getSegment - Get Ig segment allele, gene and family names
Description¶
getSegment
performs generic matching of delimited segment calls with a custom
regular expression. getAllele, getGene and getFamily extract
the allele, gene and family names, respectively, from a character vector of
immunoglobulin (Ig) or TCR segment allele calls in IMGT format.
Usage¶
getSegment(
segment_call,
segment_regex,
first = TRUE,
collapse = TRUE,
strip_d = TRUE,
omit_nl = FALSE,
sep = ","
)
getAllele(
segment_call,
first = TRUE,
collapse = TRUE,
strip_d = TRUE,
omit_nl = FALSE,
sep = ","
)
getGene(
segment_call,
first = TRUE,
collapse = TRUE,
strip_d = TRUE,
omit_nl = FALSE,
sep = ","
)
getFamily(
segment_call,
first = TRUE,
collapse = TRUE,
strip_d = TRUE,
omit_nl = FALSE,
sep = ","
)
getLocus(
segment_call,
first = TRUE,
collapse = TRUE,
strip_d = TRUE,
omit_nl = FALSE,
sep = ","
)
getChain(
segment_call,
first = TRUE,
collapse = TRUE,
strip_d = TRUE,
omit_nl = FALSE,
sep = ","
)
Arguments¶
- segment_call
- character vector containing segment calls delimited by commas.
- segment_regex
- string defining the segment match regular expression.
- first
- if
TRUE
return only the first call insegment_call
; ifFALSE
return all calls delimited by commas. - collapse
- if
TRUE
check for duplicates and return only unique segment assignments; ifFALSE
return all assignments (faster). Has no effect iffirst=TRUE
. - strip_d
- if
TRUE
remove the “D” from the end of gene annotations (denoting a duplicate gene in the locus); ifFALSE
do not alter gene names. - omit_nl
- if
TRUE
remove non-localized (NL) genes from the result. Only applies at the gene or allele level. - sep
- character defining both the input and output segment call delimiter.
Value¶
A character vector containing allele, gene or family names.
References¶
Examples¶
# Light chain examples
kappa_call <- c("Homsap IGKV1D-39*01 F,Homsap IGKV1-39*02 F,Homsap IGKV1-39*01",
"Homsap IGKJ5*01 F")
getAllele(kappa_call)
[1] "IGKV1-39*01" "IGKJ5*01"
getAllele(kappa_call, first=FALSE)
[1] "IGKV1-39*01,IGKV1-39*02" "IGKJ5*01"
getAllele(kappa_call, first=FALSE, strip_d=FALSE)
[1] "IGKV1D-39*01,IGKV1-39*02,IGKV1-39*01"
[2] "IGKJ5*01"
getGene(kappa_call)
[1] "IGKV1-39" "IGKJ5"
getGene(kappa_call, first=FALSE)
[1] "IGKV1-39" "IGKJ5"
getGene(kappa_call, first=FALSE, strip_d=FALSE)
[1] "IGKV1D-39,IGKV1-39" "IGKJ5"
getFamily(kappa_call)
[1] "IGKV1" "IGKJ5"
getFamily(kappa_call, first=FALSE)
[1] "IGKV1" "IGKJ5"
getFamily(kappa_call, first=FALSE, collapse=FALSE)
[1] "IGKV1,IGKV1,IGKV1" "IGKJ5"
getFamily(kappa_call, first=FALSE, strip_d=FALSE)
[1] "IGKV1D,IGKV1" "IGKJ5"
getLocus(kappa_call)
[1] "IGK" "IGK"
getChain(kappa_call)
[1] "VL" "VL"
# Heavy chain examples
heavy_call <- c("Homsap IGHV1-69*01 F,Homsap IGHV1-69D*01 F",
"Homsap IGHD1-1*01 F",
"Homsap IGHJ1*01 F")
getAllele(heavy_call, first=FALSE)
[1] "IGHV1-69*01" "IGHD1-1*01" "IGHJ1*01"
getAllele(heavy_call, first=FALSE, strip_d=FALSE)
[1] "IGHV1-69*01,IGHV1-69D*01" "IGHD1-1*01"
[3] "IGHJ1*01"
getGene(heavy_call, first=FALSE)
[1] "IGHV1-69" "IGHD1-1" "IGHJ1"
getGene(heavy_call, first=FALSE, strip_d=FALSE)
[1] "IGHV1-69,IGHV1-69D" "IGHD1-1" "IGHJ1"
getFamily(heavy_call)
[1] "IGHV1" "IGHD1" "IGHJ1"
getLocus(heavy_call)
[1] "IGH" "IGH" "IGH"
getChain(heavy_call)
[1] "VH" "VH" "VH"
# Filtering non-localized genes
nl_call <- c("IGHV3-NL1*01,IGHV3-30-3*01,IGHV3-30*01",
"Homosap IGHV3-30*01 F,Homsap IGHV3-NL1*01 F",
"IGHV1-NL1*01")
getAllele(nl_call, first=FALSE, omit_nl=TRUE)
[1] "IGHV3-30-3*01,IGHV3-30*01" "IGHV3-30*01"
[3] ""
getGene(nl_call, first=FALSE, omit_nl=TRUE)
[1] "IGHV3-30-3,IGHV3-30" "IGHV3-30" ""
getFamily(nl_call, first=FALSE, omit_nl=TRUE)
[1] "IGHV3" "IGHV3" ""
# Temporary designation examples
tmp_call <- c("IGHV9S3*01", "IGKV10S12*01")
getAllele(tmp_call)
[1] "IGHV9S3*01" "IGKV10S12*01"
getGene(tmp_call)
[1] "IGHV9S3" "IGKV10S12"
getFamily(tmp_call)
[1] "IGHV9" "IGKV10"