Extract and replace all subsequences from a bstr sequences

bstr_sub_all(bstrobj, from = list(1L), to = list(-1L), length)

bstr_sub_all(bstrobj, from = list(1L), to = list(-1L), length, omit_na = FALSE) <- value

bstr_sub_replace_all(..., replacement, value = replacement)

bstr_sub_all_replace(..., replacement, value = replacement)

Arguments

bstrobj

bstr class object or character vector

from

a list of integer vectors giving the start indexes or a list of two-column matrices, each of type cbind(from, to)

to

a list of integer vectors giving the end indexes

length

a list of integer vectors giving the substring lengths

omit_na

a single logical value; indicates whether missing values in any of the indexes or in value leave the part of the corresponding input string unchanged [replacement function only]

value

a list of character vectors defining the replacement strings [replacement function only]

...

arguments to be passed to bstr_sub_all<-

replacement

alias of value [wherever applicable]

Examples

(temp <- dstr_rand_seq(3, 20, "[AT]", seed = 1))
#> class: dstr,bstr,character #> number of sequences: 3 #> [1] no name 1 : AATTATTTTAAATATATTAT 20 #> [2] no name 2 : TATAAAAATAATAATTTATA 20 #> [3] no name 3 : TTTTTTAATTATAAAAATTA 20
(pos_A_trails <- stringr::str_locate_all(temp, "A{2,}"))
#> [[1]] #> start end #> [1,] 1 2 #> [2,] 10 12 #> #> [[2]] #> start end #> [1,] 4 8 #> [2,] 10 11 #> [3,] 13 14 #> #> [[3]] #> start end #> [1,] 7 8 #> [2,] 13 17 #>
# Extract all A trails bstr_sub_all(temp, pos_A_trails)
#> $`no name 1` #> class: bstr,character #> number of sequences: 2 #> [1] no name 1 1 : AA 2 #> [2] no name 1 2 : AAA 3 #> #> $`no name 2` #> class: bstr,character #> number of sequences: 3 #> [1] no name 2 1 : AAAAA 5 #> [2] no name 2 2 : AA 2 #> [3] no name 2 3 : AA 2 #> #> $`no name 3` #> class: bstr,character #> number of sequences: 2 #> [1] no name 3 1 : AA 2 #> [2] no name 3 2 : AAAAA 5 #>
# Substitute all A trails by x bstr_sub_all(temp, pos_A_trails) <- "x" temp
#> class: dstr,bstr,character #> number of sequences: 3 #> [1] no name 1 : xTTATTTTxTATATTAT 17 #> [2] no name 2 : TATxTxTxTTTATA 14 #> [3] no name 3 : TTTTTTxTTATxTTA 15
# Substitute all A trails by case switched ones original <- change <- dstr_rand_seq(2, 10, "[AaT]", seed = 1) pos_A_trails <- stringr::str_locate_all(change, "[Aa]{2,}") switched_A_trails <- bstr_sub_all(change, pos_A_trails) %>% lapply(bstr_switch_case) bstr_sub_all(change, pos_A_trails) <- switched_A_trails c(original[1], change[1], original[2], change[2])
#> class: bstr,character #> number of sequences: 4 #> [1] no name 1 : ATTaAaaTTA 10 #> [2] no name 1 : ATTAaAATTA 10 #> [3] no name 2 : AAaTaTaaTa 10 #> [4] no name 2 : aaATaTAATa 10