Biostrings

Biostrings

Biostrings

source(“http://bioconductor.org/biocLite.R”)

## Install
BiocManager::install('Biostrings')

Quick Start

test.txt

library(Biostrings)
s = readDNAStringSet("test.txt")

length(s) #Numbers of seq
nchar(s) #length of each seq
reverse(s)
translate(s)
dna2rna(s)
cDNA(dna2rna(s))
tolower(s) # = = I don't know
letterFrequency(s, DNA_BASES) # Frq with A,T,G,C
letterFrequency(s, DNA_ALPHABET) # Frq with A, C, G, T, M, R, W, S, Y, K, V, H, D, B, N, -, +, .
letterFrequency(s, DNA_BASES, as.prob = TRUE) # Frq with A T G C
letterFrequency(s, "GC", as.prob = TRUE) # Frq with GC

1. Fasta Calculate

library(Biostrings)
## Reading a fasta file
A <-readDNAStringSet('predict.coding.fa.transdecoder.pep.sel.fa')
head(DNAStringSet(A))
  width seq names               
[1]   604 ASSVASTASSAHHHASAASTGTV...  TRINITY_DN100000_...
[2]   616 MDYMDSGRYTKSDKDWDTNVASD...  TRINITY_DN100001_...
[3]   157 SRAKKVKKDSKKGGGGGGGGSSW...  TRINITY_DN100002_...

Get the Distance Matrix from the Tree

Raw post

library(TDbook) # example data
library(Biostrings)

tree <- tree_HPV58


tl <- tree$tip.label
acc <- sub("\\w+\\|", "", tl)
names(tl) <- acc

tipseq <- ape::read.GenBank(acc) %>% as.character %>%
lapply(., paste0, collapse = "") %>% unlist %>%
Biostrings::DNAStringSet

tipseq_aln <- muscle::muscle(tipseq)
tipseq_aln <- DNAStringSet(tipseq_aln)

tipseq_dist <- stringDist(tipseq_aln, method = "hamming")
as.matrix(tipseq_dist)[1:5, 1:5]
FJ385264 D90400 FJ385265 FJ385263 FJ385261
FJ385264 0 15 16 18 20
D90400 15 0 7 7 9
FJ385265 16 7 0 8 12
FJ385263 18 7 8 0 12
FJ385261 20 9 12 12 0
Author

Karobben

Posted on

2020-05-01

Updated on

2024-01-11

Licensed under

Comments