withopen('results.xml', 'w') as save_file: blast_results = result_handle.read() save_file.write(blast_results)
print(sequence_data) E_VALUE_THRESH = 1e-20 for record in NCBIXML.parse(open("results.xml")): if record.alignments: print("\n") print("query: %s" % record.query[:100]) for align in record.alignments: print("match: %s " % align.title[:100])
We can use esearch to dwonload the fast file
source: NCBI Book
conda install -c bioconda entrez-direct
# for nucleotide esearch -db nucleotide -query "KAG7629426.1" | efetch -format fasta # for protein sequences esearch -db protein -query "KAG7629426.1" | efetch -format fasta
Practices
Find the homologs gene from 10 different species by blast
for i in Organ_list: record = SeqIO.read("chain_N.faa", format="fasta") result_handle = NCBIWWW.qblast('blastp', 'nr', record.seq, entrez_query="txid"+ i +"[ORGN]")
withopen('results.xml', 'w') as save_file: blast_results = result_handle.read() save_file.write(blast_results) E_VALUE_THRESH = 1e-20 Num = 0 for record in NCBIXML.parse(open("results.xml")): Num += 1 if record.alignments: print("\n") print("query: %s" % record.query[:100]) for align in record.alignments: align.title.split("|")[1] if Num < 4: A = align.title handle = Entrez.efetch( db="protein", id=A.split("|")[1], rettype="fasta", retmode="text") print(handle.read())