Package Bio :: Package SeqIO :: Module InsdcIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.SeqIO.InsdcIO

 1  # Copyright 2007 by Peter Cock.  All rights reserved. 
 2  # 
 3  # This code is part of the Biopython distribution and governed by its 
 4  # license.  Please see the LICENSE file that should have been included 
 5  # as part of this package. 
 6   
 7  from Bio.GenBank.Scanner import GenBankScanner, EmblScanner 
 8  from Bio.Alphabet import generic_protein 
 9   
10  # NOTE 
11  # ==== 
12  # The "brains" for parsing GenBank and EMBL files (and any 
13  # other flat file variants from the INSDC in future) is in 
14  # Bio.GenBank.Scanner (plus the _FeatureConsumer in Bio.GenBank) 
15  # 
16  # See also 
17  # ======== 
18  # International Nucleotide Sequence Database Collaboration 
19  # http://www.insdc.org/ 
20  #  
21  # GenBank 
22  # http://www.ncbi.nlm.nih.gov/Genbank/ 
23  # 
24  # EMBL Nucleotide Sequence Database 
25  # http://www.ebi.ac.uk/embl/ 
26  # 
27  # DDBJ (DNA Data Bank of Japan) 
28  # http://www.ddbj.nig.ac.jp/ 
29   
30 -def GenBankIterator(handle) :
31 """Breaks up a Genbank file into SeqRecord objects 32 33 Every section from the LOCUS line to the terminating // becomes 34 a single SeqRecord with associated annotation and features. 35 36 Note that for genomes or chromosomes, there is typically only 37 one record.""" 38 #This calls a generator function: 39 return GenBankScanner(debug=0).parse_records(handle)
40
41 -def EmblIterator(handle) :
42 """Breaks up an EMBL file into SeqRecord objects 43 44 Every section from the LOCUS line to the terminating // becomes 45 a single SeqRecord with associated annotation and features. 46 47 Note that for genomes or chromosomes, there is typically only 48 one record.""" 49 #This calls a generator function: 50 return EmblScanner(debug=0).parse_records(handle)
51
52 -def GenBankCdsFeatureIterator(handle, alphabet=generic_protein) :
53 """Breaks up a Genbank file into SeqRecord objects for each CDS feature 54 55 Every section from the LOCUS line to the terminating // can contain 56 many CDS features. These are returned as with the stated amino acid 57 translation sequence (if given). 58 """ 59 #This calls a generator function: 60 return GenBankScanner(debug=0).parse_cds_features(handle, alphabet)
61
62 -def EmblCdsFeatureIterator(handle, alphabet=generic_protein) :
63 """Breaks up a EMBL file into SeqRecord objects for each CDS feature 64 65 Every section from the LOCUS line to the terminating // can contain 66 many CDS features. These are returned as with the stated amino acid 67 translation sequence (if given). 68 """ 69 #This calls a generator function: 70 return EmblScanner(debug=0).parse_cds_features(handle, alphabet)
71