Package Bio :: Package LocusLink :: Module locus_format
[hide private]
[frames] | no frames]

Source Code for Module Bio.LocusLink.locus_format

  1   
  2   
  3   
  4  """Martel based parser to read LocusLink flat files. 
  5   
  6  This is a huge regular expression for LocusLink, 
  7  built using the 'regular expressions on steroids' capabilities of 
  8  Martel. 
  9   
 10  A description of the format can be found in the 'ligand.doc' file 
 11  from the Ligand distribution, available from: 
 12   
 13   http://www.ncbi.nih.gov/LocusLink 
 14   
 15   
 16  """ 
 17   
 18  # Martel 
 19  from Martel import Str 
 20  from Martel import Str1 
 21  from Martel import Alt 
 22  from Martel import Rep 
 23  from Martel import Group 
 24  from Martel import ToEol 
 25  from Martel import AnyEol 
 26  from Martel import Any 
 27  from Martel import Word 
 28  from Martel import Opt 
 29  from Martel import AssertNot 
 30   
 31  from Martel import RecordReader 
 32   
 33  # --- First set up some helper constants and functions 
 34  INDENT = 12 
 35   
 36  blank_spaces = Rep(Str1(" ")) 
 37  point = Str1(".") 
 38   
 39  white_space = Rep( Any( "       " ) ) 
 40  locus_keys = [ \ 
 41          'LOCUSID', \ 
 42          'LOCUS_CONFIRMED', \ 
 43          'LOCUS_TYPE', \ 
 44          'ORGANISM', \ 
 45          'STATUS', \ 
 46          'NM', \ 
 47          'NP', \ 
 48          'CDD', \ 
 49          'PRODUCT', \ 
 50          'ASSEMBLY', \ 
 51          'CONTIG', \ 
 52          'EVID', \ 
 53          'XM', \ 
 54          'XP', \ 
 55          'ACCNUM', \ 
 56          'TYPE', \ 
 57          'PROT', \ 
 58          'OFFICIAL_SYMBOL', \ 
 59          'OFFICIAL_GENE_NAME', \ 
 60          'PREFERRED_PRODUCT', \ 
 61          'ALIAS_SYMBOL', \ 
 62          'SUMMARY', \ 
 63          'CHR', \ 
 64          'STS', \ 
 65          'COMP', \ 
 66          'ALIAS_PROT', \ 
 67          'UNIGENE', \ 
 68          'BUTTON', \ 
 69          'LINK', \ 
 70          'OMIM', \ 
 71          'MAP', \ 
 72          'MAPLINK', \ 
 73          'ECNUM', \ 
 74          'PROTOTYPE', \ 
 75          'DB_DESCR', \ 
 76          'DB_LINK', \ 
 77          'PMID', \ 
 78          'GRIF', \ 
 79          'SUBFUNC', \ 
 80          'GO', \ 
 81          'EXTANNOT' 
 82           
 83          ] 
 84   
 85  accnum_block_keys = [ \ 
 86      'ACCNUM', \ 
 87      'TYPE', \ 
 88      'PROT' \ 
 89      ] 
 90  phenotype = Str1( 'PHENOTYPE' ) 
 91  db = Str1( 'DB' ) 
 92  accnum_block_key = Str( *accnum_block_keys ) 
 93   
 94   
 95   
 96  valid_locus_key = Str( *locus_keys ) 
97 -def define_locus_line( entry_tag ):
98 99 return( white_space + \ 100 Str1(entry_tag ) + \ 101 white_space + \ 102 Str1( ":" ) + \ 103 white_space + \ 104 ToEol() )
105
106 -def define_locus_group( entry_name, entry_tag ):
107 return Group( entry_name, \ 108 define_locus_line( entry_tag )) 109 110 accnum_block = Group( 'accnum_block', \ 111 define_locus_line( 'ACCNUM' ) + \ 112 define_locus_line( 'TYPE' ) + \ 113 Opt(define_locus_line( 'PROT' ) ) ) 114 115 phenotype_block = Group( 'phenotype_block', \ 116 define_locus_line( 'PHENOTYPE' ) + \ 117 Opt( define_locus_line( 'PHENOTYPE_ID' ) ) ) 118 119 db_block = Group( 'db_block', \ 120 define_locus_line( 'DB_DESCR' ) + \ 121 define_locus_line( 'DB_LINK' ) ) 122 123 begin_record_line = Str1( '>>' ) + ToEol() 124 locus_line = Group( 'locus_line', \ 125 white_space + AssertNot( accnum_block_key ) + AssertNot( phenotype ) + AssertNot( db ) + Word() + white_space + Str1( ':' ) + ToEol() ) 126 127 locus_record = begin_record_line + Rep( Alt( locus_line, accnum_block, phenotype_block, db_block ) ) 128 #locus_record = Rep( locus_line ) 129