Package Bio :: Package NeuralNetwork :: Package Gene :: Module Pattern
[hide private]
[frames] | no frames]

Source Code for Module Bio.NeuralNetwork.Gene.Pattern

  1  """Generic functionality useful for all gene representations. 
  2   
  3  This module contains classes which can be used for all the different 
  4  types of patterns available for representing gene information (ie. motifs, 
  5  signatures and schemas). These are the general classes which should be 
  6  handle any of the different specific patterns. 
  7  """ 
  8  # standard library 
  9  import string 
 10  import random 
 11   
 12  # biopython 
 13  from Bio import utils 
 14  from Bio.Seq import Seq, MutableSeq 
 15   
16 -class PatternIO:
17 """Allow reading and writing of patterns to files. 18 19 This just defines a simple persistance class for patterns, making 20 it easy to write them to a file and read 'em back. 21 """
22 - def __init__(self, alphabet = None):
23 """Intialize the reader and writer class. 24 25 Arguments: 26 27 o alphabet - An optional argument specifying the alphabet 28 which patterns should follow. If an alphabet is set it'll be used 29 to verify that all patterns follow it. 30 31 Attributes: 32 o separator - A character to use in separating items in a signature 33 when it is written to a file and read back. This character should 34 not be in the possible alphabet of the sequences, or there will 35 be trouble. 36 """ 37 self._alphabet = alphabet 38 39 self.separator = ";"
40
41 - def write(self, pattern_list, output_handle):
42 """Write a list of patterns to the given handle. 43 """ 44 for pattern in pattern_list: 45 # deal with signatures, concatentate them with the separator 46 if (type(pattern) == type([]) or 47 type(pattern) == type(tuple([]))): 48 string_pattern = string.join(pattern, self.separator) 49 # deal with the normal cases 50 else: 51 string_pattern = pattern 52 53 output_handle.write("%s\n" % string_pattern)
54
55 - def write_seq(self, seq_pattern_list, output_handle):
56 """Convenience function to write Seq objects to a file. 57 58 This can take Seqs and MutableSeqs, and write them to a file 59 as strings. 60 """ 61 # convert the seq patterns into just string patterns 62 all_patterns = [] 63 64 for seq_pattern in seq_pattern_list: 65 if isinstance(seq_pattern, MutableSeq): 66 seq = seq_pattern.toseq() 67 all_patterns.append(seq.data) 68 elif isinstance(seq_pattern, Seq): 69 all_patterns.append(seq_pattern.data) 70 else: 71 raise ValueError("Unexpected pattern type %r" % seq_pattern) 72 73 self.write(all_patterns, output_handle)
74
75 - def read(self, input_handle):
76 """Read patterns from the specified handle. 77 """ 78 all_patterns = [] 79 80 while 1: 81 cur_line = input_handle.readline() 82 83 if not(cur_line): 84 break 85 86 cur_pattern = string.rstrip(cur_line) 87 # split up signatures 88 if cur_pattern.find(self.separator) >= 0: 89 cur_pattern = tuple(cur_pattern.split(self.separator)) 90 91 if self._alphabet is not None: 92 # make single patterns (not signatures) into lists, so we 93 # can check signatures and single patterns the same 94 if type(cur_pattern) != type(tuple([])): 95 test_pattern = [cur_pattern] 96 else: 97 test_pattern = cur_pattern 98 for pattern_item in test_pattern: 99 pattern_seq = Seq(pattern_item, self._alphabet) 100 if not(utils.verify_alphabet(pattern_seq)): 101 raise ValueError("Pattern %s not matching alphabet %s" 102 % (cur_pattern, self._alphabet)) 103 104 all_patterns.append(cur_pattern) 105 106 return all_patterns
107
108 -class PatternRepository:
109 """This holds a list of specific patterns found in sequences. 110 111 This is designed to be a general holder for a set of patterns and 112 should be subclassed for specific implementations (ie. holding Motifs 113 or Signatures. 114 """
115 - def __init__(self, pattern_info):
116 """Initialize a repository with patterns, 117 118 Arguments: 119 120 o pattern_info - A representation of all of the patterns found in 121 a *Finder search. This should be a dictionary, where the keys 122 are patterns, and the values are the number of times a pattern is 123 found. 124 125 The patterns are represented interally as a list of two 126 tuples, where the first element is the number of times a pattern 127 occurs, and the second is the pattern itself. This makes it easy 128 to sort the list and return the top N patterns. 129 """ 130 self._pattern_dict = pattern_info 131 132 # create the list representation 133 self._pattern_list = [] 134 for pattern_name in self._pattern_dict.keys(): 135 self._pattern_list.append((self._pattern_dict[pattern_name], 136 pattern_name)) 137 138 self._pattern_list.sort() 139 self._pattern_list.reverse()
140
141 - def get_all(self):
142 """Retrieve all of the patterns in the repository. 143 """ 144 patterns = [] 145 for pattern_info in self._pattern_list: 146 patterns.append(pattern_info[1]) 147 148 return patterns
149
150 - def get_random(self, num_patterns):
151 """Retrieve the specified number of patterns randomly. 152 153 Randomly selects patterns from the list and returns them. 154 155 Arguments: 156 157 o num_patterns - The total number of patterns to return. 158 """ 159 all_patterns = [] 160 161 while len(all_patterns) < num_patterns: 162 # pick a pattern, and only add it if it is not already present 163 new_pattern_info = random.choice(self._pattern_list) 164 165 if new_pattern_info[1] not in all_patterns: 166 all_patterns.append(new_pattern_info[1]) 167 168 return all_patterns
169
170 - def get_top_percentage(self, percent):
171 """Return a percentage of the patterns. 172 173 This returns the top 'percent' percentage of the patterns in the 174 repository. 175 """ 176 all_patterns = self.get_all() 177 178 num_to_return = int(len(all_patterns) * percent) 179 180 return all_patterns[:num_to_return]
181
182 - def get_top(self, num_patterns):
183 """Return the specified number of most frequently occurring patterns 184 185 Arguments: 186 187 o num_patterns - The number of patterns to return. 188 """ 189 all_patterns = [] 190 for pattern_info in self._pattern_list[:num_patterns]: 191 all_patterns.append(pattern_info[1]) 192 193 return all_patterns
194
195 - def get_differing(self, top_num, bottom_num):
196 """Retrieve patterns that are at the extreme ranges. 197 198 This returns both patterns at the top of the list (ie. the same as 199 returned by get_top) and at the bottom of the list. This 200 is especially useful for patterns that are the differences between 201 two sets of patterns. 202 203 Arguments: 204 205 o top_num - The number of patterns to take from the top of the list. 206 207 o bottom_num - The number of patterns to take from the bottom of 208 the list. 209 """ 210 all_patterns = [] 211 # first get from the top of the list 212 for pattern_info in self._pattern_list[:top_num]: 213 all_patterns.append(pattern_info[1]) 214 215 # then from the bottom 216 for pattern_info in self._pattern_list[-bottom_num:]: 217 all_patterns.append(pattern_info[1]) 218 219 return all_patterns
220
221 - def remove_polyA(self, at_percentage = .9):
222 """Remove patterns which are likely due to polyA tails from the lists. 223 224 This is just a helper function to remove pattenrs which are likely 225 just due to polyA tails, and thus are not really great motifs. 226 This will also get rid of stuff like ATATAT, which might be a 227 useful motif, so use at your own discretion. 228 229 XXX Could we write a more general function, based on info content 230 or something like that? 231 232 Arguments: 233 234 o at_percentage - The percentage of A and T residues in a pattern 235 that qualifies it for being removed. 236 """ 237 remove_list = [] 238 # find all of the really AT rich patterns 239 for pattern_info in self._pattern_list: 240 pattern_at = (float(string.count(pattern_info[1], 'A') + 241 string.count(pattern_info[1], 'T')) / 242 float(len(pattern_info[1]))) 243 if pattern_at > at_percentage: 244 remove_list.append(pattern_info) 245 246 # now remove them from the master list 247 for to_remove in remove_list: 248 self._pattern_list.remove(to_remove)
249
250 - def count(self, pattern):
251 """Return the number of times the specified pattern is found. 252 """ 253 try: 254 return self._pattern_dict[pattern] 255 except KeyError: 256 return 0
257