1 """A Martel format to parse the output from transfac.
2
3 Formats:
4 format Format for a whole file.
5
6 """
7
8 import warnings
9 warnings.warn("Bio.expressions was deprecated, as it does not work with recent versions of mxTextTools. If you want to continue to use this module, please get in contact with the Biopython developers at biopython-dev@biopython.org to avoid permanent removal of this module from Biopython", DeprecationWarning)
10
11
12 import sys
13
14 from Martel import *
15 from Martel import RecordReader
16
17 blank_line = Opt(Spaces()) + AnyEol()
18
19 MATRIX_LINE = Str("Search for sites by WeightMatrix library:") + Spaces() + \
20 UntilEol("matrix_file") + AnyEol()
21 SEQUENCE_LINE = Str("Sequence file:") + Spaces() + \
22 UntilEol("sequence_file") + AnyEol()
23 PROFILE_LINE = Str("Site selection profile:") + Spaces() + \
24 UntilSep("profile_file", sep=" ") + Spaces() + \
25 UntilEol("profile_description") + AnyEol()
26
27 TITLE_LINE = Str("Inspecting sequence ID") + Spaces() + \
28 UntilSep("entryname", sep=" ") + Spaces() + \
29 UntilSep("dataclass", sep=";") + Str(";") + Spaces() + \
30 UntilSep("molecule", sep=";") + Str(";") + Spaces() + \
31 UntilSep("division", sep=";") + Str(";") + Spaces() + \
32 UntilSep("sequencelength", sep=" ") + Spaces() + Str("BP") + \
33 UntilEol() + AnyEol()
34
36 return Opt(Spaces()) + exp + Opt(Spaces())
37
38
39 DATA_LINE = \
40 SS(UntilSep("matrix_identifier", sep=" |")) + \
41 Str("|") + \
42 SS(UntilSep("position", sep=" ")) + \
43 SS(Str("(") + Group("strand", Any("+-")) + Str(")")) + \
44 Str("|") + \
45 SS(Float("core_match")) + \
46 Str("|") + \
47 SS(Float("matrix_match")) + \
48 Str("|") + \
49 Opt(Spaces()) + UntilEol("sequence") + AnyEol()
50
51 SEQUENCES_LENGTH_LINE = \
52 Spaces() + Str("Total sequences length=") + Integer("sequences_length") + \
53 AnyEol()
54
55 FOUND_SITES_LINE = \
56 Spaces() + Str("Total number of found sites=") + Integer("found_sites") + \
57 AnyEol()
58
59 SITE_FREQUENCY_LINE = \
60 Spaces() + Str("Frequency of sites per nucleotide=") + \
61 Float("sites_per_nucleotide") + AnyEol()
62
63 format = MATRIX_LINE + \
64 SEQUENCE_LINE + \
65 PROFILE_LINE + \
66 blank_line + \
67 TITLE_LINE + \
68 blank_line + \
69 Rep(DATA_LINE) + \
70 blank_line + \
71 SEQUENCES_LENGTH_LINE + \
72 blank_line + \
73 FOUND_SITES_LINE + \
74 blank_line + \
75 SITE_FREQUENCY_LINE
76