Package Martel :: Package test :: Module test_swissprot38
[hide private]
[frames] | no frames]

Source Code for Module Martel.test.test_swissprot38

  1  from testformats.swissprot38 import * 
  2   
  3   
  4  import support 
  5   
  6  test_list = support.Storage() 
  7  add_test = test_list.add_test 
  8  add_test_lines = test_list.add_test_lines 
  9   
 10  add_test_lines("ID line", ID, """\ 
 11  ID   100K_RAT       STANDARD;      PRT;   889 AA. 
 12  ID   CYC_BOVIN      STANDARD;      PRT;   104 AA. 
 13  ID   GIA2_GIALA     STANDARD;      PRT;   296 AA. 
 14  """) 
 15   
 16  add_test_lines("AC line", AC, """\ 
 17  AC   Q62671; 
 18  AC   P00321; P05348; 
 19  """) 
 20   
 21  add_test("AC (block)", AC_block, """\ 
 22  AC   Q62671; Q05349; Q05351; Q05352; Q05353; Q05354; Q05355; Q05356; 
 23  AC   Q92671; Q95349; Q95351; Q95352; Q95353; Q95354; Q95355; Q95356; 
 24  AC   Q98763; 
 25  """) 
 26   
 27  add_test("date 1", DT_created + DT_seq_update + DT_ann_update, """\ 
 28  DT   01-OCT-1996 (Rel. 34, Created) 
 29  DT   01-OCT-1996 (Rel. 34, Last sequence update) 
 30  DT   01-NOV-1997 (Rel. 35, Last annotation update) 
 31  """) 
 32   
 33  add_test("date 2", DT_created + DT_seq_update + DT_ann_update, """\ 
 34  DT   01-AUG-1988 (Rel. 08, Created) 
 35  DT   01-JAN-1990 (Rel. 13, Last sequence update) 
 36  DT   15-APR-1999 (Rel. 38, Last annotation update) 
 37  """) 
 38   
 39  add_test_lines("DE (single line)", DE, """\ 
 40  DE   100 KD PROTEIN (EC 6.3.2.-). 
 41  DE   10 KD PROTEIN PRECURSOR (CLONE PSAS10). 
 42  """) 
 43  add_test("DE (muliline) 1", DE_block, """\ 
 44  DE   14-3-3 PROTEIN BETA/ALPHA (PROTEIN KINASE C INHIBITOR PROTEIN-1) 
 45  DE   (KCIP-1). 
 46  """) 
 47  add_test("DE (muliline) 2", DE_block, """\ 
 48  DE   ANNEXIN V (LIPOCORTIN V) (ENDONEXIN II) (CALPHOBINDIN I) (CBP-I) 
 49  DE   (PLACENTAL ANTICOAGULANT PROTEIN I) (PAP-I) (PP4) (THROMBOPLASTIN 
 50  DE   INHIBITOR) (VASCULAR ANTICOAGULANT-ALPHA) (VAC-ALPHA) (ANCHORIN CII). 
 51  """) 
 52   
 53  add_test_lines("GN (single line)", GN, """\ 
 54  GN   HAG3. 
 55  GN   REX-1. 
 56  GN   HNS OR DRDX OR OSMZ OR BGLY. 
 57  GN   GVPA AND (GVPB OR GVPA2). 
 58  """) 
 59   
 60  # from CALM_HUMAN 
 61  add_test("GN (block)", GN_block, """\ 
 62  GN   (CALM1 OR CAM1 OR CALM OR CAM) AND (CALM2 OR CAM2 OR CAMB) AND 
 63  GN   (CALM3 OR CAM3 OR CAMC). 
 64  """) 
 65   
 66  add_test_lines("OS (single line)", OS, """\ 
 67  OS   Helianthus annuus (Common sunflower). 
 68  OS   Escherichia coli. 
 69  OS   Homo sapiens (Human). 
 70  OS   Acer spicatum (Moose maple) (Mountain maple). 
 71  OS   Rous sarcoma virus (strain Schmidt-Ruppin). 
 72  """) 
 73   
 74  add_test("OS (block) 1", OS_block, """\ 
 75  OS   Oncorhynchus nerka (Sockeye salmon), and 
 76  OS   Oncorhynchus masou (Cherry salmon) (Masu salmon). 
 77  """) 
 78   
 79  add_test("OS (block) 2", OS_block, """\ 
 80  OS   Mus musculus (Mouse), Rattus norvegicus (Rat), and 
 81  OS   Bos taurus (Bovine). 
 82  """) 
 83   
 84  add_test_lines("OG (single line)", OG, """\ 
 85  OG   Chloroplast. 
 86  OG   Cyanelle. 
 87  OG   Mitochondrion. 
 88  OG   Plasmid name. 
 89  OG   Plasmid IncI1 ColIb. 
 90  """) 
 91   
 92  add_test("OG (block)", OG_block, """\ 
 93  OG   Plasmid pDGO100, Plasmid IncQ pIE723, Plasmid pBP201, and 
 94  OG   Plasmid IncM pBWH1. 
 95  """) 
 96   
 97  add_test("OG (block)", OG_block, """\ 
 98  OG   Plasmid R6-5, Plasmid IncFII NR1, and 
 99  OG   Plasmid IncFII R1-19 (R1 drd-19). 
100  """) 
101  add_test_lines("OC (single line)", OC, """\ 
102  OC   Eukaryota; Alveolata; Apicomplexa; Haemosporida; Plasmodium. 
103  OC   Eukaryota; Entamoebidae; Entamoeba. 
104  """) 
105   
106  add_test("OC (block) 1", OC_block, """\ 
107  OC   Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; 
108  OC   euphyllophytes; Spermatophyta; Magnoliophyta; eudicotyledons; 
109  OC   core eudicots; Asteridae; euasterids I; Solanales; Solanaceae; 
110  OC   Solanum. 
111  """) 
112   
113  add_test("OC (block) 2", OC_block, """\ 
114  OC   Eukaryota; Metazoa; Chordata; Vertebrata; Mammalia; Eutheria; 
115  OC   Primates; Catarrhini; Hominidae; Homo. 
116  """) 
117   
118  add_test_lines("RN", RN, """\ 
119  RN   [1] 
120  RN   [2] 
121  RN   [3] 
122  RN   [23] 
123  RN   [876543] 
124  """) 
125   
126  add_test_lines("RP", RP, """\ 
127  RP   SEQUENCE FROM N.A. 
128  RP   SEQUENCE FROM N.A., AND SEQUENCE OF 12-35. 
129  RP   SEQUENCE OF 34-56; 67-73 AND 123-345, AND DISULFIDE BONDS. 
130  RP   REVISIONS TO 67-89. 
131  RP   STRUCTURE BY NMR. 
132  RP   X-RAY CRYSTALLOGRAPHY (1.8 ANGSTROMS). 
133  RP   CHARACTERIZATION. 
134  RP   MUTAGENESIS OF TYR-56. 
135  RP   REVIEW. 
136  RP   VARIANT ALA-58. 
137  RP   VARIANTS XLI LEU-341; ARG-372 AND TYR-446. 
138  """) 
139   
140  add_test_lines("RC (single line)", RC, """\ 
141  RC   STRAIN=SPRAGUE-DAWLEY; TISSUE=LIVER; 
142  RC   STRAIN=HOLSTEIN; TISSUE=MAMMARY GLAND, AND LYMPH NODE; 
143  RC   SPECIES=RAT; STRAIN=WISTAR; 
144  RC   PLASMID=INCFII R100; 
145  """) 
146   
147  add_test("RC (block)", RC_block, """\ 
148  RC   STRAIN=MVZ CATALOG 172969, 172970, 174109, 174110, 174229, AND 174230; 
149  RC   TISSUE=LIVER; 
150  """) 
151   
152  add_test_lines("RX (single line)", RX, """\ 
153  RX   MEDLINE; 91002678. 
154  RX   MEDLINE; 93144687. 
155  """) 
156   
157  add_test("RA (block) 1", RA_block, """\ 
158  RA   SMITH H. JR., VON BRAUN M.T. III; 
159  """) 
160   
161  add_test("RA (block) 2", RA_block, """\ 
162  RA   YANOFSKY C., PLATT T., CRAWFORD I.P., NICHOLS B.P., CHRISTIE G.E., 
163  RA   HOROWITZ H., VAN CLEEMPUT M., WU A.M.; 
164  """) 
165  add_test("RT (single line)", RT, """\ 
166  RT   "Organization of the sunflower 11S storage protein gene family."; 
167  """) 
168   
169  add_test("RT (block) 1", RT_block, """\ 
170  RT   "New insulin-like proteins with atypical disulfide bond pattern 
171  RT   characterized in Caenorhabditis elegans by comparative sequence 
172  RT   analysis and homology modeling."; 
173  """) 
174   
175  add_test("RT (block) 2", RT_block, """\ 
176  RT   "Stored mRNA in cotyledons of Vigna unguiculata seeds: nucleotide 
177  RT   sequence of cloned cDNA for a stored mRNA and induction of its 
178  RT   synthesis by precocious germination."; 
179  """) 
180  add_test_lines("RL (single line)", RL, """\ 
181  RL   J. Mol. Biol. 168:321-331(1983). 
182  RL   Nucleic Acids Res. 27:0-0(1999). 
183  RL   Thesis (1972), University of Geneva, Switzerland. 
184  """) 
185   
186  add_test("RL (block) 1", RL_block, """\ 
187  RL   (In) Boyer P.D. (eds.); 
188  RL   The enzymes (3rd ed.), pp.11:397-547, Academic Press, New York (1975). 
189  """) 
190   
191  add_test("RL (block) 2", RL_block, """\ 
192  RL   (In) Rich D.H., Gross E. (eds.); 
193  RL   Proceedings of the 7th american peptide symposium, pp.69-72, 
194  RL   Pierce Chemical Co., Rockford Il. (1981). 
195  """) 
196   
197  add_test("RL (block) 3", RL_block, """\ 
198  RL   (In) Magnusson S., Ottesen M., Foltmann B., Dano K., 
199  RL   Neurath H. (eds.); 
200  RL   Regulatory proteolytic enzymes and their inhibitors, pp.163-172, 
201  RL   Pergamon Press, New York (1978). 
202  """) 
203   
204  add_test("RL (block) 4", RL_block, """\ 
205  RL   (In) Plant Gene Register PGR98-023. 
206  RL   (In) Worm Breeder's Gazette 15(3):34(1998). 
207  """) 
208   
209  add_test("reference 1", reference, """\ 
210  RN   [1] 
211  RP   SEQUENCE FROM N.A. 
212  RC   STRAIN=WISTAR; TISSUE=TESTIS; 
213  RX   MEDLINE; 92253337. 
214  RA   MUELLER D., REHBEIN M., BAUMEISTER H., RICHTER D.; 
215  RT   "Molecular characterization of a novel rat protein structurally 
216  RT   related to poly(A) binding proteins and the 70K protein of the U1 
217  RT   small nuclear ribonucleoprotein particle (snRNP)."; 
218  RL   Nucleic Acids Res. 20:1471-1475(1992). 
219  """) 
220   
221  add_test("reference 2", reference, """\ 
222  RN   [2] 
223  RP   ERRATUM. 
224  RA   MUELLER D., REHBEIN M., BAUMEISTER H., RICHTER D.; 
225  RL   Nucleic Acids Res. 20:2624-2624(1992). 
226  """) 
227  s1 = """\ 
228  CC   -!- FUNCTION: E3 UBIQUITIN-PROTEIN LIGASE WHICH ACCEPTS UBIQUITIN FROM 
229  CC       AN E2 UBIQUITIN-CONJUGATING ENZYME IN THE FORM OF A THIOESTER AND 
230  CC       THEN DIRECTLY TRANSFERS THE UBIQUITIN TO TARGETED SUBSTRATES (BY 
231  CC       SIMILARITY). THIS PROTEIN MAY BE INVOLVED IN MATURATION AND/OR 
232  CC       POST-TRANSCRIPTIONAL REGULATION OF MRNA. 
233  """ 
234  s2 = """\ 
235  CC   -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT 
236  CC       IN LIVER, KIDNEY, LUNG AND BRAIN. 
237  """ 
238  s3 = """\ 
239  CC   -!- DEVELOPMENTAL STAGE: IN EARLY POST-NATAL LIFE, EXPRESSION IN 
240  CC       THE TESTIS INCREASES TO REACH A MAXIMUM AROUND DAY 28. 
241  """ 
242  add_test("single comment 1", single_comment, s1) 
243  add_test("single comment 2", single_comment, s2) 
244  add_test("single comment 3", single_comment, s3) 
245   
246  copyright = """\ 
247  CC   -------------------------------------------------------------------------- 
248  CC   This SWISS-PROT entry is copyright. It is produced through a collaboration 
249  CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation - 
250  CC   the European Bioinformatics Institute.  There are no  restrictions on  its 
251  CC   use  by  non-profit  institutions as long  as its content  is  in  no  way 
252  CC   modified and this statement is not removed.  Usage  by  and for commercial 
253  CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/ 
254  CC   or send an email to license@isb-sib.ch). 
255  CC   -------------------------------------------------------------------------- 
256  """                      
257   
258   
259  add_test("set of comments 1", comment, s1) 
260  add_test("set of comments 2", comment, s1+s2) 
261  add_test("set of comments 3", comment, s1+s3) 
262  add_test("set of comments 4", comment, s1+s2+s3) 
263  add_test("set of comments 5", comment, s1+copyright) 
264  add_test("set of comments 6", comment, s1+s2+s3+copyright) 
265   
266   
267  s = """\ 
268  DR   AARHUS/GHENT-2DPAGE; 8006; IEF. 
269  DR   DICTYDB; DD01047; MYOA. 
270  DR   ECO2DBASE; G052.0; 6TH EDITION. 
271  DR   ECOGENE; EG10054; ARAC. 
272  DR   FLYBASE; FBgn0000055; Adh. 
273  DR   GCRDB; GCR_0087; -. 
274  DR   HIV; K02013; NEF$BRU. 
275  DR   HSC-2DPAGE; P47985; HUMAN. 
276  DR   HSSP; P00438; 1DOB. 
277  DR   MAIZEDB; 25342; -. 
278  DR   MAIZE-2DPAGE; P80607; COLEOPTILE. 
279  DR   MENDEL; 2596; AMAhy;psbA;1. 
280  DR   MGD; MGI:87920; ADFP. 
281  DR   MGD; MGI:95401; EPB4.1. 
282  DR   MIM; 249900; -. 
283  DR   PDB; 3ADK; 16-APR-88. 
284  DR   PIR; A02768; R5EC7. 
285  DR   REBASE; RB00005; EcoRI. 
286  DR   SGD; L0000008; AAR2. 
287  DR   STYGENE; SG10312; PROV. 
288  DR   SUBTILIST; BG10774; OPPD. 
289  DR   SWISS-2DPAGE; P10599; HUMAN. 
290  DR   TIGR; MJ0125; -. 
291  DR   TRANSFAC; T00141; -. 
292  DR   WORMPEP; ZK637.7; CE00437. 
293  DR   YEPD; 4270; -. 
294  DR   ZFIN; ZDB-GENE-980526-290; hoxa1. 
295  DR   EMBL; Y00312; CAA68412.1; -. 
296  DR   EMBL; L29151; AAA99430.1; ALT_INIT. 
297  DR   EMBL; L20562; AAA26884.1; ALT_TERM. 
298  DR   EMBL; X56420; CAA39814.1; ALT_FRAME. 
299  DR   EMBL; M28482; AAA26378.1; ALT_SEQ. 
300  DR   EMBL; M63397; AAA51662.1; -. 
301  DR   EMBL; M63395; AAA51662.1; JOINED. 
302  DR   EMBL; M63396; AAA51662.1; JOINED. 
303  DR   EMBL; J04126; -; NOT_ANNOTATED_CDS. 
304  DR   PROSITE; PS00107; PROTEIN_KINASE_ATP; 1. 
305  DR   PROSITE; PS00028; ZINC_FINGER_C2H2; 6. 
306  DR   PROSITE; PS00237; G_PROTEIN_RECEPTOR; FALSE_NEG. 
307  DR   PROSITE; PS01128; SHIKIMATE_KINASE; PARTIAL. 
308  DR   PROSITE; PS00383; TYR_PHOSPHATASE_1; UNKNOWN_1. 
309  DR   PFAM; PF00017; SH2; 1. 
310  DR   PFAM; PF00008; EGF; 8. 
311  DR   PFAM; PF00595; PDZ; PARTIAL. 
312  """ 
313   
314  add_test_lines("DR", DR, s) 
315  add_test("DR (block)", DR_block, s) 
316   
317  add_test_lines("KW (single line)", KW, """\ 
318  KW   Oxidoreductase; Acetylation. 
319  KW   Acetylation; Oxidoreductase. 
320  KW   Ubiquitin conjugation; Ligase. 
321  KW   Signal. 
322  KW   Seed storage protein; Multigene family; Signal. 
323  """) 
324   
325  add_test("KW (block) 1", KW_block, """\ 
326  KW   Brain; Neurone; Phosphorylation; Acetylation; Multigene family; 
327  KW   3D-structure. 
328  """) 
329   
330  add_test("KW (block) 2", KW_block, """\ 
331  KW   Steroidogenesis; Oxidoreductase; NAD; Isomerase; Mitochondrion; 
332  KW   Multigene family; Multifunctional enzyme; Transmembrane; 
333  KW   Endoplasmic reticulum. 
334  """) 
335   
336  add_test("KW (block) 3", KW_block, """\ 
337  KW   Hydrolase; Ligase; Oxidoreductase; NADP; Multifunctional enzyme; 
338  KW   One-carbon metabolism; ATP-binding; Purine biosynthesis; 
339  KW   Amino-acid biosynthesis; Methionine biosynthesis; 
340  KW   Histidine biosynthesis. 
341  """) 
342   
343  add_test_lines("FT range / single line", FT_range, """\ 
344  FT   DOMAIN       77     88       ASP/GLU-RICH (ACIDIC). 
345  FT   DOMAIN      127    150       PRO-RICH. 
346  FT   DOMAIN      420    439       ARG/GLU-RICH (MIXED CHARGE). 
347  FT   BINDING     858    858       UBIQUITIN (BY SIMILARITY). 
348  FT   DOMAIN       43     57       PRO/THR-RICH. 
349  FT   SIGNAL       <1      8       BY SIMILARITY. 
350  FT   NON_TER       1      1 
351  FT   DISULFID     56     67 
352  FT   CARBOHYD    114    114       POTENTIAL. 
353  FT   CONFLICT    102    102       D -> S (IN REF. 2). 
354  FT   CONFLICT    105    105       MISSING (IN REF. 3). 
355  FT   CHAIN         ?     75       10 KD PROTEIN. 
356  FT   SIGNAL        1    ?24       POTENTIAL. 
357  FT   PROPEP      ?25    ?31       POTENTIAL. 
358  FT   SIGNAL        1      ? 
359  FT   INIT_MET      0      0 
360  """) 
361   
362  add_test("FT w/ continuation 1", FT, """\ 
363  FT   MOD_RES       9      9       AMIDATION (G-10 PROVIDE AMIDE GROUP) 
364  FT                                (BY SIMILARITY). 
365  """) 
366   
367  add_test("FT w/ continuation 2", FT, """\ 
368  FT   DOMAIN      131    296       13.5 X 12 AA TANDEM REPEATS OF E-E-T-Q-K- 
369  FT                                T-V-E-P-E-Q-T. 
370  """) 
371   
372  add_test("FT w/ continuation 3", FT, """\ 
373  FT   VARIANT      33     33       F -> Y (IN A*0205, A*0206, A*0208, A*0210 
374  FT                                AND A*0221). 
375  FT                                /FTId=VAR_004334. 
376  """) 
377   
378   
379  add_test("feature (block)", feature_block, """\ 
380  FT   DOMAIN       77     88       ASP/GLU-RICH (ACIDIC). 
381  FT   DOMAIN      127    150       PRO-RICH. 
382  FT   DOMAIN      420    439       ARG/GLU-RICH (MIXED CHARGE). 
383  FT   DOMAIN      131    296       13.5 X 12 AA TANDEM REPEATS OF E-E-T-Q-K- 
384  FT                                T-V-E-P-E-Q-T. 
385  FT   BINDING     858    858       UBIQUITIN (BY SIMILARITY). 
386  FT   DOMAIN       43     57       PRO/THR-RICH. 
387  FT   SIGNAL       <1      8       BY SIMILARITY. 
388  FT   NON_TER       1      1 
389  FT   DISULFID     56     67 
390  FT   CARBOHYD    114    114       POTENTIAL. 
391  FT   VARIANT      33     33       F -> Y (IN A*0205, A*0206, A*0208, A*0210 
392  FT                                AND A*0221). 
393  FT                                /FTId=VAR_004334. 
394  FT   CONFLICT    102    102       D -> S (IN REF. 2). 
395  FT   CONFLICT    105    105       MISSING (IN REF. 3). 
396  FT   CHAIN         ?     75       10 KD PROTEIN. 
397  FT   SIGNAL        1    ?24       POTENTIAL. 
398  FT   PROPEP      ?25    ?31       POTENTIAL. 
399  FT   MOD_RES       9      9       AMIDATION (G-10 PROVIDE AMIDE GROUP) 
400  FT                                (BY SIMILARITY). 
401  FT   SIGNAL        1      ? 
402  FT   INIT_MET      0      0 
403  """) 
404   
405  add_test_lines("SQ header", SQ, """\ 
406  SQ   SEQUENCE   889 AA;  100368 MW;  DD7E6C7A CRC32; 
407  SQ   SEQUENCE   111 AA;  12416 MW;  103BBA8B CRC32; 
408  SQ   SEQUENCE   29 AA;  2900 MW;  BA38C516 CRC32; 
409  SQ   SEQUENCE   1707 AA;  194328 MW;  31FDA77C CRC32; 
410  """) 
411   
412  add_test_lines("SQ_data", SQ_data, """\ 
413       ISFTSFNDES GENAEKLLQF KRWFWSIVER MSMTERQDLV YFWTSSPSLP ASEEGFQPMP 
414       SITIRPPDDQ HLPTANTCIS RLYVPLYSSK QILKQKLLLA IKTKNFGFV 
415       SITIRPPDDQ HLP 
416       A 
417  """) 
418   
419  add_test("sequence 1", sequence, """\ 
420  SQ   SEQUENCE   889 AA;  100368 MW;  DD7E6C7A CRC32; 
421       MMSARGDFLN YALSLMRSHN DEHSDVLPVL DVCSLKHVAY VFQALIYWIK AMNQQTTLDT 
422       PQLERKRTRE LLELGIDNED SEHENDDDTS QSATLNDKDD ESLPAETGQN HPFFRRSDSM 
423       S 
424  """) 
425  add_test("sequence 2", sequence, """\ 
426  SQ   SEQUENCE   4 AA;  408 MW;  34BC4AD8 CRC32; 
427       GFAD 
428  """) 
429   
430  add_test("end", end, """\ 
431  // 
432  """) 
433   
434   
435  record1 = """ID   100K_RAT       STANDARD;      PRT;   889 AA. 
436  AC   Q62671; 
437  DT   01-NOV-1997 (Rel. 35, Created) 
438  DT   01-NOV-1997 (Rel. 35, Last sequence update) 
439  DT   15-JUL-1999 (Rel. 38, Last annotation update) 
440  DE   100 KD PROTEIN (EC 6.3.2.-). 
441  OS   Rattus norvegicus (Rat). 
442  OC   Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Mammalia; 
443  OC   Eutheria; Rodentia; Sciurognathi; Muridae; Murinae; Rattus. 
444  RN   [1] 
445  RP   SEQUENCE FROM N.A. 
446  RC   STRAIN=WISTAR; TISSUE=TESTIS; 
447  RX   MEDLINE; 92253337. 
448  RA   MUELLER D., REHBEIN M., BAUMEISTER H., RICHTER D.; 
449  RT   "Molecular characterization of a novel rat protein structurally 
450  RT   related to poly(A) binding proteins and the 70K protein of the U1 
451  RT   small nuclear ribonucleoprotein particle (snRNP)."; 
452  RL   Nucleic Acids Res. 20:1471-1475(1992). 
453  RN   [2] 
454  RP   ERRATUM. 
455  RA   MUELLER D., REHBEIN M., BAUMEISTER H., RICHTER D.; 
456  RL   Nucleic Acids Res. 20:2624-2624(1992). 
457  CC   -!- FUNCTION: E3 UBIQUITIN-PROTEIN LIGASE WHICH ACCEPTS UBIQUITIN FROM 
458  CC       AN E2 UBIQUITIN-CONJUGATING ENZYME IN THE FORM OF A THIOESTER AND 
459  CC       THEN DIRECTLY TRANSFERS THE UBIQUITIN TO TARGETED SUBSTRATES (BY 
460  CC       SIMILARITY). THIS PROTEIN MAY BE INVOLVED IN MATURATION AND/OR 
461  CC       POST-TRANSCRIPTIONAL REGULATION OF MRNA. 
462  CC   -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT 
463  CC       IN LIVER, KIDNEY, LUNG AND BRAIN. 
464  CC   -!- DEVELOPMENTAL STAGE: IN EARLY POST-NATAL LIFE, EXPRESSION IN 
465  CC       THE TESTIS INCREASES TO REACH A MAXIMUM AROUND DAY 28. 
466  CC   -!- MISCELLANEOUS: A CYSTEINE RESIDUE IS REQUIRED FOR 
467  CC       UBIQUITIN-THIOLESTER FORMATION. 
468  CC   -!- SIMILARITY: CONTAINS AN HECT-TYPE E3 UBIQUITIN-PROTEIN LIGASE 
469  CC       DOMAIN. 
470  CC   -!- SIMILARITY: A CENTRAL REGION (AA 485-514) IS SIMILAR TO THE 
471  CC       C-TERMINAL DOMAINS OF MAMMALIAN AND YEAST POLY (A) RNA BINDING 
472  CC       PROTEINS (PABP). 
473  CC   -!- SIMILARITY: THE C-TERMINAL HALF SHOWS HIGH SIMILARITY TO 
474  CC       DROSOPHILA HYPERPLASMIC DISC PROTEIN AND SOME, TO HUMAN E6-AP. 
475  CC   -!- SIMILARITY: CONTAINS MIXED-CHARGE DOMAINS SIMILAR TO RNA-BINDING 
476  CC       PROTEINS. 
477  CC   -------------------------------------------------------------------------- 
478  CC   This SWISS-PROT entry is copyright. It is produced through a collaboration 
479  CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation - 
480  CC   the European Bioinformatics Institute.  There are no  restrictions on  its 
481  CC   use  by  non-profit  institutions as long  as its content  is  in  no  way 
482  CC   modified and this statement is not removed.  Usage  by  and for commercial 
483  CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/ 
484  CC   or send an email to license@isb-sib.ch). 
485  CC   -------------------------------------------------------------------------- 
486  DR   EMBL; X64411; CAA45756.1; -. 
487  DR   PFAM; PF00632; HECT; 1. 
488  DR   PFAM; PF00658; PABP; 1. 
489  DR   PROSITE; PS00107; PROTEIN_KINASE_ATP; 1. 
490  DR   AARHUS/GHENT-2DPAGE; 8006; IEF. 
491  DR   DICTYDB; DD01047; MYOA. 
492  KW   Ubiquitin conjugation; G-protein coupled receptor; Transmembrane; 
493  KW   Glycoprotein; Ligase. 
494  FT   DOMAIN       77     88       ASP/GLU-RICH (ACIDIC). 
495  FT   DOMAIN      127    150       PRO-RICH. 
496  FT   DOMAIN      420    439       ARG/GLU-RICH (MIXED CHARGE). 
497  FT   DOMAIN      448    457       ARG/ASP-RICH (MIXED CHARGE). 
498  FT   DOMAIN      485    514       PABP-LIKE. 
499  FT   DOMAIN      579    590       ASP/GLU-RICH (ACIDIC). 
500  FT   DOMAIN      786    889       HECT DOMAIN. 
501  FT   DOMAIN      827    847       PRO-RICH. 
502  FT   BINDING     858    858       UBIQUITIN (BY SIMILARITY). 
503  SQ   SEQUENCE   889 AA;  100368 MW;  DD7E6C7A CRC32; 
504       MMSARGDFLN YALSLMRSHN DEHSDVLPVL DVCSLKHVAY VFQALIYWIK AMNQQTTLDT 
505       PQLERKRTRE LLELGIDNED SEHENDDDTS QSATLNDKDD ESLPAETGQN HPFFRRSDSM 
506       TFLGCIPPNP FEVPLAEAIP LADQPHLLQP NARKEDLFGR PSQGLYSSSA GSGKCLVEVT 
507       MDRNCLEVLP TKMSYAANLK NVMNMQNRQK KAGEDQSMLA EEADSSKPGP SAHDVAAQLK 
508       SSLLAEIGLT ESEGPPLTSF RPQCSFMGMV ISHDMLLGRW RLSLELFGRV FMEDVGAEPG 
509       SILTELGGFE VKESKFRREM EKLRNQQSRD LSLEVDRDRD LLIQQTMRQL NNHFGRRCAT 
510       TPMAVHRVKV TFKDEPGEGS GVARSFYTAI AQAFLSNEKL PNLDCIQNAN KGTHTSLMQR 
511       LRNRGERDRE REREREMRRS SGLRAGSRRD RDRDFRRQLS IDTRPFRPAS EGNPSDDPDP 
512       LPAHRQALGE RLYPRVQAMQ PAFASKITGM LLELSPAQLL LLLASEDSLR ARVEEAMELI 
513       VAHGRENGAD SILDLGLLDS SEKVQENRKR HGSSRSVVDM DLDDTDDGDD NAPLFYQPGK 
514       RGFYTPRPGK NTEARLNCFR NIGRILGLCL LQNELCPITL NRHVIKVLLG RKVNWHDFAF 
515       FDPVMYESLR QLILASQSSD ADAVFSAMDL AFAVDLCKEE GGGQVELIPN GVNIPVTPQN 
516       VYEYVRKYAE HRMLVVAEQP LHAMRKGLLD VLPKNSLEDL TAEDFRLLVN GCGEVNVQML 
517       ISFTSFNDES GENAEKLLQF KRWFWSIVER MSMTERQDLV YFWTSSPSLP ASEEGFQPMP 
518       SITIRPPDDQ HLPTANTCIS RLYVPLYSSK QILKQKLLLA IKTKNFGFV 
519  // 
520  """ 
521   
522  record2 = """\ 
523  ID   12KD_FRAAN     STANDARD;      PRT;   111 AA. 
524  AC   Q05349; 
525  DT   01-OCT-1996 (Rel. 34, Created) 
526  DT   01-OCT-1996 (Rel. 34, Last sequence update) 
527  DT   01-NOV-1997 (Rel. 35, Last annotation update) 
528  DE   AUXIN-REPRESSED 12.5 KD PROTEIN. 
529  OS   Fragaria ananassa (Strawberry). 
530  OC   Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; 
531  OC   euphyllophytes; Spermatophyta; Magnoliophyta; eudicotyledons; 
532  OC   core eudicots; Rosidae; eurosids I; Rosales; Rosaceae; Fragaria. 
533  RN   [1] 
534  RP   SEQUENCE FROM N.A. 
535  RC   STRAIN=CV. OZARK BEAUTY; TISSUE=FLOWER; 
536  RX   MEDLINE; 91329668. 
537  RA   REDDY A.S.N., POOVAIAH B.W.; 
538  RT   "Molecular cloning and sequencing of a cDNA for an auxin-repressed 
539  RT   mRNA: correlation between fruit growth and repression of the 
540  RT   auxin-regulated gene."; 
541  RL   Plant Mol. Biol. 14:127-136(1990). 
542  CC   -!- FUNCTION: E3 UBIQUITIN-PROTEIN LIGASE WHICH ACCEPTS UBIQUITIN FROM 
543  CC       AN E2 UBIQUITIN-CONJUGATING ENZYME IN THE FORM OF A THIOESTER AND 
544  CC       THEN DIRECTLY TRANSFERS THE UBIQUITIN TO TARGETED SUBSTRATES (BY 
545  CC       SIMILARITY). THIS PROTEIN MAY BE INVOLVED IN MATURATION AND/OR 
546  CC       POST-TRANSCRIPTIONAL REGULATION OF MRNA. 
547  CC   -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT 
548  CC       IN LIVER, KIDNEY, LUNG AND BRAIN. 
549  CC   -!- DEVELOPMENTAL STAGE: IN EARLY POST-NATAL LIFE, EXPRESSION IN 
550  CC       THE TESTIS INCREASES TO REACH A MAXIMUM AROUND DAY 28. 
551  CC   -!- MISCELLANEOUS: A CYSTEINE RESIDUE IS REQUIRED FOR 
552  CC       UBIQUITIN-THIOLESTER FORMATION. 
553  CC   -!- SIMILARITY: CONTAINS AN HECT-TYPE E3 UBIQUITIN-PROTEIN LIGASE 
554  CC       DOMAIN. 
555  CC   -!- SIMILARITY: A CENTRAL REGION (AA 485-514) IS SIMILAR TO THE 
556  CC       C-TERMINAL DOMAINS OF MAMMALIAN AND YEAST POLY (A) RNA BINDING 
557  CC       PROTEINS (PABP). 
558  CC   -!- SIMILARITY: THE C-TERMINAL HALF SHOWS HIGH SIMILARITY TO 
559  CC       DROSOPHILA HYPERPLASMIC DISC PROTEIN AND SOME, TO HUMAN E6-AP. 
560  CC   -!- SIMILARITY: CONTAINS MIXED-CHARGE DOMAINS SIMILAR TO RNA-BINDING 
561  CC       PROTEINS. 
562  CC   -------------------------------------------------------------------------- 
563  CC   This SWISS-PROT entry is copyright. It is produced through a collaboration 
564  CC   between  the Swiss Institute of Bioinformatics  and the  EMBL outstation - 
565  CC   the European Bioinformatics Institute.  There are no  restrictions on  its 
566  CC   use  by  non-profit  institutions as long  as its content  is  in  no  way 
567  CC   modified and this statement is not removed.  Usage  by  and for commercial 
568  CC   entities requires a license agreement (See http://www.isb-sib.ch/announce/ 
569  CC   or send an email to license@isb-sib.ch). 
570  CC   -------------------------------------------------------------------------- 
571  DR   EMBL; X52429; CAA36676.1; -. 
572  DR   EMBL; X64411; CAA45756.1; -. 
573  DR   PFAM; PF00632; HECT; 1. 
574  DR   PFAM; PF00658; PABP; 1. 
575  DR   PROSITE; PS00107; PROTEIN_KINASE_ATP; 1. 
576  DR   AARHUS/GHENT-2DPAGE; 8006; IEF. 
577  DR   DICTYDB; DD01047; MYOA. 
578  KW   Ubiquitin conjugation; G-protein coupled receptor; Transmembrane; 
579  KW   Glycoprotein; Ligase. 
580  FT   DOMAIN       43     57       PRO/THR-RICH. 
581  SQ   SEQUENCE   111 AA;  12416 MW;  103BBA8B CRC32; 
582       MVLLDKLWDD IVAGPQPERG LGMLRKVPQP LNLKDEGESS KITMPTTPTT PVTPTTPISA 
583       RKDNVWRSVF HPGSNLSSKT MGNQVFDSPQ PNSPTVYDWM YSGETRSKHH R 
584  // 
585  """ 
586   
587  add_test("record 1", record, record1) 
588  add_test("record 2", record, record2) 
589  add_test("format", format, record1 + record2) 
590   
591   
592 -def test():
593 test_list.test()
594
595 -def dump():
596 test_list.dump()
597 598 if __name__ == "__main__": 599 test() 600