diff --git a/sbol_utilities/sbol3_sbol2_conversion.py b/sbol_utilities/sbol3_sbol2_conversion.py index ac44446f..0be263d8 100644 --- a/sbol_utilities/sbol3_sbol2_conversion.py +++ b/sbol_utilities/sbol3_sbol2_conversion.py @@ -1,3 +1,6 @@ +import tempfile +from pathlib import Path + import sbol3 import sbol2 from sbol2 import mapsto, model, sequenceconstraint @@ -142,38 +145,58 @@ def visit_combinatorial_derivation(self, a: sbol3.CombinatorialDerivation): # Priority: 2 raise NotImplementedError('Conversion of CombinatorialDerivation from SBOL3 to SBOL2 not yet implemented') - def visit_component(self, cp3: sbol3.Component): + def visit_component(self, comp3: sbol3.Component): # Remap type if it's one of the ones that needs remapping; otherwise pass through unchanged type_map = {sbol3.SBO_DNA: sbol2.BIOPAX_DNA, # TODO: distinguish BioPAX Dna from DnaRegion sbol3.SBO_RNA: sbol2.BIOPAX_RNA, # TODO: distinguish BioPAX Rna from RnaRegion sbol3.SBO_PROTEIN: sbol2.BIOPAX_PROTEIN, sbol3.SBO_SIMPLE_CHEMICAL: sbol2.BIOPAX_SMALL_MOLECULE, sbol3.SBO_NON_COVALENT_COMPLEX: sbol2.BIOPAX_COMPLEX} - types2 = [type_map.get(t, t) for t in cp3.types] + types2 = [type_map.get(t, t) for t in comp3.types] # Make the Component object and add it to the document - cp2 = sbol2.ComponentDefinition(cp3.identity, types2, version=self._sbol2_version(cp3)) - self.doc2.addComponentDefinition(cp2) + comp_def2 = sbol2.ComponentDefinition(comp3.identity, types2, version=self._sbol2_version(comp3)) + self.doc2.addComponentDefinition(comp_def2) # Convert the Component properties not covered by the constructor - cp2.roles = cp3.roles - cp2.sequences = cp3.sequences - if cp3.features: - raise NotImplementedError('Conversion of Component features from SBOL3 to SBOL2 not yet implemented') - if cp3.interactions: - raise NotImplementedError('Conversion of Component interactions from SBOL3 to SBOL2 not yet implemented') - if cp3.constraints: - raise NotImplementedError('Conversion of Component constraints from SBOL3 to SBOL2 not yet implemented') - if cp3.interface: + comp_def2.roles = comp3.roles + comp_def2.sequences = comp3.sequences + if comp3.features: + for feature in comp3.features: + if type(feature) == sbol3.subcomponent.SubComponent: + self.visit_sub_component(feature, comp_def2) + elif type(feature) == sbol3.compref.ComponentReference: + try: + self.visit_component_reference(feature) + except NotImplementedError as e: + # highlights the error message in red. + print(f"\033[91m{e}\033[0m") + else: + raise NotImplementedError( + 'Conversion of Component features from SBOL3 to SBOL2 not yet implemented') + if comp3.interactions: + for interaction in comp3.interactions: + try: + self.visit_interaction(interaction) + except NotImplementedError as e: + print(f"\033[91m{e}\033[0m") + if comp3.constraints: + for constraint in comp3.constraints: + try: + pass + self.visit_constraint(constraint) + except NotImplementedError as e: + print(f"\033[91m{e}\033[0m") + if comp3.interface: raise NotImplementedError('Conversion of Component interface from SBOL3 to SBOL2 not yet implemented') - if cp3.models: + if comp3.models: raise NotImplementedError('Conversion of Component models from SBOL3 to SBOL2 not yet implemented') # Map over all other TopLevel properties and extensions not covered by the constructor - self._convert_toplevel(cp3, cp2) + self._convert_toplevel(comp3, comp_def2) - def visit_component_reference(self, a: sbol3.ComponentReference): + def visit_component_reference(self, comp_ref3: sbol3.ComponentReference): # Priority: 3 raise NotImplementedError('Conversion of ComponentReference from SBOL3 to SBOL2 not yet implemented') - def visit_constraint(self, a: sbol3.Constraint): + def visit_constraint(self, constraint: sbol3.Constraint): # Priority: 2 raise NotImplementedError('Conversion of Constraint from SBOL3 to SBOL2 not yet implemented') @@ -262,7 +285,7 @@ def visit_sequence(self, seq3: sbol3.Sequence): # Map over all other TopLevel properties and extensions not covered by the constructor self._convert_toplevel(seq3, seq2) - def visit_sequence_feature(self, a: sbol3.SequenceFeature): + def visit_sequence_feature(self, feat3: sbol3.SequenceFeature): # Priority: 1 raise NotImplementedError('Conversion of SequenceFeature from SBOL3 to SBOL2 not yet implemented') @@ -270,9 +293,18 @@ def visit_singular_unit(self, a: sbol3.SingularUnit): # Priority: 4 raise NotImplementedError('Conversion of SingularUnit from SBOL3 to SBOL2 not yet implemented') - def visit_sub_component(self, a: sbol3.SubComponent): + def visit_sub_component(self, sub3: sbol3.SubComponent, + comp_def2: sbol2.ComponentDefinition): # Priority: 1 - raise NotImplementedError('Conversion of SubComponent from SBOL3 to SBOL2 not yet implemented') + # Make the Component, Module, or Functional_Component objects and add them to the document + # TODO Handle converting sub_components into Modules and FunctionalEntities when necessary + comp2 = sbol2.Component(sub3.identity) + comp2.roles = sub3.roles + comp2.roleIntegration = sub3.role_integration + comp2.sourceLocations = sub3.source_locations + comp2.definition = sub3.instance_of + comp2.displayId = sub3.display_id + comp_def2.components.add(comp2) def visit_unit_division(self, a: sbol3.UnitDivision): # Priority: 4 @@ -395,7 +427,7 @@ def visit_combinatorial_derivation(self, a: sbol2.CombinatorialDerivation): # Priority: 2 raise NotImplementedError('Conversion of CombinatorialDerivation from SBOL2 to SBOL3 not yet implemented') - def visit_component_definition(self, cd2: sbol2.ComponentDefinition): + def visit_component_definition(self, comp_def2: sbol2.ComponentDefinition, sub3_comp2_equivalencies=None): # Remap type if it's one of the ones that needs remapping; otherwise pass through unchanged type_map = {sbol2.BIOPAX_DNA: sbol3.SBO_DNA, 'http://www.biopax.org/release/biopax-level3.owl#Dna': sbol3.SBO_DNA, # TODO: make reversible @@ -404,27 +436,56 @@ def visit_component_definition(self, cd2: sbol2.ComponentDefinition): sbol2.BIOPAX_PROTEIN: sbol3.SBO_PROTEIN, sbol2.BIOPAX_SMALL_MOLECULE: sbol3.SBO_SIMPLE_CHEMICAL, sbol2.BIOPAX_COMPLEX: sbol3.SBO_NON_COVALENT_COMPLEX} - types3 = [type_map.get(t, t) for t in cd2.types] + types3 = [type_map.get(t, t) for t in comp_def2.types] # Make the Component object and add it to the document - cp3 = sbol3.Component(cd2.identity, types3, namespace=self._sbol3_namespace(cd2), - roles=cd2.roles, sequences=cd2.sequences) - self.doc3.add(cp3) + comp3 = sbol3.Component(comp_def2.identity, types3, namespace=self._sbol3_namespace(comp_def2), + roles=comp_def2.roles, sequences=comp_def2.sequences) + self.doc3.add(comp3) + # Convert the Component properties not covered by the constructor - if cd2.components: - raise NotImplementedError('Conversion of ComponentDefinition components ' - 'from SBOL2 to SBOL3 not yet implemented') - if cd2.sequenceAnnotations: + identity_mappings = {} + if comp_def2.components: + for comp2 in comp_def2.components: + self.visit_component(comp2, comp3, identity_mappings) + + if comp_def2.sequenceAnnotations: raise NotImplementedError('Conversion of ComponentDefinition sequenceAnnotations ' 'from SBOL2 to SBOL3 not yet implemented') - if cd2.sequenceConstraints: + if comp_def2.sequenceConstraints: raise NotImplementedError('Conversion of ComponentDefinition sequenceConstraints ' 'from SBOL2 to SBOL3 not yet implemented') # Map over all other TopLevel properties and extensions not covered by the constructor - self._convert_toplevel(cd2, cp3) + self._convert_toplevel(comp_def2, comp3) + self.handle_subcomponent_identity_triple_surgery(identity_mappings) - def visit_component(self, a: sbol2.Component): + def visit_component(self, comp2: sbol2.Component, comp3: sbol3.Component, identity_mappings): # Priority: 2 - raise NotImplementedError('Conversion of Component from SBOL2 to SBOL3 not yet implemented') + sub3 = sbol3.SubComponent(comp2.identity) + sub3.roles = comp2.roles + if comp2.roleIntegration: + sub3.role_integration = comp2.roleIntegration + if comp2.sourceLocations: + sub3.source_locations = comp2.sourceLocations + sub3.instance_of = comp2.definition + comp3.features += [sub3] + identity_mappings[sub3.identity] = comp2.identity + + def handle_subcomponent_identity_triple_surgery(self, identity_mappings): + with tempfile.TemporaryDirectory() as tmpdir: + temporary_file = Path(tmpdir) / 'temporary_file.nt' + self.doc3.write(temporary_file) + with open(temporary_file, 'r+') as file: + + triples = file.readlines() + for index, triple in enumerate(triples): + for old_identity, new_identity in identity_mappings.items(): + if f"<{old_identity}> " in triple: + triples[index] = triple.replace(old_identity, new_identity) + + file.seek(0) + file.writelines(triples) + file.truncate() + self.doc3.read(temporary_file) def visit_cut(self, a: sbol2.Cut): # Priority: 2 @@ -504,7 +565,7 @@ def visit_module(self, a: sbol2.Module): # Priority: 3 raise NotImplementedError('Conversion of Module from SBOL2 to SBOL3 not yet implemented') - def visit_module_definition(self, a: sbol2.ModuleDefinition): + def visit_module_definition(self, md2: sbol2.ModuleDefinition): # Priority: 3 raise NotImplementedError('Conversion of ModuleDefinition from SBOL2 to SBOL3 not yet implemented') diff --git a/test/sbol3.nt b/test/sbol3.nt new file mode 100644 index 00000000..ed4e70d4 --- /dev/null +++ b/test/sbol3.nt @@ -0,0 +1,436 @@ + "Range1" . + "3242"^^ . + . + . + "0"^^ . + . + "GenbankReference1" . + "Tyson,G.H., McDermott,P.F., Li,C., Chen,Y., Tadesse,D.A., Mukherjee,S., Bodeis-Jones,S., Kabera,C., Gaines,S.A., Loneragan,G.H., Edrington,T.S., Torrence,M., Harhay,D.M. and Zhao,S." . + "" . + "JWYZ01000115" . + "" . + "J. Antimicrob. Chemother. 70 (10), 2763-2769 (2015)" . + . + "" . + "26142410" . + "WGS accurately predicts antimicrobial resistance in Escherichia coli" . + . + . + "Range1" . + "3242"^^ . + . + . + "0"^^ . + . + "GenbankReference2" . + "Tyson,G.H., McDermott,P.F., Li,C., Tadesse,D.A., Mukherjee,S., Bodeis-Jones,S., Kabera,C., Gaines,S.A., Loneragan,G.H., Edrington,T.S., Torrence,M., Harhay,D.M. and Zhao,S." . + "" . + "JWYZ01000115" . + "" . + "Submitted (17-NOV-2014) CVM, FDA, 8401 Muirkirk Rd, Laurel, MD 20708, USA" . + . + "" . + "" . + "Direct Submission" . + . + . + "GenbankStructuredComment1" . + "JWYZ01000115" . + "Genome-Assembly-Data" . + "1::Assembly Method" . + "2::Assembly Name" . + "3::Genome Coverage" . + "4::Sequencing Technology" . + "1::CLC Genomics Workbench v. 7.5" . + "2::Escherichia coli CVM N37069PS v1.0" . + "3::48.5x" . + "4::Illumina MiSeq" . + . + . + "GenbankStructuredComment2" . + "JWYZ01000115" . + "Genome-Annotation-Data" . + "10::CRISPR Arrays" . + "11::rRNAs" . + "12::tRNAs" . + "13::ncRNA" . + "14::Frameshifted Genes" . + "1::Annotation Provider" . + "2::Annotation Date" . + "3::Annotation Pipeline" . + "4::Annotation Method" . + "5::Annotation Software revision" . + "6::Features Annotated" . + "7::Genes" . + "8::CDS" . + "9::Pseudo Genes" . + "10::2" . + "11::11 (5S, 16S, 23S)" . + "12::78" . + "13::17" . + "14::41" . + "1::NCBI" . + "2::12/29/2014 14:07:05" . + "3::NCBI Prokaryotic Genome Annotation Pipeline" . + "4::Best-placed reference protein set; GeneMarkS+" . + "5::2.9 (rev. 455303)" . + "6::Gene; CDS; rRNA; tRNA; ncRNA; repeat_region" . + "7::4,855" . + "8::4,642" . + "9::107" . + . + . + "Range1" . + "115"^^ . + . + . + "1"^^ . + . + "Range1" . + "2299"^^ . + . + . + "1706"^^ . + . + "SequenceFeature10" . + . + . + . + "0:locus_tag" . + "0:PU64_23680" . + . + "Range1" . + "2299"^^ . + . + . + "1706"^^ . + . + "SequenceFeature11" . + . + . + . + "0:locus_tag" . + "1:inference" . + "2:note" . + "3:codon_start" . + "4:transl_table" . + "5:product" . + "6:protein_id" . + "7:translation" . + "0:PU64_23680" . + "1:EXISTENCE: similar to AA sequence:RefSeq:WP_001544295.1" . + "2:Derived by automated computational analysis using gene prediction method: Protein Homology." . + "3:1" . + "4:11" . + "5:TetR family transcriptional regulator" . + "6:KIG36582.1" . + "7:MVTKKQSRVPGRPRRFAPEQAISAAKVLFHQKGFDAVSVAEVTDYLGINPPSLYAAFGSKAGLFSRVLNEYVGTEAIPLADILRDDRPVGECLVEVLKEAARRYSQNGGCAGCMVLEGIHSHDPLARDIAVQYYHAAETTIYDYIARRHPQSAQCVTDFMSTVMSGLSAKAREGHSIEQLCATAALAGEAIKTLLKE" . + . + "Range1" . + "2896"^^ . + . + . + "2444"^^ . + . + "SequenceFeature12" . + . + . + . + "0:locus_tag" . + "0:PU64_23685" . + . + "Range1" . + "2896"^^ . + . + . + "2444"^^ . + . + "SequenceFeature13" . + . + . + . + "0:locus_tag" . + "1:inference" . + "2:note" . + "3:codon_start" . + "4:transl_table" . + "5:product" . + "6:protein_id" . + "7:translation" . + "0:PU64_23685" . + "1:EXISTENCE: similar to AA sequence:RefSeq:WP_001570607.1" . + "2:Derived by automated computational analysis using gene prediction method: Protein Homology." . + "3:1" . + "4:11" . + "5:toxin-antitoxin biofilm protein TabA" . + "6:KIG36583.1" . + "7:MIIGNIHNLQPWLPQELRQAIEHIKAHVTAETPKGKHDIEGNHLFYLISEDMTEPYEARRAEYHARYLDIQIVLKGQEGMTFSTQPAGTPDTDWLADKDIAFLPEGVDEKTVILNEGDFVVFYPGEVHKPLCAVGAPAQVRKAVVKMLMA" . + . + "Range1" . + "3242"^^ . + . + . + "3019"^^ . + . + "SequenceFeature14" . + . + . + . + "0:locus_tag" . + "0:PU64_23690" . + . + "Range1" . + "3242"^^ . + . + . + "3019"^^ . + . + "SequenceFeature15" . + . + . + . + "0:locus_tag" . + "1:inference" . + "2:note" . + "3:codon_start" . + "4:transl_table" . + "5:product" . + "6:protein_id" . + "7:translation" . + "0:PU64_23690" . + "1:EXISTENCE: similar to AA sequence:RefSeq:WP_000036524.1" . + "2:Derived by automated computational analysis using gene prediction method: Protein Homology." . + "3:1" . + "4:11" . + "5:DNA-binding protein" . + "6:KIG36584.1" . + "7:MSKISGWNFSQNITSADNCKQKNEDLDTWYVGMNDFARIAGGQNSRSNILSPRAFLEFLAKIFTLGYVDFSKRS" . + . + "SequenceFeature1" . + . + . + . + "0:locus_tag" . + "0:PU64_23660" . + . + "Range1" . + "115"^^ . + . + . + "1"^^ . + . + "SequenceFeature2" . + . + . + . + "0:locus_tag" . + "1:inference" . + "2:note" . + "3:codon_start" . + "4:transl_table" . + "5:product" . + "6:protein_id" . + "7:translation" . + "0:PU64_23660" . + "1:EXISTENCE: similar to AA sequence:RefSeq:WP_005059815.1" . + "2:Derived by automated computational analysis using gene prediction method: Protein Homology." . + "3:1" . + "4:11" . + "5:pyrBI operon leader peptide" . + "6:KIG36579.1" . + "7:MVQCVRHFVLPRLKKDAGLPFFFPLITHSQPLNRGAFF" . + . + "Range1" . + "3242"^^ . + . + . + "1"^^ . + . + "SequenceFeature3" . + . + . + . + "0:organism" . + "1:mol_type" . + "2:submitter_seqid" . + "3:strain" . + "4:isolation_source" . + "5:db_xref" . + "6:country" . + "7:collection_date" . + "0:Escherichia coli" . + "1:genomic DNA" . + "2:N37069PS_contig_115" . + "3:CVM N37069PS" . + "4:Farm" . + "5:taxon:562" . + "6:USA" . + "7:20-Jan-2012" . + . + "Range1" . + "427"^^ . + . + . + "95"^^ . + . + "SequenceFeature4" . + . + . + . + "0:locus_tag" . + "0:PU64_23665" . + . + "Range1" . + "427"^^ . + . + . + "95"^^ . + . + "SequenceFeature5" . + . + . + . + "0:locus_tag" . + "1:inference" . + "2:note" . + "3:codon_start" . + "4:transl_table" . + "5:product" . + "6:protein_id" . + "7:translation" . + "0:PU64_23665" . + "1:EXISTENCE: similar to AA sequence:RefSeq:WP_001349257.1" . + "2:Derived by automated computational analysis using gene prediction method: Protein Homology." . + "3:1" . + "4:11" . + "5:hypothetical protein" . + "6:KIG36585.1" . + "7:MSNTLNHTSSRQIVRHYTHRQKRRKHLMQYFVSANGLFELKVKVYAFLFDVILQGNCPSVSIIADIPCFFLFHFHAIRYAFYSIHPTYRAECESERLTLLLTAQGCALSL" . + . + "Range1" . + "791"^^ . + . + . + "396"^^ . + . + "SequenceFeature6" . + . + . + . + "0:locus_tag" . + "0:PU64_23670" . + . + "Range1" . + "791"^^ . + . + . + "396"^^ . + . + "SequenceFeature7" . + . + . + . + "0:locus_tag" . + "1:inference" . + "2:note" . + "3:codon_start" . + "4:transl_table" . + "5:product" . + "6:protein_id" . + "7:translation" . + "0:PU64_23670" . + "1:EXISTENCE: similar to AA sequence:RefSeq:WP_001701843.1" . + "2:Derived by automated computational analysis using gene prediction method: Protein Homology." . + "3:1" . + "4:11" . + "5:mRNA endoribonuclease" . + "6:KIG36580.1" . + "7:MVERTAVFPAGRHSLYAEHRYSAAIRSGDLLFVSGQVGSREDGTPEPDFQQQVRLAFDNLHATLAAAGCTFDDIIDVTSFHTDPENQFEDIMTVKNEIFSAPPYPNWTAVGVTWLAGFDFEIKVIARIPEQ" . + . + "Range1" . + "1635"^^ . + . + . + "922"^^ . + . + "SequenceFeature8" . + . + . + . + "0:locus_tag" . + "0:PU64_23675" . + . + "Range1" . + "1635"^^ . + . + . + "922"^^ . + . + "SequenceFeature9" . + . + . + . + "0:locus_tag" . + "1:inference" . + "2:note" . + "3:codon_start" . + "4:transl_table" . + "5:product" . + "6:protein_id" . + "7:translation" . + "0:PU64_23675" . + "1:EXISTENCE: similar to AA sequence:SwissProt:P39333.2" . + "2:Derived by automated computational analysis using gene prediction method: Protein Homology." . + "3:1" . + "4:11" . + "5:oxidoreductase" . + "6:KIG36581.1" . + "7:MGAFTGKTVLILGGSRGIGAAIVRRFVTDGANVRFTYAGSKDAAKRLAQETGATAVFTDSADRDAVIDVVRKSGALDILVVNAGIGVFGEALELNADDIDRLFKINIHAPYHASVEAARQMPEGGRILIIGSVNGDRMPVAGMAAYAASKSALQGMARGLARDFGPRGITINVVQPGPIDTDANPANGPMRDMLHSLMAIKRHGQPEEVAGMVAWLAGPEASFVTGAMHTIDGAFGA" . + . + "Escherichia coli strain CVM N37069PS N37069PS_contig_115, whole genome shotgun sequence" . + "JWYZ01000115" . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + "JWYZ01000115" . + "Annotation was added by the NCBI Prokaryotic Genome Annotation\nPipeline (released 2013). Information about the Pipeline can be\nfound here: http://www.ncbi.nlm.nih.gov/genome/annotation_prok/" . + "16-JAN-2015" . + "BioProject:PRJNA266657::BioSample:SAMN03177677" . + "BCT" . + "JWYZ01000115.1" . + "WGS" . + "JWYZ01000115" . + "DNA" . + "JWYZ01000115" . + "Escherichia coli" . + . + . + "1"^^ . + "Escherichia coli" . + . + . + "Bacteria,Proteobacteria,Gammaproteobacteria,Enterobacterales,Enterobacteriaceae,Escherichia" . + "linear" . + . + "JWYZ01000115_sequence" . + "aaaaaaaagcccctcgattgaggggctgggaatgggtgatcaacgggaagaaaaacggcaggccagcgtcttttttcagacgcggtaagacaaaatgtcgaacacactgaaccatacatcctcccggcaaattgtccggcattatactcatcgtcagaagcggcgcaagcatttgatgcaatattttgtcagcgcaaacggtttatttgaattaaaagtcaaggtatatgcatttttatttgatgtgattctgcaggggaactgtccttcggtatcaataattgcagacattccctgctttttcctttttcactttcacgcaatcagatatgcattttattccattcatccgacttatagggcggagtgtgaaagcgaacggctaacactattgcttactgctcagggatgcgcgctatcactttaatttcaaaatcaaagcctgccagccatgtaacacccaccgccgtccagtttggataaggtggggcgctaaatatttcatttttcaccgtcatgatgtcttcaaattggttttctggatcggtatggaagctcgtaacatcaatgatatcgtcaaaagtgcatcccgcagctgccagggtcgcatgcaaattatcaaatgccagtctgacttgttgctgaaaatcgggttctggtgttccgtcctctcgacttcctacttgcccggaaacaaacagcaaatcgccggaacgaatagccgcagaataacgatgctcagcatatagtgaatgtcggccagcagggaaaacagcggttctttctaccatttggttatcctcaagatttacgacatgaacagaagatttctctttaccgggagccgcttttagcggacgacgtgagtaaacaaaacccagacatcatggataatggctgggcttaattgagcgtagtcggttatgcgccaaacgcgccatcaatggtatgcattgcgccggtaacaaaactggcttctggccctgctaaccatgcgaccataccagcgacctcttccggttgcccatgtcttttgatagccatcaaactatgcaacatatcgcgcattggcccgttggcgggattagcgtcggtatcaattggccctggctggacgacgttaatggtgatcccacgcggtccaaaatcacgggccagcccgcgcgccatgccttgcagggcagatttgctggcggcataagcagccatgcctgcaacaggcatacgatcgccattcacggagccgatgattaagatgcgcccgccttcgggcatctgccgggcggcttcaacagaggcatgataaggagcatgaatattgattttgaaaaggcgatcaatatcgtcggcatttaattccagggcctcgccaaagacgccaatacctgcatttaccaccaggatatccaatgcgccgctcttacgaacgacatcaatgacagcgtctctgtcagcactatctgtgaatactgctgtcgctccagtctcttgtgccaggcgtttagcggcatctttcgaccccgcataggtgaatcgtacattggccccatcggtgacgaaacgacgtacgatagcggcaccgataccacgactgccaccgaggatgagaactgtcttacctgtaaaagcgcccataaggactccttgatttattatgtaacatgcattacaaaactgttttaactttctgtcaacatgttttgtaatggtcactaaaaaacaatctcgcgttccaggtcgtcccagacgtttcgctcctgagcaggcaatctctgcggcaaaagtgctttttcaccaaaaaggtttcgatgctgtcagtgttgctgaagttactgattatcttggtattaaccccccgagcctctacgcggcttttggcagtaaagctgggttatttagccgtgtactcaatgaatacgtcggtacggaagctattccgcttgccgatattcttcgtgatgatcgtccagtaggcgagtgcctggttgaggtattaaaagaagcggcgcgcagatatagccaaaacggcggctgcgctggctgtatggttcttgaaggtattcatagtcatgatccactagcgcgtgatattgccgttcaatattatcacgccgcagaaacgaccatttatgactatatcgccaggcggcatccacaaagcgcacaatgtgtgactgattttatgagtaccgtgatgtcagggctttctgcgaaggcacgggaggggcactcaatcgaacaactctgtgcaacagctgcactggcgggggaagcgataaaaactcttctcaaggagtgatgctggccttgatccgaaaggcgggaacgcgcctgccgataagttgtgataagacaataattcacgcattaaggctagcggaattgatcatcttttcgtataacgatagaaatgaaacgttgttttaattaaggagtggaaaagatgatcatcggaaatattcataatcttcagccgtggctaccccaggagttacgccaggcgattgagcatatcaaagcacacgttacggcagaaacgccaaagggcaagcacgatatcgaaggcaatcatctgttttatcttatctcggaagatatgaccgagccgtacgaagcccgccgtgcggagtaccatgcccgctatctcgacattcagattgtgttaaaaggtcaggaaggcatgaccttcagcacgcaacctgcaggcacgccggataccgactggttagctgataaagacatcgcatttttgccggaaggcgttgatgagaaaacagttatccttaatgaaggtgattttgttgtgttttatccgggggaagtgcataaaccgctgtgcgcagtgggtgcaccagcccaggttcgcaaagcagtagtgaagatgctgatggcgtgatgacttttcgccgtaaataactccaggtttacggcgagtttgtgaaaagagcgttttttgatatttttttgtgagtaaaatttgtaatgcttagacgttcttattaactcaaggagtccgtcatgagcaaaatatcaggttggaatttttctcaaaacattacatcagccgacaattgtaaacaaaaaaatgaagacttagatacctggtatgtgggaatgaatgattttgcccgaattgccggagggcagaatagcagaagcaatattctttctcccagagcatttttggagtttttggctaagatatttaccctgggttatgtggattttagcaaacgctccaa" . + . + . + . diff --git a/test/test.nt b/test/test.nt new file mode 100644 index 00000000..e69de29b diff --git a/test/test.xml b/test/test.xml new file mode 100644 index 00000000..e69de29b diff --git a/test/test_files/sbol3_toggle_switch.nt b/test/test_files/sbol3_toggle_switch.nt new file mode 100644 index 00000000..d67e540b --- /dev/null +++ b/test/test_files/sbol3_toggle_switch.nt @@ -0,0 +1,370 @@ + . + . + "lacI" . + . + "lacI" . + "lacI coding sequence" . + . + . + "SubComponent6" . + . + . + . + "tetR terminator" . + . + "ter_tetR" . + "Terminator" . + . + . + "IPTG_LacI" . + . + "IPTG_LacI" . + "IPTG_LacI complex" . + . + . + . + . + "Constraint2" . + . + . + . + "lacI terminator" . + . + "ter_lacI" . + "Terminator" . + . + . + . + "SubComponent6" . + . + . + . + "IPTG" . + . + "IPTG" . + "IPTG" . + . + . + . + "Participation1" . + . + . + . + "Participation1" . + . + . + . + "pTetR" . + . + "pTetR" . + "TetR repressible promoter" . + . + . + . + "LacI" . + . + "LacI_protein" . + "LacI protein" . + . + . + . + "rbs" . + . + "rbs_gfp" . + "RBS" . + . + . + "SubComponent5" . + . + . + . + . + . + "Interaction4" . + . + . + "atC_TetR" . + . + "atC_TetR" . + "atC_TetR complex" . + . + . + . + . + "Constraint1" . + . + . + . + "SubComponent5" . + . + . + . + "ComponentReference4" . + . + . + . + "SubComponent4" . + . + . + . + . + . + "Interaction3" . + . + . + "SubComponent10" . + . + . + . + . + "Interaction3" . + . + . + . + "Participation3" . + . + . + . + "TetR" . + . + "TetR_protein" . + "TetR protein" . + . + . + . + . + "toggle_switch" . + "Toggle Switch genetic circuit" . + . + . + . + . + . + . + "Toggle Switch" . + . + . + . + . + "SubComponent4" . + . + . + . + "pLacI promoter" . + . + "pLacI" . + "LacI repressible promoter" . + . + . + "SubComponent9" . + . + . + . + "ComponentReference3" . + . + . + . + "rbs" . + . + "rbs_tetR" . + "tetR RBS" . + . + . + . + "gfp" . + . + "gfp" . + "gfp coding sequence" . + . + . + . + . + . + . + "LacI producer" . + "LacI_producer" . + . + . + . + . + . + . + . + . + . + "LacI producer" . + . + . + . + . + . + . + "SubComponent3" . + . + . + . + . + "Interaction2" . + . + . + . + "Participation2" . + . + . + "SubComponent2" . + . + . + . + "Participation2" . + . + . + . + . + "Interaction2" . + . + . + . + "Participation2" . + . + . + . + "Participation2" . + . + . + "SubComponent9" . + . + . + . + "rbs" . + . + "rbs_lacI" . + "RBS" . + . + . + . + . + "TetR_producer" . + . + . + . + . + . + . + . + . + "TetR producer" . + . + . + "TetR device" . + . + . + . + . + . + "SubComponent3" . + . + . + "SubComponent8" . + . + . + . + "ComponentReference2" . + . + . + . + "tetR" . + . + "tetR" . + "tetR coding sequence" . + . + . + . + "Participation2" . + . + . + . + "SubComponent2" . + . + . + . + . + "Interaction1" . + . + . + . + . + "Interaction1" . + . + . + "SubComponent1" . + . + . + . + "Participation1" . + . + . + . + "Participation1" . + . + . + . + "Participation1" . + . + . + . + "Participation1" . + . + . + "SubComponent8" . + . + . + "GFP" . + . + "GFP_protein" . + "GFP" . + . + . + . + "SubComponent2" . + . + . + . + "Participation3" . + . + . + . + "aTC" . + . + "aTC" . + "aTC" . + . + . + "SubComponent7" . + . + . + . + "ComponentReference1" . + . + . + . + "Participation1" . + . + . + . + "SubComponent1" . + . + . + "SubComponent7" . + . + . + . + "SubComponent1" . + . + . + . + "Participation2" . + . + . + . + "Participation2" . + . diff --git a/test/test_files/subcomponent_test_3.nt b/test/test_files/subcomponent_test_3.nt new file mode 100644 index 00000000..26ab10bc --- /dev/null +++ b/test/test_files/subcomponent_test_3.nt @@ -0,0 +1,62 @@ + "B0015" . + . + . + "double terminator" . + . + . + . + "B0015_seq" . + "ccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttata" . + . + . + . + "B0034" . + . + . + "RBS (Elowitz 1999)" . + . + . + . + "B0034_seq" . + "aaagaggagaaa" . + . + . + . + "E0040" . + . + . + "GFP" . + . + . + . + "E0040_seq" . + "atgcgtaaaggagaagaacttttcactggagttgtcccaattcttgttgaattagatggtgatgttaatgggcacaaattttctgtcagtggagagggtgaaggtgatgcaacatacggaaaacttacccttaaatttatttgcactactggaaaactacctgttccatggccaacacttgtcactactttcggttatggtgttcaatgctttgcgagatacccagatcatatgaaacagcatgactttttcaagagtgccatgcccgaaggttatgtacaggaaagaactatatttttcaaagatgacgggaactacaagacacgtgctgaagtcaagtttgaaggtgatacccttgttaatagaatcgagttaaaaggtattgattttaaagaagatggaaacattcttggacacaaattggaatacaactataactcacacaatgtatacatcatggcagacaaacaaaagaatggaatcaaagttaacttcaaaattagacacaacattgaagatggaagcgttcaactagcagaccattatcaacaaaatactccaattggcgatggccctgtccttttaccagacaaccattacctgtccacacaatctgccctttcgaaagatcccaacgaaaagagagaccacatggtccttcttgagtttgtaacagctgctgggattacacatggcatggatgaactatacaaataataa" . + . + . + . + "SubComponent1" . + . + . + "SubComponent2" . + . + . + "SubComponent3" . + . + . + "GFP expression cassette used for 2016 iGEM interlab study" . + "i13504" . + . + . + . + . + "iGEM 2016 interlab reporter" . + . + . + . + "1" . + "1" . + "1" . + "1" . + "1" . + "1" . + "1" . diff --git a/test/test_sbol2_sbol3_direct.py b/test/test_sbol2_sbol3_direct.py index 94791fcf..5450a991 100644 --- a/test/test_sbol2_sbol3_direct.py +++ b/test/test_sbol2_sbol3_direct.py @@ -22,8 +22,8 @@ def test_3to2_conversion(self): doc3.read(TEST_FILES / 'BBa_J23101_patched.nt') # Convert to SBOL2 and check contents doc2 = convert3to2(doc3, True) - #report = doc2.validate() - #self.assertEqual(len(report), 0, f'Validation failed: {report}') + # report = doc2.validate() + # self.assertEqual(len(report), 0, f'Validation failed: {report}') with tempfile.TemporaryDirectory() as tmpdir: tmp2 = Path(tmpdir) / 'doc2.xml' doc2.write(tmp2) @@ -60,8 +60,8 @@ def test_3to2_implementation_conversion(self): doc3.read(TEST_FILES / 'sbol3_implementation.nt') # Convert to SBOL2 and check contents doc2 = convert3to2(doc3, True) - #report = doc2.validate() - #self.assertEqual(len(report), 0, f'Validation failed: {report}') + # report = doc2.validate() + # self.assertEqual(len(report), 0, f'Validation failed: {report}') with tempfile.TemporaryDirectory() as tmpdir: tmp2 = Path(tmpdir) / 'doc2.xml' doc2.write(tmp2) @@ -98,8 +98,8 @@ def test_3to2_collection_conversion(self): doc3.read(TEST_FILES / 'sbol3_collection.nt') # Convert to SBOL2 and check contents doc2 = convert3to2(doc3, True) - #report = doc2.validate() - #self.assertEqual(len(report), 0, f'Validation failed: {report}') + # report = doc2.validate() + # self.assertEqual(len(report), 0, f'Validation failed: {report}') with tempfile.TemporaryDirectory() as tmpdir: tmp2 = Path(tmpdir) / 'doc2.xml' doc2.write(tmp2) @@ -129,6 +129,37 @@ def test_2to3_collection_conversion(self): doc2_loop.write(tmp2) self.assertFalse(file_diff(str(tmp2), str(TEST_FILES / 'sbol_3to2_collection.xml'))) + # @unittest.skip("Feature in Progress") + def test_3to2_subcomponent_test(self): + """Test ability to convert a sub_component from SBOL3 to SBOL2""" + # Load an SBOL3 document and check its contents + doc3 = sbol3.Document() + doc3.read(TEST_FILES / 'subcomponent_test_3.nt') + + # Convert to SBOL2 and check contents + doc2 = sbol2.Document() + doc2 = convert3to2(doc3, use_native_converter=True) + + # report = doc2.validate() + # self.assertEqual(len(report), 0, f'Validation failed: {report}') + with tempfile.TemporaryDirectory() as tmpdir: + tmp2 = Path(tmpdir) / 'doc2.xml' + doc2.write(tmp2) + with open(tmp2, "r") as file: + # Read the contents of the file + file_contents = file.read() + + # self.assertFalse(file_diff(str(tmp2), str(TEST_FILES / 'sbol_3to2_collection.xml'))) + + doc3_loop = convert2to3(doc2, use_native_converter=True) + self.assertEqual(len(doc3_loop.validate()), 0) + tmp3 = Path(tmpdir) / 'doc3_loop.nt' + doc3_loop.write(tmp3) + + self.assertFalse(file_diff(str(tmp3), str(TEST_FILES / 'subcomponent_test_3.nt'))) + + # ToDo: add a test with two components, each with two subcomponents + if __name__ == '__main__': unittest.main()