diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index f2375024..ec047ec0 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -1,15 +1,16 @@ from __future__ import annotations -from typing import Callable, Dict, Iterable, List, Union, Set, Optional, Tuple +from typing import Dict, Iterable, List, Union, Optional, Tuple import sbol3 import tyto -from sbol_utilities.helper_functions import id_sort, find_child, find_top_level, SBOL3PassiveVisitor, cached_references, is_plasmid +from sbol_utilities.helper_functions import id_sort, find_child, find_top_level, SBOL3PassiveVisitor, cached_references, is_plasmid, is_circular from sbol_utilities.workarounds import get_parent from Bio import Restriction - +from pydna.dseqrecord import Dseqrecord +from itertools import product # TODO: consider allowing return of LocalSubComponent and ExternallyDefined def contained_components(roots: Union[sbol3.TopLevel, Iterable[sbol3.TopLevel]]) -> set[sbol3.Component]: @@ -88,34 +89,13 @@ def has_dna_type(component: sbol3.Component) -> bool: # there must be atleast 1 SO role, among others def check_roles(component: sbol3.Component) -> bool: - try: - return any(tyto.SO.get_term_by_uri(role) for role in component.roles) - except LookupError: - return False + return any(tyto.SO.get_term_by_uri(role) for role in component.roles) # check all conditions return isinstance(obj, sbol3.Component) and check_roles(obj) \ and has_dna_type(obj) and len(obj.sequences) == 1 -def by_roles(required_role: str) -> Callable[[sbol3.TopLevel], bool]: - """Given an object and a role, check if it is one of the roles of the object. - - :param required_role: the role which must be present in given object - :return: lambda function taking an obj to check roles in, returns bool - """ - return lambda obj: isinstance(obj, sbol3.Component) and required_role in obj.roles - - -def by_types(required_type: str) -> Callable[[sbol3.TopLevel], bool]: - """Given an object and a type, check if it is one of the types of the object. - - :param required_type: the type which must be present in given object - :return: lambda function taking an obj to check types in, returns bool - """ - return lambda obj: isinstance(obj, sbol3.Component) and required_type in obj.types - - def ensure_singleton_feature(system: sbol3.Component, target: Union[sbol3.Feature, sbol3.Component]): """Return a feature associated with the target, i.e., the target itself if a feature, or a SubComponent. If the target is not already in the system, add it. @@ -367,7 +347,7 @@ def rbs(identity: str, sequence: str, **kwargs) -> Tuple[sbol3.Component, sbol3. :return: A tuple of Component and Sequence. """ rbs_component, rbs_seq = dna_component_with_sequence(identity, sequence, **kwargs) - rbs_component.roles. append(sbol3.SO_RBS) + rbs_component.roles.append(sbol3.SO_RBS) return rbs_component, rbs_seq @@ -380,7 +360,7 @@ def cds(identity: str, sequence: str, **kwargs) -> Tuple[sbol3.Component, sbol3. :return: A tuple of Component and Sequence. """ cds_component, cds_seq = dna_component_with_sequence(identity, sequence, **kwargs) - cds_component.roles. append(sbol3.SO_CDS) + cds_component.roles.append(sbol3.SO_CDS) return cds_component, cds_seq @@ -393,7 +373,7 @@ def terminator(identity: str, sequence: str, **kwargs) -> Tuple[sbol3.Component, :return: A tuple of Component and Sequence. """ terminator_component, terminator_seq = dna_component_with_sequence(identity, sequence, **kwargs) - terminator_component.roles. append(sbol3.SO_TERMINATOR) + terminator_component.roles.append(sbol3.SO_TERMINATOR) return terminator_component, terminator_seq @@ -406,7 +386,7 @@ def protein_stability_element(identity: str, sequence: str, **kwargs) -> Tuple[s :return: A tuple of Component and Sequence. """ pse_component, protein_stability_element_seq = dna_component_with_sequence(identity, sequence, **kwargs) - pse_component.roles. append(tyto.SO.protein_stability_element) + pse_component.roles.append(tyto.SO.protein_stability_element) return pse_component, protein_stability_element_seq @@ -419,7 +399,7 @@ def gene(identity: str, sequence: str, **kwargs) -> Tuple[sbol3.Component, sbol3 :return: A tuple of Component and Sequence. """ gene_component, gene_seq = dna_component_with_sequence(identity, sequence, **kwargs) - gene_component.roles. append(sbol3.SO_GENE) + gene_component.roles.append(sbol3.SO_GENE) return gene_component, gene_seq @@ -432,11 +412,11 @@ def operator(identity: str, sequence: str, **kwargs) -> Tuple[sbol3.Component, s :return: A tuple of Component and Sequence. """ operator_component, operator_seq = dna_component_with_sequence(identity, sequence, **kwargs) - operator_component.roles. append(sbol3.SO_OPERATOR) + operator_component.roles.append(sbol3.SO_OPERATOR) return operator_component, operator_seq -def engineered_region(identity: str, features: Union[List[sbol3.SubComponent], List[sbol3.Component]], **kwargs) \ +def engineered_region(identity: str, features: Union[List[sbol3.SubComponent], List[sbol3.Component]], fix_order: bool=True, **kwargs) \ -> sbol3.Component: """Creates an Engineered Region Component, with features assumed to be in linear order @@ -451,12 +431,13 @@ def engineered_region(identity: str, features: Union[List[sbol3.SubComponent], L if isinstance(to_add, sbol3.Component): to_add = sbol3.SubComponent(to_add) er_component.features.append(to_add) - if len(er_component.features) > 1: - for i in range(len(er_component.features)-1): - constraint = sbol3.Constraint(sbol3.SBOL_PRECEDES, er_component.features[i], er_component.features[i + 1]) - er_component.constraints = [constraint] - else: - pass + if fix_order == True: + if len(er_component.features) > 1: + for i in range(len(er_component.features)-1): + constraint = sbol3.Constraint(sbol3.SBOL_PRECEDES, er_component.features[i], er_component.features[i+1]) + er_component.constraints.append(constraint) + else: + pass return er_component @@ -469,7 +450,7 @@ def mrna(identity: str, sequence: str, **kwargs) -> Tuple[sbol3.Component, sbol3 :return: A tuple of Component and Sequence. """ mrna_component, mrna_seq = rna_component_with_sequence(identity, sequence, **kwargs) - mrna_component.roles. append(sbol3.SO_MRNA) + mrna_component.roles.append(sbol3.SO_MRNA) return mrna_component, mrna_seq @@ -482,7 +463,7 @@ def transcription_factor(identity: str, sequence: str, **kwargs) -> Tuple[sbol3. :return: A tuple of Component and Sequence. """ tf_component, transcription_factor_seq = protein_component_with_sequence(identity, sequence, **kwargs) - tf_component.roles. append(sbol3.SO_TRANSCRIPTION_FACTOR) + tf_component.roles.append(sbol3.SO_TRANSCRIPTION_FACTOR) return tf_component, transcription_factor_seq @@ -561,7 +542,7 @@ def backbone(identity: str, sequence: str, dropout_location: List[int], fusion_s :param sequence: The DNA sequence of the Component encoded in IUPAC. :param dropout_location: List of 2 integers that indicates the start and the end of the dropout sequence including overhangs. Note that the index of the first location is 1, as is typical practice in biology, rather than 0, as is typical practice in computer science. :param fusion_site_length: Integer of the lenght of the fusion sites (eg. BsaI fusion site lenght is 4, SapI fusion site lenght is 3) - :param linear: Boolean than indicates if the backbone is linear, by default it is seted to Flase which means that it has a circular topology. + :param linear: Boolean than indicates if the backbone is linear, by default it is seted to False which means that it has a circular topology. :param kwargs: Keyword arguments of any other Component attribute. :return: A tuple of Component and Sequence. """ @@ -593,6 +574,52 @@ def backbone(identity: str, sequence: str, dropout_location: List[int], fusion_s backbone_component.constraints.append(backbone_dropout_meets) return backbone_component, backbone_seq +def backbone_from_sbol(identity: Union[str,None], sbol_comp: sbol3.Component, dropout_location: List[int], fusion_site_length:int, linear:bool, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]: + """Creates a Backbone Component and its Sequence. + + :param identity: The identity of the Component. The identity of Sequence is also identity with the suffix '_seq'. + :param sbol_comp: The SBOL Component containing the DNA sequence to use. + :param dropout_location: List of 2 integers that indicates the start and the end of the dropout sequence including overhangs. Note that the index of the first location is 1, as is typical practice in biology, rather than 0, as is typical practice in computer science. + :param fusion_site_length: Integer of the lenght of the fusion sites (eg. BsaI fusion site lenght is 4, SapI fusion site lenght is 3) + :param linear: Boolean than indicates if the backbone is linear, by default it is seted to False which means that it has a circular topology. + :param kwargs: Keyword arguments of any other Component attribute. + :return: A tuple of Component and Sequence. + """ + if len(dropout_location) != 2: + raise ValueError('The dropout_location only accepts 2 int values in a list.') + if len(sbol_comp.sequences)!=1: + raise ValueError(f'The reactant needs to have precisely one sequence. The input reactant has {len(sbol_comp.sequences)} sequences') + sequence = str(sbol_comp.sequences[0].lookup().elements) + if identity == None: + backbone_component = sbol_comp + backbone_seq = sbol_comp.sequences[0] + else: + backbone_component, backbone_seq = dna_component_with_sequence(identity, sequence, **kwargs) + backbone_component.roles.append(sbol3.SO_DOUBLE_STRANDED) + dropout_location_comp = sbol3.Range(sequence=backbone_seq, start=dropout_location[0], end=dropout_location[1]) + insertion_site_location1 = sbol3.Range(sequence=backbone_seq, start=dropout_location[0], end=dropout_location[0]+fusion_site_length, order=1) + insertion_site_location2 = sbol3.Range(sequence=backbone_seq, start=dropout_location[1]-fusion_site_length, end=dropout_location[1], order=3) + dropout_sequence_feature = sbol3.SequenceFeature(locations=[dropout_location_comp], roles=[tyto.SO.deletion]) + insertion_sites_feature = sbol3.SequenceFeature(locations=[insertion_site_location1, insertion_site_location2], roles=[tyto.SO.insertion_site]) + if linear: + backbone_component.types.append(sbol3.SO_LINEAR) + backbone_component.roles.append(sbol3.SO_ENGINEERED_REGION) + open_backbone_location1 = sbol3.Range(sequence=backbone_seq, start=1, end=dropout_location[0]+fusion_site_length-1, order=1) + open_backbone_location2 = sbol3.Range(sequence=backbone_seq, start=dropout_location[1]-fusion_site_length, end=len(sequence), order=3) + open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2]) + else: + backbone_component.types.append(sbol3.SO_CIRCULAR) + backbone_component.roles.append(tyto.SO.plasmid_vector) + open_backbone_location1 = sbol3.Range(sequence=backbone_seq, start=1, end=dropout_location[0]+fusion_site_length-1, order=2) + open_backbone_location2 = sbol3.Range(sequence=backbone_seq, start=dropout_location[1]-fusion_site_length, end=len(sequence), order=1) + open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2]) + backbone_component.features.append(dropout_sequence_feature) + backbone_component.features.append(insertion_sites_feature) + backbone_component.features.append(open_backbone_feature) + backbone_dropout_meets = sbol3.Constraint(restriction='http://sbols.org/v3#meets', subject=dropout_sequence_feature, object=open_backbone_feature) + backbone_component.constraints.append(backbone_dropout_meets) + return backbone_component, backbone_seq + def part_in_backbone(identity: str, part: sbol3.Component, backbone: sbol3.Component, linear:bool=False, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]: """Creates a Part in Backbone Component and its Sequence. @@ -617,10 +644,14 @@ def part_in_backbone(identity: str, part: sbol3.Component, backbone: sbol3.Compo # get backbone sequence backbone_sequence = backbone.sequences[0].lookup().elements # compute open backbone sequences - open_backbone_sequence_from_location1=backbone_sequence[backbone.features[-1].locations[0].start -1 : backbone.features[-1].locations[0].end -1] - open_backbone_sequence_from_location2=backbone_sequence[backbone.features[-1].locations[1].start -1 : backbone.features[-1].locations[1].end-1] + open_backbone_sequence_from_location1=backbone_sequence[backbone.features[-1].locations[0].start -1 : backbone.features[-1].locations[0].end] + open_backbone_sequence_from_location2=backbone_sequence[backbone.features[-1].locations[1].start -1 : backbone.features[-1].locations[1].end] # extract part sequence part_sequence = part.sequences[0].lookup().elements + covered_fusion_site_lengths = [3,4] # review can be user input + for l in covered_fusion_site_lengths: + if open_backbone_sequence_from_location1[-l:]==part_sequence[:l] and part_sequence[-l:]==open_backbone_sequence_from_location2[:l]: + part_sequence = part_sequence[l:-l] # make new component sequence if linear: part_in_backbone_seq_str = open_backbone_sequence_from_location1 + part_sequence + open_backbone_sequence_from_location2 @@ -642,4 +673,310 @@ def part_in_backbone(identity: str, part: sbol3.Component, backbone: sbol3.Compo part_in_backbone_component.features.append(backbone_subcomponent) # adding topology part_in_backbone_component.types.append(topology_type) - return part_in_backbone_component, part_in_backbone_seq \ No newline at end of file + #if len(part_in_backbone_component.name)==0: # TODO: review + # part_in_backbone_component.name = identity + return part_in_backbone_component, part_in_backbone_seq + +def part_in_backbone_from_sbol(identity: Union[str, None], sbol_comp: sbol3.Component, part_location: List[int], part_roles:List[str], fusion_site_length:int, linear:bool=False, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]: + """Restructures a non-hierarchical plasmid Component to follow the part-in-backbone pattern following BP011. + It overwrites the SBOL3 Component provided. + A part inserted into a backbone is represented by a Component that includes both the part insert + as a feature that is a SubComponent and the backbone as another SubComponent. + For more information about BP011 visit https://github.com/SynBioDex/SBOL-examples/tree/main/SBOL/best-practices/BP011 + + :param identity: The identity of the Component, is its a String it build a new SBOL Component, if None it adds on top of the input. The identity of Sequence is also identity with the suffix '_seq'. + :param sbol_comp: The SBOL3 Component that will be used to create the part in backbone Component and Sequence. + :param part_location: List of 2 integers that indicates the start and the end of the unitary part. Note that the index of the first location is 1, as is typical practice in biology, rather than 0, as is typical practice in computer science. + :param part_roles: List of strings that indicates the roles to add on the part. + :param fusion_site_length: Integer of the length of the fusion sites (eg. BsaI fusion site lenght is 4, SapI fusion site lenght is 3) + :param linear: Boolean than indicates if the backbone is linear, by default it is seted to Flase which means that it has a circular topology. + :param kwargs: Keyword arguments of any other Component attribute. + :return: A tuple of Component and Sequence. + """ + if len(part_location) != 2: + raise ValueError('The part_location only accepts 2 int values in a list.') + if len(sbol_comp.sequences)!=1: + raise ValueError(f'The reactant needs to have precisely one sequence. The input reactant has {len(sbol_comp.sequences)} sequences') + sequence = sbol_comp.sequences[0].lookup().elements + if identity == None: + part_in_backbone_component = sbol_comp + part_in_backbone_seq = sbol_comp.sequences[0] + else: + part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity, sequence, **kwargs) + part_in_backbone_component.roles.append(sbol3.SO_DOUBLE_STRANDED) + for part_role in part_roles: + part_in_backbone_component.roles.append(part_role) + # creating part feature + part_location_comp = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[1]) + #TODO: add the option of fusion sites to be of different lenghts + insertion_site_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[0]+fusion_site_length, order=1) + insertion_site_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=part_location[1], order=3) + part_sequence_feature = sbol3.SequenceFeature(locations=[part_location_comp], roles=part_roles) + part_sequence_feature.roles.append(tyto.SO.engineered_insert) + insertion_sites_feature = sbol3.SequenceFeature(locations=[insertion_site_location1, insertion_site_location2], roles=[tyto.SO.insertion_site]) + #TODO: infer topology from the input + if linear: + part_in_backbone_component.types.append(sbol3.SO_LINEAR) + part_in_backbone_component.roles.append(sbol3.SO_ENGINEERED_REGION) + # creating backbone feature + open_backbone_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=1, end=part_location[0]+fusion_site_length-1, order=1) + open_backbone_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=len(sequence), order=3) + open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2]) + else: + part_in_backbone_component.types.append(sbol3.SO_CIRCULAR) + part_in_backbone_component.roles.append(tyto.SO.plasmid_vector) + # creating backbone feature + open_backbone_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=1, end=part_location[0]+fusion_site_length-1, order=2) + open_backbone_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=len(sequence), order=1) + open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2]) + part_in_backbone_component.features.append(part_sequence_feature) + part_in_backbone_component.features.append(insertion_sites_feature) + part_in_backbone_component.features.append(open_backbone_feature) + backbone_dropout_meets = sbol3.Constraint(restriction='http://sbols.org/v3#meets', subject=part_sequence_feature, object=open_backbone_feature) + part_in_backbone_component.constraints.append(backbone_dropout_meets) + #TODO: Add a branch to create a component without overwriting the WHOLE input component + #removing repeated types and roles + part_in_backbone_component.types = set(part_in_backbone_component.types) + part_in_backbone_component.roles = set(part_in_backbone_component.roles) + return part_in_backbone_component, part_in_backbone_seq + +def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.ExternallyDefined], assembly_plan:sbol3.Component, **kwargs)-> Tuple[sbol3.Component, sbol3.Sequence]: + """Digests a Component using the provided restriction enzymes and creates a product Component and a digestion Interaction. + The product Component is assumed to be the insert for parts in backbone and the backbone for backbones. + + :param reactant: DNA to be digested as SBOL Component, usually a part_in_backbone. + :param restriction_enzymes: Restriction enzymes used Externally Defined. + :return: A tuple of Component and Interaction. + """ + if sbol3.SBO_DNA not in reactant.types: + raise TypeError(f'The reactant should has a DNA type. Types founded {reactant.types}.') + if len(reactant.sequences)!=1: + raise ValueError(f'The reactant needs to have precisely one sequence. The input reactant has {len(reactant.sequences)} sequences') + participations=[] + restriction_enzymes_pydna=[] + for re in restriction_enzymes: + enzyme = Restriction.__dict__[re.name] + restriction_enzymes_pydna.append(enzyme) + modifier_participation = sbol3.Participation(roles=[sbol3.SBO_MODIFIER], participant=re) + participations.append(modifier_participation) + + # Inform topology to PyDNA, if not found assuming linear. + if is_circular(reactant): + circular=True + linear=False + else: + circular=False + linear=True + + reactant_seq = reactant.sequences[0].lookup().elements + # Dseqrecord is from PyDNA package with reactant sequence + ds_reactant = Dseqrecord(reactant_seq, linear=linear, circular=circular) + digested_reactant = ds_reactant.cut(restriction_enzymes_pydna) + + if len(digested_reactant)<2 or len(digested_reactant)>3: + raise NotImplementedError(f'Not supported number of products. Found{len(digested_reactant)}') + #TODO select them based on content rather than size. + elif circular and len(digested_reactant)==2: + part_extract, backbone = sorted(digested_reactant, key=len) + elif linear and len(digested_reactant)==3: + prefix, part_extract, suffix = digested_reactant + else: raise NotImplementedError('The reactant has no valid topology type') + + # Extracting roles from features + reactant_features_roles = [] + for f in reactant.features: + for r in f.roles: + reactant_features_roles.append(r) + # if part + if any(n==tyto.SO.engineered_insert for n in reactant_features_roles): + # Compute the length of single strand sticky ends or fusion sites + product_5_prime_ss_strand, product_5_prime_ss_end = part_extract.seq.five_prime_end() + product_3_prime_ss_strand, product_3_prime_ss_end = part_extract.seq.three_prime_end() + + product_sequence = str(part_extract.seq) + prod_comp, prod_seq = dna_component_with_sequence(identity=f'{reactant.name}_part_extract', sequence=product_sequence, **kwargs) #str(product_sequence)) + # add sticky ends features + five_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=1, end=len(product_5_prime_ss_end), order=1) + three_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=len(product_sequence)-len(product_3_prime_ss_end)+1, end=len(product_sequence), order=3) + fusion_sites_feature = sbol3.SequenceFeature(locations=[five_prime_fusion_site_location, three_prime_fusion_site_location], roles=[tyto.SO.insertion_site]) + prod_comp.roles.append(tyto.SO.engineered_insert) + prod_comp.features.append(fusion_sites_feature) + + # if backbone + elif any(n==tyto.SO.deletion for n in reactant_features_roles): + # Compute the length of single strand sticky ends or fusion sites + product_5_prime_ss_strand, product_5_prime_ss_end = backbone.seq.five_prime_end() + product_3_prime_ss_strand, product_3_prime_ss_end = backbone.seq.three_prime_end() + product_sequence = str(backbone.seq) + prod_comp, prod_seq = dna_component_with_sequence(identity=f'{reactant.name}_backbone', sequence=product_sequence, **kwargs) #str(product_sequence)) + # add sticky ends features + five_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=1, end=len(product_5_prime_ss_end), order=1) + three_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=len(product_sequence)-len(product_3_prime_ss_end)+1, end=len(product_sequence), order=3) + fusion_sites_feature = sbol3.SequenceFeature(locations=[five_prime_fusion_site_location, three_prime_fusion_site_location], roles=[tyto.SO.insertion_site]) + prod_comp.roles.append(tyto.SO.plasmid_vector) + prod_comp.features.append(fusion_sites_feature) + + else: raise NotImplementedError('The reactant has no valid roles') + + #Add reference to part in backbone + reactant_subcomponent = sbol3.SubComponent(reactant) + prod_comp.features.append(reactant_subcomponent) + # Create reactant Participation. + react_subcomp = sbol3.SubComponent(reactant) + assembly_plan.features.append(react_subcomp) + reactant_participation = sbol3.Participation(roles=[sbol3.SBO_REACTANT], participant=react_subcomp) + participations.append(reactant_participation) + + prod_subcomp = sbol3.SubComponent(prod_comp) + assembly_plan.features.append(prod_subcomp) + product_participation = sbol3.Participation(roles=[sbol3.SBO_PRODUCT], participant=prod_subcomp) + participations.append(product_participation) + + # Make Interaction + interaction = sbol3.Interaction(types=[tyto.SBO.cleavage], participations=participations) + assembly_plan.interactions.append(interaction) + + return prod_comp, prod_seq + +def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> List[Tuple[sbol3.Component, sbol3.Sequence]]: + """Ligates Components using base complementarity and creates a product Component and a ligation Interaction. + + :param reactant: DNA to be ligated as SBOL Component. + :return: A tuple of Component and Interaction. + """ + # Create a dictionary that maps each first and last 4 letters to a list of strings that have those letters. + reactant_parts = [] + fusion_sites_set = set() + for reactant in reactants: + fusion_site_3prime_length = reactant.features[0].locations[0].end - reactant.features[0].locations[0].start + fusion_site_5prime_length = reactant.features[0].locations[1].end - reactant.features[0].locations[1].start + if fusion_site_3prime_length == fusion_site_5prime_length: + fusion_site_length = fusion_site_3prime_length + 1 # if the fusion site is 4 bp long, the start will be 1 and end 4, 4-1 = 3, so we add 1 to get 4. + fusion_sites_set.add(fusion_site_length) + if len(fusion_sites_set) > 1: + raise ValueError(f'Fusion sites of different length within different parts. Check {reactant.identity} ') + else: + raise ValueError(f'Fusion sites of different length within the same part. Check {reactant.identity}') + if tyto.SO.plasmid_vector in reactant.roles: + reactant_parts.append(reactant) + elif tyto.SO.engineered_insert in reactant.roles: + reactant_parts.append(reactant) + else: + raise ValueError(f'Part {reactant.identity} does not have a valid role') + # remove the backbones if any from the reactants, to create the composite + groups = {} + for reactant in reactant_parts: + first_four_letters = reactant.sequences[0].lookup().elements[:fusion_site_length].lower() + last_four_letters = reactant.sequences[0].lookup().elements[-fusion_site_length:].lower() + part_syntax = f'{first_four_letters}_{last_four_letters}' + if part_syntax not in groups: + groups[part_syntax] = [] + groups[part_syntax].append(reactant) + else: groups[part_syntax].append(reactant) + # groups is a dictionary of lists of parts that have the same first and last 4 letters + # list_of_combinations_per_assembly is a list of tuples of parts that can be ligated together + list_of_parts_per_combination = list(product(*groups.values())) #cartesian product + # create list_of_composites_per_assembly from list_of_combinations_per_assembly + list_of_composites_per_assembly = [] + for combination in list_of_parts_per_combination: + list_of_parts_per_composite = [combination[0]] + insert_sequence = combination[0].sequences[0].lookup().elements + remaining_parts = list(combination[1:]) + it = 1 + while remaining_parts: + remaining_parts_before = len(remaining_parts) + for part in remaining_parts: + # match insert sequence 5' to part 3' + if part.sequences[0].lookup().elements[:fusion_site_length].lower() == insert_sequence[-fusion_site_length:].lower(): + insert_sequence = insert_sequence[:-fusion_site_length] + part.sequences[0].lookup().elements + list_of_parts_per_composite.append(part) + remaining_parts.remove(part) + # match insert sequence 3' to part 5' + elif part.sequences[0].lookup().elements[-fusion_site_length:].lower() == insert_sequence[:fusion_site_length].lower(): + insert_sequence = part.sequences[0].lookup().elements + insert_sequence[fusion_site_length:] + list_of_parts_per_composite.insert(0, part) + remaining_parts.remove(part) + remaining_parts_after = len(remaining_parts) + + if remaining_parts_before == remaining_parts_after: + it += 1 + if it > 5: #5 was chosen arbitrarily to avoid infinite loops + print(groups) + raise ValueError('No match found, check the parts and their fusion sites') + list_of_composites_per_assembly.append(list_of_parts_per_composite) + + # transform list_of_parts_per_assembly into list of composites + products_list = [] + participations = [] + composite_number = 1 + for composite in list_of_composites_per_assembly: # a composite of the form [A,B,C] + # calculate sequence + composite_sequence_str = "" + composite_name = "" + #part_subcomponents = [] + part_extract_subcomponents = [] + for part_extract in composite: + composite_sequence_str = composite_sequence_str + part_extract.sequences[0].lookup().elements[:-fusion_site_length] #needs a version for linear + # create participations + part_extract_subcomponent = sbol3.SubComponent(part_extract) # LocalSubComponent?? + part_extract_subcomponents.append(part_extract_subcomponent) + composite_name = composite_name +'_'+ part_extract.name + # create dna componente and sequence + composite_component, composite_seq = dna_component_with_sequence(f'composite_{composite_number}{composite_name}', composite_sequence_str) # **kwarads use in future? + composite_component.name = f'composite_{composite_number}{composite_name}' + composite_component.roles.append(sbol3.SO_ENGINEERED_REGION) + composite_component.features = part_extract_subcomponents + for i in range(len(composite_component.features )-1): + composite_component.constraints = [sbol3.Constraint(restriction='http://sbols.org/v3#meets', subject=composite_component.features[i], object=composite_component.features[i+1])] + products_list.append([composite_component, composite_seq]) + composite_number += 1 + return products_list + + +class Assembly_plan_composite_in_backbone_single_enzyme(): + """Creates a Assembly Plan. + :param name: Name of the assembly plan Component. + :param parts_in_backbone: Parts in backbone to be assembled. + :param acceptor_backbone: Backbone in which parts are inserted on the assembly. + :param restriction_enzymes: Restriction enzyme with correct name from Bio.Restriction as Externally Defined. + :param document: SBOL Document where the assembly plan will be created. + :param linear: Boolean to inform if the reactant is linear. + :param circular: Boolean to inform if the reactant is circular. + :param **kwargs: Keyword arguments of any other Component attribute for the assembled part. + """ + + def __init__(self, name: str, parts_in_backbone: List[sbol3.Component], acceptor_backbone: sbol3.Component, restriction_enzyme: Union[str,sbol3.ExternallyDefined], document:sbol3.Document): + self.name = name + self.parts_in_backbone = parts_in_backbone + self.acceptor_backbone = acceptor_backbone + self.restriction_enzyme = restriction_enzyme + self.products = [] + self.extracted_parts = [] + self.document = document + + #create assembly plan + self.assembly_plan_component = sbol3.Component(identity=f'{self.name}_assembly_plan', types=sbol3.SBO_FUNCTIONAL_ENTITY) + self.document.add(self.assembly_plan_component) + self.composites = [] + + def run(self): + self.assembly_plan_component.features.append(self.restriction_enzyme) + #extract parts + part_number = 1 + for part_in_backbone in self.parts_in_backbone: + part_comp, part_seq = digestion(reactant=part_in_backbone,restriction_enzymes=[self.restriction_enzyme], assembly_plan=self.assembly_plan_component, name=f'part_{part_number}_{part_in_backbone.display_id}') + self.document.add([part_comp, part_seq]) + self.extracted_parts.append(part_comp) + part_number += 1 + #extract backbone (should be the same?) + backbone_comp, backbone_seq = digestion(reactant=self.acceptor_backbone,restriction_enzymes=[self.restriction_enzyme], assembly_plan=self.assembly_plan_component, name=f'part_{part_number}') + self.document.add([backbone_comp, backbone_seq]) + self.extracted_parts.append(backbone_comp) + + #create composite part from extracted parts + composites_list = ligation(reactants=self.extracted_parts, assembly_plan=self.assembly_plan_component) + for composite in composites_list: + composite[0].generated_by.append(self.assembly_plan_component) # + self.composites.append(composite) + self.products.append(composite) + self.document.add(composite) diff --git a/sbol_utilities/helper_functions.py b/sbol_utilities/helper_functions.py index 168debfc..5b3b1f21 100644 --- a/sbol_utilities/helper_functions.py +++ b/sbol_utilities/helper_functions.py @@ -363,4 +363,11 @@ def is_circular(obj: Union[sbol3.Component, sbol3.LocalSubComponent, sbol3.Exter :param obj: design to be checked :return: true if circular """ - return any(n==sbol3.SO_CIRCULAR for n in obj.types) \ No newline at end of file + return any(n==sbol3.SO_CIRCULAR for n in obj.types) + +def is_linear(obj: Union[sbol3.Component, sbol3.LocalSubComponent, sbol3.ExternallyDefined]) -> bool: + """Check if an SBOL Component or Feature is linear. + :param obj: design to be checked + :return: true if linear + """ + return any(n==sbol3.SO_LINEAR for n in obj.types) \ No newline at end of file diff --git a/setup.py b/setup.py index b0b5db53..f863f823 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,8 @@ 'tyto>=1.4', 'openpyxl', 'requests', - 'sbol_factory>=1.1' + 'sbol_factory>=1.1', + 'pydna' ], extras_require={ # requirements for development 'dev': ['pytest', 'interrogate'] diff --git a/test/test_component.py b/test/test_component.py index 63575900..0c45195e 100644 --- a/test/test_component.py +++ b/test/test_component.py @@ -7,76 +7,35 @@ import sbol3 import tyto -from sbol_utilities.component import contained_components, contains, add_feature, add_interaction, constitutive, \ - regulate, order, in_role, all_in_role, ensure_singleton_feature, by_roles, by_types, is_dna_part, ed_restriction_enzyme -from sbol_utilities.helper_functions import filter_top_level +from sbol_utilities.component import contained_components, contains, add_feature, add_interaction, \ + constitutive, ed_restriction_enzyme, \ + regulate, order, in_role, all_in_role, ensure_singleton_feature, is_dna_part from sbol_utilities.component import dna_component_with_sequence, rna_component_with_sequence, \ protein_component_with_sequence, media, functional_component, promoter, rbs, cds, terminator, \ protein_stability_element, gene, operator, engineered_region, mrna, transcription_factor, \ strain, ed_simple_chemical, ed_protein -from sbol_utilities.component import ed_restriction_enzyme, backbone, part_in_backbone +from sbol_utilities.component import ed_restriction_enzyme, backbone, backbone_from_sbol, part_in_backbone, part_in_backbone_from_sbol, \ + digestion, ligation, Assembly_plan_composite_in_backbone_single_enzyme from sbol_utilities.helper_functions import find_top_level, toplevel_named, TopLevelNotFound, outgoing_links from sbol_utilities.sbol_diff import doc_diff +from sbol_utilities.conversion import convert_from_genbank class TestComponent(unittest.TestCase): - def test_filter_by_roles(self): - """test the filter by roles utility""" - doc = sbol3.Document() - sbol3.set_namespace('http://sbolstandard.org/testfiles') - # create and add 3 components, with 2 having common role of dna - comp_1 = sbol3.Component('component_1', sbol3.SBO_DNA, roles=[tyto.SBO.deoxyribonucleic_acid]) - comp_2 = sbol3.Component('component_2', sbol3.SBO_DNA, roles=[tyto.SO.engineered_region]) - comp_3 = sbol3.Component('component_3', sbol3.SBO_DNA, roles=[tyto.SO.engineered_region, tyto.SBO.deoxyribonucleic_acid]) - doc.add(comp_1) - doc.add(comp_2) - doc.add(comp_3) - # only comp_1 and comp_3 must be returned by the function - matched = list(filter_top_level(doc, by_roles(tyto.SBO.deoxyribonucleic_acid))) - assert(comp_1 in matched and comp_3 in matched and len(matched) == 2) - - def test_filter_by_types(self): - """test the filter by types utility""" - doc = sbol3.Document() - sbol3.set_namespace('http://sbolstandard.org/testfiles') - # create and add 3 components, with 2 one of the types as SBO_DNA - comp_1 = sbol3.Component('component_1', types=[sbol3.SBO_DNA]) - comp_2 = sbol3.Component('component_2', types=[sbol3.SBO_DEGRADATION, sbol3.SBO_DNA]) - comp_3 = sbol3.Component('component_3', types=[sbol3.SBO_FUNCTIONAL_ENTITY]) - doc.add(comp_1) - doc.add(comp_2) - doc.add(comp_3) - # only comp_1 and comp_3 must be returned by the function - matched = list(filter_top_level(doc, by_types(sbol3.SBO_DNA))) - assert(comp_1 in matched and comp_2 in matched and len(matched) == 2) - def test_dna_part(self): """Test the correctness of is_dna_part check""" # create a test dna component - doc = sbol3.Document() - sbol3.set_namespace('http://sbolstandard.org/testfiles') + dna_identity = 'Test_dna_identity' dna_sequence = 'Test_dna_sequence' dna_description = 'Test_dna_description' sbol3.set_namespace('http://sbolstandard.org/testfiles') # we don't need dna_sequence object - test_dna_component_1, _ = dna_component_with_sequence('test_identity1', dna_sequence, description=dna_description) - test_dna_component_2, _ = dna_component_with_sequence('test_identity2', dna_sequence, description=dna_description) - test_dna_component_3, _ = dna_component_with_sequence('test_identity3', dna_sequence, description=dna_description) + test_dna_component, _ = dna_component_with_sequence(dna_identity, dna_sequence, description=dna_description) # adding atleast 1 SO role - test_dna_component_1.roles.append(sbol3.SO_GENE) - test_dna_component_2.roles.append(sbol3.SBO_DEGRADATION) - # created and add 3 components, with 1 satisfying all criteria - doc.add(test_dna_component_3) - doc.add(test_dna_component_2) - doc.add(test_dna_component_1) - # use filter_top_level utility to filter objects which are dna parts - matched = list(filter_top_level(doc, is_dna_part)) - # 2nd component had non SO roles, 3rd component had no role - assert test_dna_component_1 in matched - assert test_dna_component_2 not in matched - assert test_dna_component_3 not in matched + test_dna_component.roles.append(sbol3.SO_GENE) + assert is_dna_part(test_dna_component) def test_system_building(self): doc = sbol3.Document() @@ -250,7 +209,7 @@ def test_high_level_constructors(self): if len(enr_comp.features) > 1: for i in range(len(enr_comp.features)-1): constraint = sbol3.Constraint(sbol3.SBOL_PRECEDES, enr_comp.features[i], enr_comp.features[i+1]) - enr_comp.constraints = [constraint] + enr_comp.constraints.append(constraint) else: pass hlc_doc.add(hlc_enr_comp) @@ -334,16 +293,19 @@ def test_high_level_constructors(self): doc.add(media_comp) assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: {media_identity}' - def test_sep055(self): - """Test construction of components and features using helper functions: for each, build manually and compare.""" - hlc_doc = sbol3.Document() - doc = sbol3.Document() + def test_restriction_enzyme_bp011(self): + """Test the restriction_enzyme function""" sbol3.set_namespace('http://sbolstandard.org/testfiles') - restriction_enzyme_name = 'BsaI' restriction_enzyme_definition = 'http://rebase.neb.com/rebase/enz/BsaI.html' # TODO: replace with getting the URI from Enzyme when REBASE identifiers become available in biopython 1.80 bsai = ed_restriction_enzyme(restriction_enzyme_name) assert bsai.definition == restriction_enzyme_definition, 'Constructor Error: ed_restriction_enzyme' + + def test_backbone_bp011(self): + """Test the backbone function""" + hlc_doc = sbol3.Document() + doc = sbol3.Document() + sbol3.set_namespace('http://sbolstandard.org/testfiles') backbone_identity = 'backbone' backbone_sequence = 'aaGGGGttttCCCCaa' @@ -401,7 +363,25 @@ def test_sep055(self): doc.add([linear_backbone_component, linear_backbone_seq]) assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Linear {backbone_identity}' + #Test backbone from SBOL + hlc_doc = sbol3.Document() + doc = sbol3.Document() + sbol3.set_namespace('http://sbolstandard.org/testfiles') + # build using backbone from SBOL + doc.add([circular_backbone_component, circular_backbone_seq]) + hl_circular_backbone_component, hl_circular_backbone_seq = backbone_from_sbol(identity=backbone_identity, sbol_comp=circular_backbone_component, dropout_location=dropout_location, fusion_site_length=fusion_site_length, linear=False, description=test_description) + hlc_doc.add([hl_circular_backbone_component, hl_circular_backbone_seq]) + assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Circular {backbone_identity} from SBOL' + hlc_doc = sbol3.Document() + doc = sbol3.Document() + doc.add([linear_backbone_component, linear_backbone_seq]) + hl_linear_backbone_component, hl_linear_backbone_seq = backbone_from_sbol(identity=backbone_identity, sbol_comp=linear_backbone_component, dropout_location=dropout_location, fusion_site_length=fusion_site_length, linear=True, description=test_description) + hlc_doc.add([hl_linear_backbone_component, hl_linear_backbone_seq]) + assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Linear {backbone_identity} from SBOL' + + def test_part_in_backbone_bp011(self): + """Test the part_in_backbone function""" hlc_doc = sbol3.Document() doc = sbol3.Document() sbol3.set_namespace('http://sbolstandard.org/testfiles') @@ -416,9 +396,11 @@ def test_sep055(self): hl_part_in_backbone_circular, hl_part_in_backbone_circular_sequence = part_in_backbone(identity_pib, part=test_promoter, backbone=test_backbone) hlc_doc.add([hl_part_in_backbone_circular, hl_part_in_backbone_circular_sequence]) + backbone_sequence = test_backbone.sequences[0].lookup().elements - open_backbone_sequence_from_location1=backbone_sequence[test_backbone.features[-1].locations[0].start -1 : test_backbone.features[-1].locations[0].end -1] - open_backbone_sequence_from_location2=backbone_sequence[test_backbone.features[-1].locations[1].start -1 : test_backbone.features[-1].locations[1].end-1] + open_backbone_sequence_from_location1=backbone_sequence[test_backbone.features[-1].locations[0].start -1 : test_backbone.features[-1].locations[0].end] + open_backbone_sequence_from_location2=backbone_sequence[test_backbone.features[-1].locations[1].start -1 : test_backbone.features[-1].locations[1].end] + part_sequence = test_promoter.sequences[0].lookup().elements part_in_backbone_seq_str = part_sequence + open_backbone_sequence_from_location2 + open_backbone_sequence_from_location1 part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity_pib, part_in_backbone_seq_str) @@ -442,7 +424,6 @@ def test_sep055(self): hlc_doc.add([test_promoter, test_promoter_seq, test_backbone, test_backbone_seq]) doc.add([test_promoter, test_promoter_seq, test_backbone, test_backbone_seq]) - hl_part_in_backbone_linear, hl_part_in_backbone_linear_sequence = part_in_backbone(identity_pib, part=test_promoter, backbone=test_backbone, linear=True) hlc_doc.add([hl_part_in_backbone_linear, hl_part_in_backbone_linear_sequence]) @@ -460,6 +441,112 @@ def test_sep055(self): part_in_backbone_component_linear.types.append(sbol3.SO_LINEAR) doc.add([part_in_backbone_component_linear, part_in_backbone_seq]) assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Linear {identity_pib}' - + + def test_part_in_backbone_from_sbol_bp011(self): + hlc_doc = sbol3.Document() + sbol3.set_namespace('http://sbolstandard.org/testfiles') + test_dir = os.path.dirname(os.path.realpath(__file__)) + b0015_dir = os.path.join(test_dir, 'test_files', 'b0015.gb') + # Part in backbone from SBOL + target_b0015_unitary_part_sequence = 'ccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttata' + b0015_doc = convert_from_genbank(b0015_dir, 'https://github.com/Gonza10V') + b0015_ef = [top_level for top_level in b0015_doc if type(top_level)==sbol3.Component][0] + hlc_b0015_ef_in_bb, hlc_b0015_ef_in_bb_seq = part_in_backbone_from_sbol('b0015_ef_in_bb', b0015_ef, [518,646], [sbol3.SO_TERMINATOR], 4, False, name='b0015_ef_in_bb') + hlc_doc.add([hlc_b0015_ef_in_bb, hlc_b0015_ef_in_bb_seq]) + for feature in hlc_b0015_ef_in_bb.features: + if feature.roles == [sbol3.SO_TERMINATOR, tyto.SO.engineered_insert]: + b0015_unitary_part_sequence = feature.locations[0].sequence.lookup().elements[feature.locations[0].start-1:feature.locations[0].end] + assert target_b0015_unitary_part_sequence == b0015_unitary_part_sequence , "Unitary part sequence does not match target sequence" + assert len(hlc_b0015_ef_in_bb.features) == 3, f"Incorrect number of features, number of features expeted is 3, got {len(hlc_b0015_ef_in_bb.features)}" + assert set(hlc_b0015_ef_in_bb.types) == set([sbol3.SBO_DNA, sbol3.SO_CIRCULAR]) , f"Incorrect types, types expected are [sbol3.SBO_DNA, sbol3.SO_CIRCULAR], got {hlc_b0015_ef_in_bb.types}" + assert set(hlc_b0015_ef_in_bb.roles) == set([sbol3.SO_DOUBLE_STRANDED, sbol3.SO_TERMINATOR, tyto.SO.plasmid_vector]), f"Incorrect roles, roles expected are [sbol3.SO_DOUBLE_STRANDED, sbol3.SO_TERMINATOR, tyto.SO.plasmid_vector], got {hlc_b0015_ef_in_bb.roles}" + features_roles = set() + for ft in hlc_b0015_ef_in_bb.features: + for role in ft.roles: + features_roles.add(role) + assert features_roles == set([tyto.SO.insertion_site, sbol3.SO_TERMINATOR, tyto.SO.engineered_insert]), f"Incorrect feature roles, roles expected are [tyto.SO.insertion_site, sbol3.SO_TERMINATOR, tyto.SO.engineered_insert], got {features_roles}" + + def test_assembly_plan_bp011(self): + """Test assembly plan class""" + doc = sbol3.Document() + sbol3.set_namespace('http://sbolstandard.org/testfiles') + test_dir = os.path.dirname(os.path.realpath(__file__)) + # Assembly plan setup + bsai = ed_restriction_enzyme('BsaI') + #lvl1 acceptor + lvl1_pOdd_acceptor_seq = 'gctcgagtcccgtcaagtcagcgtaatgctctgccagtgttacaaccaattaaccaattctgattagaaaaactcatcgagcatcaaatgaaactgcaatttattcatatcaggattatcaataccatatttttgaaaaagccgtttctgtaatgaaggagaaaactcaccgaggcagttccataggatggcaagatcctggtatcggtctgcgattccgactcgtccaacatcaatacaacctattaatttcccctcgtcaaaaataaggttatcaagtgagaaatcaccatgagtgacgactgaatccggtgagaatggcaaaagcttatgcatttctttccagacttgttcaacaggccagccattacgctcgtcatcaaaatcactcgcatcaaccaaaccgttattcattcgtgattgcgcctgagcgagacgaaatacgcgatcgctgttaaaaggacaattacaaacaggaatcgaatgcaaccggcgcaggaacactgccagcgcatcaacaatattttcacctgaatcaggatattcttctaatacctggaatgctgttttcccggggatcgcagtggtgagtaaccatgcatcatcaggagtacggataaaatgcttgatggtcggaagaggcataaattccgtcagccagtttagtctgaccatctcatctgtaacatcattggcaacgctacctttgccatgtttcagaaacaactctggcgcatcgggcttcccatacaatcgatagattgtcgcacctgattgcccgacattatcgcgagcccatttatacccatataaatcagcatccatgttggaatttaatcgcggcctggagcaagacgtttcccgttgaatatggctcataacaccccttgtattactgtttatgtaagcagacagttttattgttcatgatgatatatttttatcttgtgcaatgtaacatcagagattttgagacacaacgtggctttgttgaataaatcgaacttttgctgagttgaaggatcagctcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgctcttcaatgggagtgagacccaatacgcaaaccgcctctccccgcgcgttggccgattcattaatgcagctggcacgacaggtttcccgactggaaagcgggcagtgagcgcaacgcaattaatgtgagttagctcactcattaggcaccccaggctttacactttatgcttccggctcgtatgttgtgtggaattgtgagcggataacaatttcacacatactagagaaagaggagaaatactagatggcttcctccgaagacgttatcaaagagttcatgcgtttcaaagttcgtatggaaggttccgttaacggtcacgagttcgaaatcgaaggtgaaggtgaaggtcgtccgtacgaaggtacccagaccgctaaactgaaagttaccaaaggtggtccgctgccgttcgcttgggacatcctgtccccgcagttccagtacggttccaaagcttacgttaaacacccggctgacatcccggactacctgaaactgtccttcccggaaggtttcaaatgggaacgtgttatgaacttcgaagacggtggtgttgttaccgttacccaggactcctccctgcaagacggtgagttcatctacaaagttaaactgcgtggtaccaacttcccgtccgacggtccggttatgcagaaaaaaaccatgggttgggaagcttccaccgaacgtatgtacccggaagacggtgctctgaaaggtgaaatcaaaatgcgtctgaaactgaaagacggtggtcactacgacgctgaagttaaaaccacctacatggctaaaaaaccggttcagctgccgggtgcttacaaaaccgacatcaaactggacatcacctcccacaacgaagactacaccatcgttgaacagtacgaacgtgctgaaggtcgtcactccaccggtgcttaataacgctgatagtgctagtgtagatcgctactagagccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttataggtctcaGCTTgcatgaagagcctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgaca' + podd_backbone, podd_backbone_seq = backbone('pOdd_bb', lvl1_pOdd_acceptor_seq, [1169,2259], 4, False, name='pOdd_bb') + doc.add([podd_backbone,podd_backbone_seq]) + #parts in backbone + ##get parts from genbank + podd1_dir = os.path.join(test_dir, 'test_files', 'podd1.gb') + j23100_b0034_dir = os.path.join(test_dir, 'test_files', 'j23100_b0034.gb') + sfgfp_dir = os.path.join(test_dir, 'test_files', 'sfgfp.gb') + rhlr_dir = os.path.join(test_dir, 'test_files', 'rhlr.gb') + b0015_dir = os.path.join(test_dir, 'test_files', 'b0015.gb') + + """Test assembly plan class""" + doc = sbol3.Document() + sbol3.set_namespace('http://sbolstandard.org/testfiles') + # Assembly plan setup + bsai = ed_restriction_enzyme('BsaI') + #lvl1 acceptor + podd1_dir = os.path.join(test_dir, 'test_files', 'podd1.gb') + podd_doc = convert_from_genbank(podd1_dir, 'https://github.com/Gonza10V') + podd_af = [top_level for top_level in podd_doc if type(top_level)==sbol3.Component][0] + podd_backbone, podd_backbone_seq = backbone_from_sbol('pOdd_bb', podd_af, [680,1770], 4, False, name='pOdd_bb') + doc.add([podd_backbone,podd_backbone_seq]) + #parts in backbone + ##get parts from genbank + j23100_dir = os.path.join(test_dir, 'test_files', 'ab_j23100.gb') + j23101_dir = os.path.join(test_dir, 'test_files', 'ab_j23101.gb') + b0034_dir = os.path.join(test_dir, 'test_files', 'bc_b0034.gb') + gfp_dir = os.path.join(test_dir, 'test_files', 'ce_gfp.gb') + rfp_dir = os.path.join(test_dir, 'test_files', 'ce_mrfp1.gb') + cfp_dir = os.path.join(test_dir, 'test_files', 'ce_ecfp.gb') + b0015_dir = os.path.join(test_dir, 'test_files', 'ef_b0015.gb') + j23100_doc = convert_from_genbank(j23100_dir, 'https://github.com/Gonza10V') + j23100_ab = [top_level for top_level in j23100_doc if type(top_level)==sbol3.Component][0] + j23101_doc = convert_from_genbank(j23101_dir, 'https://github.com/Gonza10V') + j23101_ab = [top_level for top_level in j23101_doc if type(top_level)==sbol3.Component][0] + b0034_doc = convert_from_genbank(b0034_dir, 'https://github.com/Gonza10V') + b0034_bc = [top_level for top_level in b0034_doc if type(top_level)==sbol3.Component][0] + gfp_doc = convert_from_genbank(gfp_dir, 'https://github.com/Gonza10V') + gfp_ce = [top_level for top_level in gfp_doc if type(top_level)==sbol3.Component][0] + rfp_doc = convert_from_genbank(rfp_dir, 'https://github.com/Gonza10V') + rfp_ce = [top_level for top_level in rfp_doc if type(top_level)==sbol3.Component][0] + cfp_doc = convert_from_genbank(cfp_dir, 'https://github.com/Gonza10V') + cfp_ce = [top_level for top_level in cfp_doc if type(top_level)==sbol3.Component][0] + b0015_doc = convert_from_genbank(b0015_dir, 'https://github.com/Gonza10V') + b0015_ef = [top_level for top_level in b0015_doc if type(top_level)==sbol3.Component][0] + ##SBOL parts in backbone + j23100_ab_in_bb, j23100_ab_in_bb_seq = part_in_backbone_from_sbol('j23100_ab_in_bb', j23100_ab, [479,513], [sbol3.SO_PROMOTER], 4, False, name='j23100_ab_in_bb') + doc.add([j23100_ab_in_bb, j23100_ab_in_bb_seq]) + j23101_ab_in_bb, j23101_ab_in_bb_seq = part_in_backbone_from_sbol('j23101_ab_in_bb', j23101_ab, [479,513], [sbol3.SO_PROMOTER], 4, False, name='j23101_ab_in_bb') + doc.add([j23101_ab_in_bb, j23101_ab_in_bb_seq]) + b0034_bc_in_bb, b0034_bc_in_bb_seq = part_in_backbone_from_sbol('b0034_bc_in_bb', b0034_bc, [479,499], [sbol3.SO_RBS], 4, False, name='b0034_bc_in_bb') + doc.add([b0034_bc_in_bb, b0034_bc_in_bb_seq]) + gfp_ce_in_bb, gfp_ce_in_bb_seq = part_in_backbone_from_sbol('gfp_ce_in_bb', gfp_ce, [479,1195], [sbol3.SO_CDS], 4, False, name='gfp_ce_in_bb') + doc.add([gfp_ce_in_bb, gfp_ce_in_bb_seq]) + rfp_ce_in_bb, rfp_ce_in_bb_seq = part_in_backbone_from_sbol('rfp_ce_in_bb', rfp_ce, [479,1156], [sbol3.SO_CDS], 4, False, name='rfp_ce_in_bb') + doc.add([rfp_ce_in_bb, rfp_ce_in_bb_seq]) + cfp_ce_in_bb, cfp_ce_in_bb_seq = part_in_backbone_from_sbol('cfp_ce_in_bb', cfp_ce, [479,1198], [sbol3.SO_CDS], 4, False, name='cfp_ce_in_bb') + doc.add([cfp_ce_in_bb, cfp_ce_in_bb_seq]) + b0015_ef_in_bb, b0015_ef_in_bb_seq = part_in_backbone_from_sbol('b0015_ef_in_bb', b0015_ef, [518,646], [sbol3.SO_TERMINATOR], 4, False, name='b0015_ef_in_bb') + doc.add([b0015_ef_in_bb, b0015_ef_in_bb_seq]) + + + #Assembly plan + combinatorial_assembly_plan = Assembly_plan_composite_in_backbone_single_enzyme( + name='combinatorial_rgb_transcriptional_units', + parts_in_backbone=[j23100_ab_in_bb, j23101_ab_in_bb, b0034_bc_in_bb, gfp_ce_in_bb, rfp_ce_in_bb, cfp_ce_in_bb, b0015_ef_in_bb], + acceptor_backbone=podd_backbone, + restriction_enzyme=bsai, + document=doc) + combinatorial_assembly_plan.run() + for obj in combinatorial_assembly_plan.document.objects: + if obj.identity =='http://sbolstandard.org/testfiles/composite_3_part_8_part_1_j23100_ab_in_bb_part_3_b0034_bc_in_bb_part_6_cfp_ce_in_bb_part_7_b0015_ef_in_bb': + obtained_sequence = obj.sequences[0].lookup().elements + target_sequence = 'cgctgcatgaagagcctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgagtcccgtcaagtcagcgtaatgctctgccagtgttacaaccaattaaccaattctgattagaaaaactcatcgagcatcaaatgaaactgcaatttattcatatcaggattatcaataccatatttttgaaaaagccgtttctgtaatgaaggagaaaactcaccgaggcagttccataggatggcaagatcctggtatcggtctgcgattccgactcgtccaacatcaatacaacctattaatttcccctcgtcaaaaataaggttatcaagtgagaaatcaccatgagtgacgactgaatccggtgagaatggcaaaagcttatgcatttctttccagacttgttcaacaggccagccattacgctcgtcatcaaaatcactcgcatcaaccaaaccgttattcattcgtgattgcgcctgagcgagacgaaatacgcgatcgctgttaaaaggacaattacaaacaggaatcgaatgcaaccggcgcaggaacactgccagcgcatcaacaatattttcacctgaatcaggatattcttctaatacctggaatgctgttttcccggggatcgcagtggtgagtaaccatgcatcatcaggagtacggataaaatgcttgatggtcggaagaggcataaattccgtcagccagtttagtctgaccatctcatctgtaacatcattggcaacgctacctttgccatgtttcagaaacaactctggcgcatcgggcttcccatacaatcgatagattgtcgcacctgattgcccgacattatcgcgagcccatttatacccatataaatcagcatccatgttggaatttaatcgcggcctggagcaagacgtttcccgttgaatatggctcataacaccccttgtattactgtttatgtaagcagacagttttattgttcatgatgatatatttttatcttgtgcaatgtaacatcagagattttgagacacaacgtggctttgttgaataaatcgaacttttgctgagttgaaggatcagctcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgctcttcaatgGGAGttgacggctagctcagtcctaggtacagtgctagcTACTagagaaagaggagaaatactaaatggtgagcaagggcgaggagctgttcaccggggtggtgcccatcctggtcgagctggacggcgacgtgaacggccacaagttcagcgtgtccggcgagggcgagggcgatgccacctacggcaagctgaccctgaagttcatctgcaccaccggcaagctgcccgtgccctggcccaccctcgtgaccaccctgacctggggcgtgcagtgcttcagccgctaccccgaccacatgaagcagcacgacttcttcaagtccgccatgcccgaaggctacgtccaggagcgcaccatcttcttcaaggacgacggcaactacaagacccgcgccgaggtgaagttcgagggcgacaccctggtgaaccgcatcgagctgaagggcatcgacttcaaggaggacggcaacatcctggggcacaagctggagtacaactacatcagccacaacgtctatatcaccgccgacaagcagaagaacggcatcaaggccaacttcaagatccgccacaacatcgaggacggcagcgtgcagctcgccgaccactaccagcagaacacccccatcggcgacggccccgtgctgctgcccgacaaccactacctgagcacccagtccgccctgagcaaagaccccaacgagaagcgcgatcacatggtcctgctggagttcgtgaccgccgccgggatcactctcggcatggacgagctgtacaagtaataaGCTTccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttata' if __name__ == '__main__': unittest.main() diff --git a/test/test_files/ab_j23100.gb b/test/test_files/ab_j23100.gb new file mode 100644 index 00000000..de329983 --- /dev/null +++ b/test/test_files/ab_j23100.gb @@ -0,0 +1,80 @@ +LOCUS AB_J23100 2095 bp ds-DNA linear 25-OCT-2023 +DEFINITION . +FEATURES Location/Qualifiers + misc_feature 346..365 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(422..466) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 468..473 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 475..478 + /label="A" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" + misc_feature 479..513 + /label="J23100" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature 514..517 + /label="B" + /ApEinfo_revcolor="#f8d3a9" + /ApEinfo_fwdcolor="#f8d3a9" + misc_feature 531..602 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(666..685) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(770..1384) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(1551..1656) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" +ORIGIN + 1 tcattgccat acgaaattcc ggatgagcat tcatcaggcg ggcaagaatg tgaataaagg + 61 ccggataaaa cttgtgctta tttttcttta cggtctttaa aaaggccgta atatccagct + 121 gaacggtctg gttataggta cattgagcaa ctgactgaaa tgcctcaaaa tgttctttac + 181 gatgccattg ggatatatca acggtggtat atccagtgat ttttttctcc attttagctt + 241 ccttagctcc tgaaaatctc gataactcaa aaaatacgcc cggtagtgat cttatttcat + 301 tatggtgaaa gttggaacct cttacgtgcc cgatcaactc gagtgccacc tgacgtctaa + 361 gaaaccatta ttatcatgac attaacctat aaaaataggc gtatcacgag gcagaatttc + 421 agataaaaaa aatccttagc tttcgctaag gatgatttct ggaattcggt ctcgGGAGtt + 481 gacggctagc tcagtcctag gtacagtgct agcTACTCGA Gaccctgcag tccggcaaaa + 541 aagggcaagg tgtcaccacc ctgccctttt tctttaaaac cgaaaagatt acttcgcgtt + 601 atgcaggctt cctcgctcac tgactcgctg cgctcggtcg ttcggctgcg gcgagcggta + 661 tcagctcact caaaggcggt aatacggtta tccacagaat caggggataa cgcaggaaag + 721 aacatgtgag caaaaggcca gcaaaaggcc aggaaccgta aaaaggccgc gttgctggcg + 781 tttttccaca ggctccgccc ccctgacgag catcacaaaa atcgacgctc aagtcagagg + 841 tggcgaaacc cgacaggact ataaagatac caggcgtttc cccctggaag ctccctcgtg + 901 cgctctcctg ttccgaccct gccgcttacc ggatacctgt ccgcctttct cccttcggga + 961 agcgtggcgc tttctcatag ctcacgctgt aggtatctca gttcggtgta ggtcgttcgc + 1021 tccaagctgg gctgtgtgca cgaacccccc gttcagcccg accgctgcgc cttatccggt + 1081 aactatcgtc ttgagtccaa cccggtaaga cacgacttat cgccactggc agcagccact + 1141 ggtaacagga ttagcagagc gaggtatgta ggcggtgcta cagagttctt gaagtggtgg + 1201 cctaactacg gctacactag aagaacagta tttggtatct gcgctctgct gaagccagtt + 1261 accttcggaa aaagagttgg tagctcttga tccggcaaac aaaccaccgc tggtagcggt + 1321 ggtttttttg tttgcaagca gcagattacg cgcagaaaaa aaggatctca agaagatcct + 1381 ttgatctttt ctacggggtc tgacgctcag tggaacgaaa actcacgtta agggattttg + 1441 gtcatgagat tatcaaaaag gatcttcacc tagatccttt taaattaaaa atgaagtttt + 1501 aaatcaatct aaagtatata tgagtaaact tggtctgaca gctcgaggct tggattctca + 1561 ccaataaaaa acgcccggcg gcaaccgagc gttctgaaca aatccagatg gagttctgag + 1621 gtcattactg gatctatcaa caggagtcca agcgagctcg atatcaaatt acgccccgcc + 1681 ctgccactca tcgcagtact gttgtaattc attaagcatt ctgccgacat ggaagccatc + 1741 acaaacggca tgatgaacct gaatcgccag cggcatcagc accttgtcgc cttgcgtata + 1801 atatttgccc atggtgaaaa cgggggcgaa gaagttgtcc atattggcca cgtttaaatc + 1861 aaaactggtg aaactcaccc agggattggc tgagacgaaa aacatattct caataaaccc + 1921 tttagggaaa taggccaggt tttcaccgta acacgccaca tcttgcgaat atatgtgtag + 1981 aaactgccgg aaatcgtcgt ggtattcact ccagagcgat gaaaacgttt cagtttgctc + 2041 atggaaaacg gtgtaacaag ggtgaacact atcccatatc accagctcac cgtct +// \ No newline at end of file diff --git a/test/test_files/ab_j23101.gb b/test/test_files/ab_j23101.gb new file mode 100644 index 00000000..6f534a76 --- /dev/null +++ b/test/test_files/ab_j23101.gb @@ -0,0 +1,81 @@ +LOCUS AB_J23101 2095 bp ds-DNA circular 25-OCT-2023 +DEFINITION . +COMMENT From pSB1C00 - Loop universal L0 acceptor and J23100_B0034_AC +FEATURES Location/Qualifiers + misc_feature 346..365 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(422..466) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 468..473 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 475..478 + /label="A" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" + misc_feature 479..513 + /label="J23101" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature 514..517 + /label="B" + /ApEinfo_revcolor="#f8d3a9" + /ApEinfo_fwdcolor="#f8d3a9" + misc_feature 531..602 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(666..685) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(770..1384) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(1551..1656) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" +ORIGIN + 1 tcattgccat acgaaattcc ggatgagcat tcatcaggcg ggcaagaatg tgaataaagg + 61 ccggataaaa cttgtgctta tttttcttta cggtctttaa aaaggccgta atatccagct + 121 gaacggtctg gttataggta cattgagcaa ctgactgaaa tgcctcaaaa tgttctttac + 181 gatgccattg ggatatatca acggtggtat atccagtgat ttttttctcc attttagctt + 241 ccttagctcc tgaaaatctc gataactcaa aaaatacgcc cggtagtgat cttatttcat + 301 tatggtgaaa gttggaacct cttacgtgcc cgatcaactc gagtgccacc tgacgtctaa + 361 gaaaccatta ttatcatgac attaacctat aaaaataggc gtatcacgag gcagaatttc + 421 agataaaaaa aatccttagc tttcgctaag gatgatttct ggaattcggt ctcgGGAGtt + 481 tacagctagc tcagtcctag gtattatgct agcTACTCGA Gaccctgcag tccggcaaaa + 541 aagggcaagg tgtcaccacc ctgccctttt tctttaaaac cgaaaagatt acttcgcgtt + 601 atgcaggctt cctcgctcac tgactcgctg cgctcggtcg ttcggctgcg gcgagcggta + 661 tcagctcact caaaggcggt aatacggtta tccacagaat caggggataa cgcaggaaag + 721 aacatgtgag caaaaggcca gcaaaaggcc aggaaccgta aaaaggccgc gttgctggcg + 781 tttttccaca ggctccgccc ccctgacgag catcacaaaa atcgacgctc aagtcagagg + 841 tggcgaaacc cgacaggact ataaagatac caggcgtttc cccctggaag ctccctcgtg + 901 cgctctcctg ttccgaccct gccgcttacc ggatacctgt ccgcctttct cccttcggga + 961 agcgtggcgc tttctcatag ctcacgctgt aggtatctca gttcggtgta ggtcgttcgc + 1021 tccaagctgg gctgtgtgca cgaacccccc gttcagcccg accgctgcgc cttatccggt + 1081 aactatcgtc ttgagtccaa cccggtaaga cacgacttat cgccactggc agcagccact + 1141 ggtaacagga ttagcagagc gaggtatgta ggcggtgcta cagagttctt gaagtggtgg + 1201 cctaactacg gctacactag aagaacagta tttggtatct gcgctctgct gaagccagtt + 1261 accttcggaa aaagagttgg tagctcttga tccggcaaac aaaccaccgc tggtagcggt + 1321 ggtttttttg tttgcaagca gcagattacg cgcagaaaaa aaggatctca agaagatcct + 1381 ttgatctttt ctacggggtc tgacgctcag tggaacgaaa actcacgtta agggattttg + 1441 gtcatgagat tatcaaaaag gatcttcacc tagatccttt taaattaaaa atgaagtttt + 1501 aaatcaatct aaagtatata tgagtaaact tggtctgaca gctcgaggct tggattctca + 1561 ccaataaaaa acgcccggcg gcaaccgagc gttctgaaca aatccagatg gagttctgag + 1621 gtcattactg gatctatcaa caggagtcca agcgagctcg atatcaaatt acgccccgcc + 1681 ctgccactca tcgcagtact gttgtaattc attaagcatt ctgccgacat ggaagccatc + 1741 acaaacggca tgatgaacct gaatcgccag cggcatcagc accttgtcgc cttgcgtata + 1801 atatttgccc atggtgaaaa cgggggcgaa gaagttgtcc atattggcca cgtttaaatc + 1861 aaaactggtg aaactcaccc agggattggc tgagacgaaa aacatattct caataaaccc + 1921 tttagggaaa taggccaggt tttcaccgta acacgccaca tcttgcgaat atatgtgtag + 1981 aaactgccgg aaatcgtcgt ggtattcact ccagagcgat gaaaacgttt cagtttgctc + 2041 atggaaaacg gtgtaacaag ggtgaacact atcccatatc accagctcac cgtct +// \ No newline at end of file diff --git a/test/test_files/b0015.gb b/test/test_files/b0015.gb new file mode 100644 index 00000000..079d75ae --- /dev/null +++ b/test/test_files/b0015.gb @@ -0,0 +1,87 @@ +LOCUS Copy_of_B0015_EF:_pSB1C 2190 bp ds-DNA circular 28-JAN-2023 +DEFINITION . +COMMENT From pSB1C00 - Loop universal L0 acceptor and B0015_EF +FEATURES Location/Qualifiers + misc_feature 385..404 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(461..505) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 507..512 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + terminator 517..646 + /label="BBa-B0015 Terminator" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" + terminator 526..597 + /label="rrnB T1 terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + terminator 613..640 + /label="T7Te terminator" + /ApEinfo_revcolor="#75c6a9" + /ApEinfo_fwdcolor="#75c6a9" + misc_feature 664..735 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(799..818) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(903..1517) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(1684..1789) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" + CDS complement(1802..271) + /label="Cam Resistance" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" +ORIGIN + 1 aagggtgaac actatcccat atcaccagct caccgtcttt cattgccata cgaaattccg + 61 gatgagcatt catcaggcgg gcaagaatgt gaataaaggc cggataaaac ttgtgcttat + 121 ttttctttac ggtctttaaa aaggccgtaa tatccagctg aacggtctgg ttataggtac + 181 attgagcaac tgactgaaat gcctcaaaat gttctttacg atgccattgg gatatatcaa + 241 cggtggtata tccagtgatt tttttctcca ttttagcttc cttagctcct gaaaatctcg + 301 ataactcaaa aaatacgccc ggtagtgatc ttatttcatt atggtgaaag ttggaacctc + 361 ttacgtgccc gatcaactcg agtgccacct gacgtctaag aaaccattat tatcatgaca + 421 ttaacctata aaaataggcg tatcacgagg cagaatttca gataaaaaaa atccttagct + 481 ttcgctaagg atgatttctg gaattcggtc tcggcttcca ggcatcaaat aaaacgaaag + 541 gctcagtcga aagactgggc ctttcgtttt atctgttgtt tgtcggtgaa cgctctctac + 601 tagagtcaca ctggctcacc ttcgggtggg cctttctgcg tttatacgct CGAGaccctg + 661 cagtccggca aaaaagggca aggtgtcacc accctgccct ttttctttaa aaccgaaaag + 721 attacttcgc gttatgcagg cttcctcgct cactgactcg ctgcgctcgg tcgttcggct + 781 gcggcgagcg gtatcagctc actcaaaggc ggtaatacgg ttatccacag aatcagggga + 841 taacgcagga aagaacatgt gagcaaaagg ccagcaaaag gccaggaacc gtaaaaaggc + 901 cgcgttgctg gcgtttttcc acaggctccg cccccctgac gagcatcaca aaaatcgacg + 961 ctcaagtcag aggtggcgaa acccgacagg actataaaga taccaggcgt ttccccctgg + 1021 aagctccctc gtgcgctctc ctgttccgac cctgccgctt accggatacc tgtccgcctt + 1081 tctcccttcg ggaagcgtgg cgctttctca tagctcacgc tgtaggtatc tcagttcggt + 1141 gtaggtcgtt cgctccaagc tgggctgtgt gcacgaaccc cccgttcagc ccgaccgctg + 1201 cgccttatcc ggtaactatc gtcttgagtc caacccggta agacacgact tatcgccact + 1261 ggcagcagcc actggtaaca ggattagcag agcgaggtat gtaggcggtg ctacagagtt + 1321 cttgaagtgg tggcctaact acggctacac tagaagaaca gtatttggta tctgcgctct + 1381 gctgaagcca gttaccttcg gaaaaagagt tggtagctct tgatccggca aacaaaccac + 1441 cgctggtagc ggtggttttt ttgtttgcaa gcagcagatt acgcgcagaa aaaaaggatc + 1501 tcaagaagat cctttgatct tttctacggg gtctgacgct cagtggaacg aaaactcacg + 1561 ttaagggatt ttggtcatga gattatcaaa aaggatcttc acctagatcc ttttaaatta + 1621 aaaatgaagt tttaaatcaa tctaaagtat atatgagtaa acttggtctg acagctcgag + 1681 gcttggattc tcaccaataa aaaacgcccg gcggcaaccg agcgttctga acaaatccag + 1741 atggagttct gaggtcatta ctggatctat caacaggagt ccaagcgagc tcgatatcaa + 1801 attacgcccc gccctgccac tcatcgcagt actgttgtaa ttcattaagc attctgccga + 1861 catggaagcc atcacaaacg gcatgatgaa cctgaatcgc cagcggcatc agcaccttgt + 1921 cgccttgcgt ataatatttg cccatggtga aaacgggggc gaagaagttg tccatattgg + 1981 ccacgtttaa atcaaaactg gtgaaactca cccagggatt ggctgagacg aaaaacatat + 2041 tctcaataaa ccctttaggg aaataggcca ggttttcacc gtaacacgcc acatcttgcg + 2101 aatatatgtg tagaaactgc cggaaatcgt cgtggtattc actccagagc gatgaaaacg + 2161 tttcagtttg ctcatggaaa acggtgtaac +// \ No newline at end of file diff --git a/test/test_files/bc_b0034.gb b/test/test_files/bc_b0034.gb new file mode 100644 index 00000000..16872bdd --- /dev/null +++ b/test/test_files/bc_b0034.gb @@ -0,0 +1,81 @@ +LOCUS BC_B0034 2081 bp ds-DNA circular 25-OCT-2023 +DEFINITION . +COMMENT From pSB1C00 - Loop universal L0 acceptor and J23100_B0034_AC +FEATURES Location/Qualifiers + misc_feature 346..365 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(422..466) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 468..473 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 475..478 + /label="B" + /ApEinfo_revcolor="#f8d3a9" + /ApEinfo_fwdcolor="#f8d3a9" + misc_feature 479..499 + /label="B0034" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 500..503 + /label="C" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature 517..588 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(652..671) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(756..1370) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(1537..1642) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" +ORIGIN + 1 tcattgccat acgaaattcc ggatgagcat tcatcaggcg ggcaagaatg tgaataaagg + 61 ccggataaaa cttgtgctta tttttcttta cggtctttaa aaaggccgta atatccagct + 121 gaacggtctg gttataggta cattgagcaa ctgactgaaa tgcctcaaaa tgttctttac + 181 gatgccattg ggatatatca acggtggtat atccagtgat ttttttctcc attttagctt + 241 ccttagctcc tgaaaatctc gataactcaa aaaatacgcc cggtagtgat cttatttcat + 301 tatggtgaaa gttggaacct cttacgtgcc cgatcaactc gagtgccacc tgacgtctaa + 361 gaaaccatta ttatcatgac attaacctat aaaaataggc gtatcacgag gcagaatttc + 421 agataaaaaa aatccttagc tttcgctaag gatgatttct ggaattcggt ctcgTACTag + 481 agaaagagga gaaatactaa atgCGAGacc ctgcagtccg gcaaaaaagg gcaaggtgtc + 541 accaccctgc cctttttctt taaaaccgaa aagattactt cgcgttatgc aggcttcctc + 601 gctcactgac tcgctgcgct cggtcgttcg gctgcggcga gcggtatcag ctcactcaaa + 661 ggcggtaata cggttatcca cagaatcagg ggataacgca ggaaagaaca tgtgagcaaa + 721 aggccagcaa aaggccagga accgtaaaaa ggccgcgttg ctggcgtttt tccacaggct + 781 ccgcccccct gacgagcatc acaaaaatcg acgctcaagt cagaggtggc gaaacccgac + 841 aggactataa agataccagg cgtttccccc tggaagctcc ctcgtgcgct ctcctgttcc + 901 gaccctgccg cttaccggat acctgtccgc ctttctccct tcgggaagcg tggcgctttc + 961 tcatagctca cgctgtaggt atctcagttc ggtgtaggtc gttcgctcca agctgggctg + 1021 tgtgcacgaa ccccccgttc agcccgaccg ctgcgcctta tccggtaact atcgtcttga + 1081 gtccaacccg gtaagacacg acttatcgcc actggcagca gccactggta acaggattag + 1141 cagagcgagg tatgtaggcg gtgctacaga gttcttgaag tggtggccta actacggcta + 1201 cactagaaga acagtatttg gtatctgcgc tctgctgaag ccagttacct tcggaaaaag + 1261 agttggtagc tcttgatccg gcaaacaaac caccgctggt agcggtggtt tttttgtttg + 1321 caagcagcag attacgcgca gaaaaaaagg atctcaagaa gatcctttga tcttttctac + 1381 ggggtctgac gctcagtgga acgaaaactc acgttaaggg attttggtca tgagattatc + 1441 aaaaaggatc ttcacctaga tccttttaaa ttaaaaatga agttttaaat caatctaaag + 1501 tatatatgag taaacttggt ctgacagctc gaggcttgga ttctcaccaa taaaaaacgc + 1561 ccggcggcaa ccgagcgttc tgaacaaatc cagatggagt tctgaggtca ttactggatc + 1621 tatcaacagg agtccaagcg agctcgatat caaattacgc cccgccctgc cactcatcgc + 1681 agtactgttg taattcatta agcattctgc cgacatggaa gccatcacaa acggcatgat + 1741 gaacctgaat cgccagcggc atcagcacct tgtcgccttg cgtataatat ttgcccatgg + 1801 tgaaaacggg ggcgaagaag ttgtccatat tggccacgtt taaatcaaaa ctggtgaaac + 1861 tcacccaggg attggctgag acgaaaaaca tattctcaat aaacccttta gggaaatagg + 1921 ccaggttttc accgtaacac gccacatctt gcgaatatat gtgtagaaac tgccggaaat + 1981 cgtcgtggta ttcactccag agcgatgaaa acgtttcagt ttgctcatgg aaaacggtgt + 2041 aacaagggtg aacactatcc catatcacca gctcaccgtc t +// \ No newline at end of file diff --git a/test/test_files/ce_ecfp.gb b/test/test_files/ce_ecfp.gb new file mode 100644 index 00000000..24b64599 --- /dev/null +++ b/test/test_files/ce_ecfp.gb @@ -0,0 +1,93 @@ +LOCUS CE_eCFP 2780 bp ds-DNA circular 25-OCT-2023 +DEFINITION . +COMMENT From pSB1C00 - Loop universal L0 acceptor and J23100_B0034_AC +FEATURES Location/Qualifiers + misc_feature 346..365 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(422..466) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 468..473 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 475..478 + /label="C" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature 479..1198 + /label="eCFP" + /ApEinfo_revcolor="#84b0dc" + /ApEinfo_fwdcolor="#84b0dc" + misc_feature 1199..1202 + /label="E" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature 1216..1287 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(1351..1370) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(1455..2069) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(2236..2341) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" +ORIGIN + 1 tcattgccat acgaaattcc ggatgagcat tcatcaggcg ggcaagaatg tgaataaagg + 61 ccggataaaa cttgtgctta tttttcttta cggtctttaa aaaggccgta atatccagct + 121 gaacggtctg gttataggta cattgagcaa ctgactgaaa tgcctcaaaa tgttctttac + 181 gatgccattg ggatatatca acggtggtat atccagtgat ttttttctcc attttagctt + 241 ccttagctcc tgaaaatctc gataactcaa aaaatacgcc cggtagtgat cttatttcat + 301 tatggtgaaa gttggaacct cttacgtgcc cgatcaactc gagtgccacc tgacgtctaa + 361 gaaaccatta ttatcatgac attaacctat aaaaataggc gtatcacgag gcagaatttc + 421 agataaaaaa aatccttagc tttcgctaag gatgatttct ggaattcggt ctcgaatggt + 481 gagcaagggc gaggagctgt tcaccggggt ggtgcccatc ctggtcgagc tggacggcga + 541 cgtgaacggc cacaagttca gcgtgtccgg cgagggcgag ggcgatgcca cctacggcaa + 601 gctgaccctg aagttcatct gcaccaccgg caagctgccc gtgccctggc ccaccctcgt + 661 gaccaccctg acctggggcg tgcagtgctt cagccgctac cccgaccaca tgaagcagca + 721 cgacttcttc aagtccgcca tgcccgaagg ctacgtccag gagcgcacca tcttcttcaa + 781 ggacgacggc aactacaaga cccgcgccga ggtgaagttc gagggcgaca ccctggtgaa + 841 ccgcatcgag ctgaagggca tcgacttcaa ggaggacggc aacatcctgg ggcacaagct + 901 ggagtacaac tacatcagcc acaacgtcta tatcaccgcc gacaagcaga agaacggcat + 961 caaggccaac ttcaagatcc gccacaacat cgaggacggc agcgtgcagc tcgccgacca + 1021 ctaccagcag aacaccccca tcggcgacgg ccccgtgctg ctgcccgaca accactacct + 1081 gagcacccag tccgccctga gcaaagaccc caacgagaag cgcgatcaca tggtcctgct + 1141 ggagttcgtg accgccgccg ggatcactct cggcatggac gagctgtaca agtaataagc + 1201 ttCGAGaccc tgcagtccgg caaaaaaggg caaggtgtca ccaccctgcc ctttttcttt + 1261 aaaaccgaaa agattacttc gcgttatgca ggcttcctcg ctcactgact cgctgcgctc + 1321 ggtcgttcgg ctgcggcgag cggtatcagc tcactcaaag gcggtaatac ggttatccac + 1381 agaatcaggg gataacgcag gaaagaacat gtgagcaaaa ggccagcaaa aggccaggaa + 1441 ccgtaaaaag gccgcgttgc tggcgttttt ccacaggctc cgcccccctg acgagcatca + 1501 caaaaatcga cgctcaagtc agaggtggcg aaacccgaca ggactataaa gataccaggc + 1561 gtttccccct ggaagctccc tcgtgcgctc tcctgttccg accctgccgc ttaccggata + 1621 cctgtccgcc tttctccctt cgggaagcgt ggcgctttct catagctcac gctgtaggta + 1681 tctcagttcg gtgtaggtcg ttcgctccaa gctgggctgt gtgcacgaac cccccgttca + 1741 gcccgaccgc tgcgccttat ccggtaacta tcgtcttgag tccaacccgg taagacacga + 1801 cttatcgcca ctggcagcag ccactggtaa caggattagc agagcgaggt atgtaggcgg + 1861 tgctacagag ttcttgaagt ggtggcctaa ctacggctac actagaagaa cagtatttgg + 1921 tatctgcgct ctgctgaagc cagttacctt cggaaaaaga gttggtagct cttgatccgg + 1981 caaacaaacc accgctggta gcggtggttt ttttgtttgc aagcagcaga ttacgcgcag + 2041 aaaaaaagga tctcaagaag atcctttgat cttttctacg gggtctgacg ctcagtggaa + 2101 cgaaaactca cgttaaggga ttttggtcat gagattatca aaaaggatct tcacctagat + 2161 ccttttaaat taaaaatgaa gttttaaatc aatctaaagt atatatgagt aaacttggtc + 2221 tgacagctcg aggcttggat tctcaccaat aaaaaacgcc cggcggcaac cgagcgttct + 2281 gaacaaatcc agatggagtt ctgaggtcat tactggatct atcaacagga gtccaagcga + 2341 gctcgatatc aaattacgcc ccgccctgcc actcatcgca gtactgttgt aattcattaa + 2401 gcattctgcc gacatggaag ccatcacaaa cggcatgatg aacctgaatc gccagcggca + 2461 tcagcacctt gtcgccttgc gtataatatt tgcccatggt gaaaacgggg gcgaagaagt + 2521 tgtccatatt ggccacgttt aaatcaaaac tggtgaaact cacccaggga ttggctgaga + 2581 cgaaaaacat attctcaata aaccctttag ggaaataggc caggttttca ccgtaacacg + 2641 ccacatcttg cgaatatatg tgtagaaact gccggaaatc gtcgtggtat tcactccaga + 2701 gcgatgaaaa cgtttcagtt tgctcatgga aaacggtgta acaagggtga acactatccc + 2761 atatcaccag ctcaccgtct +// \ No newline at end of file diff --git a/test/test_files/ce_gfp.gb b/test/test_files/ce_gfp.gb new file mode 100644 index 00000000..e4bf9a15 --- /dev/null +++ b/test/test_files/ce_gfp.gb @@ -0,0 +1,93 @@ +LOCUS CE_GFP 2777 bp ds-DNA circular 26-OCT-2023 +DEFINITION . +COMMENT From pSB1C00 - Loop universal L0 acceptor and J23100_B0034_AC +FEATURES Location/Qualifiers + misc_feature 346..365 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(422..466) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 468..473 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 475..478 + /label="C" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature 479..1195 + /label="GFP" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature 1196..1199 + /label="E" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature 1213..1284 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(1348..1367) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(1452..2066) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(2233..2338) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" +ORIGIN + 1 tcattgccat acgaaattcc ggatgagcat tcatcaggcg ggcaagaatg tgaataaagg + 61 ccggataaaa cttgtgctta tttttcttta cggtctttaa aaaggccgta atatccagct + 121 gaacggtctg gttataggta cattgagcaa ctgactgaaa tgcctcaaaa tgttctttac + 181 gatgccattg ggatatatca acggtggtat atccagtgat ttttttctcc attttagctt + 241 ccttagctcc tgaaaatctc gataactcaa aaaatacgcc cggtagtgat cttatttcat + 301 tatggtgaaa gttggaacct cttacgtgcc cgatcaactc gagtgccacc tgacgtctaa + 361 gaaaccatta ttatcatgac attaacctat aaaaataggc gtatcacgag gcagaatttc + 421 agataaaaaa aatccttagc tttcgctaag gatgatttct ggaattcggt ctcgaatggt + 481 gagcaagggc gaggagctgt tcaccggggt ggtgcccatc ctggtcgagc tggacggcga + 541 cgtaaacggc cacaagttca gcgtgtccgg cgagggcgag ggcgatgcca cctacggcaa + 601 gctgaccctg aagttcatct gcaccaccgg caagctgccc gtgccctggc ccaccctcgt + 661 gaccaccttc agctacggcg tgcagtgctt cagccgctac cccgaccaca tgaagcagca + 721 cgacttcttc aagtccgcca tgcccgaagg ctacgtccag gagcgcacca tcttcttcaa + 781 ggacgacggc aactacaaga cccgcgccga ggtgaagttc gagggcgaca ccctggtgaa + 841 ccgcatcgag ctgaagggca tcgacttcaa ggaggacggc aacatcctgg ggcacaagct + 901 ggagtacaac tacaacagcc acaacgtcta tatcatggcc gacaagcaga agaacggcat + 961 caaggtgaac ttcaagatcc gccacaacat cgaggacggc agcgtgcagc tcgccgacca + 1021 ctaccagcag aacaccccca tcggcgacgg ccccgtgctg ctgcccgaca accactacct + 1081 gagcacccag tccgccctga gcaaagaccc caacgagaag cgcgatcaca tggtcctgct + 1141 ggagttcgtg accgccgccg ggatcactca cggcatggac gagctgtaca agtaagcttC + 1201 GAGaccctgc agtccggcaa aaaagggcaa ggtgtcacca ccctgccctt tttctttaaa + 1261 accgaaaaga ttacttcgcg ttatgcaggc ttcctcgctc actgactcgc tgcgctcggt + 1321 cgttcggctg cggcgagcgg tatcagctca ctcaaaggcg gtaatacggt tatccacaga + 1381 atcaggggat aacgcaggaa agaacatgtg agcaaaaggc cagcaaaagg ccaggaaccg + 1441 taaaaaggcc gcgttgctgg cgtttttcca caggctccgc ccccctgacg agcatcacaa + 1501 aaatcgacgc tcaagtcaga ggtggcgaaa cccgacagga ctataaagat accaggcgtt + 1561 tccccctgga agctccctcg tgcgctctcc tgttccgacc ctgccgctta ccggatacct + 1621 gtccgccttt ctcccttcgg gaagcgtggc gctttctcat agctcacgct gtaggtatct + 1681 cagttcggtg taggtcgttc gctccaagct gggctgtgtg cacgaacccc ccgttcagcc + 1741 cgaccgctgc gccttatccg gtaactatcg tcttgagtcc aacccggtaa gacacgactt + 1801 atcgccactg gcagcagcca ctggtaacag gattagcaga gcgaggtatg taggcggtgc + 1861 tacagagttc ttgaagtggt ggcctaacta cggctacact agaagaacag tatttggtat + 1921 ctgcgctctg ctgaagccag ttaccttcgg aaaaagagtt ggtagctctt gatccggcaa + 1981 acaaaccacc gctggtagcg gtggtttttt tgtttgcaag cagcagatta cgcgcagaaa + 2041 aaaaggatct caagaagatc ctttgatctt ttctacgggg tctgacgctc agtggaacga + 2101 aaactcacgt taagggattt tggtcatgag attatcaaaa aggatcttca cctagatcct + 2161 tttaaattaa aaatgaagtt ttaaatcaat ctaaagtata tatgagtaaa cttggtctga + 2221 cagctcgagg cttggattct caccaataaa aaacgcccgg cggcaaccga gcgttctgaa + 2281 caaatccaga tggagttctg aggtcattac tggatctatc aacaggagtc caagcgagct + 2341 cgatatcaaa ttacgccccg ccctgccact catcgcagta ctgttgtaat tcattaagca + 2401 ttctgccgac atggaagcca tcacaaacgg catgatgaac ctgaatcgcc agcggcatca + 2461 gcaccttgtc gccttgcgta taatatttgc ccatggtgaa aacgggggcg aagaagttgt + 2521 ccatattggc cacgtttaaa tcaaaactgg tgaaactcac ccagggattg gctgagacga + 2581 aaaacatatt ctcaataaac cctttaggga aataggccag gttttcaccg taacacgcca + 2641 catcttgcga atatatgtgt agaaactgcc ggaaatcgtc gtggtattca ctccagagcg + 2701 atgaaaacgt ttcagtttgc tcatggaaaa cggtgtaaca agggtgaaca ctatcccata + 2761 tcaccagctc accgtct +// \ No newline at end of file diff --git a/test/test_files/ce_mrfp1.gb b/test/test_files/ce_mrfp1.gb new file mode 100644 index 00000000..5ce34456 --- /dev/null +++ b/test/test_files/ce_mrfp1.gb @@ -0,0 +1,92 @@ +LOCUS CE_mRFP1 2738 bp ds-DNA circular 26-OCT-2023 +DEFINITION . +COMMENT From pSB1C00 - Loop universal L0 acceptor and J23100_B0034_AC +FEATURES Location/Qualifiers + misc_feature 346..365 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(422..466) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 468..473 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 475..478 + /label="C" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature 479..1156 + /label="mRFP1" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 1157..1160 + /label="E" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature 1174..1245 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(1309..1328) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(1413..2027) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(2194..2299) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" +ORIGIN + 1 tcattgccat acgaaattcc ggatgagcat tcatcaggcg ggcaagaatg tgaataaagg + 61 ccggataaaa cttgtgctta tttttcttta cggtctttaa aaaggccgta atatccagct + 121 gaacggtctg gttataggta cattgagcaa ctgactgaaa tgcctcaaaa tgttctttac + 181 gatgccattg ggatatatca acggtggtat atccagtgat ttttttctcc attttagctt + 241 ccttagctcc tgaaaatctc gataactcaa aaaatacgcc cggtagtgat cttatttcat + 301 tatggtgaaa gttggaacct cttacgtgcc cgatcaactc gagtgccacc tgacgtctaa + 361 gaaaccatta ttatcatgac attaacctat aaaaataggc gtatcacgag gcagaatttc + 421 agataaaaaa aatccttagc tttcgctaag gatgatttct ggaattcggt ctcgaatggc + 481 ttcctccgaa gatgttatca aagagttcat gcgtttcaaa gttcgtatgg aaggttccgt + 541 taacggtcac gagttcgaaa tcgaaggtga aggtgaaggt cgtccgtacg aaggtaccca + 601 gaccgctaaa ctgaaagtta ccaaaggtgg tccgctgccg ttcgcttggg acatcctgtc + 661 cccgcagttc cagtacggtt ccaaagctta cgttaaacac ccggctgaca tcccggacta + 721 cctgaaactg tccttcccgg aaggtttcaa atgggaacgt gttatgaact tcgaggacgg + 781 tggtgttgtt accgttaccc aggactcctc cctgcaagac ggtgagttca tctacaaagt + 841 taaactgcgt ggtaccaact tcccgtccga cggtccggtt atgcagaaaa aaaccatggg + 901 ttgggaagct tccaccgaac gtatgtaccc ggaagatggt gctctgaaag gtgaaatcaa + 961 aatgcgtctg aaactgaaag acggtggtca ctacgacgct gaagttaaaa ccacctacat + 1021 ggctaaaaaa ccggttcagc tgccgggtgc ttacaaaacc gacatcaaac tggacatcac + 1081 ctcccacaac gaggactaca ccatcgttga acagtacgaa cgtgctgaag gtcgtcactc + 1141 caccggtgct taatgagctt CGAGaccctg cagtccggca aaaaagggca aggtgtcacc + 1201 accctgccct ttttctttaa aaccgaaaag attacttcgc gttatgcagg cttcctcgct + 1261 cactgactcg ctgcgctcgg tcgttcggct gcggcgagcg gtatcagctc actcaaaggc + 1321 ggtaatacgg ttatccacag aatcagggga taacgcagga aagaacatgt gagcaaaagg + 1381 ccagcaaaag gccaggaacc gtaaaaaggc cgcgttgctg gcgtttttcc acaggctccg + 1441 cccccctgac gagcatcaca aaaatcgacg ctcaagtcag aggtggcgaa acccgacagg + 1501 actataaaga taccaggcgt ttccccctgg aagctccctc gtgcgctctc ctgttccgac + 1561 cctgccgctt accggatacc tgtccgcctt tctcccttcg ggaagcgtgg cgctttctca + 1621 tagctcacgc tgtaggtatc tcagttcggt gtaggtcgtt cgctccaagc tgggctgtgt + 1681 gcacgaaccc cccgttcagc ccgaccgctg cgccttatcc ggtaactatc gtcttgagtc + 1741 caacccggta agacacgact tatcgccact ggcagcagcc actggtaaca ggattagcag + 1801 agcgaggtat gtaggcggtg ctacagagtt cttgaagtgg tggcctaact acggctacac + 1861 tagaagaaca gtatttggta tctgcgctct gctgaagcca gttaccttcg gaaaaagagt + 1921 tggtagctct tgatccggca aacaaaccac cgctggtagc ggtggttttt ttgtttgcaa + 1981 gcagcagatt acgcgcagaa aaaaaggatc tcaagaagat cctttgatct tttctacggg + 2041 gtctgacgct cagtggaacg aaaactcacg ttaagggatt ttggtcatga gattatcaaa + 2101 aaggatcttc acctagatcc ttttaaatta aaaatgaagt tttaaatcaa tctaaagtat + 2161 atatgagtaa acttggtctg acagctcgag gcttggattc tcaccaataa aaaacgcccg + 2221 gcggcaaccg agcgttctga acaaatccag atggagttct gaggtcatta ctggatctat + 2281 caacaggagt ccaagcgagc tcgatatcaa attacgcccc gccctgccac tcatcgcagt + 2341 actgttgtaa ttcattaagc attctgccga catggaagcc atcacaaacg gcatgatgaa + 2401 cctgaatcgc cagcggcatc agcaccttgt cgccttgcgt ataatatttg cccatggtga + 2461 aaacgggggc gaagaagttg tccatattgg ccacgtttaa atcaaaactg gtgaaactca + 2521 cccagggatt ggctgagacg aaaaacatat tctcaataaa ccctttaggg aaataggcca + 2581 ggttttcacc gtaacacgcc acatcttgcg aatatatgtg tagaaactgc cggaaatcgt + 2641 cgtggtattc actccagagc gatgaaaacg tttcagtttg ctcatggaaa acggtgtaac + 2701 aagggtgaac actatcccat atcaccagct caccgtct +// \ No newline at end of file diff --git a/test/test_files/ef_b0015.gb b/test/test_files/ef_b0015.gb new file mode 100644 index 00000000..0c38ecfa --- /dev/null +++ b/test/test_files/ef_b0015.gb @@ -0,0 +1,95 @@ +LOCUS EF_B0015 2190 bp ds-DNA circular 25-OCT-2023 +DEFINITION . +COMMENT From pSB1C00 - Loop universal L0 acceptor and B0015_EF +FEATURES Location/Qualifiers + misc_feature 385..404 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(461..505) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 507..512 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 514..517 + /label="E" + /ApEinfo_revcolor="#f8d3a9" + /ApEinfo_fwdcolor="#f8d3a9" + terminator 518..646 + /label="BBa-B0015 Terminator" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" + terminator 526..597 + /label="rrnB T1 terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + terminator 613..640 + /label="T7Te terminator" + /ApEinfo_revcolor="#75c6a9" + /ApEinfo_fwdcolor="#75c6a9" + misc_feature 647..650 + /label="F" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 664..735 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(799..818) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(903..1517) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(1684..1789) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" + CDS complement(1802..271) + /label="Cam Resistance" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" +ORIGIN + 1 aagggtgaac actatcccat atcaccagct caccgtcttt cattgccata cgaaattccg + 61 gatgagcatt catcaggcgg gcaagaatgt gaataaaggc cggataaaac ttgtgcttat + 121 ttttctttac ggtctttaaa aaggccgtaa tatccagctg aacggtctgg ttataggtac + 181 attgagcaac tgactgaaat gcctcaaaat gttctttacg atgccattgg gatatatcaa + 241 cggtggtata tccagtgatt tttttctcca ttttagcttc cttagctcct gaaaatctcg + 301 ataactcaaa aaatacgccc ggtagtgatc ttatttcatt atggtgaaag ttggaacctc + 361 ttacgtgccc gatcaactcg agtgccacct gacgtctaag aaaccattat tatcatgaca + 421 ttaacctata aaaataggcg tatcacgagg cagaatttca gataaaaaaa atccttagct + 481 ttcgctaagg atgatttctg gaattcggtc tcgGCTTcca ggcatcaaat aaaacgaaag + 541 gctcagtcga aagactgggc ctttcgtttt atctgttgtt tgtcggtgaa cgctctctac + 601 tagagtcaca ctggctcacc ttcgggtggg cctttctgcg tttatacgct CGAGaccctg + 661 cagtccggca aaaaagggca aggtgtcacc accctgccct ttttctttaa aaccgaaaag + 721 attacttcgc gttatgcagg cttcctcgct cactgactcg ctgcgctcgg tcgttcggct + 781 gcggcgagcg gtatcagctc actcaaaggc ggtaatacgg ttatccacag aatcagggga + 841 taacgcagga aagaacatgt gagcaaaagg ccagcaaaag gccaggaacc gtaaaaaggc + 901 cgcgttgctg gcgtttttcc acaggctccg cccccctgac gagcatcaca aaaatcgacg + 961 ctcaagtcag aggtggcgaa acccgacagg actataaaga taccaggcgt ttccccctgg + 1021 aagctccctc gtgcgctctc ctgttccgac cctgccgctt accggatacc tgtccgcctt + 1081 tctcccttcg ggaagcgtgg cgctttctca tagctcacgc tgtaggtatc tcagttcggt + 1141 gtaggtcgtt cgctccaagc tgggctgtgt gcacgaaccc cccgttcagc ccgaccgctg + 1201 cgccttatcc ggtaactatc gtcttgagtc caacccggta agacacgact tatcgccact + 1261 ggcagcagcc actggtaaca ggattagcag agcgaggtat gtaggcggtg ctacagagtt + 1321 cttgaagtgg tggcctaact acggctacac tagaagaaca gtatttggta tctgcgctct + 1381 gctgaagcca gttaccttcg gaaaaagagt tggtagctct tgatccggca aacaaaccac + 1441 cgctggtagc ggtggttttt ttgtttgcaa gcagcagatt acgcgcagaa aaaaaggatc + 1501 tcaagaagat cctttgatct tttctacggg gtctgacgct cagtggaacg aaaactcacg + 1561 ttaagggatt ttggtcatga gattatcaaa aaggatcttc acctagatcc ttttaaatta + 1621 aaaatgaagt tttaaatcaa tctaaagtat atatgagtaa acttggtctg acagctcgag + 1681 gcttggattc tcaccaataa aaaacgcccg gcggcaaccg agcgttctga acaaatccag + 1741 atggagttct gaggtcatta ctggatctat caacaggagt ccaagcgagc tcgatatcaa + 1801 attacgcccc gccctgccac tcatcgcagt actgttgtaa ttcattaagc attctgccga + 1861 catggaagcc atcacaaacg gcatgatgaa cctgaatcgc cagcggcatc agcaccttgt + 1921 cgccttgcgt ataatatttg cccatggtga aaacgggggc gaagaagttg tccatattgg + 1981 ccacgtttaa atcaaaactg gtgaaactca cccagggatt ggctgagacg aaaaacatat + 2041 tctcaataaa ccctttaggg aaataggcca ggttttcacc gtaacacgcc acatcttgcg + 2101 aatatatgtg tagaaactgc cggaaatcgt cgtggtattc actccagagc gatgaaaacg + 2161 tttcagtttg ctcatggaaa acggtgtaac +// \ No newline at end of file diff --git a/test/test_files/podd1.gb b/test/test_files/podd1.gb new file mode 100644 index 00000000..66377409 --- /dev/null +++ b/test/test_files/podd1.gb @@ -0,0 +1,221 @@ +LOCUS Copy_of_pSB1K01_-_Loop_ 3286 bp ds-DNA circular 09-NOV-2022 +DEFINITION . +KEYWORDS "accession:pSB1K01" +COMMENT Imported from database: Registry of Standard Biological Parts + Entry: pSB1K01 Description: pOdd1 Loop Vector based on pSB1K3 +FEATURES Location/Qualifiers + primer 386..399 + /label="pOdd1_To_pOdd4_Fw" + /note="sequence: ctggaattcgctcttcacagggagtgagacccaatacgcaaaccgcctct" + /ApEinfo_revcolor="#9eafd2" + /ApEinfo_fwdcolor="#9eafd2" + primer_binding 547..566 + /label="VF primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + primer 547..566 + /label="VF2" + /note="sequence: ccacctgacgtctaagaaac" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + primer complement(651..669) + /label="pOdd_Suff_Rev" + /note="sequence: CGAATTCCAGAAATCATCC" + /ApEinfo_revcolor="#f7977a" + /ApEinfo_fwdcolor="#f7977a" + primer 656..681 + /label="pOdd1_Insert_Fw" + /note="sequence: atttctggaattcgctcttcaatggg" + /ApEinfo_revcolor="#faac61" + /ApEinfo_fwdcolor="#faac61" + primer 656..684 + /label="pOdd2_BF_Insert_Fw" + /note="sequence: atttctggaattcgctcttcagcaTACTt" + /ApEinfo_revcolor="#84b0dc" + /ApEinfo_fwdcolor="#84b0dc" + primer 656..684 + /label="pOdd1_BF_Insert_Fw" + /note="sequence: atttctggaattcgctcttcaatgTACTt" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + primer 660..709 + /label="pOdd1_To_pOdd4_Fw" + /note="sequence: ctggaattcgctcttcacagggagtgagacccaatacgcaaaccgcctct" + /ApEinfo_revcolor="#9eafd2" + /ApEinfo_fwdcolor="#9eafd2" + dna 663..668 + /label="EcoRI" + /ApEinfo_revcolor="#d6b295" + /ApEinfo_fwdcolor="#d6b295" + dna 669..675 + /label="SapI" + /ApEinfo_revcolor="#faac61" + /ApEinfo_fwdcolor="#faac61" + misc 677..679 + /label="5' Fusion Site ATG" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + misc 680..683 + /label="Fusion Site GGAG" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + primer 683..696 + /label="pOdd1_CF_Fw" + /note="sequence: CTCTTCAATGaatgTGAGACCCAATAC" + /ApEinfo_revcolor="#f7977a" + /ApEinfo_fwdcolor="#f7977a" + primer 684..702 + /label="pOdd1_BF_OR_BE_Fw" + /note="sequence: CTCTTCAATGtactTGAGACCCAATACGCAAAC" + /ApEinfo_revcolor="#f7977a" + /ApEinfo_fwdcolor="#f7977a" + dna complement(685..690) + /label="BsaI" + /ApEinfo_revcolor="#faac61" + /ApEinfo_fwdcolor="#faac61" + misc_feature 691..1759 + /label="BBa_J04454" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature 917..1594 + /label="RFP" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + primer complement(1739..1756) + /label="pOdd_Pre_Rev" + /note="sequence: AAACGCAGAAAGGCCCAC" + /ApEinfo_revcolor="#f7977a" + /ApEinfo_fwdcolor="#f7977a" + primer complement(1741..1790) + /label="pOdd1_To_pOdd4_Rev" + /note="sequence: ggactgcaggctcttcaaccagcgtgagacctataaacgcagaaaggccc" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" + dna 1760..1765 + /label="BsaI" + /ApEinfo_revcolor="#84b0dc" + /ApEinfo_fwdcolor="#84b0dc" + primer complement(1761..1790) + /label="pOdd1_AE_Insert_Rev" + /note="sequence: ggactgcaggctcttcatgcAAGCtgagac" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" + misc 1767..1770 + /label="Fusion Site CGCT" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" + primer complement(1769..1790) + /label="pOdd1_Insert_Rev" + /note="sequence: ggactgcaggctcttcatgcag" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + primer complement(1769..1790) + /label="pOdd2_Insert_Rev" + /note="sequence: ggactgcaggctcttcagtaag" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + primer 1770..1787 + /label="pOdd1_AE_Fw" + /note="sequence: ATAGGTCTCAgcttGCATGAAGAGCCTGCAG" + /ApEinfo_revcolor="#f7977a" + /ApEinfo_fwdcolor="#f7977a" + misc 1771..1773 + /label="3' Fusion Site GCA" + /ApEinfo_revcolor="#d59687" + /ApEinfo_fwdcolor="#d59687" + primer 1771..1787 + /label="pOdd1_AC_Fw" + /note="sequence: ATAGGTCTCAaatgGCATGAAGAGCCTGCAG" + /ApEinfo_revcolor="#f7977a" + /ApEinfo_fwdcolor="#f7977a" + dna complement(1775..1781) + /label="SapI" + /ApEinfo_revcolor="#d6b295" + /ApEinfo_fwdcolor="#d6b295" + dna complement(1782..1787) + /label="PstI" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + stem_loop 1788..1859 + /label="E. coli his operon terminator" + /ApEinfo_revcolor="#b4abac" + /ApEinfo_fwdcolor="#b4abac" + primer_binding complement(1923..1942) + /label="VR primer binding site" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" + primer complement(1923..1942) + /label="VR" + /note="sequence: gtattaccgcctttgagtga" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" + misc 2027..2641 + /label="rep (pMB1)" + /ApEinfo_revcolor="#f8d3a9" + /ApEinfo_fwdcolor="#f8d3a9" + primer 2356..2370 + /label="pOdd1_To_pOdd4_Rev" + /note="sequence: ggactgcaggctcttcaaccagcgtgagacctataaacgcagaaaggccc" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" + misc 2863..392 + /label="Kanamycin resistance marker" + /ApEinfo_revcolor="#faac61" + /ApEinfo_fwdcolor="#faac61" +ORIGIN + 1 ccggcgcagg aacactgcca gcgcatcaac aatattttca cctgaatcag gatattcttc + 61 taatacctgg aatgctgttt tcccggggat cgcagtggtg agtaaccatg catcatcagg + 121 agtacggata aaatgcttga tggtcggaag aggcataaat tccgtcagcc agtttagtct + 181 gaccatctca tctgtaacat cattggcaac gctacctttg ccatgtttca gaaacaactc + 241 tggcgcatcg ggcttcccat acaatcgata gattgtcgca cctgattgcc cgacattatc + 301 gcgagcccat ttatacccat ataaatcagc atccatgttg gaatttaatc gcggcctgga + 361 gcaagacgtt tcccgttgaa tatggctcat aacacccctt gtattactgt ttatgtaagc + 421 agacagtttt attgttcatg atgatatatt tttatcttgt gcaatgtaac atcagagatt + 481 ttgagacaca acgtggcttt gttgaataaa tcgaactttt gctgagttga aggatcagct + 541 cgagtgccac ctgacgtcta agaaaccatt attatcatga cattaaccta taaaaatagg + 601 cgtatcacga ggcagaattt cagataaaaa aaatccttag ctttcgctaa ggatgatttc + 661 tggaattcgc tcttcaatgg gagtgagacc caatacgcaa accgcctctc cccgcgcgtt + 721 ggccgattca ttaatgcagc tggcacgaca ggtttcccga ctggaaagcg ggcagtgagc + 781 gcaacgcaat taatgtgagt tagctcactc attaggcacc ccaggcttta cactttatgc + 841 ttccggctcg tatgttgtgt ggaattgtga gcggataaca atttcacaca tactagagaa + 901 agaggagaaa tactagatgg cttcctccga agacgttatc aaagagttca tgcgtttcaa + 961 agttcgtatg gaaggttccg ttaacggtca cgagttcgaa atcgaaggtg aaggtgaagg + 1021 tcgtccgtac gaaggtaccc agaccgctaa actgaaagtt accaaaggtg gtccgctgcc + 1081 gttcgcttgg gacatcctgt ccccgcagtt ccagtacggt tccaaagctt acgttaaaca + 1141 cccggctgac atcccggact acctgaaact gtccttcccg gaaggtttca aatgggaacg + 1201 tgttatgaac ttcgaagacg gtggtgttgt taccgttacc caggactcct ccctgcaaga + 1261 cggtgagttc atctacaaag ttaaactgcg tggtaccaac ttcccgtccg acggtccggt + 1321 tatgcagaaa aaaaccatgg gttgggaagc ttccaccgaa cgtatgtacc cggaagacgg + 1381 tgctctgaaa ggtgaaatca aaatgcgtct gaaactgaaa gacggtggtc actacgacgc + 1441 tgaagttaaa accacctaca tggctaaaaa accggttcag ctgccgggtg cttacaaaac + 1501 cgacatcaaa ctggacatca cctcccacaa cgaagactac accatcgttg aacagtacga + 1561 acgtgctgaa ggtcgtcact ccaccggtgc ttaataacgc tgatagtgct agtgtagatc + 1621 gctactagag ccaggcatca aataaaacga aaggctcagt cgaaagactg ggcctttcgt + 1681 tttatctgtt gtttgtcggt gaacgctctc tactagagtc acactggctc accttcgggt + 1741 gggcctttct gcgtttatag gtctcacgct gcatgaagag cctgcagtcc ggcaaaaaag + 1801 ggcaaggtgt caccaccctg ccctttttct ttaaaaccga aaagattact tcgcgttatg + 1861 caggcttcct cgctcactga ctcgctgcgc tcggtcgttc ggctgcggcg agcggtatca + 1921 gctcactcaa aggcggtaat acggttatcc acagaatcag gggataacgc aggaaagaac + 1981 atgtgagcaa aaggccagca aaaggccagg aaccgtaaaa aggccgcgtt gctggcgttt + 2041 ttccacaggc tccgcccccc tgacgagcat cacaaaaatc gacgctcaag tcagaggtgg + 2101 cgaaacccga caggactata aagataccag gcgtttcccc ctggaagctc cctcgtgcgc + 2161 tctcctgttc cgaccctgcc gcttaccgga tacctgtccg cctttctccc ttcgggaagc + 2221 gtggcgcttt ctcatagctc acgctgtagg tatctcagtt cggtgtaggt cgttcgctcc + 2281 aagctgggct gtgtgcacga accccccgtt cagcccgacc gctgcgcctt atccggtaac + 2341 tatcgtcttg agtccaaccc ggtaagacac gacttatcgc cactggcagc agccactggt + 2401 aacaggatta gcagagcgag gtatgtaggc ggtgctacag agttcttgaa gtggtggcct + 2461 aactacggct acactagaag aacagtattt ggtatctgcg ctctgctgaa gccagttacc + 2521 ttcggaaaaa gagttggtag ctcttgatcc ggcaaacaaa ccaccgctgg tagcggtggt + 2581 ttttttgttt gcaagcagca gattacgcgc agaaaaaaag gatctcaaga agatcctttg + 2641 atcttttcta cggggtctga cgctcagtgg aacgaaaact cacgttaagg gattttggtc + 2701 atgagattat caaaaaggat cttcacctag atccttttaa attaaaaatg aagttttaaa + 2761 tcaatctaaa gtatatatga gtaaacttgg tctgacagct cgagtcccgt caagtcagcg + 2821 taatgctctg ccagtgttac aaccaattaa ccaattctga ttagaaaaac tcatcgagca + 2881 tcaaatgaaa ctgcaattta ttcatatcag gattatcaat accatatttt tgaaaaagcc + 2941 gtttctgtaa tgaaggagaa aactcaccga ggcagttcca taggatggca agatcctggt + 3001 atcggtctgc gattccgact cgtccaacat caatacaacc tattaatttc ccctcgtcaa + 3061 aaataaggtt atcaagtgag aaatcaccat gagtgacgac tgaatccggt gagaatggca + 3121 aaagcttatg catttctttc cagacttgtt caacaggcca gccattacgc tcgtcatcaa + 3181 aatcactcgc atcaaccaaa ccgttattca ttcgtgattg cgcctgagcg agacgaaata + 3241 cgcgatcgct gttaaaagga caattacaaa caggaatcga atgcaa +// \ No newline at end of file diff --git a/test/test_files/sbol_gg.ipynb b/test/test_files/sbol_gg.ipynb new file mode 100644 index 00000000..f8b2bf77 --- /dev/null +++ b/test/test_files/sbol_gg.ipynb @@ -0,0 +1,422 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sbol3\n", + "from sbol_utilities.component import ed_restriction_enzyme, backbone, part_in_backbone, part_in_backbone_from_sbol, \\\n", + " digestion, ligation, Assembly_plan_composite_in_backbone_single_enzyme, backbone_from_sbol\n", + "from sbol_utilities.conversion import convert_from_genbank\n", + "from itertools import product\n", + "from sbol_utilities.component import contained_components" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Simple assembly" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"Test assembly plan class\"\"\"\n", + "doc = sbol3.Document()\n", + "sbol3.set_namespace('http://sbolstandard.org/testfiles')\n", + "# Assembly plan setup\n", + "bsai = ed_restriction_enzyme('BsaI')\n", + "#lvl1 acceptor\n", + "podd1_dir = os.path.join('podd1.gb')\n", + "podd_doc = convert_from_genbank(podd1_dir, 'https://github.com/Gonza10V')\n", + "podd_af = [top_level for top_level in podd_doc if type(top_level)==sbol3.Component][0]\n", + "podd_backbone, podd_backbone_seq = backbone_from_sbol('pOdd_bb', podd_af, [680,1770], 4, False, name='pOdd_bb')\n", + "doc.add([podd_backbone,podd_backbone_seq])\n", + "#parts in backbone\n", + "##get parts from genbank\n", + "j23100_dir = os.path.join('ab_j23100.gb')\n", + "b0034_dir = os.path.join('bc_b0034.gb')\n", + "gfp_dir = os.path.join('ce_gfp.gb')\n", + "b0015_dir = os.path.join('ef_b0015.gb')\n", + "j23100_doc = convert_from_genbank(j23100_dir, 'https://github.com/Gonza10V')\n", + "j23100_ab = [top_level for top_level in j23100_doc if type(top_level)==sbol3.Component][0]\n", + "b0034_doc = convert_from_genbank(b0034_dir, 'https://github.com/Gonza10V')\n", + "b0034_bc = [top_level for top_level in b0034_doc if type(top_level)==sbol3.Component][0]\n", + "gfp_doc = convert_from_genbank(gfp_dir, 'https://github.com/Gonza10V')\n", + "gfp_ce = [top_level for top_level in gfp_doc if type(top_level)==sbol3.Component][0]\n", + "b0015_doc = convert_from_genbank(b0015_dir, 'https://github.com/Gonza10V')\n", + "b0015_ef = [top_level for top_level in b0015_doc if type(top_level)==sbol3.Component][0]\n", + "##SBOL parts in backbone\n", + "j23100_ab_in_bb, j23100_ab_in_bb_seq = part_in_backbone_from_sbol('j23100_ab_in_bb', j23100_ab, [479,513], [sbol3.SO_PROMOTER], 4, False, name='j23100_ab_in_bb')\n", + "doc.add([j23100_ab_in_bb, j23100_ab_in_bb_seq])\n", + "b0034_bc_in_bb, b0034_bc_in_bb_seq = part_in_backbone_from_sbol('b0034_bc_in_bb', b0034_bc, [479,499], [sbol3.SO_RBS], 4, False, name='b0034_bc_in_bb')\n", + "doc.add([b0034_bc_in_bb, b0034_bc_in_bb_seq])\n", + "gfp_ce_in_bb, gfp_ce_in_bb_seq = part_in_backbone_from_sbol('gfp_ce_in_bb', gfp_ce, [479,1195], [sbol3.SO_CDS], 4, False, name='gfp_ce_in_bb')\n", + "doc.add([gfp_ce_in_bb, gfp_ce_in_bb_seq])\n", + "b0015_ef_in_bb, b0015_ef_in_bb_seq = part_in_backbone_from_sbol('b0015_ef_in_bb', b0015_ef, [518,646], [sbol3.SO_TERMINATOR], 4, False, name='b0015_ef_in_bb')\n", + "doc.add([b0015_ef_in_bb, b0015_ef_in_bb_seq])\n", + "\n", + "\n", + "#Assembly plan\n", + "simple_assembly_plan = Assembly_plan_composite_in_backbone_single_enzyme( \n", + " name='simple_green_transcriptional_unit',\n", + " parts_in_backbone=[j23100_ab_in_bb, b0034_bc_in_bb, gfp_ce_in_bb, b0015_ef_in_bb], \n", + " acceptor_backbone=podd_backbone,\n", + " restriction_enzyme=bsai,\n", + " document=doc)\n", + "simple_assembly_plan.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Should have 1 product" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + }, + { + "data": { + "text/plain": [ + "[[,\n", + " ]]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(len(simple_assembly_plan.products))\n", + "simple_assembly_plan.products" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print doc components" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://sbolstandard.org/testfiles/pOdd_bb\n", + "http://sbolstandard.org/testfiles/pOdd_bb_seq\n", + "http://sbolstandard.org/testfiles/j23100_ab_in_bb\n", + "http://sbolstandard.org/testfiles/j23100_ab_in_bb_seq\n", + "http://sbolstandard.org/testfiles/b0034_bc_in_bb\n", + "http://sbolstandard.org/testfiles/b0034_bc_in_bb_seq\n", + "http://sbolstandard.org/testfiles/gfp_ce_in_bb\n", + "http://sbolstandard.org/testfiles/gfp_ce_in_bb_seq\n", + "http://sbolstandard.org/testfiles/b0015_ef_in_bb\n", + "http://sbolstandard.org/testfiles/b0015_ef_in_bb_seq\n", + "http://sbolstandard.org/testfiles/simple_green_transcriptional_unit_assembly_plan\n", + "http://sbolstandard.org/testfiles/j23100_ab_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/j23100_ab_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/b0034_bc_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/b0034_bc_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/gfp_ce_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/gfp_ce_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/b0015_ef_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/b0015_ef_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/pOdd_bb_backbone\n", + "http://sbolstandard.org/testfiles/pOdd_bb_backbone_seq\n", + "http://sbolstandard.org/testfiles/composite_1_part_5_part_1_j23100_ab_in_bb_part_2_b0034_bc_in_bb_part_3_gfp_ce_in_bb_part_4_b0015_ef_in_bb\n", + "http://sbolstandard.org/testfiles/composite_1_part_5_part_1_j23100_ab_in_bb_part_2_b0034_bc_in_bb_part_3_gfp_ce_in_bb_part_4_b0015_ef_in_bb_seq\n" + ] + } + ], + "source": [ + "for obj in simple_assembly_plan.document.objects:\n", + " print(obj.identity)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "get component sequence" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for obj in simple_assembly_plan.document.objects:\n", + " if obj.identity =='http://sbolstandard.org/testfiles/composite_0_part_5_part_1_j23100_ab_in_bb_part_2_b0034_bc_in_bb_part_3_gfp_ce_in_bb_part_4_b0015_ef_in_bb':\n", + " print(obj.sequences[0].lookup().elements)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Combinatorial assembly" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"Test assembly plan class\"\"\"\n", + "doc = sbol3.Document()\n", + "sbol3.set_namespace('http://sbolstandard.org/testfiles')\n", + "# Assembly plan setup\n", + "bsai = ed_restriction_enzyme('BsaI')\n", + "#lvl1 acceptor\n", + "podd1_dir = os.path.join('podd1.gb')\n", + "podd_doc = convert_from_genbank(podd1_dir, 'https://github.com/Gonza10V')\n", + "podd_af = [top_level for top_level in podd_doc if type(top_level)==sbol3.Component][0]\n", + "podd_backbone, podd_backbone_seq = backbone_from_sbol('pOdd_bb', podd_af, [680,1770], 4, False, name='pOdd_bb')\n", + "doc.add([podd_backbone,podd_backbone_seq])\n", + "#parts in backbone\n", + "##get parts from genbank\n", + "j23100_dir = os.path.join('ab_j23100.gb')\n", + "j23101_dir = os.path.join('ab_j23101.gb')\n", + "b0034_dir = os.path.join('bc_b0034.gb')\n", + "gfp_dir = os.path.join('ce_gfp.gb')\n", + "rfp_dir = os.path.join('ce_mrfp1.gb')\n", + "cfp_dir = os.path.join('ce_ecfp.gb')\n", + "b0015_dir = os.path.join('ef_b0015.gb')\n", + "j23100_doc = convert_from_genbank(j23100_dir, 'https://github.com/Gonza10V')\n", + "j23100_ab = [top_level for top_level in j23100_doc if type(top_level)==sbol3.Component][0]\n", + "j23101_doc = convert_from_genbank(j23101_dir, 'https://github.com/Gonza10V')\n", + "j23101_ab = [top_level for top_level in j23101_doc if type(top_level)==sbol3.Component][0]\n", + "b0034_doc = convert_from_genbank(b0034_dir, 'https://github.com/Gonza10V')\n", + "b0034_bc = [top_level for top_level in b0034_doc if type(top_level)==sbol3.Component][0]\n", + "gfp_doc = convert_from_genbank(gfp_dir, 'https://github.com/Gonza10V')\n", + "gfp_ce = [top_level for top_level in gfp_doc if type(top_level)==sbol3.Component][0]\n", + "rfp_doc = convert_from_genbank(rfp_dir, 'https://github.com/Gonza10V')\n", + "rfp_ce = [top_level for top_level in rfp_doc if type(top_level)==sbol3.Component][0]\n", + "cfp_doc = convert_from_genbank(cfp_dir, 'https://github.com/Gonza10V')\n", + "cfp_ce = [top_level for top_level in cfp_doc if type(top_level)==sbol3.Component][0]\n", + "b0015_doc = convert_from_genbank(b0015_dir, 'https://github.com/Gonza10V')\n", + "b0015_ef = [top_level for top_level in b0015_doc if type(top_level)==sbol3.Component][0]\n", + "##SBOL parts in backbone\n", + "j23100_ab_in_bb, j23100_ab_in_bb_seq = part_in_backbone_from_sbol('j23100_ab_in_bb', j23100_ab, [479,513], [sbol3.SO_PROMOTER], 4, False, name='j23100_ab_in_bb')\n", + "doc.add([j23100_ab_in_bb, j23100_ab_in_bb_seq])\n", + "j23101_ab_in_bb, j23101_ab_in_bb_seq = part_in_backbone_from_sbol('j23101_ab_in_bb', j23101_ab, [479,513], [sbol3.SO_PROMOTER], 4, False, name='j23101_ab_in_bb')\n", + "doc.add([j23101_ab_in_bb, j23101_ab_in_bb_seq])\n", + "b0034_bc_in_bb, b0034_bc_in_bb_seq = part_in_backbone_from_sbol('b0034_bc_in_bb', b0034_bc, [479,499], [sbol3.SO_RBS], 4, False, name='b0034_bc_in_bb')\n", + "doc.add([b0034_bc_in_bb, b0034_bc_in_bb_seq])\n", + "gfp_ce_in_bb, gfp_ce_in_bb_seq = part_in_backbone_from_sbol('gfp_ce_in_bb', gfp_ce, [479,1195], [sbol3.SO_CDS], 4, False, name='gfp_ce_in_bb')\n", + "doc.add([gfp_ce_in_bb, gfp_ce_in_bb_seq])\n", + "rfp_ce_in_bb, rfp_ce_in_bb_seq = part_in_backbone_from_sbol('rfp_ce_in_bb', rfp_ce, [479,1156], [sbol3.SO_CDS], 4, False, name='rfp_ce_in_bb')\n", + "doc.add([rfp_ce_in_bb, rfp_ce_in_bb_seq])\n", + "cfp_ce_in_bb, cfp_ce_in_bb_seq = part_in_backbone_from_sbol('cfp_ce_in_bb', cfp_ce, [479,1198], [sbol3.SO_CDS], 4, False, name='cfp_ce_in_bb')\n", + "doc.add([cfp_ce_in_bb, cfp_ce_in_bb_seq])\n", + "b0015_ef_in_bb, b0015_ef_in_bb_seq = part_in_backbone_from_sbol('b0015_ef_in_bb', b0015_ef, [518,646], [sbol3.SO_TERMINATOR], 4, False, name='b0015_ef_in_bb')\n", + "doc.add([b0015_ef_in_bb, b0015_ef_in_bb_seq])\n", + "\n", + "\n", + "#Assembly plan\n", + "combinatorial_assembly_plan = Assembly_plan_composite_in_backbone_single_enzyme( \n", + " name='combinatorial_rgb_transcriptional_units',\n", + " parts_in_backbone=[j23100_ab_in_bb, j23101_ab_in_bb, b0034_bc_in_bb, gfp_ce_in_bb, rfp_ce_in_bb, cfp_ce_in_bb, b0015_ef_in_bb], \n", + " acceptor_backbone=podd_backbone,\n", + " restriction_enzyme=bsai,\n", + " document=doc)\n", + "combinatorial_assembly_plan.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "should produce 6 products" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6\n" + ] + }, + { + "data": { + "text/plain": [ + "[[,\n", + " ],\n", + " [,\n", + " ],\n", + " [,\n", + " ],\n", + " [,\n", + " ],\n", + " [,\n", + " ],\n", + " [,\n", + " ]]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(len(combinatorial_assembly_plan.products))\n", + "combinatorial_assembly_plan.products\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "get doc components" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://sbolstandard.org/testfiles/pOdd_bb\n", + "http://sbolstandard.org/testfiles/pOdd_bb_seq\n", + "http://sbolstandard.org/testfiles/j23100_ab_in_bb\n", + "http://sbolstandard.org/testfiles/j23100_ab_in_bb_seq\n", + "http://sbolstandard.org/testfiles/j23101_ab_in_bb\n", + "http://sbolstandard.org/testfiles/j23101_ab_in_bb_seq\n", + "http://sbolstandard.org/testfiles/b0034_bc_in_bb\n", + "http://sbolstandard.org/testfiles/b0034_bc_in_bb_seq\n", + "http://sbolstandard.org/testfiles/gfp_ce_in_bb\n", + "http://sbolstandard.org/testfiles/gfp_ce_in_bb_seq\n", + "http://sbolstandard.org/testfiles/rfp_ce_in_bb\n", + "http://sbolstandard.org/testfiles/rfp_ce_in_bb_seq\n", + "http://sbolstandard.org/testfiles/cfp_ce_in_bb\n", + "http://sbolstandard.org/testfiles/cfp_ce_in_bb_seq\n", + "http://sbolstandard.org/testfiles/b0015_ef_in_bb\n", + "http://sbolstandard.org/testfiles/b0015_ef_in_bb_seq\n", + "http://sbolstandard.org/testfiles/combinatorial_rgb_transcriptional_units_assembly_plan\n", + "http://sbolstandard.org/testfiles/j23100_ab_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/j23100_ab_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/j23101_ab_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/j23101_ab_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/b0034_bc_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/b0034_bc_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/gfp_ce_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/gfp_ce_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/rfp_ce_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/rfp_ce_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/cfp_ce_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/cfp_ce_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/b0015_ef_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/b0015_ef_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/pOdd_bb_backbone\n", + "http://sbolstandard.org/testfiles/pOdd_bb_backbone_seq\n", + "http://sbolstandard.org/testfiles/composite_1_part_8_part_1_j23100_ab_in_bb_part_3_b0034_bc_in_bb_part_4_gfp_ce_in_bb_part_7_b0015_ef_in_bb\n", + "http://sbolstandard.org/testfiles/composite_1_part_8_part_1_j23100_ab_in_bb_part_3_b0034_bc_in_bb_part_4_gfp_ce_in_bb_part_7_b0015_ef_in_bb_seq\n", + "http://sbolstandard.org/testfiles/composite_2_part_8_part_1_j23100_ab_in_bb_part_3_b0034_bc_in_bb_part_5_rfp_ce_in_bb_part_7_b0015_ef_in_bb\n", + "http://sbolstandard.org/testfiles/composite_2_part_8_part_1_j23100_ab_in_bb_part_3_b0034_bc_in_bb_part_5_rfp_ce_in_bb_part_7_b0015_ef_in_bb_seq\n", + "http://sbolstandard.org/testfiles/composite_3_part_8_part_1_j23100_ab_in_bb_part_3_b0034_bc_in_bb_part_6_cfp_ce_in_bb_part_7_b0015_ef_in_bb\n", + "http://sbolstandard.org/testfiles/composite_3_part_8_part_1_j23100_ab_in_bb_part_3_b0034_bc_in_bb_part_6_cfp_ce_in_bb_part_7_b0015_ef_in_bb_seq\n", + "http://sbolstandard.org/testfiles/composite_4_part_8_part_2_j23101_ab_in_bb_part_3_b0034_bc_in_bb_part_4_gfp_ce_in_bb_part_7_b0015_ef_in_bb\n", + "http://sbolstandard.org/testfiles/composite_4_part_8_part_2_j23101_ab_in_bb_part_3_b0034_bc_in_bb_part_4_gfp_ce_in_bb_part_7_b0015_ef_in_bb_seq\n", + "http://sbolstandard.org/testfiles/composite_5_part_8_part_2_j23101_ab_in_bb_part_3_b0034_bc_in_bb_part_5_rfp_ce_in_bb_part_7_b0015_ef_in_bb\n", + "http://sbolstandard.org/testfiles/composite_5_part_8_part_2_j23101_ab_in_bb_part_3_b0034_bc_in_bb_part_5_rfp_ce_in_bb_part_7_b0015_ef_in_bb_seq\n", + "http://sbolstandard.org/testfiles/composite_6_part_8_part_2_j23101_ab_in_bb_part_3_b0034_bc_in_bb_part_6_cfp_ce_in_bb_part_7_b0015_ef_in_bb\n", + "http://sbolstandard.org/testfiles/composite_6_part_8_part_2_j23101_ab_in_bb_part_3_b0034_bc_in_bb_part_6_cfp_ce_in_bb_part_7_b0015_ef_in_bb_seq\n" + ] + } + ], + "source": [ + "for obj in combinatorial_assembly_plan.document.objects:\n", + " print(obj.identity)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get a sequence" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cgctgcatgaagagcctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgagtcccgtcaagtcagcgtaatgctctgccagtgttacaaccaattaaccaattctgattagaaaaactcatcgagcatcaaatgaaactgcaatttattcatatcaggattatcaataccatatttttgaaaaagccgtttctgtaatgaaggagaaaactcaccgaggcagttccataggatggcaagatcctggtatcggtctgcgattccgactcgtccaacatcaatacaacctattaatttcccctcgtcaaaaataaggttatcaagtgagaaatcaccatgagtgacgactgaatccggtgagaatggcaaaagcttatgcatttctttccagacttgttcaacaggccagccattacgctcgtcatcaaaatcactcgcatcaaccaaaccgttattcattcgtgattgcgcctgagcgagacgaaatacgcgatcgctgttaaaaggacaattacaaacaggaatcgaatgcaaccggcgcaggaacactgccagcgcatcaacaatattttcacctgaatcaggatattcttctaatacctggaatgctgttttcccggggatcgcagtggtgagtaaccatgcatcatcaggagtacggataaaatgcttgatggtcggaagaggcataaattccgtcagccagtttagtctgaccatctcatctgtaacatcattggcaacgctacctttgccatgtttcagaaacaactctggcgcatcgggcttcccatacaatcgatagattgtcgcacctgattgcccgacattatcgcgagcccatttatacccatataaatcagcatccatgttggaatttaatcgcggcctggagcaagacgtttcccgttgaatatggctcataacaccccttgtattactgtttatgtaagcagacagttttattgttcatgatgatatatttttatcttgtgcaatgtaacatcagagattttgagacacaacgtggctttgttgaataaatcgaacttttgctgagttgaaggatcagctcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgctcttcaatgGGAGttgacggctagctcagtcctaggtacagtgctagcTACTagagaaagaggagaaatactaaatggtgagcaagggcgaggagctgttcaccggggtggtgcccatcctggtcgagctggacggcgacgtgaacggccacaagttcagcgtgtccggcgagggcgagggcgatgccacctacggcaagctgaccctgaagttcatctgcaccaccggcaagctgcccgtgccctggcccaccctcgtgaccaccctgacctggggcgtgcagtgcttcagccgctaccccgaccacatgaagcagcacgacttcttcaagtccgccatgcccgaaggctacgtccaggagcgcaccatcttcttcaaggacgacggcaactacaagacccgcgccgaggtgaagttcgagggcgacaccctggtgaaccgcatcgagctgaagggcatcgacttcaaggaggacggcaacatcctggggcacaagctggagtacaactacatcagccacaacgtctatatcaccgccgacaagcagaagaacggcatcaaggccaacttcaagatccgccacaacatcgaggacggcagcgtgcagctcgccgaccactaccagcagaacacccccatcggcgacggccccgtgctgctgcccgacaaccactacctgagcacccagtccgccctgagcaaagaccccaacgagaagcgcgatcacatggtcctgctggagttcgtgaccgccgccgggatcactctcggcatggacgagctgtacaagtaataaGCTTccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttata\n" + ] + } + ], + "source": [ + "for obj in combinatorial_assembly_plan.document.objects:\n", + " if obj.identity =='http://sbolstandard.org/testfiles/composite_3_part_8_part_1_j23100_ab_in_bb_part_3_b0034_bc_in_bb_part_6_cfp_ce_in_bb_part_7_b0015_ef_in_bb':\n", + " print(obj.sequences[0].lookup().elements)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "LOICA", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/test/test_files/sfgfp.gb b/test/test_files/sfgfp.gb new file mode 100644 index 00000000..b667e882 --- /dev/null +++ b/test/test_files/sfgfp.gb @@ -0,0 +1,102 @@ +LOCUS Copy_of_sfGFP_CE:_pSB1C 2778 bp ds-DNA circular 15-NOV-2022 +DEFINITION . +COMMENT From pSB1C00 - Loop universal L0 acceptor and sfGFP_CE Sequencing + data suggests primer needs to be re-designed - appears to have part + of B0015 at the end +FEATURES Location/Qualifiers + misc_feature 1..20 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(77..121) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 123..128 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + CDS 131..850 + /label="sfGFP (BBa_I746916)" + /ApEinfo_revcolor="#84b0dc" + /ApEinfo_fwdcolor="#84b0dc" + CDS 131..850 + /label="Translation 131-850" + /translation="MRKGEELFTGVVPILVELDGDVNGHKFSVRGEGEGDATNGKLTLKFICTTGKLPVPWPTLVTTLTYGVQCFARYPDHMKQHDFFKSAMPEGYVQERTISFKDDGTYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNFNSHNVYITADKQKNGIKANFKIRHNVEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSVLSKDPNEKRDHMVLLEFVTAAGITHGMDELYK**" + misc_feature 851..854 + /label="Fusion Site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature 856..861 + /label="BsaI Site" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" + misc_feature 868..939 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(1003..1022) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(1107..1721) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(1888..1993) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" + CDS complement(2006..2665) + /label="Cam Resistance" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" +ORIGIN + 1 ccacctgacg tctaagaaac cattattatc atgacattaa cctataaaaa taggcgtatc + 61 acgaggcaga atttcagata aaaaaaatcc ttagctttcg ctaaggatga tttctggaat + 121 tcggtctcga atgcgtaaag gcgaggaact gttcactggt gtcgtcccta ttctggtgga + 181 actggatggt gatgtcaacg gtcataagtt ttccgtgcgt ggcgagggtg aaggtgacgc + 241 aactaatggt aaactgacgc tgaagttcat ctgtactact ggtaaactgc cggtaccttg + 301 gccgactctg gtaacgacgc tgacttatgg tgttcagtgc tttgctcgtt atccggacca + 361 tatgaagcag catgacttct tcaagtccgc catgccggaa ggctatgtgc aggaacgcac + 421 gatttccttt aaggatgacg gcacgtacaa aacgcgtgcg gaagtgaaat ttgaaggcga + 481 taccctggta aaccgcattg agctgaaagg cattgacttt aaagaagacg gcaatatcct + 541 gggccataag ctggaataca attttaacag ccacaatgtt tacatcaccg ccgataaaca + 601 aaaaaatggc attaaagcga attttaaaat tcgccacaac gtggaggatg gcagcgtgca + 661 gctggctgat cactaccagc aaaacactcc aatcggtgat ggtcctgttc tgctgccaga + 721 caatcactat ctgagcacgc aaagcgttct gtctaaagat ccgaacgaga aacgcgatca + 781 tatggttctg ctggagttcg taaccgcagc gggcatcacg catggtatgg atgaactgta + 841 caaatgatga gcttCGAGac cctgcagtcc ggcaaaaaag ggcaaggtgt caccaccctg + 901 ccctttttct ttaaaaccga aaagattact tcgcgttatg caggcttcct cgctcactga + 961 ctcgctgcgc tcggtcgttc ggctgcggcg agcggtatca gctcactcaa aggcggtaat + 1021 acggttatcc acagaatcag gggataacgc aggaaagaac atgtgagcaa aaggccagca + 1081 aaaggccagg aaccgtaaaa aggccgcgtt gctggcgttt ttccacaggc tccgcccccc + 1141 tgacgagcat cacaaaaatc gacgctcaag tcagaggtgg cgaaacccga caggactata + 1201 aagataccag gcgtttcccc ctggaagctc cctcgtgcgc tctcctgttc cgaccctgcc + 1261 gcttaccgga tacctgtccg cctttctccc ttcgggaagc gtggcgcttt ctcatagctc + 1321 acgctgtagg tatctcagtt cggtgtaggt cgttcgctcc aagctgggct gtgtgcacga + 1381 accccccgtt cagcccgacc gctgcgcctt atccggtaac tatcgtcttg agtccaaccc + 1441 ggtaagacac gacttatcgc cactggcagc agccactggt aacaggatta gcagagcgag + 1501 gtatgtaggc ggtgctacag agttcttgaa gtggtggcct aactacggct acactagaag + 1561 aacagtattt ggtatctgcg ctctgctgaa gccagttacc ttcggaaaaa gagttggtag + 1621 ctcttgatcc ggcaaacaaa ccaccgctgg tagcggtggt ttttttgttt gcaagcagca + 1681 gattacgcgc agaaaaaaag gatctcaaga agatcctttg atcttttcta cggggtctga + 1741 cgctcagtgg aacgaaaact cacgttaagg gattttggtc atgagattat caaaaaggat + 1801 cttcacctag atccttttaa attaaaaatg aagttttaaa tcaatctaaa gtatatatga + 1861 gtaaacttgg tctgacagct cgaggcttgg attctcacca ataaaaaacg cccggcggca + 1921 accgagcgtt ctgaacaaat ccagatggag ttctgaggtc attactggat ctatcaacag + 1981 gagtccaagc gagctcgata tcaaattacg ccccgccctg ccactcatcg cagtactgtt + 2041 gtaattcatt aagcattctg ccgacatgga agccatcaca aacggcatga tgaacctgaa + 2101 tcgccagcgg catcagcacc ttgtcgcctt gcgtataata tttgcccatg gtgaaaacgg + 2161 gggcgaagaa gttgtccata ttggccacgt ttaaatcaaa actggtgaaa ctcacccagg + 2221 gattggctga gacgaaaaac atattctcaa taaacccttt agggaaatag gccaggtttt + 2281 caccgtaaca cgccacatct tgcgaatata tgtgtagaaa ctgccggaaa tcgtcgtggt + 2341 attcactcca gagcgatgaa aacgtttcag tttgctcatg gaaaacggtg taacaagggt + 2401 gaacactatc ccatatcacc agctcaccgt ctttcattgc catacgaaat tccggatgag + 2461 cattcatcag gcgggcaaga atgtgaataa aggccggata aaacttgtgc ttatttttct + 2521 ttacggtctt taaaaaggcc gtaatatcca gctgaacggt ctggttatag gtacattgag + 2581 caactgactg aaatgcctca aaatgttctt tacgatgcca ttgggatata tcaacggtgg + 2641 tatatccagt gatttttttc tccattttag cttccttagc tcctgaaaat ctcgataact + 2701 caaaaaatac gcccggtagt gatcttattt cattatggtg aaagttggaa cctcttacgt + 2761 gcccgatcaa ctcgagtg +// \ No newline at end of file diff --git a/test/test_files/test_part_in_backbone_from_sbol_bp011.nt b/test/test_files/test_part_in_backbone_from_sbol_bp011.nt new file mode 100644 index 00000000..0e244d22 --- /dev/null +++ b/test/test_files/test_part_in_backbone_from_sbol_bp011.nt @@ -0,0 +1,67 @@ + "Constraint1" . + . + . + . + . + "Range1" . + "646"^^ . + . + "518"^^ . + . + "SequenceFeature1" . + . + . + . + . + "Range1" . + "522"^^ . + . + "1"^^ . + "518"^^ . + . + "Range2" . + "646"^^ . + . + "3"^^ . + "642"^^ . + . + "SequenceFeature2" . + . + . + . + . + "Range1" . + "521"^^ . + . + "2"^^ . + "1"^^ . + . + "Range2" . + "2190"^^ . + . + "1"^^ . + "642"^^ . + . + "SequenceFeature3" . + . + . + . + "b0015_ef_in_bb" . + . + . + . + . + . + . + "b0015_ef_in_bb" . + . + . + . + . + . + . + "b0015_ef_in_bb_seq" . + "aagggtgaacactatcccatatcaccagctcaccgtctttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgcccgatcaactcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcggtctcggcttccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttatacgctCGAGaccctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgaggcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaac" . + . + . + .