Skip to content

Commit 9ec80ca

Browse files
authored
Merge pull request #145 from puja-trivedi/update_genomeannotation_20250221
updated genome_annotation.yaml to match formatting standards
2 parents 5301afe + 363164c commit 9ec80ca

File tree

11 files changed

+301
-188
lines changed

11 files changed

+301
-188
lines changed

erdiagram-autogen/bke_taxonomy.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ ColorPalette {
3535
}
3636
MatrixFile {
3737
stringList content_url
38+
uriorcurieList xref
3839
string id
3940
iri_type iri
4041
curieList category
@@ -43,7 +44,6 @@ MatrixFile {
4344
narrative_text description
4445
boolean deprecated
4546
stringList provided_by
46-
uriorcurieList xref
4747
label_type full_name
4848
label_typeList synonym
4949
}
@@ -91,6 +91,7 @@ CellSpecimen {
9191
}
9292
ObservationRow {
9393
string label
94+
uriorcurieList xref
9495
string id
9596
iri_type iri
9697
curieList category
@@ -99,12 +100,12 @@ ObservationRow {
99100
narrative_text description
100101
boolean deprecated
101102
stringList provided_by
102-
uriorcurieList xref
103103
label_type full_name
104104
label_typeList synonym
105105
}
106106
ObservationMatrix {
107107
stringList content_url
108+
uriorcurieList xref
108109
string id
109110
iri_type iri
110111
curieList category
@@ -113,7 +114,6 @@ ObservationMatrix {
113114
narrative_text description
114115
boolean deprecated
115116
stringList provided_by
116-
uriorcurieList xref
117117
label_type full_name
118118
label_typeList synonym
119119
}

json-schema-autogen/bke_taxonomy.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2409,7 +2409,7 @@
24092409
},
24102410
"GeneAnnotation": {
24112411
"additionalProperties": false,
2412-
"description": "An annotation describing the location, boundaries, and functions of individual genes within a genome annotation.",
2412+
"description": "Represents a single gene. Includes metadata about the gene, such as its molecular type and the genome annotation it was referenced from.",
24132413
"properties": {
24142414
"category": {
24152415
"description": "Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}. NOTE: The category slot was modified to have a curie range and a pattern for bican categories.",
@@ -2531,7 +2531,7 @@
25312531
"type": "string"
25322532
}
25332533
],
2534-
"description": "The genome annotation that this gene annotation was referenced from.",
2534+
"description": "The genome annotation that this gene was referenced from.",
25352535
"type": "string"
25362536
},
25372537
"source_id": {
@@ -2724,7 +2724,7 @@
27242724
},
27252725
"GenomeAnnotation": {
27262726
"additionalProperties": false,
2727-
"description": "Location and nomenclature of genes and all of the coding regions in a genome assembly and the classification of genes and transcripts into types.",
2727+
"description": "Represents a genome annotation. Includes metadata about the genome, such as its version and reference assembly.",
27282728
"properties": {
27292729
"authority": {
27302730
"$ref": "#/$defs/AuthorityType",
@@ -2911,7 +2911,7 @@
29112911
},
29122912
"GenomeAssembly": {
29132913
"additionalProperties": false,
2914-
"description": "Genome assembly to contain version and label information",
2914+
"description": "Represents a genome assembly. A genome assembly is a computational representation of a genome sequence.",
29152915
"properties": {
29162916
"category": {
29172917
"description": "Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}. NOTE: The category slot was modified to have a curie range and a pattern for bican categories.",

json-schema-autogen/genome_annotation.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -683,7 +683,7 @@
683683
},
684684
"GeneAnnotation": {
685685
"additionalProperties": false,
686-
"description": "An annotation describing the location, boundaries, and functions of individual genes within a genome annotation.",
686+
"description": "Represents a single gene. Includes metadata about the gene, such as its molecular type and the genome annotation it was referenced from.",
687687
"properties": {
688688
"category": {
689689
"description": "Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}. NOTE: The category slot was modified to have a curie range and a pattern for bican categories.",
@@ -805,7 +805,7 @@
805805
"type": "string"
806806
}
807807
],
808-
"description": "The genome annotation that this gene annotation was referenced from.",
808+
"description": "The genome annotation that this gene was referenced from.",
809809
"type": "string"
810810
},
811811
"source_id": {
@@ -998,7 +998,7 @@
998998
},
999999
"GenomeAnnotation": {
10001000
"additionalProperties": false,
1001-
"description": "Location and nomenclature of genes and all of the coding regions in a genome assembly and the classification of genes and transcripts into types.",
1001+
"description": "Represents a genome annotation. Includes metadata about the genome, such as its version and reference assembly.",
10021002
"properties": {
10031003
"authority": {
10041004
"$ref": "#/$defs/AuthorityType",
@@ -1185,7 +1185,7 @@
11851185
},
11861186
"GenomeAssembly": {
11871187
"additionalProperties": false,
1188-
"description": "Genome assembly to contain version and label information",
1188+
"description": "Represents a genome assembly. A genome assembly is a computational representation of a genome sequence.",
11891189
"properties": {
11901190
"category": {
11911191
"description": "Name of the high level ontology class in which this entity is categorized. Corresponds to the label for the biolink entity type class. In a neo4j database this MAY correspond to the neo4j label tag. In an RDF database it should be a biolink model class URI. This field is multi-valued. It should include values for ancestors of the biolink class; for example, a protein such as Shh would have category values `biolink:Protein`, `biolink:GeneProduct`, `biolink:MolecularEntity`. In an RDF database, nodes will typically have an rdf:type triples. This can be to the most specific biolink class, or potentially to a class more specific than something in biolink. For example, a sequence feature `f` may have a rdf:type assertion to a SO class such as TF_binding_site, which is more specific than anything in biolink. Here we would have categories {biolink:GenomicEntity, biolink:MolecularEntity, biolink:NamedThing}. NOTE: The category slot was modified to have a curie range and a pattern for bican categories.",

jsonld-context-autogen/bke_taxonomy.context.jsonld

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -512,12 +512,12 @@
512512
"Gene": {
513513
"@id": "biolink:Gene"
514514
},
515-
"GeneAnnotation": {
516-
"@id": "GeneAnnotation"
517-
},
518515
"GeneOrGeneProduct": {
519516
"@id": "biolink:GeneOrGeneProduct"
520517
},
518+
"GeneAnnotation": {
519+
"@id": "GeneAnnotation"
520+
},
521521
"Genome": {
522522
"@id": "biolink:Genome"
523523
},

jsonld-context-autogen/genome_annotation.context.jsonld

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -214,9 +214,7 @@
214214
},
215215
"@id": "authority"
216216
},
217-
"category": {
218-
"@id": "biolink:category"
219-
},
217+
"category": "@type",
220218
"checksum_algorithm": {
221219
"@context": {
222220
"text": "skos:notation",
@@ -384,12 +382,12 @@
384382
"Gene": {
385383
"@id": "biolink:Gene"
386384
},
387-
"GeneAnnotation": {
388-
"@id": "GeneAnnotation"
389-
},
390385
"GeneOrGeneProduct": {
391386
"@id": "biolink:GeneOrGeneProduct"
392387
},
388+
"GeneAnnotation": {
389+
"@id": "GeneAnnotation"
390+
},
393391
"Genome": {
394392
"@id": "biolink:Genome"
395393
},
@@ -448,5 +446,4 @@
448446
"@id": "biolink:ThingWithTaxon"
449447
}
450448
}
451-
}
452-
449+
}

linkml-schema/bke_taxonomy.yaml

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,7 @@ classes:
440440
- has_variable
441441
- was_derived_from
442442
- content_url
443+
- xref
443444
slot_usage:
444445
was_generated_by:
445446
description: The aggregation process from which a observation matrix was generated
@@ -453,7 +454,7 @@ classes:
453454
has_variable:
454455
description: One of set of genes which together forms the variable set of
455456
an observation matrix.
456-
range: gene annotation
457+
range: GeneAnnotation
457458
multivalued: true
458459
was_derived_from:
459460
description: One of many cell specimens from which observations in the matrix
@@ -464,6 +465,10 @@ classes:
464465
local_names:
465466
local_name_value: url
466467
local_name_source: allen
468+
xref:
469+
local_names:
470+
local_name_value: unique_id
471+
local_name_source: allen
467472
ObservationRow:
468473
description: One specific row of the observation matrix representing a set of
469474
measurements preformed on a sample over a set of variables or features.
@@ -476,6 +481,7 @@ classes:
476481
- represented_in
477482
- was_derived_from
478483
- label
484+
- xref
479485
slot_usage:
480486
part_of_matrix:
481487
description: The observation matrix for which is observation row is part of.
@@ -487,6 +493,10 @@ classes:
487493
description: The cell specimen from which the observation was derived from.
488494
range: CellSpecimen
489495
label: {}
496+
xref:
497+
local_names:
498+
local_name_value: unique_id
499+
local_name_source: allen
490500
CellSpecimen:
491501
description: ( defined as in BERS)
492502
from_schema: https:/w3id.org/brain-bican/bke-taxonomy
@@ -521,7 +531,7 @@ classes:
521531
local_name_source: allen
522532
description: One of potentially many gene annotation terms to which the abbreviation
523533
denotes.
524-
range: gene annotation
534+
range: GeneAnnotation
525535
multivalued: true
526536
denotes_parcellation_term:
527537
local_names:
@@ -562,11 +572,16 @@ classes:
562572
- ProvEntity
563573
slots:
564574
- content_url
575+
- xref
565576
slot_usage:
566577
content_url:
567578
local_names:
568579
local_name_value: url
569580
local_name_source: allen
581+
xref:
582+
local_names:
583+
local_name_value: unique_id
584+
local_name_source: allen
570585
ColorPalette:
571586
description: A schematic set of display colors that can be applied to individual
572587
components of the associated entity set (for example, a taxonomy).

linkml-schema/genome_annotation.yaml

Lines changed: 20 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ id: https://identifiers.org/brain-bican/genome-annotation-schema
22
name: genome-annotation-schema
33
title: Genome Annotation Schema
44
description: |-
5-
The Genome Annotation schema is designed to represent types and relationships of an organism's annotated genome.
5+
The Genome Annotation Schema is designed to respresent all the genes from a given genome annotation.
66
77
prefixes:
88
linkml: https://w3id.org/linkml/
@@ -19,75 +19,68 @@ default_range: string
1919
default_prefix: bican
2020

2121
classes:
22-
gene annotation:
22+
GeneAnnotation:
2323
is_a: gene
2424
description: >-
25-
An annotation describing the location, boundaries, and functions of
26-
individual genes within a genome annotation.
25+
Represents a single gene. Includes metadata about the gene, such as its molecular type and the genome annotation it was referenced from.
2726
slots:
28-
- molecular type
29-
- source id
27+
- molecular_type
28+
- source_id
3029
attributes:
31-
referenced in:
32-
description: The genome annotation that this gene annotation was referenced from.
30+
referenced_in:
31+
description: The genome annotation that this gene was referenced from.
3332
required: true
3433
inlined: true
3534
any_of:
36-
- range: genome annotation
35+
- range: GenomeAnnotation
3736
- range: string
38-
id_prefixes:
39-
- ENSEMBL
40-
- MGI
41-
- NCBIGene
4237

43-
genome annotation:
38+
GenomeAnnotation:
4439
is_a: genome
4540
description: >-
46-
Location and nomenclature of genes and all of the coding regions in a genome assembly
47-
and the classification of genes and transcripts into types.
41+
Represents a genome annotation. Includes metadata about the genome, such as its version and reference assembly.
4842
slots:
4943
- version
5044
- digest
5145
- content_url
5246
- authority
5347
attributes:
54-
reference assembly:
48+
reference_assembly:
5549
description: The reference genome assembly that this genome annotation was created from.
5650
required: true
5751
inlined: true
5852
any_of:
59-
- range: genome assembly
53+
- range: GenomeAssembly
6054
- range: string
6155

62-
genome assembly:
56+
GenomeAssembly:
6357
is_a: named thing
6458
mixins:
6559
- thing with taxon
6660
description: >-
67-
Genome assembly to contain version and label information
61+
Represents a genome assembly. A genome assembly is a computational representation of a genome sequence.
6862
slots:
6963
- version
7064
- strain
7165

72-
annotation collection:
66+
AnnotationCollection:
7367
tree_root: true
7468
attributes:
7569
annotations:
7670
multivalued: true
7771
inlined_as_list: true
78-
range: gene annotation
72+
range: GeneAnnotation
7973
genome_annotations:
8074
multivalued: true
8175
inlined_as_list: true
82-
range: genome annotation
76+
range: GenomeAnnotation
8377
genome_assemblies:
8478
multivalued: true
8579
inlined_as_list: true
86-
range: genome assembly
87-
80+
range: GenomeAssembly
8881

8982
slots:
90-
molecular type:
83+
molecular_type:
9184
any_of:
9285
- range: BioType
9386
- range: string
@@ -100,14 +93,13 @@ slots:
10093
description: The organization responsible for publishing the data.
10194
range: AuthorityType
10295

103-
source id:
96+
source_id:
10497
description: The authority specific identifier.
10598
slot_uri: schema:identifier
10699

107100
strain:
108101
description: The genetic variant or subtype of a species or organism.
109102

110-
111103
enums:
112104
BioType:
113105
permissible_values:

linkml-schema/source_bke_taxonomy/gsheet_output/Relations.tsv

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
Object Predicate Subject Multivalued ExactlyOneOf Slot Definition Quasi-formalism BKE Local Name Local Name Source
1+
Class Predicate Subject Multivalued ExactlyOneOf Slot Definition Quasi-formalism BKE Local Name Local Name Source
22
> class slot range multivalued ignore description ignore local_names local_names
33
> internal_separator: "|" inner_key: local_name_value inner_key: local_name_source
4-
Abbreviation denotes_gene_annotation gene annotation TRUE FALSE One of potentially many gene annotation terms to which the abbreviation denotes. Some abbreviation (a) denotes some GeneAnnotation (G) if and only if a refers to G. denotes allen
4+
Abbreviation denotes_gene_annotation GeneAnnotation TRUE FALSE One of potentially many gene annotation terms to which the abbreviation denotes. Some abbreviation (a) denotes some GeneAnnotation (G) if and only if a refers to G. denotes allen
55
Abbreviation denotes_parcellation_term ParcellationTerm TRUE FALSE One of potentially many parcellation terms (anatomical structures) which the abbreviation denotes. A denotes some PT denotes allen
66
CellTypeSet part_of_taxonomy CellTypeTaxonomy FALSE TRUE The cell types taxonomy for which the cell type set is part of. CTS part_of_taxonomy CTT if and only if CTT is a taxonomy, and CTS is a member of CTT
77
CellTypeSet contains_taxon CellTypeTaxon TRUE FALSE One of potentially many cell types taxons which together defines the cell type set. CTS contains_taxon CTT if and only if CTT is a part of CTS
@@ -28,7 +28,7 @@ DisplayColor is_color_for_taxon CellTypeTaxon FALSE TRUE The associated cell typ
2828
DisplayColor is_color_for_set CellTypeSet FALSE TRUE The associated cell type set of the specified display color. DC is_color_for CTS if DC is associated with CTS is_color_for allen
2929
ObservationMatrix was_generated_by ObservationMatrixCreationProcess FALSE TRUE The aggregation process from which a observation matrix was generated by. OM was_generated_by some OMCP if and only if OMCP is a process and OM was_output_of OMCP
3030
ObservationMatrix represented_by MatrixFile TRUE FALSE One of potentially matrix files which together represents the entire observation matrix. MF concretizes some OM
31-
ObservationMatrix has_variable gene annotation TRUE FALSE One of set of genes which together forms the variable set of an observation matrix.
31+
ObservationMatrix has_variable GeneAnnotation TRUE FALSE One of set of genes which together forms the variable set of an observation matrix.
3232
ObservationMatrix was_derived_from CellSpecimen TRUE FALSE One of many cell specimens from which observations in the matrix was derived from. OM was_derived_from some CSp if and only if OM has_input some CSp
3333
ObservationRow part_of_matrix ObservationMatrix FALSE TRUE The observation matrix for which is observation row is part of. OR part_of_matrix some OM if and only if OR part_of some OM
3434
ObservationRow represented_in MatrixFile FALSE TRUE The specific file where this observation row is represented. OR represented in MF if and only if OR is part_of some OM and OM is_concretized_in some MF

linkml-schema/source_bke_taxonomy/gsheet_output/Slots.tsv

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,5 +45,8 @@ DisplayColor id ( database GUID) FALSE string
4545
DisplayColor color_hex_triplet #450099 A hex string representing the display color for an associated entity. FALSE string
4646
DisplayColor xref unique_id allen
4747
MatrixFile content_url url allen
48+
MatrixFile xref unique_id allen
4849
ObservationMatrix content_url url allen
49-
ObservationRow label
50+
ObservationMatrix xref unique_id allen
51+
ObservationRow label
52+
ObservationRow xref unique_id allen

0 commit comments

Comments
 (0)