Skip to content

Commit 7fbcebe

Browse files
committed
several updates
1 parent 684c7f1 commit 7fbcebe

File tree

3 files changed

+15
-3
lines changed

3 files changed

+15
-3
lines changed

logsheet_schema_extended.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ http://vocab.nerc.ac.uk/collection/P01/current/SO4XCLP1/"
135135
,samp_size_vol,xsd:float,xsd:float,,,http://vocab.nerc.ac.uk/collection/P21/current/MS11285/,litre,http://vocab.nerc.ac.uk/collection/P06/current/ULIT/,"water ",Sampling,mandatory,Amount of sample (volume) that was collected.,,," "
136136
,sampl_person,xsd:string,xsd:string,,,NA,NA,,water;soil,Sampling,mandatory,Name of person who sampled the sediment,Jasson Piepson,,
137137
,sampl_person_orcid,xsd:string,xsd:anyURI,https://orcid.org/,,NA,NA,,water;soil,Sampling,optional,orcid URL of the person who sampled the sediment (https://orcid.org),https://orcid.org/0000-0002-0202-8256,,
138-
,sampling_event,xsd:string,xsd:anyURI,http://example.com/,,NA,NA,,water;soil,Sampling,mandatory,"An EMO BON sampling event is a specific sampling campaign. A campaign that takes place at a specific observatory station at a specific time. The sampling campaign identifier consists of two terms: 1) Sampling Site ID; that is the Observatory ID (for example “SMN99”) supplemented with the sampling site indicator (“Wa” for water column, “So” for soft substrates and “Ha” for hard substrates) 2) Sampling Campaign Date formated as YYMMDD.For example “220315” would be the campaign on 15 March 2022. Especially for Hard substrates, both the deployment and retrieval dates have to be recorded formated as YYMMDD-YYMMDD",JIJO41 So 210828,,
138+
,sampling_event,xsd:string,xsd:string,,,NA,NA,,water;soil,Sampling,mandatory,"An EMO BON sampling event is a specific sampling campaign. A campaign that takes place at a specific observatory station at a specific time. The sampling campaign identifier consists of two terms: 1) Sampling Site ID; that is the Observatory ID (for example “SMN99”) supplemented with the sampling site indicator (“Wa” for water column, “So” for soft substrates and “Ha” for hard substrates) 2) Sampling Campaign Date formated as YYMMDD.For example “220315” would be the campaign on 15 March 2022. Especially for Hard substrates, both the deployment and retrieval dates have to be recorded formated as YYMMDD-YYMMDD",JIJO41 So 210828,,
139139
,ship_date,xsd:date,xsd:date,,,NA,NA,,water;soil,Sampling,mandatory,The date the samples were shipped for DNA extraction and sequencing,31/12/2021," ",
140140
,ship_date_seq,xsd:date,xsd:date,,,NA,NA,,water;soil,Sampling,mandatory,The date the samples were shipped from the EMBRC HQ to the sequencing centre according to ISO 8601,01/12/2021," ",
141141
may change: depends on whether GSC replies to Ioulia's question,size_frac_low,xsd:float,xsd:float,,,http://vocab.nerc.ac.uk/collection/Q01/current/Q0100011/,mm,https://vocab.nerc.ac.uk/collection/P06/current/UXMM/,water;soil,Sampling,mandatory,Refers to the sieve pore size used to pre-filter/pre-sort the macrobenthos sample. Materials larger than the size threshold are excluded from the sample. Complete this field only in case of sequential sieving,1,,check with ENA MIxS as terms may be swappaed: Ioulia is still waiting for a reply
@@ -150,7 +150,7 @@ may change: depends on whether GSC replies to Ioulia's question,size_frac_up,xsd
150150
,failure,xsd:boolean,xsd:boolean,,,NA,NA,NA,w;s,Sampling,mandatory,"Indicates if the data generation failed. Takes True (T) or False (F) values. If data generation from a material sample failed, it needs to be indicated here. The failure may have happened at any stage of the workflow.",,yes,
151151
,scientific_name,xsd:string,xsd:string,,,NA,NA,NA,w;s,Sampling,mandatory,"Scientific name for the taxonomy ID of the organism as in the NCBI Taxonomy database. Entries in the NCBI Taxonomy database have integer taxon IDs. Each taxon ID cooresponds to a scientific name. For EMO BON seawater samples the NCBI taxonomy ID is 1874687 coresponding to ""marine plankton metagenome"". For EMO BON sediment samples the NCBI taxonomy is 412755 corresponding to “marine sediment metagenome” ",,yes,
152152
,comm_samp,xsd:string,xsd:string,,,NA,NA,NA,s,Sampling,mandatory,"Brief description of the biological community being sampled. The terms used can be ""micro"" for microorganisms, ""meio"" for meiobenthos and ""macro"" for macrobenthos. For blank samples, add ""blank""",,yes,
153-
,source_mat_id,xsd:string,xsd:anyURI,http://example.com/,,http://rs.tdwg.org/dwc/terms/materialSampleID,NA,,water;soil,Sampling; Measured,mandatory,"A unique identifier assigned to a material sample according to EMO BON Handbook. This identifier will characterize the sample during nucleic acid extraction, and subsequent sequencing or biobanking. This identifier consists of 5 terms: 1) Project (“EMO BON”) 2) Sampling Site ID; that is the Observatory ID (for example “SMN99”) supplemented with the sampling site indicator (“Wa” for water column, “So” for soft substrates and “Ha” for hard substrates) 3) Sampling Campaign Date formated as YYMMDD.For example “220315” would be the campaign on 15 March 2022. Especially for Hard substrates, both the deployment and retrieval dates have to be recorded formated as YYMMDD-YYMMDD 4) Size fraction (Wa: “3 μm” / “0.2 μm” / “20 μm” / “200 μm”) or organisms collected (So: “micro” for microorganisms, “meio” for meiobenthos and “macro” for macrobenthos) or ARMS fraction (Ha: “SF” for sessile fraction, “MF05” for motile fraction sieved through 0.5 mm, “MF01” for motile fraction sieved through 0.1 mm) 5) Replicate (number 1-4). For labelling the negative control, this term will be replaced by the notation “blank” in parenthesis.",EMO BON JIJO41 SO 210828 micro_2," ",This is the value as corrected by IV from the source_mat_id_orig as entered by the station; it is also the value we should either QC (if exists) or create ourselves (if not)
153+
,source_mat_id,xsd:string,xsd:string,,,http://rs.tdwg.org/dwc/terms/materialSampleID,NA,,water;soil,Sampling; Measured,mandatory,"A unique identifier assigned to a material sample according to EMO BON Handbook. This identifier will characterize the sample during nucleic acid extraction, and subsequent sequencing or biobanking. This identifier consists of 5 terms: 1) Project (“EMO BON”) 2) Sampling Site ID; that is the Observatory ID (for example “SMN99”) supplemented with the sampling site indicator (“Wa” for water column, “So” for soft substrates and “Ha” for hard substrates) 3) Sampling Campaign Date formated as YYMMDD.For example “220315” would be the campaign on 15 March 2022. Especially for Hard substrates, both the deployment and retrieval dates have to be recorded formated as YYMMDD-YYMMDD 4) Size fraction (Wa: “3 μm” / “0.2 μm” / “20 μm” / “200 μm”) or organisms collected (So: “micro” for microorganisms, “meio” for meiobenthos and “macro” for macrobenthos) or ARMS fraction (Ha: “SF” for sessile fraction, “MF05” for motile fraction sieved through 0.5 mm, “MF01” for motile fraction sieved through 0.1 mm) 5) Replicate (number 1-4). For labelling the negative control, this term will be replaced by the notation “blank” in parenthesis.",EMO BON JIJO41 SO 210828 micro_2," ",This is the value as corrected by IV from the source_mat_id_orig as entered by the station; it is also the value we should either QC (if exists) or create ourselves (if not)
154154
,source_mat_id_orig,xsd:string,xsd:string,,,NA,NA,,water;soil,Sampling,mandatory,"A unique identifier assigned to a material sample according to EMO BON Handbook as added by the sampling operators. This identifier is not quality controled but remains unchanged. This identifier consists of 5 terms: 1) Project (“EMOBON”) 2) Sampling Site ID; that is the Observatory ID (for example “SMN99”) supplemented with the sampling site indicator (“Wa” for water column, “So” for soft substrates and “Ha” for hard substrates) 3) Sampling Campaign Date formated as YYMMDD.For example “220315” would be the campaign on 15 March 2022. Especially for Hard substrates, both the deployment and retrieval dates have to be recorded formated as YYMMDD-YYMMDD 4) Size fraction (Wa: “3 m” / “0.2um” / “20um” / “200um”) or organisms collected (So: “micro” for microorganisms, “meio” for meiobenthos and “macro” for macrobenthos) or ARMS fraction (Ha: “SF” for sessile fraction, “MF500” for motile fraction sieved through 0.5 mm, “MF100” for motile fraction sieved through 0.1 mm) 5) Replicate number (1-4). For labelling the negative control, this term will be replaced by the notation “blank”. All terms must be separated by ""_"".",EMOBON JIJO41 Wa 210828 3 um 3,yes,"This is the value entered by the station themselves, but can often be wrong"
155155
,samp_store_date,xsd:date,xsd:date,,,NA,NA,,water;soil,Sampling,mandatory,"Indicate the date the sample was placed in storage at appropriate temperature according to ISO 8601. Storage can be long term (replicates 3 and 4, WaSOP2 and WaSOP3 samples and usually the blanks) or short term (until samples are shipped for analyses).",01/07/2021,,
156156
" ",replicate,xsd:string,xsd:string,,,NA,NA,NA,water;soil,Sampling,mandatory,"Indicates the replicate code of the material sample. For replicate 1, add ""1"", for replicate 2, add ""2"", etc. For blank samples, add ""blank"".","1 (a numeric) or ""blank""",yes,

sembench.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,17 @@
2828
"jinja_root": "templates",
2929
"file_name": "water_observatory.ldt.ttl.j2"
3030
}
31+
},
32+
{
33+
"type": "pysubyt",
34+
"input": "logsheets-transformed/water_sampling.csv",
35+
"output": "water/sampling/{sampling_event}.ttl",
36+
"template": {
37+
"jinja_root": "templates",
38+
"file_name": "water_sampling.ldt.ttl.j2"
39+
},
40+
"sets": {
41+
"observatory": "logsheets-transformed/water_observatory.csv"
42+
}
3143
}
3244
]

templates/water_observatory.ldt.ttl.j2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ Description: Template to generate triples from {SampleType}_observatory.csv data
5353
schema:contactPoint [
5454
a emobon:Organization ;
5555
schema:legalName {{ item.strip() | ttl("xsd:string") | safe }} ;
56-
emobon:edmoId {{ _.organization_edmoid.split(';')[loop.index0].strip() | ttl("xsd:integer") | safe }} ; {# xsd:integer ~ pysubyt processing failed due to <int format does not round-trip [ 449 <> 449 ]>#}
56+
emobon:edmoId {{ _.organization_edmoid.split(';')[loop.index0].strip() | ttl("xsd:anyURI") | safe }} ; {# xsd:integer ~ pysubyt processing failed due to <int format does not round-trip [ 449 <> 449 ]>#}
5757
schema:address [
5858
a schema:PostalAddress ;
5959
schema:addressCountry {{ _.organization_country | ttl("xsd:string") | safe }} ;

0 commit comments

Comments
 (0)