Merge pull request #42 from poseidon-framework/janno_column_names_and_order

stschiff · web-flow · commit 2a72e08b5022 · 2022-01-28T14:23:52.000+01:00
changes for Poseidon 2.5.0
diff --git a/janno_columns.tsv b/janno_columns.tsv
@@ -1,39 +1,45 @@
-janno_column_name	description	data_type	multi	choice	range	choice_options	range_lower	range_upper	mandatory	unique	bonus
-Individual_ID	id as defined by the genetics laboratory, needs to be unique (e.g. I1234, BOT001), needs to fit to the values in the poseidon package .fam file, if multiple datasets exist for the same individual different IDs are required (e.g. loschbour_snpAD)	String	FALSE	FALSE	FALSE				TRUE	TRUE	FALSE
-Collection_ID	id as defined by the provider/owner of a sample (e.g. grave 40 skeleton 2)	String	FALSE	FALSE	FALSE				FALSE	FALSE	TRUE
-Source_Tissue	skeletal/tissue/source elements, specific bone name should be reported with an underscore (e.g. bone_phalanx), multiple values separated by ; in case of multiple libraries	String	TRUE	FALSE	FALSE				FALSE	FALSE	FALSE
-Country	present-day political country	String	FALSE	FALSE	FALSE				FALSE	FALSE	FALSE
-Location	unspecified location information like administrative or topographic region or mountains/rivers/lakes/cities nearby	String	FALSE	FALSE	FALSE				FALSE	FALSE	TRUE
-Site	site name	String	FALSE	FALSE	FALSE				FALSE	FALSE	FALSE
-Latitude	latitude with up to 5 places after the decimal point	Float	FALSE	FALSE	TRUE		-90	90	FALSE	FALSE	FALSE
-Longitude	longitude with up to 5 places after the decimal point	Float	FALSE	FALSE	TRUE		-180	180	FALSE	FALSE	FALSE
-Date_C14_Labnr	labnr of C14 date, multiple values separated by ; in case of multiple dates	String	TRUE	FALSE	FALSE				FALSE	FALSE	FALSE
-Date_C14_Uncal_BP	uncalibrated years BP (as in before 1950AD), as reported by C14 labs, multiple values separated by ; in the same order as Date_C14_Labnr in case of multiple dates	Integer	TRUE	FALSE	TRUE		0	Inf	FALSE	FALSE	FALSE
-Date_C14_Uncal_BP_Err	standard deviation (1 sigma ±), as reported by C14 labs, multiple values separated by ; in the same order as Date_C14_Labnr in case of multiple dates	Integer	TRUE	FALSE	TRUE		0	Inf	FALSE	FALSE	FALSE
-Date_BC_AD_Median	calibrated median age for C14 dates, or simple mid-points for archaeological intervals, 2000 for modern samples	Integer	FALSE	FALSE	TRUE		-Inf	2050	FALSE	FALSE	FALSE
-Date_BC_AD_Start	lower (older) bound for the age, negative numbers for BC, positive numbers for AD, in case of C14 dates 95% interval post calibration, 2000 for modern samples	Integer	FALSE	FALSE	TRUE		-Inf	2050	FALSE	FALSE	FALSE
-Date_BC_AD_Stop	upper (more recent) bound for the age, negative numbers for BC, positive numbers for AD, in case of C14 dates 95% interval post calibration, 2000 for modern samples	Integer	FALSE	FALSE	TRUE		-Inf	2050	FALSE	FALSE	FALSE
-Date_Type	“C14“ if directly from the individual, “contextual“ if based on archaeology or other C14 dates from the site, “modern” for present-day individuals	String	FALSE	TRUE	FALSE	C14;contextual;modern			FALSE	FALSE	FALSE
-No_of_Libraries	number of libraries	Integer	FALSE	FALSE	FALSE				FALSE	FALSE	FALSE
-Data_Type	specifics of data generation method, multiple values separated by ;	String	TRUE	TRUE	FALSE	Shotgun;1240K;OtherCapture;ReferenceGenome			FALSE	FALSE	FALSE
-Genotype_Ploidy	ploidy of the genotypes	String	FALSE	TRUE	FALSE	diploid;haploid			FALSE	FALSE	FALSE
-Group_Name	ideally Eisenmann rule + underscore flags, e.g. to annotate relatives or outliers or low coverage, multiple entries separated by ; to accommodate different labels, value must equal the group name in the .fam file (in case of multiple entries the first one)	String	TRUE	FALSE	FALSE				TRUE	FALSE	FALSE
-Genetic_Sex	“F“, “M“ or “U“ because eigenstrat and plink formats only support these three. Edge cases (XXY, XYY, X0) are undefined and should be grouped as F, M or U, with a note added	Char	FALSE	TRUE	FALSE	F;M;U			TRUE	FALSE	FALSE
-Nr_autosomal_SNPs	number of autosomal SNPs covered for 1240K capture or SG data pulldown	Integer	FALSE	FALSE	FALSE				FALSE	FALSE	FALSE
-Coverage_1240K	average X-fold coverage across 1240K SNP sites after quality filtering (internal data), NOT the % SNPs of 1.2M possible	Float	FALSE	FALSE	FALSE				FALSE	FALSE	FALSE
-MT_Haplogroup	mitochondrial haplogroup after phylotree.org as reported by Haplofind or Haplogrep	String	FALSE	FALSE	FALSE				FALSE	FALSE	FALSE
-Y_Haplogroup	Y-chromosome haplogroup reported as published, for internal data, please follow syntax with main branch + most terminal derived Y-SNP (e.g. R1b-P312)	String	FALSE	FALSE	FALSE				FALSE	FALSE	FALSE
-Endogenous	% endogenous DNA as estimated from SG libraries (before capture), as for example estimated by EAGER, not on target and no quality filter, in case of multiple libraries report only the highest value	Float	FALSE	FALSE	TRUE		0	100	FALSE	FALSE	FALSE
-UDG 	“mixed” in case multiple libraries with different UDG treatment were merged	String	FALSE	TRUE	FALSE	minus;half;plus;mixed			FALSE	FALSE	FALSE
-Library_Built	“ds” for double stranded, “ss” for single stranded, “mixed” in case multiple libraries with different protocols were merged	String	FALSE	TRUE	FALSE	ds;ss;other			FALSE	FALSE	FALSE
-Damage 	% damage on 5' end for the main shotgun library used for sequencing and/or capture, in case of multiple libraries report a value from the merged read alignment	Float	FALSE	FALSE	TRUE		0	100	FALSE	FALSE	FALSE
-Xcontam 	if male for captured library, in case of multiple libraries report a value from the merged read alignment	Float	FALSE	FALSE	TRUE		0	1	FALSE	FALSE	FALSE
-Xcontam_stderr	standard error of ANGSD X contamination estimate, in case of multiple libraries report a value from the merged read alignment	Float	FALSE	FALSE	TRUE		0	Inf	FALSE	FALSE	FALSE
-mtContam	mitochondrial contamination rate as estimated by ContamMix and/or Schmutzi, in case of multiple libraries report a value from the merged read alignment	Float	FALSE	FALSE	TRUE		0	1	FALSE	FALSE	FALSE
-mtContam_stderr	Standard error of ContamMix/Schmutzi estimate, in case of multiple libraries report a value from the merged read alignment	Float	FALSE	FALSE	TRUE		0	Inf	FALSE	FALSE	FALSE
-Genetic_Source_Accession_IDs	ENA or SRA Accession ID(s) pointing to the source data used to generate the genotyping data. If multiple are given they should be arranged by descending specificity (e.g. project id > sample id > sequencing run id).	String	TRUE	FALSE	FALSE				FALSE	FALSE	FALSE
-Data_Preparation_Pipeline_URL	URL pointing to a description of the pipeline used to generate the genotype data from the source data	String	FALSE	FALSE	FALSE				FALSE	FALSE	FALSE
-Primary_Contact	Project lead or first author	String	FALSE	FALSE	FALSE				FALSE	FALSE	FALSE
-Publication_Status	bibtex key (e.g. “AuthorJournalYear“) or “unpublished“	String	TRUE	FALSE	FALSE				FALSE	FALSE	FALSE
-Note	wildcard comments. e.g. note down aneuploidies here	String	FALSE	FALSE	FALSE				FALSE	FALSE	TRUE
-Keywords	Arbitrary tags separated by ; (e.g. for custom sorting purposes) 	String	TRUE	FALSE	FALSE				FALSE	FALSE	TRUE
+janno_column_name	description	data_type	multi	choice	range	choice_options	range_lower	range_upper	mandatory	unique
+Poseidon_ID	id as defined by the genetics laboratory, needs to be unique (e.g. I1234, BOT001), needs to fit to the values in the poseidon package .fam file, if multiple datasets exist for the same individual different IDs are required (e.g. loschbour_snpAD)	String	FALSE	FALSE	FALSE				TRUE	TRUE
+Genetic_Sex	“F“, “M“ or “U“ because eigenstrat and plink formats only support these three, edge cases (XXY, XYY, X0) are undefined and should be grouped as F, M or U, with a note added	Char	FALSE	TRUE	FALSE	F;M;U			TRUE	FALSE
+Group_Name	ideally Eisenmann rule + underscore flags, e.g. to annotate relatives or outliers or low coverage, multiple entries separated by ; to accommodate different labels, value must equal the group name in the .fam file (in case of multiple entries the first one)	String	TRUE	FALSE	FALSE				TRUE	FALSE
+Alternative_IDs	other identifiers for the same individual, e.g. IDs in other databases or popular names (e.g. Ötzi/Iceman)	String	TRUE	FALSE	FALSE				FALSE	FALSE
+Relation_To	other individuals (by Poseidon_ID) that are related/identical to this individual, multiple entries separated by ;	String	TRUE	FALSE	FALSE				FALSE	FALSE
+Relation_Degree	relationship degree for relatives mentioned in Related_To, multiple values separated by ; in the same order as Related_To in case of multiple relations	String	TRUE	TRUE	FALSE	identical;first;second;thirdToFifth;sixthToTenth;unrelated;other			FALSE	FALSE
+Relation_Type	relationship type for relatives mentioned in Related_To as an arbitrary string (e.g. sister_of, child_of, nephew_of, ...), multiple values separated by ; in the same order as Related_To in case of multiple relations	String	TRUE	FALSE	FALSE				FALSE	FALSE
+Relation_Note	arbitrary comments about the relations of this individual	String	FALSE	FALSE	FALSE				FALSE	FALSE
+Collection_ID	id as defined by the provider/owner of a sample (e.g. grave 40 skeleton 2)	String	FALSE	FALSE	FALSE				FALSE	FALSE
+Country	present-day political country	String	FALSE	FALSE	FALSE				FALSE	FALSE
+Location	unspecified location information like administrative or topographic region or mountains/rivers/lakes/cities nearby	String	FALSE	FALSE	FALSE				FALSE	FALSE
+Site	site name	String	FALSE	FALSE	FALSE				FALSE	FALSE
+Latitude	latitude with up to 5 places after the decimal point	Float	FALSE	FALSE	TRUE		-90	90	FALSE	FALSE
+Longitude	longitude with up to 5 places after the decimal point	Float	FALSE	FALSE	TRUE		-180	180	FALSE	FALSE
+Date_Type	“C14“ if directly from the individual, “contextual“ if based on archaeology or other C14 dates from the site, “modern” for present-day individuals	String	FALSE	TRUE	FALSE	C14;contextual;modern			FALSE	FALSE
+Date_C14_Labnr	labnr of C14 date, multiple values separated by ; in case of multiple dates	String	TRUE	FALSE	FALSE				FALSE	FALSE
+Date_C14_Uncal_BP	uncalibrated years BP (as in before 1950AD), as reported by C14 labs, multiple values separated by ; in the same order as Date_C14_Labnr in case of multiple dates	Integer	TRUE	FALSE	TRUE		0	Inf	FALSE	FALSE
+Date_C14_Uncal_BP_Err	standard deviation (1 sigma ±), as reported by C14 labs, multiple values separated by ; in the same order as Date_C14_Labnr in case of multiple dates	Integer	TRUE	FALSE	TRUE		0	Inf	FALSE	FALSE
+Date_BC_AD_Start	lower (older) bound for the age, negative numbers for BC, positive numbers for AD, in case of C14 dates 95% interval post calibration, 2000 for modern samples	Integer	FALSE	FALSE	TRUE		-Inf	2050	FALSE	FALSE
+Date_BC_AD_Median	calibrated median age for C14 dates, or simple mid-points for archaeological intervals, 2000 for modern samples	Integer	FALSE	FALSE	TRUE		-Inf	2050	FALSE	FALSE
+Date_BC_AD_Stop	upper (more recent) bound for the age, negative numbers for BC, positive numbers for AD, in case of C14 dates 95% interval post calibration, 2000 for modern samples	Integer	FALSE	FALSE	TRUE		-Inf	2050	FALSE	FALSE
+Date_Note	a free text field for arbitrary comments about the dating information	String	FALSE	FALSE	FALSE				FALSE	FALSE
+MT_Haplogroup	mitochondrial haplogroup after phylotree.org as reported by Haplofind or Haplogrep	String	FALSE	FALSE	FALSE				FALSE	FALSE
+Y_Haplogroup	Y-chromosome haplogroup reported as published, for internal data, please follow syntax with main branch + most terminal derived Y-SNP (e.g. R1b-P312)	String	FALSE	FALSE	FALSE				FALSE	FALSE
+Source_Tissue	skeletal/tissue/source elements, specific bone name should be reported with an underscore (e.g. bone_phalanx), multiple values separated by ; in case of multiple libraries	String	TRUE	FALSE	FALSE				FALSE	FALSE
+Nr_Libraries	number of libraries	Integer	FALSE	FALSE	FALSE				FALSE	FALSE
+Capture_Type	specifics of data generation method, multiple values separated by ;	String	TRUE	TRUE	FALSE	Shotgun;1240K;OtherCapture;ReferenceGenome			FALSE	FALSE
+UDG 	“mixed” in case multiple libraries with different UDG treatment were merged	String	FALSE	TRUE	FALSE	minus;half;plus;mixed			FALSE	FALSE
+Library_Built	“ds” for double stranded, “ss” for single stranded, “mixed” in case multiple libraries with different protocols were merged	String	FALSE	TRUE	FALSE	ds;ss;other			FALSE	FALSE
+Genotype_Ploidy	ploidy of the genotypes	String	FALSE	TRUE	FALSE	diploid;haploid			FALSE	FALSE
+Data_Preparation_Pipeline_URL	URL pointing to a description of the pipeline used to generate the genotype data from the source data	String	FALSE	FALSE	FALSE				FALSE	FALSE
+Endogenous	% endogenous DNA as estimated from SG libraries (before capture), as for example estimated by EAGER, not on target and no quality filter, in case of multiple libraries report only the highest value	Float	FALSE	FALSE	TRUE		0	100	FALSE	FALSE
+Nr_SNPs	number of SNPs covered	Integer	FALSE	FALSE	FALSE				FALSE	FALSE
+Coverage_on_Target_SNPs	average X-fold coverage across targeted SNP sites after quality filtering (internal data)	Float	FALSE	FALSE	FALSE				FALSE	FALSE
+Damage	% damage on 5' end for the main shotgun library used for sequencing and/or capture, in case of multiple libraries report a value from the merged read alignment	Float	FALSE	FALSE	TRUE		0	100	FALSE	FALSE
+Contamination	(modern) contamination as measured by the method in Contamination_Measure, multiple values can be separated by ;, Contamination_Err, Contamination_Meas and Contamination_Note must have the same number and order of entries, in case of multiple libraries report a value from the merged read alignment	String	TRUE	FALSE	FALSE				FALSE	FALSE
+Contamination_Err	(modern) contamination estimate error	String	TRUE	FALSE	FALSE				FALSE	FALSE
+Contamination_Meas	method to measure contamination, should be a software tool (ANGSD, Schmutzi, …) and the respective software versions, details should go to Contamination_Note	String	TRUE	FALSE	FALSE				FALSE	FALSE
+Contamination_Note	arbitrary comments about the contamination estimate	String	FALSE	FALSE	FALSE				FALSE	FALSE
+Genetic_Source_Accession_IDs	ENA or SRA Accession ID(s) pointing to the source data used to generate the genotyping data, if multiple are given they should be arranged by descending specificity (e.g. project id > sample id > sequencing run id)	String	TRUE	FALSE	FALSE				FALSE	FALSE
+Primary_Contact	Project lead or first author	String	FALSE	FALSE	FALSE				FALSE	FALSE
+Publication	bibtex key (e.g. “AuthorJournalYear“) or “unpublished“	String	TRUE	FALSE	FALSE				FALSE	FALSE
+Note	wildcard comments, e.g. note down aneuploidies here	String	FALSE	FALSE	FALSE				FALSE	FALSE
+Keywords	arbitrary tags separated by ; (e.g. for custom sorting purposes) 	String	TRUE	FALSE	FALSE				FALSE	FALSE