Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ dmypy.json
great_expectations/
.vscode/

inputs/
outputs/
upload_check*
*manifests/
8 changes: 7 additions & 1 deletion annotations/processing-splits.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def process_csv(file_path):

# 2. Add "PublicationView_id" as a column if not present, and fill it with values from "Pubmed Id" column
if "PublicationView_id" not in df.columns and "Pubmed Id" in df.columns:
df["PublicationView_id"] = df["Pubmed Id"]
df.insert(1,'PublicationView_id',df['Pubmed Id'].copy())

# 3. Drop 'Publication Theme Name' and 'Publication Consortium Name' columns
columns_to_drop = ["Publication Theme Name", "Publication Consortium Name"]
Expand All @@ -39,6 +39,12 @@ def process_csv(file_path):
else x
)
)
# Re-order the Dataframe with columns in the same order as the union table
if "Publication Dataset Alias" in df.columns and "Publication Accessibility" in df.columns:
col_list = list(df.columns)
x,y = col_list.index('Publication Dataset Alias'), col_list.index('Publication Accessibility')
col_list[y], col_list[x] = col_list[x], col_list[y]
df = df[col_list]

# Save the modified DataFrame back to the CSV file
df.to_csv(file_path, index=False)
Expand Down
Empty file modified annotations/split_manifest_grants.py
100644 → 100755
Empty file.
39 changes: 24 additions & 15 deletions annotations/upload-workflow.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,56 @@
#Outlines workflow for curation processing and upload
#author: aditi.gopalan

#Make sure to run this script from the root directory (~/)end


# Activate the Conda environment
echo "Activating conda environment: schematic"
conda activate schematic
echo "Activating conda environment: schematic2"
conda init
conda activate schematic2


# Run python3 split_manifest_grants.py
echo "Splitting manifests..."
python3 split_manifest_grants.py feb_manifest.csv publication ./output/output_feb --csv
python3 ~/Documents/code/mc2-center-dcc/annotations/split_manifest_grants.py ~/Documents/code/mc2-center-dcc/annotations/input/jan25_pubs.csv publication ~/Documents/code/mc2-center-dcc/annotations/output/output_jan25 --csv

# Check if split_manifest_grants.py was successful
if [ $? -eq 0 ]; then
# Generate file paths for split manifests
echo "Generating file paths..."
python3 gen-mp-csv.py ./output/output_feb feb_filepaths.csv publications
python3 ~/Documents/code/mc2-center-dcc/annotations/gen-mp-csv.py ~/Documents/code/mc2-center-dcc/annotations/output/output_nov ~/Documents/code/mc2-center-dcc/annotations/input/jan25_filepaths.csv publications

# Check if file path generation was successful
if [ $? -eq 0 ]; then
# Format manifests
echo "Processing split files..."
python3 processing-splits.py ./output/output_feb
python3 ~/Documents/code/mc2-center-dcc/annotations/processing-splits.py ~/Documents/code/mc2-center-dcc/annotations/output/output_jan25
# Check if formatting was successful
if [ $? -eq 0 ]; then
# Run schema updates, generating IDs
echo "Running schema updates..."
python3 schema_update.py feb_filepaths.csv Publication
python3 ~/Documents/code/mc2-center-dcc/annotations/schema_update.py ~/Documents/code/mc2-center-dcc/annotations/input/jan25_filepaths.csv Publication

echo "Generating Dataset/ Tool/ Educational Resource folder Ids"
python3 create_id_folders.py -m april_datasets_filepaths.csv -t DatasetView
# echo "Generating Dataset/ Tool/ Educational Resource folder Ids"
# python3 create_id_folders.py -m nov_datasets_filepaths.csv -t DatasetView

# Check if schema_update.py was successful
if [ $? -eq 0 ]; then
# Run python3 upload-manifests.py
echo "Uploading manifests..."
python3 upload-manifests.py -m feb_filepaths.csv -t PublicationView -c /Users/agopalan/schematic/config.yml
python3 ~/Documents/code/mc2-center-dcc/annotations/upload-manifests.py -m ~/Documents/code/mc2-center-dcc/annotations/input/jan25_filepaths.csv -t PublicationView -c ~/config.yml

# Check if upload-manifests.py was successful
if [ $? -eq 0 ]; then
echo "All scripts executed successfully."
# Run python3 upload-validation.py
echo "Validate Uploads..."
python3 ~/Documents/code/mc2-center-dcc/annotations/upload_validation.py ~/Documents/code/mc2-center-dcc/annotations/input/jan25_filepaths.csv Publication

# Check if upload-manifests.py was successful
if [ $? -eq 0 ]; then
echo "All scripts executed successfully."
else
echo "Error: upload_validation.py failed."
fi
else
echo "Error: upload-manifests.py failed."
fi
Expand All @@ -54,7 +66,4 @@ if [ $? -eq 0 ]; then
fi
else
echo "Error: split_manifest_grants.py failed."
fi
else
echo "Error: Conda environment activation failed."
fi
fi