bcgsc · matthewstuartedwards · Oct 3, 2024 · Mar 24, 2025 · Jun 26, 2025 · Aug 6, 2025
diff --git a/Dockerfile.snakemake b/Dockerfile.snakemake
@@ -1,29 +1,31 @@
-FROM node:16 as node_stage
+FROM node:18 AS node_stage
 WORKDIR /usr/src/app
 # Bundle app source
 COPY package*.json ./
+RUN rm -rf node_modules
+RUN npm install
 RUN npm ci --only=production
 # COPY everything not in dockerignore file
 COPY . .
 # set to avoid errors when singularity overloads working dir
 ENV NODE_PATH=/usr/src/app/node_modules
 
-FROM python:3.12
+FROM ubuntu:noble
 WORKDIR /usr/src/app
 COPY --from=node_stage /usr/src/app/package*.json ./
 COPY --from=node_stage /usr/src/app/node_modules ./node_modules
 
 RUN apt-get update && \
     apt-get install -y curl && \
     curl -fsSL https://deb.nodesource.com/setup_16.x | bash - && \
-    apt-get install -y nodejs
+    apt-get install -y nodejs python3 python3-pip python3-pulp python3-wheel snakemake unzip wget nano
 
 # Verify Node.js version
 RUN node -v
 
-RUN pip3 install --upgrade setuptools pip wheel && \
-    pip3 install "pulp<2.3.1" && \
-    pip3 install snakemake
+#RUN pip3 install --upgrade setuptools pip wheel && \
+    #pip3 install "pulp<2.3.1" && \
+    #pip3 install snakemake
 
 RUN mkdir /usr/src/app/snakemake_data
 RUN mkdir /usr/src/app/snakemake_logs
@@ -32,7 +34,13 @@ RUN chmod 777 /usr/src/app/snakemake_logs
 
 # Copy the Snakefile to the working directory (assuming it's already in the build context)
 COPY . .
+COPY ./download_ensembl_curl_config.txt /usr/src/app/
+COPY ./downloadCosmic.sh /usr/src/app
+
+# Still getting NODE version errors, but a rebuild is quick and helps.
+RUN npm rebuild
 
 # Run the Snakefile using Snakemake
-CMD [ "sh", "-c", "snakemake --debug -j 1 --config gkb_url=$GKB_URL gkb_user=$GKB_USER gkb_pass=$GKB_PASS --until $GKB_LOADER"]
+#CMD [ "sh", "-c", "snakemake --debug -j 1 --config gkb_url=$GKB_URL gkb_user=$GKB_USER gkb_pass=$GKB_PASS --until $GKB_LOADER"]
+CMD [ "sh", "-c", "snakemake --debug -j 1 --config gkb_url=$GKB_URL gkb_user=$GKB_USER gkb_pass=$GKB_PASS"]
 
diff --git a/Snakefile b/Snakefile
@@ -1,7 +1,6 @@
 import os
-from textwrap import dedent
 
-CONTAINER = 'docker://bcgsc/pori-graphkb-loader:v6.4.0'
+CONTAINER = 'bcgsc/pori-graphkb-loader'
 DATA_DIR = 'snakemake_data'
 LOGS_DIR = 'snakemake_logs'
 
@@ -42,13 +41,13 @@ GITHUB_DATA = 'https://raw.githubusercontent.com/bcgsc/pori_graphkb_loader/devel
 rule all:
     input: f'{DATA_DIR}/civic.COMPLETE',
         f'{DATA_DIR}/cgi.COMPLETE',
-        f'{DATA_DIR}/docm.COMPLETE',
+        #f'{DATA_DIR}/docm.COMPLETE',
         #f'{DATA_DIR}/dgidb.COMPLETE',
-        f'{DATA_DIR}/PMC4468049.COMPLETE',
-        f'{DATA_DIR}/PMC4232638.COMPLETE',
+        #f'{DATA_DIR}/PMC4468049.COMPLETE',
+        #f'{DATA_DIR}/PMC4232638.COMPLETE',
         f'{DATA_DIR}/uberon.COMPLETE',
         f'{DATA_DIR}/fdaApprovals.COMPLETE',
-        #f'{DATA_DIR}/cancerhotspots.COMPLETE',
+        f'{DATA_DIR}/cancerhotspots.COMPLETE',
         f'{DATA_DIR}/moa.COMPLETE',
         f'{DATA_DIR}/ncitFdaXref.COMPLETE',
         *([f'{DATA_DIR}/clinicaltrialsgov.COMPLETE'] if BACKFILL_TRIALS else []),
@@ -57,160 +56,131 @@ rule all:
 
 rule download_ncit:
     output: f'{DATA_DIR}/ncit/Thesaurus.txt',
-    shell: dedent(f'''\
-        cd {DATA_DIR}/ncit
-        wget https://evs.nci.nih.gov/ftp1/NCI_Thesaurus/Thesaurus.FLAT.zip
-        unzip Thesaurus.FLAT.zip
-        rm Thesaurus.FLAT.zip
-        rm -rf __MACOSX''')
+    shell: f'''
+        mkdir -p {DATA_DIR}/ncit
+        curl https://evs.nci.nih.gov/ftp1/NCI_Thesaurus/Thesaurus.FLAT.zip | zcat > {DATA_DIR}/ncit/Thesaurus.txt
+        rm -rf {DATA_DIR}/ncit/__MACOSX'''
 
 
 rule download_ncit_fda:
     output: f'{DATA_DIR}/ncit/FDA-UNII_NCIt_Subsets.txt'
-    shell: dedent(f'''\
-        cd {DATA_DIR}/ncit
-        wget https://evs.nci.nih.gov/ftp1/FDA/UNII/FDA-UNII_NCIt_Subsets.txt''')
+    shell: f'''
+        curl -L --create-dirs -o {DATA_DIR}/ncit/FDA-UNII_NCIt_Subsets.txt https://evs.nci.nih.gov/ftp1/FDA/UNII/FDA-UNII_NCIt_Subsets.txt'''
 
 
 rule download_ensembl:
+    # This is the original query string.  This has been URL encoded and put into download_ensembl_curl_config.txt
+    # Curl then runs using that config file to download ensembl.
+    #query_string='<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE Query><Query  virtualSchemaName = "default" formatter = "TSV" header = "1" uniqueRows = "0" count = "" datasetConfigVersion = "0.6" ><Dataset name = "hsapiens_gene_ensembl" interface = "default" ><Filter name = "transcript_biotype" value = "protein_coding"/><Attribute name = "ensembl_gene_id" /><Attribute name = "ensembl_gene_id_version" /><Attribute name = "ensembl_transcript_id" /><Attribute name = "ensembl_transcript_id_version" /><Attribute name = "ensembl_peptide_id" /><Attribute name = "ensembl_peptide_id_version" /><Attribute name = "hgnc_id" /><Attribute name = "refseq_mrna" /><Attribute name = "description" /><Attribute name = "external_gene_name" /><Attribute name = "external_gene_source" /></Dataset></Query>'
     output: f'{DATA_DIR}/ensembl/biomart_export.tsv'
-    shell: dedent(f'''\
-        cd {DATA_DIR}/ensembl
-        query_string='<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE Query><Query  virtualSchemaName = "default" formatter = "TSV" header = "1" uniqueRows = "0" count = "" datasetConfigVersion = "0.6" ><Dataset name = "hsapiens_gene_ensembl" interface = "default" ><Filter name = "transcript_biotype" value = "protein_coding"/><Attribute name = "ensembl_gene_id" /><Attribute name = "ensembl_gene_id_version" /><Attribute name = "ensembl_transcript_id" /><Attribute name = "ensembl_transcript_id_version" /><Attribute name = "ensembl_peptide_id" /><Attribute name = "ensembl_peptide_id_version" /><Attribute name = "hgnc_id" /><Attribute name = "refseq_mrna" /><Attribute name = "description" /><Attribute name = "external_gene_name" /><Attribute name = "external_gene_source" /></Dataset></Query>'
-        wget -O biomart_export.tsv "http://www.ensembl.org/biomart/martservice?query=$query_string"
-        ''')
+    shell: f'''
+        curl --config download_ensembl_curl_config.txt 2>&1 | tee -a downloadEnsemblLog.txt
+        '''
 
 
 rule download_fda_srs:
     output: f'{DATA_DIR}/fda/UNII_Records.txt'
-    shell: dedent(f'''\
-        cd {DATA_DIR}/fda
-        wget https://precision.fda.gov/uniisearch/archive/latest/UNII_Data.zip
-        unzip UNII_Data.zip
-        rm UNII_Data.zip
-
-        mv UNII*.txt UNII_Records.txt
-        ''')
+    shell: f'''
+        curl -L --create-dirs -o {DATA_DIR}/fda/UNII_Data.zip https://precision.fda.gov/uniisearch/archive/latest/UNII_Data.zip
+        unzip -o -d {DATA_DIR}/fda {DATA_DIR}/fda/UNII_Data.zip
+        rm {DATA_DIR}/fda/UNII_Data.zip
+        mv {DATA_DIR}/fda/UNII*.txt {DATA_DIR}/fda/UNII_Records.txt
+        '''
 
 
 rule download_refseq:
     output: f'{DATA_DIR}/refseq/LRG_RefSeqGene.tab'
-    shell: dedent(f'''\
-        cd {DATA_DIR}/refseq
-        wget -O LRG_RefSeqGene.tab ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/LRG_RefSeqGene
-        ''')
+    shell: f'''
+        curl -A "Mozilla" --fail-with-body -L --create-dirs -o {DATA_DIR}/refseq/LRG_RefSeqGene.tab ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/LRG_RefSeqGene
+        '''
 
 
 rule download_uberon:
     output: f'{DATA_DIR}/uberon/uberon.owl'
-    shell: dedent(f'''\
-        cd {DATA_DIR}/uberon
-        wget http://purl.obolibrary.org/obo/uberon.owl
-        ''')
+    shell: f'''
+        curl -A "Mozilla" -L --create-dirs -o {DATA_DIR}/uberon/uberon.owl https://github.com/obophenotype/uberon/releases/latest/download/uberon.owl
+        '''
 
 
 rule download_do:
     output: f'{DATA_DIR}/do/doid.json'
-    shell: dedent(f'''\
-        cd {DATA_DIR}/do;
-        REPO=https://github.com/DiseaseOntology/HumanDiseaseOntology.git;
-        LATEST=$(git ls-remote $REPO --tags v\\* | cut -f 2 | sed 's/refs\\/tags\///' | grep '\\bv[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]\\b' | sort -d | tail -n 1)
-        echo $LATEST
-        wget https://github.com/DiseaseOntology/HumanDiseaseOntology/raw/$LATEST/src/ontology/doid.json
-        ''')
+    shell: f'''
+        curl -A "Mozilla" --create-dirs -o {DATA_DIR}/do/doid.json https://raw.githubusercontent.com/DiseaseOntology/HumanDiseaseOntology/refs/heads/main/src/ontology/doid.json
+        '''
+
 
 
 rule download_drugbank:
     output: f'{DATA_DIR}/drugbank/full_database.xml'
-    shell: dedent(f'''\
+    shell: f'''
         cd {DATA_DIR}/drugbank
         wget https://www.drugbank.ca/releases
         latest=$(grep 'href="/releases/[^"]*"' -o releases | cut -f 3 -d/ | sed 's/"//' | sort -V | tail -n 2 | head -n 1)
         rm releases
         filename="drugbank_all_full_database_v$latest".xml
 
-        curl -Lfv -o ${{filename}}.zip -u {DRUGBANK_EMAIL}:{DRUGBANK_PASSWORD} https://go.drugbank.com/releases/5-1-8/downloads/all-full-database
+        curl -L -v --fail-with-body -o ${{filename}}.zip -u {DRUGBANK_EMAIL}:{DRUGBANK_PASSWORD} https://go.drugbank.com/releases/5-1-8/downloads/all-full-database
         unzip ${{filename}}.zip
-        mv full\ database.xml full_database.xml''')
-
+        mv full\ database.xml full_database.xml'''
 
 rule download_PMC4468049:
     output: f'{DATA_DIR}/PMC4468049/NIHMS632238-supplement-2.xlsx'
-    shell: dedent(f'''\
-        cd {DATA_DIR}/PMC4468049
-        wget https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4468049/bin/NIHMS632238-supplement-2.xlsx
-        ''')
+    shell: f''' curl -A "Mozilla" --create-dirs -o {DATA_DIR}/PMC4468049/NIHMS632238-supplement-2.xlsx https://pmc.ncbi.nlm.nih.gov/articles/instance/4468049/bin/NIHMS632238-supplement-2.xlsx'''
 
 
 rule download_PMC4232638:
     output: f'{DATA_DIR}/PMC4232638/13059_2014_484_MOESM2_ESM.xlsx'
-    shell: dedent(f'''\
-        cd {DATA_DIR}/PMC4232638
-        wget https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4232638/bin/13059_2014_484_MOESM2_ESM.xlsx
-        ''')
-
+    shell: f''' curl -A "Mozilla" --create-dirs -o {DATA_DIR}/PMC4232638/13059_2014_484_MOESM2_ESM.xlsx https://pmc.ncbi.nlm.nih.gov/articles/instance/4232638/bin/13059_2014_484_MOESM2_ESM.xlsx'''
 
 rule download_cgi:
     output: f'{DATA_DIR}/cgi/cgi_biomarkers_per_variant.tsv'
-    shell: dedent(f'''\
-        cd {DATA_DIR}/cgi
-        wget https://www.cancergenomeinterpreter.org/data/biomarkers/cgi_biomarkers_20180117.zip
-        unzip cgi_biomarkers_20180117.zip
-        ''')
+    shell: f'''
+        curl --create-dirs -o {DATA_DIR}/cgi/cgi_biomarkers.zip https://www.cancergenomeinterpreter.org/data/biomarkers/cgi_biomarkers_20180117.zip
+        unzip -d {DATA_DIR}/cgi {DATA_DIR}/cgi/cgi_biomarkers.zip
+        '''
 
 
 rule download_local_data:
     output: f'{DATA_DIR}/local/{{local}}.json'
-    shell: dedent(f'''\
+    shell: f'''
+        mkdir -p {DATA_DIR}/local
         cd {DATA_DIR}/local
         wget {GITHUB_DATA}/{{wildcards.local}}.json
-        ''')
+        '''
 
 
 rule download_cancerhotspots:
     output: f'{DATA_DIR}/cancerhotspots/cancerhotspots.v2.maf'
-    shell: dedent(f'''\
-        mkdir -p {DATA_DIR}/cancerhotspots
-        cd {DATA_DIR}/cancerhotspots
-        wget https://cbioportal-download.s3.amazonaws.com/cancerhotspots.v2.maf.gz
-        gunzip cancerhotspots.v2.maf.gz
-        ''')
+    shell: f'''
+        curl -A "Mozilla" --create-dirs -o {DATA_DIR}/cancerhotspots/cancerhotspots.v2.maf.gz https://cbioportal-download.s3.amazonaws.com/cancerhotspots.v2.maf.gz
+        gunzip {DATA_DIR}/cancerhotspots/cancerhotspots.v2.maf.gz
+        '''
 
 
 
 rule download_cosmic_resistance:
     output: f'{DATA_DIR}/cosmic/CosmicResistanceMutations.tsv'
-    shell: dedent(f'''
-        cd {DATA_DIR}/cosmic
-        AUTH=$( echo "{COSMIC_EMAIL}:{COSMIC_PASSWORD}" | base64 )
-        resp=$( curl -H "Authorization: Basic $AUTH" https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v92/CosmicResistanceMutations.tsv.gz );
-        url=$( node  -e "var resp = $resp; console.log(resp.url);" );
-        curl "$url" -o CosmicResistanceMutations.tsv.gz
-        gunzip CosmicResistanceMutations.tsv.gz
-        ''')
+    shell: f'''
+        ./downloadCosmic.sh mutations {COSMIC_EMAIL} {COSMIC_PASSWORD} "https://cancer.sanger.ac.uk/api/mono/products/v1/downloads/scripted?path=grch38/cosmic/v101/Cosmic_ResistanceMutations_Tsv_v101_GRCh38.tar&bucket=downloads" {DATA_DIR}
+        mv {DATA_DIR}/cosmic/mutations/Cosmic_ResistanceMutations_v[0-9]*_GRCh38.tsv {DATA_DIR}/cosmic/CosmicResistanceMutations.tsv
+        '''
 
 
 rule download_cosmic_diseases:
     output: f'{DATA_DIR}/cosmic/classification.csv'
-    shell: dedent(f'''
-        cd {DATA_DIR}/cosmic
-        AUTH=$( echo "{COSMIC_EMAIL}:{COSMIC_PASSWORD}" | base64 )
-        resp=$( curl -H "Authorization: Basic $AUTH" https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v92/classification.csv );
-        url=$( node  -e "var resp = $resp; console.log(resp.url);" );
-        curl "$url" -o classification.csv
-        ''')
+    shell: f'''
+        ./downloadCosmic.sh diseases {COSMIC_EMAIL} {COSMIC_PASSWORD} "https://cancer.sanger.ac.uk/api/mono/products/v1/downloads/scripted?path=grch38/cosmic/v101/Cosmic_Classification_Tsv_v101_GRCh38.tar&bucket=downloads" {DATA_DIR}
+        mv {DATA_DIR}/cosmic/diseases/Cosmic_Classification_v[0-9]*_GRCh38.tsv {DATA_DIR}/cosmic/diseases/classification.tsv
+        tr ',' '\\t' < {DATA_DIR}/cosmic/diseases/classification.tsv > {DATA_DIR}/cosmic/classification.csv
+        '''
 
 
 rule download_cosmic_fusions:
     output: f'{DATA_DIR}/cosmic/CosmicFusionExport.tsv'
-    shell: dedent(f'''
-        cd {DATA_DIR}/cosmic
-        AUTH=$( echo "{COSMIC_EMAIL}:{COSMIC_PASSWORD}" | base64 )
-        resp=$( curl -H "Authorization: Basic $AUTH" https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v92/CosmicFusionExport.tsv.gz );
-        url=$( node  -e "var resp = $resp; console.log(resp.url);" );
-        curl "$url" -o CosmicFusionExport.tsv.gz
-        gunzip CosmicFusionExport.tsv.gz
-        ''')
+    shell: f'''
+        ./downloadCosmic.sh fusion {COSMIC_EMAIL} {COSMIC_PASSWORD} "https://cancer.sanger.ac.uk/api/mono/products/v1/downloads/scripted?path=grch38/cosmic/v101/Cosmic_Fusion_Tsv_v101_GRCh38.tar&bucket=downloads" {DATA_DIR}
+        mv {DATA_DIR}/cosmic/fusion/Cosmic_Fusion_v[0-9]*_GRCh38.tsv {DATA_DIR}/cosmic/CosmicFusionExport.tsv
+        '''
 
 
 rule load_local:
@@ -394,15 +364,15 @@ rule load_cgi:
     output: f'{DATA_DIR}/cgi.COMPLETE'
     shell: LOADER_COMMAND + ' file cgi {input.data} &> {log}; cp {log} {output}'
 
-
-rule load_docm:
-    input: expand(rules.load_local.output, local=['vocab', 'signatures', 'chromosomes']),
-        rules.load_ncit.output,
-        rules.load_do.output
-    containerized: containerchoice
-    log: f'{LOGS_DIR}/docm.logs.txt'
-    output: f'{DATA_DIR}/docm.COMPLETE'
-    shell: LOADER_COMMAND + ' api docm &> {log}; cp {log} {output}'
+# DOCM has been retired and superceded by the CIViC project
+#rule load_docm:
+#    input: expand(rules.load_local.output, local=['vocab', 'signatures', 'chromosomes']),
+#        rules.load_ncit.output,
+#        rules.load_do.output
+#    containerized: containerchoice
+#    log: f'{LOGS_DIR}/docm.logs.txt'
+#    output: f'{DATA_DIR}/docm.COMPLETE'
+#    shell: LOADER_COMMAND + ' api docm &> {log}; cp {log} {output}'
 
 
 rule load_approvals:
@@ -457,10 +427,10 @@ rule load_moa:
 # input isn't actually needed but it is a file-type loader, so a dummy file must be supplied
 rule download_sources:
     output: f'{DATA_DIR}/local/sources.json'
-    shell: dedent(f'''\
+    shell: f'''
         cd {DATA_DIR}/local
         touch sources.json
-        ''')
+        '''
 
 rule load_sources:
     input: f'{DATA_DIR}/local/sources.json'
@@ -482,7 +452,7 @@ rule all_ontologies:
         rules.load_sources.output,
         rules.load_fda_srs.output,
         rules.load_ncit_fda.output,
-        #rules.load_dgidb.output
+        rules.load_dgidb.output
     containerized: containerchoice
     output: f'{DATA_DIR}/all_ontologies.COMPLETE'
     shell: 'touch {output}'
diff --git a/downloadCosmic.sh b/downloadCosmic.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+AUTH=$( echo "$2:$3" | base64 )
+RESP=$( curl -H "Authorization: Basic $AUTH" "$4" )
+# If you are getting a blank $RESP, make sure to log in to the COSMIC website and accept the terms of use.
+URL=$( node  -e "var resp = $RESP; console.log(resp.url);" )
+curl --create-dirs "$URL" -o $5/cosmic/$1/Cosmic.tar
+tar -xvf $5/cosmic/$1/Cosmic.tar -C $5/cosmic/$1/
+gunzip -f $5/cosmic/$1/*.tsv.gz # should only have one 
diff --git a/download_ensembl_curl_config.txt b/download_ensembl_curl_config.txt
@@ -0,0 +1,7 @@
+--location
+--fail-with-body 
+--create-dirs
+-o snakemake_data/ensembl/biomart_export.tsv
+-v
+-A "Mozilla"
+url="http://www.ensembl.org/biomart/martservice?query=%3C%3Fxml%20version%3D%221.0%22%20encoding%3D%22UTF-8%22%3F%3E%3C%21DOCTYPE%20Query%3E%3CQuery%20%20virtualSchemaName%20%3D%20%22default%22%20formatter%20%3D%20%22TSV%22%20header%20%3D%20%221%22%20uniqueRows%20%3D%20%220%22%20count%20%3D%20%22%22%20datasetConfigVersion%20%3D%20%220.6%22%20%3E%3CDataset%20name%20%3D%20%22hsapiens_gene_ensembl%22%20interface%20%3D%20%22default%22%20%3E%3CFilter%20name%20%3D%20%22transcript_biotype%22%20value%20%3D%20%22protein_coding%22%2F%3E%3CAttribute%20name%20%3D%20%22ensembl_gene_id%22%20%2F%3E%3CAttribute%20name%20%3D%20%22ensembl_gene_id_version%22%20%2F%3E%3CAttribute%20name%20%3D%20%22ensembl_transcript_id%22%20%2F%3E%3CAttribute%20name%20%3D%20%22ensembl_transcript_id_version%22%20%2F%3E%3CAttribute%20name%20%3D%20%22ensembl_peptide_id%22%20%2F%3E%3CAttribute%20name%20%3D%20%22ensembl_peptide_id_version%22%20%2F%3E%3CAttribute%20name%20%3D%20%22hgnc_id%22%20%2F%3E%3CAttribute%20name%20%3D%20%22refseq_mrna%22%20%2F%3E%3CAttribute%20name%20%3D%20%22description%22%20%2F%3E%3CAttribute%20name%20%3D%20%22external_gene_name%22%20%2F%3E%3CAttribute%20name%20%3D%20%22external_gene_source%22%20%2F%3E%3C%2FDataset%3E%3C%2FQuery%3E"
diff --git a/package.json b/package.json
@@ -12,21 +12,24 @@
   "private": true,
   "license": "GPL-3",
   "dependencies": {
-    "@bcgsc-pori/graphkb-parser": "^1.1.1",
-    "@bcgsc-pori/graphkb-schema": "^3.14.3",
+    "@bcgsc-pori/graphkb-parser": "^2.0.0",
+    "@bcgsc-pori/graphkb-schema": "^3.16.0",
     "ajv": "^6.10.0",
     "argparse": "^2.0.1",
     "csv-parse": "^4.6.5",
+    "expat": "^1.0.0",
     "fast-csv": "^4.3.6",
     "html-to-text": "^5.1.1",
     "http-status-codes": "^1.3.2",
     "json-cycle": "^1.3.0",
     "json-stable-stringify": "^1.0.1",
     "jsonpath": "^1.1.1",
     "jsonwebtoken": "^8.5.1",
+    "libxmljs": "^1.0.11",
     "lodash": "^4.17.21",
-    "node-expat": "^2.3.18",
+    "node-expat": "^2.4.1",
     "node-fetch": "^2.6.7",
+    "node-gyp": "^10.2.0",
     "p-limit": "^3.1.0",
     "parse5": "^5.1.1",
     "rdflib": "^2.2.15",

diff --git a/src/PMC4232638/index.js b/src/PMC4232638/index.js
@@ -1,5 +1,5 @@
 const readXlsxFile = require('read-excel-file/node');
-const kbParser = require('@bcgsc-pori/graphkb-parser');
+const {parseVariant, stringifyVariant, jsonifyVariant} = require('@bcgsc-pori/graphkb-parser');
 
 const { logger } = require('../logging');
 const { rid } = require('../graphkb');
@@ -134,7 +134,7 @@ const uploadFile = async ({ conn, filename }) => {
         logger.info(`loading: ${row.Gene}:${row['Amino acid change']}`);
 
         try {
-            const parsed = kbParser.variant.parse(`p.${row['Amino acid change']}`, false).toJSON();
+            const parsed = jsonifyVariant(parseVariant(`p.${row['Amino acid change']}`, false) );
             const [gene] = await _entrezGene.fetchAndLoadBySymbol(conn, row.Gene);
             const relevance = await conn.getVocabularyTerm(row.relevance);
             const evidence = await _pubmed.fetchAndLoadByIds(conn, row.evidence);

diff --git a/src/cancerhotspots/index.js b/src/cancerhotspots/index.js
@@ -5,8 +5,8 @@ const fs = require('fs');
 
 const csv = require('fast-csv');
 
-const { variant: { parse: variantParser } } = require('@bcgsc-pori/graphkb-parser');
-
+//const { variant: { parse: variantParser } } = require('@bcgsc-pori/graphkb-parser');
+const {parseVariant, stringifyVariant, jsonifyVariant} = require('@bcgsc-pori/graphkb-parser');
 const {
     convertRowFields,
     hashRecordToId,

diff --git a/src/cgl/index.js b/src/cgl/index.js
@@ -1,7 +1,7 @@
 const fs = require('fs');
 
-const { variant: { parse: variantParser } } = require('@bcgsc-pori/graphkb-parser');
-
+//const { variant: { parse: variantParser } } = require('@bcgsc-pori/graphkb-parser');
+const {parseVariant, stringifyVariant, jsonifyVariant} = require('@bcgsc-pori/graphkb-parser');
 const {
     loadDelimToJson,
     hashRecordToId,

diff --git a/src/civic/evidenceItem.js b/src/civic/evidenceItem.js
@@ -3,8 +3,8 @@ const path = require('path');
 
 const _ = require('lodash');
 const Ajv = require('ajv');
-const { error: { ErrorMixin } } = require('@bcgsc-pori/graphkb-parser');
-
+//const { error: { ErrorMixin } } = require('@bcgsc-pori/graphkb-parser');
+const { ParsingError, ErrorMixin, InputValidationError } = require('@bcgsc-pori/graphkb-parser');
 const { checkSpec, request } = require('../util');
 const { logger } = require('../logging');
 const { civic: SOURCE_DEFN } = require('../sources');

diff --git a/src/civic/profile.js b/src/civic/profile.js
@@ -2,8 +2,8 @@
  * Introducing Molecular Profiles with CIViC GraphQL API v2.2.0
  * [EvidenceItem]--(many-to-one)--[MolecularProfile]--(many-to-many)--[Variant]
  */
-const { error: { ErrorMixin } } = require('@bcgsc-pori/graphkb-parser');
-
+//const { error: { ErrorMixin } } = require('@bcgsc-pori/graphkb-parser');
+const { ParsingError, ErrorMixin, InputValidationError } = require('@bcgsc-pori/graphkb-parser');
 
 class NotImplementedError extends ErrorMixin { }
 const MOLECULAR_PROFILE_CACHE = new Map();

diff --git a/src/civic/publication.js b/src/civic/publication.js
@@ -1,5 +1,5 @@
-const { error: { ErrorMixin } } = require('@bcgsc-pori/graphkb-parser');
-
+//const { error: { ErrorMixin } } = require('@bcgsc-pori/graphkb-parser');
+const { ParsingError, ErrorMixin, InputValidationError } = require('@bcgsc-pori/graphkb-parser');
 const _asco = require('../asco');
 const _pubmed = require('../entrez/pubmed');
 

diff --git a/src/civic/relevance.js b/src/civic/relevance.js
@@ -1,4 +1,5 @@
-const { error: { ErrorMixin } } = require('@bcgsc-pori/graphkb-parser');
+//const { error: { ErrorMixin } } = require('@bcgsc-pori/graphkb-parser');
+const {ParsingError, ErrorMixin, InputValidationError } = require('@bcgsc-pori/graphkb-parser');
 
 class NotImplementedError extends ErrorMixin { }
 

diff --git a/src/civic/variant.js b/src/civic/variant.js
@@ -4,7 +4,8 @@ const _entrezGene = require('../entrez/gene');
 const _snp = require('../entrez/snp');
 const { civic: SOURCE_DEFN } = require('../sources');
 
-const { error: { ErrorMixin, ParsingError } } = kbParser;
+//const { error: { ErrorMixin, ParsingError } } = kbParser;
+const { ParsingError, ErrorMixin, InputValidationError } = kbParser;
 class NotImplementedError extends ErrorMixin { }
 
 const VARIANT_CACHE = new Map();

diff --git a/src/cosmic/resistance.js b/src/cosmic/resistance.js
@@ -3,8 +3,8 @@
  */
 const fs = require('fs');
 
-const { variant: { parse: variantParser } } = require('@bcgsc-pori/graphkb-parser');
-
+//const { variant: { parse: variantParser } } = require('@bcgsc-pori/graphkb-parser');
+const {parseVariant, stringifyVariant, jsonifyVariant} = require('@bcgsc-pori/graphkb-parser');
 const {
     loadDelimToJson,
     convertRowFields,

diff --git a/src/docm/index.js b/src/docm/index.js
@@ -6,8 +6,8 @@
 const Ajv = require('ajv');
 const fs = require('fs');
 
-const { variant: { parse: variantParser } } = require('@bcgsc-pori/graphkb-parser');
-
+//const { variant: { parse: variantParser } } = require('@bcgsc-pori/graphkb-parser');
+const {parseVariant, stringifyVariant, jsonifyVariant} = require('@bcgsc-pori/graphkb-parser');
 const { checkSpec, request } = require('../util');
 const {
     orderPreferredOntologyTerms, rid,

diff --git a/src/entrez/snp.js b/src/entrez/snp.js
@@ -1,6 +1,6 @@
 const Ajv = require('ajv');
 
-const { variant: { parse: variantParser } } = require('@bcgsc-pori/graphkb-parser');
+const {parseVariant, stringifyVariant, jsonifyVariant} = require('@bcgsc-pori/graphkb-parser');
 
 const { checkSpec } = require('../util');
 const {

diff --git a/src/variants/index.js b/src/variants/index.js
@@ -1,7 +1,7 @@
 const fs = require('fs');
 
-const { variant: { parse: parseVariant } } = require('@bcgsc-pori/graphkb-parser');
-
+//const { variant: { parse: parseVariant } } = require('@bcgsc-pori/graphkb-parser');
+const {parseVariant, stringifyVariant, jsonifyVariant} = require('@bcgsc-pori/graphkb-parser');
 const { logger } = require('../logging');
 const { orderPreferredOntologyTerms, rid } = require('../graphkb');
 const { fetchAndLoadBySymbol } = require('../entrez/gene');