Skip to content

Commit b1683c9

Browse files
committed
Merge branch 'gen-178-update-pandas-2' into gen-998-resolve-fill-na
2 parents 414ba8e + 8286d2c commit b1683c9

File tree

7 files changed

+22
-24
lines changed

7 files changed

+22
-24
lines changed

Dockerfile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# TODO: update this
2-
FROM ubuntu:focal-20220113
2+
FROM ubuntu:jammy-20250714
33
ENV DEBIAN_FRONTEND=noninteractive
44

55
# Must install this because gpg not installed
@@ -24,8 +24,8 @@ RUN apt-get update && apt-get install -y --allow-unauthenticated --no-install-re
2424
python3-pip \
2525
python3-dev \
2626
git \
27-
r-base-core=4.3.3-1.2004.0 \
28-
r-base-dev=4.3.3-1.2004.0 \
27+
r-base-core=4.3.3-1.2204.0 \
28+
r-base-dev=4.3.3-1.2204.0 \
2929
cmake \
3030
curl \
3131
# synapser client dependencies
@@ -45,7 +45,7 @@ RUN apt-get update && apt-get install -y --allow-unauthenticated --no-install-re
4545
openjdk-11-jre \
4646
# This is for reticulate
4747
# TODO: update this
48-
python3.11-venv && \
48+
python3.10-venv && \
4949
apt-get clean && \
5050
rm -rf /var/lib/apt/lists/*
5151

R/install_packages.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,5 @@ library(testthat)
88
library(VariantAnnotation)
99
library(xtable)
1010
library(ggpubr)
11+
library(reticulate)
12+
reticulate::py_install(c("pandas>=1.5,<=2.0.3", "numpy<=1.24.4"))

genie/load.py

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -226,16 +226,15 @@ def _reorder_new_dataset(
226226
def _generate_primary_key(
227227
dataset: pd.DataFrame, primary_key_cols: List[str], primary_key: str
228228
) -> pd.DataFrame:
229-
"""
230-
Generate primary key column a dataframe
229+
"""Generate primary key column a dataframe
231230
232231
Args:
233-
dataset(pd.DataFrame): A dataframe
234-
new_dataset: The re-ordered new dataset
235-
primary_key_cols (list): Column(s) that make up the primary key
236-
primary_key: The column name of the primary_key
232+
dataset (pd.DataFrame): A dataframe
233+
primary_key_cols (List[str]): Column(s) that make up the primary key
234+
primary_key (str): The column name of the primary_key
235+
237236
Returns:
238-
The dataframe with primary_key column added
237+
pd.DataFrame: The dataframe with primary_key column added
239238
"""
240239
# replace NAs with emtpy string
241240
dataset = dataset.fillna("")
@@ -298,21 +297,18 @@ def store_database(
298297
syn: synapseclient.Synapse,
299298
database_synid: str,
300299
col_order: List[str],
301-
allupdates: pd.DataFrame,
300+
all_updates: pd.DataFrame,
302301
to_delete_rows: pd.DataFrame,
303-
):
302+
) -> None:
304303
"""
305304
Store changes to the database
306305
307306
Args:
308-
syn (synapseclient.Synaps): Synapse object
307+
syn (synapseclient.Synapse): Synapse object
309308
database_synid (str): Synapse Id of the Synapse table
310309
col_order (List[str]): The ordered column names to be saved
311-
allupdates (pd.DataFrame): rows to be appended and/or updated
310+
all_updates (pd.DataFrame): rows to be appended and/or updated
312311
to_deleted_rows (pd.DataFrame): rows to be deleted
313-
314-
Returns:
315-
None
316312
"""
317313
storedatabase = False
318314
update_all_file = tempfile.NamedTemporaryFile(
@@ -321,14 +317,14 @@ def store_database(
321317
with open(update_all_file.name, "w") as updatefile:
322318
# Must write out the headers in case there are no appends or updates
323319
updatefile.write(",".join(col_order) + "\n")
324-
if not allupdates.empty:
320+
if not all_updates.empty:
325321
"""
326322
This is done because of pandas typing.
327323
An integer column with one NA/blank value
328324
will be cast as a double.
329325
"""
330326
updatefile.write(
331-
allupdates[col_order]
327+
all_updates[col_order]
332328
.to_csv(index=False, header=None)
333329
.replace(".0,", ",")
334330
.replace(".0\n", "\n")

genie_registry/clinical.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -583,8 +583,9 @@ def process_steps(
583583
newClinicalDf.to_csv(newPath, sep="\t", index=False)
584584
return newPath
585585

586+
@staticmethod
586587
def _validate_oncotree_code_mapping(
587-
self: "Clinical", clinicaldf: pd.DataFrame, oncotree_mapping: pd.DataFrame
588+
clinicaldf: pd.DataFrame, oncotree_mapping: pd.DataFrame
588589
) -> pd.Index:
589590
"""Checks that the oncotree codes in the input clinical
590591
data is a valid oncotree code from the official oncotree site
@@ -609,8 +610,8 @@ def _validate_oncotree_code_mapping(
609610
]
610611
return unmapped_oncotrees.index
611612

613+
@staticmethod
612614
def _validate_oncotree_code_mapping_message(
613-
self: "Clinical",
614615
clinicaldf: pd.DataFrame,
615616
unmapped_oncotree_indices: pd.DataFrame,
616617
) -> Tuple[str, str]:

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ pyranges==0.1.4
66
# known working version 6.0
77
PyYAML>=5.1
88
synapseclient[pandas]>=4.0.0, <5.0.0
9+
opentelemetry-semantic-conventions==0.56b0

setup.cfg

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ classifiers =
2121
Programming Language :: Python :: 3 :: Only
2222
Programming Language :: Python :: 3.10
2323
Programming Language :: Python :: 3.11
24-
Programming Language :: Python :: 3.12
2524
Topic :: Scientific/Engineering
2625
project_urls =
2726
Bug Tracker = https://github.com/Sage-Bionetworks/Genie/issues

tests/test_input_to_database.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -670,7 +670,6 @@ def asDataFrame(self):
670670

671671
class TestValidation:
672672
def setup_method(self):
673-
# TODO: Look into 1553428800000, locally it needs to be 1553454000000 to pass
674673
valid = [
675674
[
676675
sample_clinical_entity.id,

0 commit comments

Comments
 (0)