This repository was archived by the owner on Aug 31, 2022. It is now read-only.
-
Couldn't load subscription status.
- Fork 4
add script to edit grants and theme in project-split manifests #116
Open
brynnz22
wants to merge
7
commits into
main
Choose a base branch
from
grant-split
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
1ac6258
add script to edit grants and theme in project-split manifests
fd68e4a
adding dictionary associated with the split_manifest_grant_attributes…
d417e50
create updated script to split current manifests
ed97962
delete unnecessary comments in file
194a7c7
add code that created the dictionaries in grant_dicts.py
4395865
add minor edits
a4370d7
fix index in csvs
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,65 @@ | ||
| import synapseclient | ||
| import argparse | ||
| import pandas as pd | ||
|
|
||
|
|
||
| ### Login to Synapse ### | ||
| def login(): | ||
|
|
||
| syn = synapseclient.Synapse() | ||
| syn.login() | ||
|
|
||
| return syn | ||
|
|
||
|
|
||
| ### Get arguments ### | ||
| def get_args(): | ||
|
|
||
| parser = argparse.ArgumentParser(description='Get synapse grants table id') | ||
| parser.add_argument('table_id', | ||
| type=str, | ||
| help='Synapse grants merged table id') | ||
|
|
||
| return parser.parse_args() | ||
|
|
||
|
|
||
| ### Retrieve grants merged table and turn into data frame ### | ||
| def get_grant_table(syn, table): | ||
|
|
||
| grants_query = ( | ||
| f"SELECT grantNumber, theme, consortium, grantInstitution FROM {table}" | ||
| ) | ||
| grants_df = syn.tableQuery(grants_query).asDataFrame() | ||
|
|
||
| return grants_df | ||
|
|
||
|
|
||
| def grant_dictionary(grants_df): | ||
|
|
||
| consortium_dict = dict(zip(grants_df.grantNumber, grants_df.consortium)) | ||
| theme_dict = dict(zip(grants_df.grantNumber, grants_df.theme)) | ||
| institution_dict = dict( | ||
| zip(grants_df.grantNumber, grants_df.grantInstitution)) | ||
|
|
||
| # Make themes strings instead of lists | ||
| for key, value in theme_dict.items(): | ||
| value = str(value) | ||
| value = value.strip('["').strip('"]').replace("'", "") | ||
| theme_dict.update({key: value}) | ||
|
|
||
| print(consortium_dict) | ||
| print(theme_dict) | ||
| print(institution_dict) | ||
|
|
||
|
|
||
| def main(): | ||
|
|
||
| syn = login() | ||
| args = get_args() | ||
| grants_df = get_grant_table(syn, args.table_id) | ||
|
|
||
| grant_dictionary(grants_df) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,196 @@ | ||
| CONSORTIUM = { | ||
| 'CA209891': 'CSBC', | ||
| 'CA225088': 'CSBC', | ||
| 'CA217655': 'PS-ON', | ||
| 'CA210152': 'PS-ON', | ||
| 'CA214282': 'PS-ON', | ||
| 'CA214354': 'PS-ON', | ||
| 'CA215798': 'CSBC', | ||
| 'CA215709': 'CSBC', | ||
| 'CA215794': 'CSBC', | ||
| 'CA215848': 'CSBC', | ||
| 'CA220378': 'CSBC', | ||
| 'CA227550': 'CSBC', | ||
| 'CA232137': 'CSBC', | ||
| 'CA232161': 'CSBC', | ||
| 'CA232382': 'CSBC', | ||
| 'CA217378': 'CSBC', | ||
| 'CA217376': 'CSBC', | ||
| 'CA217450': 'CSBC', | ||
| 'CA231978': 'CSBC', | ||
| 'CA238720': 'CSBC', | ||
| 'CA232216': 'CSBC', | ||
| 'CA243007': 'CSBC', | ||
| 'CA227544': 'CSBC', | ||
| 'CA243073': 'CSBC', | ||
| 'CA238475': 'CSBC', | ||
| 'CA209923': 'CSBC', | ||
| 'CA209997': 'CSBC', | ||
| 'CA209975': 'CSBC', | ||
| 'CA209971': 'CSBC', | ||
| 'CA209992': 'CSBC', | ||
| 'CA193419': 'PS-ON', | ||
| 'CA210190': 'PS-ON', | ||
| 'CA193417': 'PS-ON', | ||
| 'CA193489': 'PS-ON', | ||
| 'CA193461': 'PS-ON', | ||
| 'CA193313': 'PS-ON', | ||
| 'CA210181': 'PS-ON', | ||
| 'CA210180': 'PS-ON', | ||
| 'CA210173': 'PS-ON', | ||
| 'CA210184': 'PS-ON', | ||
| 'CA202123': 'PS-ON', | ||
| 'CA202177': 'PS-ON', | ||
| 'CA202241': 'PS-ON', | ||
| 'CA202144': 'PS-ON', | ||
| 'CA202229': 'PS-ON', | ||
| 'CA209978': 'CSBC', | ||
| 'CA184898': 'ICBP', | ||
| 'CA217377': 'CSBC', | ||
| 'CA209988': 'CSBC', | ||
| 'CA217297': 'CSBC', | ||
| 'CA199315': 'ICBP', | ||
| 'CA188388': 'ICBP', | ||
| 'CA215845': 'CSBC', | ||
| 'CA184897': 'ICBP', | ||
| 'CA195469': 'ICBP', | ||
| 'CA250046': 'PS-ON', | ||
| 'CA250040': 'PS-ON', | ||
| 'CA214297': 'PS-ON', | ||
| 'CA244107': 'PS-ON', | ||
| 'CA244101': 'PS-ON', | ||
| 'CA244100': 'PS-ON', | ||
| 'CA244109': 'PS-ON', | ||
| 'CA225566': 'PS-ON', | ||
| 'CA214292': 'TEC', | ||
| 'CA214411': 'TEC', | ||
| 'CA214300': 'TEC', | ||
| 'CA214369': 'TEC', | ||
| 'CA214381': 'TEC', | ||
| 'CA227136': 'TEC', | ||
| 'CA241927': 'TEC', | ||
| 'CA240301': 'TEC', | ||
| 'CA232209': 'TEC', | ||
| 'CA232517': 'TEC', | ||
| 'CA245313': 'TEC', | ||
| 'CA243072': 'CSBC', | ||
| 'CA243075': 'CSBC', | ||
| 'CA238728': 'CSBC', | ||
| 'CA243004': 'CSBC', | ||
| 'CA253553': 'CSBC', | ||
| 'CA253472': 'CSBC', | ||
| 'CA253540': 'CSBC', | ||
| 'CA253547': 'CSBC', | ||
| 'CA228963': 'PS-ON', | ||
| 'CA261842': 'PS-ON', | ||
| 'CA234787': | ||
| 'NCI Clinical and Translational Exploratory/Developmental Studies', | ||
| 'CA264610': 'CSBC', | ||
| 'CA264620': 'CSBC', | ||
| 'CA261822': 'PS-ON', | ||
| 'CA250044': 'PS-ON', | ||
| 'CA261841': 'PS-ON', | ||
| 'CA254886': 'PS-ON', | ||
| 'CA250481': 'PS-ON', | ||
| 'CA260432': 'CSBC' | ||
| } | ||
|
|
||
| THEME = { | ||
| 'CA209891': 'Heterogeneity, Evolution, Drug Resistance/Sensitivity', | ||
| 'CA225088': 'Drug Resistance/Sensitivity, Microenvironment', | ||
| 'CA217655': 'Drug Resistance/Sensitivity, Tumor-Immune, Metabolism', | ||
| 'CA210152': 'Metastasis, Microenvironment', | ||
| 'CA214282': 'Heterogeneity, Evolution', | ||
| 'CA214354': 'Tumor-Immune, Microenvironment', | ||
| 'CA215798': 'Drug Resistance/Sensitivity', | ||
| 'CA215709': 'Heterogeneity, Drug Resistance/Sensitivity', | ||
| 'CA215794': 'Heterogeneity, Evolution', | ||
| 'CA215848': 'Metabolism, Drug Resistance/Sensitivity', | ||
| 'CA220378': 'Heterogeneity, Evolution', | ||
| 'CA227550': 'Drug Resistance/Sensitivity, Heterogeneity', | ||
| 'CA232137': 'Metabolism, Heterogeneity, Microenvironment', | ||
| 'CA232161': 'Heterogeneity', | ||
| 'CA232382': 'Tumor-Immune, Drug Resistance/Sensitivity', | ||
| 'CA217378': 'Heterogeneity, Evolution, Drug Resistance/Sensitivity', | ||
| 'CA217376': 'Evolution, Heterogeneity, Epigenetics, Microenvironment', | ||
| 'CA217450': 'Heterogeneity, Drug Resistance/Sensitivity, Microenvironment', | ||
| 'CA231978': 'Drug Resistance/Sensitivity, Heterogeneity', | ||
| 'CA238720': 'Drug Resistance/Sensitivity', | ||
| 'CA232216': 'Tumor-Immune, Heterogeneity, Microenvironment', | ||
| 'CA243007': 'Drug Resistance/Sensitivity', | ||
| 'CA227544': 'Drug Resistance/Sensitivity, Heterogeneity', | ||
| 'CA243073': 'Heterogeneity, Evolution', | ||
| 'CA238475': 'Drug Resistance/Sensitivity, Heterogeneity', | ||
| 'CA209923': 'Computational Resource', | ||
| 'CA209997': 'Heterogeneity, Drug Resistance/Sensitivity', | ||
| 'CA209975': | ||
| 'Tumor-Immune, Microenvironment, Drug Resistance/Sensitivity, Metastasis', | ||
| 'CA209971': 'Tumor-Immune, Metastasis, Microenvironment', | ||
| 'CA209992': 'Metastasis, Microenvironment', | ||
| 'CA193419': 'Metabolism, Heterogeneity, Evolution', | ||
| 'CA210190': 'Microenvironment, Metastasis, Tumor-Immune', | ||
| 'CA193417': 'Microenvironment, Evolution, Metastasis', | ||
| 'CA193489': | ||
| 'Evolution, Drug Resistance/Sensitivity, Microenvironment, Tumor-Immune', | ||
| 'CA193461': | ||
| 'Evolution, Drug Resistance/Sensitivity, Microenvironment, Heterogeneity', | ||
| 'CA193313': 'Heterogeneity, Evolution, Metastasis', | ||
| 'CA210181': 'Heterogeneity, Tumor-Immune, Microenvironment', | ||
| 'CA210180': 'Heterogeneity, Microenvironment, Evolution', | ||
| 'CA210173': 'Microenvironment, Metastasis, Metabolism', | ||
| 'CA210184': 'Metabolism, Microenvironment, Metastasis', | ||
| 'CA202123': 'Metastasis, Heterogeneity', | ||
| 'CA202177': 'Microenvironment, Metastasis', | ||
| 'CA202241': 'Heterogeneity, Microenvironment', | ||
| 'CA202144': 'Heterogeneity, Evolution', | ||
| 'CA202229': 'Microenvironment, Heterogeneity', | ||
| 'CA209978': | ||
| 'Drug Resistance/Sensitivity, Heterogeneity, Evolution, Metastasis', | ||
| 'CA184898': 'Heterogeneity', | ||
| 'CA217377': 'Drug Resistance/Sensitivity, Microenvironment', | ||
| 'CA209988': 'Drug Resistance/Sensitivity, Heterogeneity, Microenvironment', | ||
| 'CA217297': 'Drug Resistance/Sensitivity, Epigenetics', | ||
| 'CA199315': 'Heterogeneity, Microenvironment', | ||
| 'CA188388': 'Heterogeneity, Microenvironment', | ||
| 'CA215845': 'Drug Resistance/Sensitivity, Heterogeneity', | ||
| 'CA184897': 'Metastasis, Heterogeneity', | ||
| 'CA195469': 'Heterogeneity, Evolution', | ||
| 'CA250046': 'Evolution, Heterogeneity, Epigenetics', | ||
| 'CA250040': 'Tumor-Immune', | ||
| 'CA214297': 'Metastasis, Microenvironment', | ||
| 'CA244107': 'Metastasis, Microenvironment', | ||
| 'CA244101': 'Microenvironment, Metastasis, Heterogeneity, Evolution', | ||
| 'CA244100': 'Tumor-Immune, Drug Resistance/Sensitivity', | ||
| 'CA244109': 'Heterogeneity, Microenvironment', | ||
| 'CA225566': 'Microenvironment', | ||
| 'CA214292': 'Metastasis, Microenvironment', | ||
| 'CA214411': 'Microenvironment, Drug Resistance/Sensitivity, Heterogeneity', | ||
| 'CA214300': 'Metastasis, Microenvironment, Tumor-Immune', | ||
| 'CA214369': 'Tumor-Immune, Microenvironment, Drug Resistance/Sensitivity', | ||
| 'CA214381': 'Tumor-Immune, Microenvironment, Drug Resistance/Sensitivity', | ||
| 'CA227136': 'Microenvironment, Drug Resistance/Sensitivity', | ||
| 'CA241927': 'Metastasis, Microenvironment', | ||
| 'CA240301': 'Metastasis, Microenvironment', | ||
| 'CA232209': 'Microenvironment, Heterogeneity', | ||
| 'CA232517': 'Metastasis, Microenvironment', | ||
| 'CA245313': 'Microenvironment, Metastasis, Drug Resistance/Sensitivity', | ||
| 'CA243072': 'Drug Resistance/Sensitivity', | ||
| 'CA243075': 'Drug Resistance/Sensitivity', | ||
| 'CA238728': 'Microenvironment, Tumor-Immune', | ||
| 'CA243004': 'Microenvironment, Metastasis', | ||
| 'CA253553': 'Metabolism, Tumor-Immune', | ||
| 'CA253472': 'Tumor-Immune, Drug Resistance/Sensitivit', | ||
| 'CA253540': 'Drug Resistance/Sensitivity, Heterogeneity', | ||
| 'CA253547': 'Tumor-Immune', | ||
| 'CA228963': 'Tumor-Immune', | ||
| 'CA261842': 'Drug Resistance/Sensitivity', | ||
| 'CA234787': 'Drug Resistance/Sensitivity', | ||
| 'CA264610': 'Heterogeneity', | ||
| 'CA264620': 'Drug Resistance/Sensitivity', | ||
| 'CA261822': 'Drug Resistance/Sensitivity', | ||
| 'CA250044': 'Metastasis', | ||
| 'CA261841': 'Microenvironment', | ||
| 'CA254886': 'Mechano-genetics', | ||
| 'CA250481': 'Tumor-Immune', | ||
| 'CA260432': 'Heterogeneity' | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,69 @@ | ||
| # Preliminaries | ||
| import pandas as pd | ||
| import argparse | ||
| import os | ||
| import glob | ||
| from grant_dicts import CONSORTIUM, THEME | ||
|
|
||
|
|
||
| ### Get arguments ### | ||
| def get_args(): | ||
|
|
||
| parser = argparse.ArgumentParser( | ||
| description='Get file path of manifest csv') | ||
| parser.add_argument('directory_path', | ||
| type=str, | ||
| help='Path to directory that houses the manifest csvs') | ||
|
|
||
| return parser.parse_args() | ||
|
|
||
|
|
||
| ### Get list of csv files ### | ||
| def get_files(directory): | ||
|
|
||
| files = glob.glob(f'{directory}**/**.csv') | ||
|
|
||
| return (files) | ||
|
|
||
|
|
||
| def split_manifests(files, directory): | ||
|
|
||
| data_types = ['Publication', 'Dataset', 'File', 'Tool'] | ||
| for item in data_types: | ||
| # Create directories | ||
| os.mkdir(f'{directory}/{item}sSplit') | ||
| for file in files: | ||
| if item in file: | ||
| df = pd.read_csv(file, index_col=0, keep_default_na=False) | ||
| grant_col = f'{item} Grant Number' | ||
| consortium_col = f'{item} Consortium Name' | ||
| theme_col = f'{item} Theme Name' | ||
| # Change column grant type to list | ||
| df[grant_col] = df[grant_col].apply(lambda x: x.split(', ')) | ||
| # Separate out rows with multiple grants | ||
| df = df.explode(grant_col) | ||
| # Make consortium and themes match grant | ||
| df[consortium_col] = df[grant_col].map(CONSORTIUM) | ||
| df[theme_col] = df[grant_col].map(THEME) | ||
| # Split into multiple manifests | ||
| grouped = df.groupby([grant_col]) | ||
| print(f"Found {len(grouped.groups)} grant numbers in table " | ||
| "- splitting now...") | ||
| # Save dataframes as csvs | ||
| for grant_number in grouped.groups: | ||
| df = grouped.get_group(grant_number) | ||
| df.to_csv(f'{directory}/{item}sSplit/{grant_number}.csv') | ||
|
|
||
|
|
||
| def main(): | ||
|
|
||
| args = get_args() | ||
| file_list = get_files(args.directory_path) | ||
|
|
||
| split_manifests(file_list, args.directory_path) | ||
|
|
||
| print("Done. Manifests split by grant number.") | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.