Skip to content

Commit 7a1c7ca

Browse files
authored
Initialise test resources (#1)
* add script to create test resources * delete template components * minor changes to the project config
1 parent 5951370 commit 7a1c7ca

15 files changed

Lines changed: 36 additions & 905 deletions

File tree

_viash.yaml

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ viash_version: 0.9.0
22

33
# Step 1: Change the name of the task.
44
# example: task_name_of_this_task
5-
name: task_template
5+
name: task_foundation_models
66
organization: openproblems-bio
77
version: dev
88

@@ -11,8 +11,8 @@ license: MIT
1111
keywords: [single-cell, openproblems, benchmark]
1212
# Step 3: Update the `task_template` to the name of the task from step 1.
1313
links:
14-
issue_tracker: https://github.com/openproblems-bio/task_template/issues
15-
repository: https://github.com/openproblems-bio/task_template
14+
issue_tracker: https://github.com/openproblems-bio/task_foundation_models/issues
15+
repository: https://github.com/openproblems-bio/task_foundation_models
1616
docker_registry: ghcr.io
1717

1818

@@ -50,11 +50,8 @@ info:
5050
# Step 5: Replace the task_template to the name of the task.
5151
test_resources:
5252
- type: s3
53-
path: s3://openproblems-data/resources_test/common/
54-
dest: resources_test/common
55-
- type: s3
56-
path: s3://openproblems-data/resources_test/task_template/
57-
dest: resources_test/task_template
53+
path: s3://openproblems-data/resources_test/task_foundation_models/
54+
dest: resources_test/task_foundation_models
5855

5956
# Step 6: Update the authors of the task.
6057
authors:
@@ -81,8 +78,7 @@ config_mods: |
8178
.runners[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h" }
8279
8380
repositories:
84-
- name: core
81+
- name: openproblems
8582
type: github
86-
repo: openproblems-bio/core
83+
repo: openproblems-bio/openproblems
8784
tag: build/main
88-
path: viash/core

scripts/create_resources/test_resources.sh

Lines changed: 29 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -6,48 +6,38 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
66
# ensure that the command below is run from the root of the repository
77
cd "$REPO_ROOT"
88

9-
# # remove this when you have implemented the script
10-
# echo "TODO: replace the commands in this script with the sequence of components that you need to run to generate test_resources."
11-
# echo " Inside this script, you will need to place commands to generate example files for each of the 'src/api/file_*.yaml' files."
12-
# exit 1
13-
149
set -e
1510

16-
RAW_DATA=resources_test/common
17-
DATASET_DIR=resources_test/task_template
18-
19-
mkdir -p $DATASET_DIR
20-
21-
# process dataset
22-
viash run src/data_processors/process_dataset/config.vsh.yaml -- \
23-
--input $RAW_DATA/cxg_mouse_pancreas_atlas/dataset.h5ad \
24-
--output_train $DATASET_DIR/cxg_mouse_pancreas_atlas/train.h5ad \
25-
--output_test $DATASET_DIR/cxg_mouse_pancreas_atlas/test.h5ad \
26-
--output_solution $DATASET_DIR/cxg_mouse_pancreas_atlas/solution.h5ad
27-
28-
# run one method
29-
viash run src/methods/logistic_regression/config.vsh.yaml -- \
30-
--input_train $DATASET_DIR/cxg_mouse_pancreas_atlas/train.h5ad \
31-
--input_test $DATASET_DIR/cxg_mouse_pancreas_atlas/test.h5ad \
32-
--output $DATASET_DIR/cxg_mouse_pancreas_atlas/prediction.h5ad
33-
34-
# run one metric
35-
viash run src/metrics/accuracy/config.vsh.yaml -- \
36-
--input_prediction $DATASET_DIR/cxg_mouse_pancreas_atlas/prediction.h5ad \
37-
--input_solution $DATASET_DIR/cxg_mouse_pancreas_atlas/solution.h5ad \
38-
--output $DATASET_DIR/cxg_mouse_pancreas_atlas/score.h5ad
39-
40-
# write manual state.yaml. this is not actually necessary but you never know it might be useful
41-
cat > $DATASET_DIR/cxg_mouse_pancreas_atlas/state.yaml << HERE
42-
id: cxg_mouse_pancreas_atlas
43-
train: !file train.h5ad
44-
test: !file test.h5ad
45-
solution: !file solution.h5ad
46-
prediction: !file prediction.h5ad
47-
score: !file score.h5ad
48-
HERE
11+
OUT_DIR=resources_test/task_foundation_models/results
12+
13+
TASKS=(
14+
"task_label_projection"
15+
"task_batch_integration"
16+
)
17+
18+
if [ -d "$OUT_DIR" ]; then
19+
echo "Removing existing directory '$OUT_DIR'"
20+
rm -rf "$OUT_DIR"
21+
fi
22+
23+
mkdir -p "$OUT_DIR"
24+
25+
for TASK in "${TASKS[@]}"; do
26+
BASE_DIR="s3://openproblems-data/resources/$TASK/results"
27+
28+
# find subdir in bucket with latest date which has a 'task_info.yaml' file
29+
DATE=$(aws s3 ls "$BASE_DIR/" --recursive --no-sign-request | awk '{print $4}' | grep 'task_info.yaml' | sort -r | head -n 1 | sed 's#.*/run_\(.*\)/[^/]*$#\1#')
30+
31+
INPUT_DIR="$BASE_DIR/run_$DATE"
32+
TASK_STRIP_PREFIX=$(echo $TASK | sed 's/task_//')
33+
OUTPUT_DIR="$OUT_DIR/$TASK_STRIP_PREFIX"
34+
35+
echo "Syncing '$INPUT_DIR' to '$OUTPUT_DIR'"
36+
aws s3 sync "$INPUT_DIR" "$OUTPUT_DIR" --delete --no-sign-request
37+
done
4938

5039
# only run this if you have access to the openproblems-data bucket
5140
aws s3 sync --profile op \
52-
"$DATASET_DIR" s3://openproblems-data/resources_test/task_template \
41+
"resources_test/task_foundation_models" \
42+
s3://openproblems-data/resources_test/task_foundation_models \
5343
--delete --dryrun

src/control_methods/true_labels/config.vsh.yaml

Lines changed: 0 additions & 59 deletions
This file was deleted.

src/control_methods/true_labels/script.py

Lines changed: 0 additions & 45 deletions
This file was deleted.

src/data_processors/process_dataset/config.vsh.yaml

Lines changed: 0 additions & 34 deletions
This file was deleted.

src/data_processors/process_dataset/script.py

Lines changed: 0 additions & 86 deletions
This file was deleted.

0 commit comments

Comments
 (0)