Skip to content

Commit a0c7550

Browse files
Write train test subset for from_mean analysis
1 parent 9c4c0af commit a0c7550

File tree

5 files changed

+77
-2
lines changed

5 files changed

+77
-2
lines changed

postprocess.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,7 @@ RESULTS="$BASE/results/auditory"
1212

1313
g5k-setup-docker -t
1414
docker build . -t $TAG
15-
docker run -u root -v "$RESULTS:/results" $TAG python -u postprocess.py --results "/results"
15+
docker run -u root -v "$RESULTS:/results" $TAG python -u postprocess.py --results "/results"
16+
17+
sudo-g5k chown -R ymerel:empenn $RESULTS/*.csv
18+
sudo-g5k chown -R ymerel:empenn $RESULTS/*.nii

postprocess/postprocess_service.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,11 @@
55
import pandas as pd
66
from typing import List
77

8+
from pandas import DataFrame
9+
10+
from core.file_service import FileService
811
from postprocess.correlation_service import CorrelationService
12+
from sklearn.model_selection import train_test_split
913

1014

1115
class PostprocessService:
@@ -48,6 +52,7 @@ def get_all_correlations(self, path, ids: List[str]) -> pd.DataFrame:
4852
data.append((ids[i], 'mean', corr))
4953
data.append(('mean', ids[i], corr))
5054
print(f"Processed correlations for [{i+1} / {n}] result")
55+
data.append(('mean', 'mean', 1.0))
5156
dataframe = pd.DataFrame(data, columns=['source', 'target', 'correlation'])
5257
return dataframe.sort_values(by='correlation', ascending=False)
5358

@@ -84,3 +89,29 @@ def get_mean_image(self, inputs: list, batch_size: int) -> nib.Nifti1Image:
8489
mean_nifti = nib.Nifti1Image(mean_image, affine=nib.load(inputs[0]).affine)
8590
print("Mean image created.")
8691
return mean_nifti
92+
93+
def get_train_test(self, path: str, dataset: pd.DataFrame, train_size: float, iteration: int):
94+
print(f"Iteration [{iter}] - Training size [{train_size}]")
95+
X = dataset['id']
96+
y = dataset['id']
97+
X_id_train, X_id_test, y_id_train, y_id_test = train_test_split(X, y, train_size=train_size)
98+
99+
self.write_subset(X_id_train, dataset, path, f'train_{iteration}')
100+
self.write_subset(X_id_test, dataset, path, f'test_{iteration}')
101+
102+
def write_subset(self, ids: [], dataset: DataFrame, path: str, name: str):
103+
size = len(ids)
104+
ds_name = f'sub_dataset_{size}_{name}.csv'
105+
mean_path = os.path.join(path, 'tmp_mean_result.nii')
106+
files = []
107+
for conf_id in ids:
108+
files.append(os.path.join(path, conf_id, '_subject_id_01', 'result.nii'))
109+
mean_img = self.get_mean_image(files, 10)
110+
nib.save(mean_img, mean_path)
111+
print(f"Computing correlations to mean image for [{size}] results...")
112+
for index, row in dataset.iterrows():
113+
img = os.path.join(path, row['id'], '_subject_id_01', 'result.nii')
114+
dataset.at[index, 'from_mean'] = self.corr_srv.get_correlation_coefficient(mean_path, img, 'spearman')
115+
dataset.to_csv(os.path.join(path, ds_name),
116+
index=False, sep=';')
117+
print(f"Written to [{ds_name}].")

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ nipype~=1.8.6
22
pandas~=2.0.3
33
nibabel~=5.2.1
44
numpy~=1.24.4
5-
scipy~=1.10.1
5+
scipy~=1.10.1
6+
scikit-learn~=1.3.2

train_test.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import os
2+
from argparse import ArgumentParser
3+
4+
import numpy as np
5+
import pandas as pd
6+
7+
from postprocess.postprocess_service import PostprocessService
8+
9+
10+
def train_test():
11+
postproc_srv = PostprocessService()
12+
13+
parser = ArgumentParser(description='Post processing of results')
14+
parser.add_argument('--results', required=True, type=str, help='path to results')
15+
parser.add_argument('--iter', required=True, type=int, help='iteration number')
16+
args = parser.parse_args()
17+
basedir = args.results
18+
iteration = args.iter
19+
dataset = pd.read_csv(os.path.join(basedir, 'extended_dataset.csv'), delimiter=';').drop(columns=['mean_corr'])
20+
train_sizes = np.linspace(0.1, 0.7, 7).tolist()
21+
for train_size in train_sizes:
22+
postproc_srv.get_train_test(basedir, dataset, train_size, iteration)
23+

train_test.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/usr/bin/env bash
2+
3+
#OAR -l walltime=5
4+
#OAR -O ./log/train_test_log_%jobid%.stdout
5+
#OAR -E ./log/train_test_log_%jobid%.stderr
6+
#OAR -q production
7+
8+
TAG="fmri-confs-runner"
9+
10+
BASE="/home/ymerel/empenn_group_storage/private/ymerel"
11+
RESULTS="$BASE/results/auditory"
12+
13+
g5k-setup-docker -t
14+
docker build . -t $TAG
15+
docker run -u root -v "$RESULTS:/results" $TAG python -u train_test.py --results "/results" --iter 3
16+
17+
sudo-g5k chown -R ymerel:empenn $RESULTS/*.csv

0 commit comments

Comments
 (0)