Skip to content

Commit 1afbaf4

Browse files
authored
Merge pull request #1648 from nilchia/fix_header_flexy
change flexynesis input/output from csv to tabular
2 parents e466e3b + bf52e38 commit 1afbaf4

22 files changed

+17166
-16952
lines changed

tools/flexynesis/convert.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/usr/bin/env python
2+
3+
import sys
4+
5+
import pandas as pd
6+
7+
8+
def tabular_to_csv(tabular_file, csv_file):
9+
"""Convert tabular (TSV) to CSV"""
10+
data = pd.read_csv(tabular_file, sep="\t")
11+
data.to_csv(csv_file, index=False)
12+
13+
14+
def csv_to_tabular(csv_file, tabular_file):
15+
"""Convert CSV to tabular (TSV)"""
16+
data = pd.read_csv(csv_file)
17+
data.to_csv(tabular_file, sep="\t", index=False)
18+
19+
20+
if __name__ == "__main__":
21+
input_file = sys.argv[1]
22+
output_file = sys.argv[2]
23+
24+
if input_file.endswith('.csv'):
25+
csv_to_tabular(input_file, output_file)
26+
else:
27+
tabular_to_csv(input_file, output_file)

tools/flexynesis/flexynesis.xml

Lines changed: 342 additions & 182 deletions
Large diffs are not rendered by default.

tools/flexynesis/flexynesis_plot.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -214,21 +214,25 @@ def plot_boxplot(categorical_x, numerical_y, title_x='Categories', title_y='Valu
214214
def generate_dimred_plots(embeddings, matched_labels, args, output_dir, output_name_base):
215215
"""Generate dimensionality reduction plots"""
216216

217-
# Parse target variables
218-
target_vars = [var.strip() for var in args.target_variables.split(',')]
217+
# Parse target values from comma-separated string
218+
if args.target_value:
219+
target_values = [val.strip() for val in args.target_value.split(',')]
220+
else:
221+
# If no target values specified, use all unique variables
222+
target_values = matched_labels['variable'].unique().tolist()
219223

220-
print(f"Generating {args.method.upper()} plots for {len(target_vars)} target variable(s): {', '.join(target_vars)}")
224+
print(f"Generating {args.method.upper()} plots for {len(target_values)} target variable(s): {', '.join(target_values)}")
221225

222226
# Check variables
223227
available_vars = matched_labels['variable'].unique()
224-
missing_vars = [var for var in target_vars if var not in available_vars]
228+
missing_vars = [var for var in target_values if var not in available_vars]
225229

226230
if missing_vars:
227231
print(f"Warning: The following target variables were not found in the data: {', '.join(missing_vars)}")
228232
print(f"Available variables: {', '.join(available_vars)}")
229233

230234
# Filter to only process available variables
231-
valid_vars = [var for var in target_vars if var in available_vars]
235+
valid_vars = [var for var in target_values if var in available_vars]
232236

233237
if not valid_vars:
234238
raise ValueError(f"None of the specified target variables were found in the data. Available: {', '.join(available_vars)}")
@@ -989,8 +993,6 @@ def main():
989993
help="Path to input data embeddings file (CSV or tabular format). Required for dimred plots.")
990994
parser.add_argument("--method", type=str, default='pca', choices=['pca', 'umap'],
991995
help="Transformation method ('pca' or 'umap'). Default is 'pca'. Used for dimred plots.")
992-
parser.add_argument("--target_variables", type=str, required=False,
993-
help="Comma-separated list of target variables to plot.")
994996

995997
# Arguments for Kaplan-Meier
996998
parser.add_argument("--survival_data", type=str,
@@ -1024,7 +1026,7 @@ def main():
10241026
parser.add_argument("--random_state", type=int, default=42,
10251027
help="Random seed for reproducibility. Default is 42")
10261028

1027-
# Arguments for scatter plot, heatmap, PR curves, ROC curves, and box plots
1029+
# Arguments for dimred, scatter plot, heatmap, PR curves, ROC curves, and box plots
10281030
parser.add_argument("--target_value", type=str, default=None,
10291031
help="Target value for scatter plot.")
10301032

@@ -1057,8 +1059,6 @@ def main():
10571059
raise ValueError("--labels is required for dimensionality reduction plots")
10581060
if not args.method:
10591061
raise ValueError("--method is required for dimensionality reduction plots")
1060-
if not args.target_variables:
1061-
raise ValueError("--target_variables is required for dimensionality reduction plots")
10621062

10631063
if args.plot_type in ['kaplan_meier']:
10641064
if not args.survival_data:

tools/flexynesis/flexynesis_plot.xml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ for class_label in classes:
152152
<when value="dimred">
153153
<expand macro="plots_common_param">
154154
<expand macro="plots_common_input"/>
155-
<param argument="--embeddings" type="data" format="tabular,csv" label="Embeddings" help="Generated by flexynesis"/>
155+
<param argument="--embeddings" type="data" format="tabular" label="Embeddings" help="Generated by flexynesis"/>
156156
<param argument="--label" type="data_column" data_ref="labels" label="Column in the labels file to use for coloring the points in the plot"/>
157157
<param name="method" type="select" label="Transformation method">
158158
<option value="pca" selected="true">PCA</option>
@@ -206,10 +206,10 @@ for class_label in classes:
206206
<param name="non_commercial_use" value="True"/>
207207
<conditional name="plot_conditional">
208208
<param name="plot_type" value="dimred"/>
209-
<param name="embeddings" value="embeddings.csv"/>
209+
<param name="embeddings" value="embeddings.tabular"/>
210210
<param name="label" value="6"/>
211211
<param name="method" value="pca"/>
212-
<param name="labels" value="labels.csv"/>
212+
<param name="labels" value="labels.tabular"/>
213213
<param name="format" value="jpg"/>
214214
<param name="dpi" value="300"/>
215215
</conditional>
@@ -227,7 +227,7 @@ for class_label in classes:
227227
<param name="non_commercial_use" value="True"/>
228228
<conditional name="plot_conditional">
229229
<param name="plot_type" value="scatter"/>
230-
<param name="labels" value="labels_scatter.csv"/>
230+
<param name="labels" value="labels_scatter.tabular"/>
231231
<param name="true_label" value="5"/>
232232
<param name="predicted_label" value="6"/>
233233
<param name="format" value="jpg"/>
@@ -247,7 +247,7 @@ for class_label in classes:
247247
<param name="non_commercial_use" value="True"/>
248248
<conditional name="plot_conditional">
249249
<param name="plot_type" value="concordance_heatmap"/>
250-
<param name="labels" value="labels.csv"/>
250+
<param name="labels" value="labels.tabular"/>
251251
<param name="true_label" value="5"/>
252252
<param name="predicted_label" value="6"/>
253253
<param name="format" value="jpg"/>
@@ -267,7 +267,7 @@ for class_label in classes:
267267
<param name="non_commercial_use" value="True"/>
268268
<conditional name="plot_conditional">
269269
<param name="plot_type" value="pr_curve"/>
270-
<param name="labels" value="labels_pr.csv"/>
270+
<param name="labels" value="labels_pr.tabular"/>
271271
<param name="format" value="jpg"/>
272272
<param name="dpi" value="300"/>
273273
</conditional>
@@ -285,7 +285,7 @@ for class_label in classes:
285285
<param name="non_commercial_use" value="True"/>
286286
<conditional name="plot_conditional">
287287
<param name="plot_type" value="roc_curve"/>
288-
<param name="labels" value="labels_pr.csv"/>
288+
<param name="labels" value="labels_pr.tabular"/>
289289
<param name="format" value="jpg"/>
290290
<param name="dpi" value="300"/>
291291
</conditional>
@@ -303,7 +303,7 @@ for class_label in classes:
303303
<param name="non_commercial_use" value="True"/>
304304
<conditional name="plot_conditional">
305305
<param name="plot_type" value="box_plot"/>
306-
<param name="labels" value="labels_pr.csv"/>
306+
<param name="labels" value="labels_pr.tabular"/>
307307
<param name="format" value="jpg"/>
308308
<param name="dpi" value="300"/>
309309
</conditional>

tools/flexynesis/flexynesis_utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,8 @@ def split_and_save_data(data, ratio=0.7, output_dir='.'):
156156
# Save train and test data
157157
for key in data.keys():
158158
try:
159-
train_data[key].to_csv(os.path.join(output_dir, 'train', f'{key}.csv'))
160-
test_data[key].to_csv(os.path.join(output_dir, 'test', f'{key}.csv'))
159+
train_data[key].to_csv(os.path.join(output_dir, 'train', f'{key}.tabular'), sep='\t')
160+
test_data[key].to_csv(os.path.join(output_dir, 'test', f'{key}.tabular'), sep='\t')
161161
except Exception as e:
162162
print(f"Error saving {key}: {e}")
163163
continue
@@ -244,8 +244,8 @@ def main():
244244

245245
binarized_matrix = binarize_mutations(mutations_df, gene_idx=args.gene_idx, sample_idx=args.sample_idx)
246246
# Save binarized matrix
247-
output_file = os.path.join(args.out, 'binarized_mutations.csv')
248-
binarized_matrix.to_csv(output_file)
247+
output_file = os.path.join(args.out, 'binarized_mutations.tabular')
248+
binarized_matrix.to_csv(output_file, sep='\t')
249249
print(f"Binarized mutation matrix saved to {output_file}")
250250

251251
except Exception as e:

0 commit comments

Comments
 (0)