-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhopkins-main
executable file
·127 lines (99 loc) · 5.51 KB
/
hopkins-main
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env bash
set -eu
PROJECT_HOME="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# Force GNU date syntax via coreutils on OS X
date=date
if [[ $OSTYPE == darwin* ]]; then
date=gdate
fi
mef_lib=$PROJECT_HOME/mef_lib_2_1
offset=60000000 # one minute pre and post-ictal period offset in microseconds
frequency=1000 # number of samples / second in source data
patient_id=$1
[[ ! -z "$patient_id" ]] || { echo "Usage: ./eztrack <patient_id (e.g. PY12N008)>" ; exit 1 ; }
matlab_jvm="matlab -nodesktop -nosplash -r"
[[ ! -z "`which matlab`" ]] || \
{ echo "MATLAB not found on the PATH; please check the Getting Started section in the README" ; exit 1 ; }
printf "\n== Validating $patient_id.csv ==\n"
# Events should be marked up by the clinician as follows in, e.g. PY15N012.csv
#
# patient_id,date,onset_time,offset_time
# PY15N012,2015-08-17,16:16:57,16:21:57
# Read the csv: Assume only one entry for now, though we could process all to find all segments, start, and end marks.
IFS=',' read -r -a inputs <<< "$(head -n 2 $PROJECT_HOME/data/patients/$patient_id.csv | tail -n 1)"
dt="${inputs[1]}"
s_onset="${inputs[2]}"
s_offset="${inputs[3]}"
# Convert to uUTC:
seizure_onset_time=$(TZ=America/New_York $date --date="$dt $s_onset" +%s%6N)
seizure_offset_time=$(TZ=America/New_York $date --date="$dt $s_offset" +%s%6N)
printf "old seizure_onset_time\n1439842617000000\n${seizure_onset_time}\n"
printf "old seizure_offset_time\n1439842917000000\n${seizure_offset_time}\n"
printf "\n== Finding MEF files for $patient_id ==\n"
echo "TODO: mef_finder"
# Search the xml files in the segments for $patient_id to find the one containing the onset time.
# Use the containing folder as the source of our mef_data. Create a new mef_results data to hold the results of our processing,
# separating input from output.
segment_id=0077
mef_data=$PROJECT_HOME/data/mef/$patient_id
printf "\n== mef2eeg ==\n"
# Determine the interval of data to test in the segment.
recording_start_time=$(find $mef_data -name *.mef | head -n 1 | xargs $mef_lib/read_mef_header | grep start | cut -d ' ' -f 3)
echo "Recording start time: $recording_start_time"
start_in_micros=$(( seizure_onset_time - offset - recording_start_time ))
duration_in_micros=$(( seizure_offset_time + offset - seizure_onset_time ))
number_of_samples=$(( duration_in_micros / (1000000 / frequency) ))
start_in_millis=$(( start_in_micros / 1000 ))
echo "Extracting $number_of_samples samples from records, starting at $start_in_millis."
# start mark must be greater than 60, so simply use 61s.
# This is a constant since we're trimming the signals below in time_filter.
start_mark=61
# end_mark must be at least 60s less than the duration in the file.
# Give the end mark offset a wider margin to avoid 'Matrix dimensions must agree' errors.
end_mark_offset=$(( offset + 5000000 ))
end_mark=$(( (duration_in_micros - end_mark_offset) / 1000000 ))
echo "start_mark: $start_mark"
echo "end_mark: $end_mark"
printf "\n\t== mef2ascii ==\n"
rm -f $mef_data/*.txt
ls $mef_data/*.mef | xargs -n 1 -P 8 -t $mef_lib/mef2ascii
printf "\n\t== downsample ==\n"
ls $mef_data/*.txt | xargs -n 1 -P 8 -t $PROJECT_HOME/mef2eeg/downsample
printf "\n\t== time_filter ==\n"
ls $mef_data/*downsample.txt | xargs -I% -n 1 -P 8 -t $PROJECT_HOME/mef2eeg/time_filter % $start_in_millis $number_of_samples
printf "\n\t== channel_stats ==\n"
stats_file=$PROJECT_HOME/data/mef/$patient_id/channel_stats_all.csv
rm -f $stats_file
# Compute statistics for all channels
find $mef_data -name *.mef | xargs -n 1 -P 8 mef2eeg/channel_stats
# Write the channel stats header by grabbing the first line of the first channel stat file
find $mef_data -name *stats.csv | head -n 1 | xargs head -n 1 > $stats_file
# Merge all channel stats into one CSV for analysis
find $mef_data -name *stats.csv | xargs -n 1 tail -n 1 >> $stats_file
printf "\nstats_file saved to: $stats_file\n"
printf "\n\t== channel_filter ==\n"
eeg_data=$PROJECT_HOME/output/eeg/$patient_id
mkdir -p $eeg_data
$PROJECT_HOME/channel_filter $patient_id $mef_data $eeg_data $segment_id
printf "\n\t== signals2eeg ==\n"
# The channel filter defines the signals of interest. Combine these into one file:
paste -d ',' $mef_data/channels/*.txt > $eeg_data/${patient_id}_${segment_id}_eeg.csv
printf "\n== eeg2fsv ==\n"
num_channels=`ls $mef_data/channels/*.txt | wc -l | xargs`
included_channels="[1:$num_channels]" # PY12N008: "[1:4 7:89]". Convert the data to not require defining these gaps.
sizes="[$number_of_samples]" # PY12N008: "[640000, 672000, 737000, 729000]"
eeg2fsv_out=$eeg_data/adj_pwr
rm -rf $eeg2fsv_out
cd $PROJECT_HOME/eeg2fsv && \
echo "Invoking matlab with eeg2fsv('$eeg_data', '$patient_id', $num_channels, $included_channels, $sizes)" && \
$matlab_jvm "eeg2fsv('${eeg_data}/', '$patient_id', $num_channels, $included_channels, $sizes); exit"
# Copy results to output dir used in fsv2heatmap computation.
cp $eeg2fsv_out/svd_vectors/fsv_pwr$patient_id.mat $PROJECT_HOME/output/fsv
printf "\n== fsv2heatmap ==\n"
temporal_out=/tmp/eztrack-temporal
cd $PROJECT_HOME/fsv2heatmap && \
$matlab_jvm "csv_file = temporal_ieeg_results('$PROJECT_HOME', '$patient_id', '${patient_id}_${segment_id}_labels.csv', $start_mark, $end_mark); display(csv_file); exit" > $temporal_out
# HACK: Have fsv2heatmap save this output path to a different file to avoid relying on position with tail.
results=`tail -n 3 $temporal_out | head -n 1`
echo $results > /tmp/eztrack.out
printf "\nEZTrack is finished processing $patient_id. Results saved to:\n$results\nThis path is also in /tmp/eztrack.out\n"