hopkins-main

#!/usr/bin/env bash

set -eu

PROJECT_HOME="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

# Force GNU date syntax via coreutils on OS X
date=date
if [[ $OSTYPE == darwin* ]]; then
  date=gdate
fi

mef_lib=$PROJECT_HOME/mef_lib_2_1

offset=60000000  # one minute pre and post-ictal period offset in microseconds
frequency=1000   # number of samples / second in source data

patient_id=$1
[[ ! -z "$patient_id" ]] || { echo "Usage: ./eztrack <patient_id (e.g. PY12N008)>" ; exit 1 ; }

matlab_jvm="matlab -nodesktop -nosplash -r"
[[ ! -z "`which matlab`" ]] || \
    { echo "MATLAB not found on the PATH; please check the Getting Started section in the README" ; exit 1 ; }

printf "\n== Validating $patient_id.csv  ==\n"
# Events should be marked up by the clinician as follows in, e.g. PY15N012.csv
#
#    patient_id,date,onset_time,offset_time
#    PY15N012,2015-08-17,16:16:57,16:21:57

# Read the csv: Assume only one entry for now, though we could process all to find all segments, start, and end marks.
IFS=',' read -r -a inputs <<< "$(head -n 2 $PROJECT_HOME/data/patients/$patient_id.csv | tail -n 1)"

dt="${inputs[1]}"
s_onset="${inputs[2]}"
s_offset="${inputs[3]}"

# Convert to uUTC:
seizure_onset_time=$(TZ=America/New_York $date --date="$dt $s_onset" +%s%6N)
seizure_offset_time=$(TZ=America/New_York $date --date="$dt $s_offset" +%s%6N)

printf "old seizure_onset_time\n1439842617000000\n${seizure_onset_time}\n"
printf "old seizure_offset_time\n1439842917000000\n${seizure_offset_time}\n"

printf "\n== Finding MEF files for $patient_id  ==\n"
echo "TODO: mef_finder"
# Search the xml files in the segments for $patient_id to find the one containing the onset time.
# Use the containing folder as the source of our mef_data. Create a new mef_results data to hold the results of our processing,
# separating input from output.
segment_id=0077
mef_data=$PROJECT_HOME/data/mef/$patient_id

printf "\n== mef2eeg ==\n"
# Determine the interval of data to test in the segment.
recording_start_time=$(find $mef_data -name *.mef | head -n 1 | xargs $mef_lib/read_mef_header | grep start | cut -d ' ' -f 3)
echo "Recording start time: $recording_start_time"
start_in_micros=$(( seizure_onset_time - offset - recording_start_time ))
duration_in_micros=$(( seizure_offset_time + offset - seizure_onset_time ))
number_of_samples=$(( duration_in_micros / (1000000 / frequency) ))
start_in_millis=$(( start_in_micros / 1000 ))
echo "Extracting $number_of_samples samples from records, starting at $start_in_millis."

# start mark must be greater than 60, so simply use 61s.
# This is a constant since we're trimming the signals below in time_filter.
start_mark=61
# end_mark must be at least 60s less than the duration in the file.
# Give the end mark offset a wider margin to avoid 'Matrix dimensions must agree' errors.
end_mark_offset=$(( offset + 5000000 ))
end_mark=$(( (duration_in_micros - end_mark_offset) / 1000000 ))
echo "start_mark: $start_mark"
echo "end_mark: $end_mark"

printf "\n\t== mef2ascii ==\n"
rm -f $mef_data/*.txt
ls $mef_data/*.mef | xargs -n 1 -P 8 -t $mef_lib/mef2ascii

printf "\n\t== downsample ==\n"
ls $mef_data/*.txt | xargs -n 1 -P 8 -t $PROJECT_HOME/mef2eeg/downsample

printf "\n\t== time_filter ==\n"
ls $mef_data/*downsample.txt | xargs -I% -n 1 -P 8 -t $PROJECT_HOME/mef2eeg/time_filter % $start_in_millis $number_of_samples

printf "\n\t== channel_stats ==\n"
stats_file=$PROJECT_HOME/data/mef/$patient_id/channel_stats_all.csv
rm -f $stats_file
# Compute statistics for all channels
find $mef_data -name *.mef | xargs -n 1 -P 8 mef2eeg/channel_stats
# Write the channel stats header by grabbing the first line of the first channel stat file
find $mef_data -name *stats.csv | head -n 1 | xargs head -n 1 > $stats_file
# Merge all channel stats into one CSV for analysis
find $mef_data -name *stats.csv | xargs -n 1 tail -n 1 >> $stats_file
printf "\nstats_file saved to: $stats_file\n"

printf "\n\t== channel_filter  ==\n"
eeg_data=$PROJECT_HOME/output/eeg/$patient_id
mkdir -p $eeg_data

$PROJECT_HOME/channel_filter $patient_id $mef_data $eeg_data $segment_id

printf "\n\t== signals2eeg ==\n"
# The channel filter defines the signals of interest. Combine these into one file:
paste -d ',' $mef_data/channels/*.txt > $eeg_data/${patient_id}_${segment_id}_eeg.csv

printf "\n== eeg2fsv ==\n"
num_channels=`ls $mef_data/channels/*.txt | wc -l | xargs`
included_channels="[1:$num_channels]"    # PY12N008: "[1:4 7:89]". Convert the data to not require defining these gaps.
sizes="[$number_of_samples]" # PY12N008: "[640000, 672000, 737000, 729000]"
eeg2fsv_out=$eeg_data/adj_pwr

rm -rf $eeg2fsv_out
cd $PROJECT_HOME/eeg2fsv && \
    echo "Invoking matlab with eeg2fsv('$eeg_data', '$patient_id', $num_channels, $included_channels, $sizes)" && \
    $matlab_jvm "eeg2fsv('${eeg_data}/', '$patient_id', $num_channels, $included_channels, $sizes); exit"

# Copy results to output dir used in fsv2heatmap computation.
cp $eeg2fsv_out/svd_vectors/fsv_pwr$patient_id.mat $PROJECT_HOME/output/fsv

printf "\n== fsv2heatmap ==\n"
temporal_out=/tmp/eztrack-temporal
cd $PROJECT_HOME/fsv2heatmap && \
    $matlab_jvm "csv_file = temporal_ieeg_results('$PROJECT_HOME', '$patient_id', '${patient_id}_${segment_id}_labels.csv', $start_mark, $end_mark); display(csv_file); exit" > $temporal_out

# HACK: Have fsv2heatmap save this output path to a different file to avoid relying on position with tail.
results=`tail -n 3 $temporal_out | head -n 1`
echo $results > /tmp/eztrack.out

printf "\nEZTrack is finished processing $patient_id. Results saved to:\n$results\nThis path is also in /tmp/eztrack.out\n"