lina-usc · scott-huberty · Jan 18, 2025 · Jan 10, 2025 · Jan 10, 2025 · Jan 10, 2025
diff --git a/eoglearn/datasets/eegeyenet.py b/eoglearn/datasets/eegeyenet.py
@@ -1,4 +1,5 @@
 from pathlib import Path
+from typing import Literal
 
 import pandas as pd
 
@@ -14,23 +15,31 @@ def _get_params(subject, run):
     row = df.loc[(df.subject == subject.upper()) & (df.run == int(run))]
     assert len(row) == 1
     row = row.T.squeeze()
+    task = row["task"]
     return dict(
                 url=row["url"],
-                archive_name=f"{subject}_DOTS{run}_EEG.mat",
-                folder_name=f"EEGEYENET-Data/dots/{subject}",
+                archive_name=f"{subject}_{task}{run}_EEG.mat",
+                folder_name=f"EEGEYENET-Data/{task}/{subject}",
                 hash=row["hash"],
                 dataset_name="EEGEYENET")
 
 
-def get_subjects_runs():
+def get_subjects_runs(task: Literal["DOTS", "AS"] = "DOTS"):
     """Get dictionary of {subject: [lists of runs]}.
 
+    Parameters
+    ----------
+    task :
+        Which EEGEYENET task task to extract the subject ID's and runs for. Can be
+        ``"DOTS"``, or ``"AS"`` (antisaccade). Defaults to ``'DOTS'``.
+
     Returns
     -------
     dict
         Dictionary of subjects with the runs as values.
     """
     df = _get_urls_df()
+    df = df.loc[df["task"] == task].copy()
     return {subject: df.run.values[df.subject == subject]
             for subject in df.subject.unique()}
 
@@ -54,13 +63,14 @@ def fetch_eegeyenet(subject="EP10", run=1, fetch_dataset_kwargs=None):
     pathlib.Path
         Path to the downloaded file.
     """
+    task = _get_task_from_subject_id(subject)
     if not fetch_dataset_kwargs:
         fetch_dataset_kwargs = dict()
     run = int(run)
-    runs = get_subjects_runs()
+    runs = get_subjects_runs(task=task)
     if subject not in runs or run not in runs[subject]:
-        raise ValueError("subject and run not available. See "
-                         "get_subjects_runs() for information on "
+        raise ValueError(f"subject {subject} and run {run} not available. "
+                         "See get_subjects_runs() for information on "
                          "available subjects and runs.")
 
     fetch_dataset_kwargs["dataset_params"] = _get_params(subject, run)
@@ -72,5 +82,14 @@ def fetch_eegeyenet(subject="EP10", run=1, fetch_dataset_kwargs=None):
     if not fpath.exists():
         fetch_dataset_kwargs["force_update"] = True
         _fetch_dataset(fetch_dataset_kwargs=fetch_dataset_kwargs)
-
     return fpath
+
+
+def _get_task_from_subject_id(subject):
+    if  subject.startswith("EP"):
+        return "DOTS"
+    if subject.startswith(("A", "B")):
+        return "AS"
+    raise ValueError(
+        f"Can't determine task for {subject}. Is this subject in eegeyenet_urls.csv?"
+        )