Skip to content

Commit df8a987

Browse files
committed
fix: Colab notebook clones repo instead of pip install — benchmarks need repo files
1 parent bc95a6e commit df8a987

1 file changed

Lines changed: 2 additions & 66 deletions

File tree

benchmarks/colab_benchmark.ipynb

Lines changed: 2 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -51,46 +51,7 @@
5151
"execution_count": null,
5252
"metadata": {},
5353
"outputs": [],
54-
"source": [
55-
"# Install ALMA from GitHub (latest main branch)\n",
56-
"!pip install -q git+https://github.com/RBKunnela/ALMA-memory.git\n",
57-
"\n",
58-
"# Install GPU-accelerated FAISS if available, fall back to CPU\n",
59-
"import subprocess\n",
60-
"import sys\n",
61-
"\n",
62-
"try:\n",
63-
" subprocess.check_call(\n",
64-
" [sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"faiss-gpu\"],\n",
65-
" stdout=subprocess.DEVNULL,\n",
66-
" stderr=subprocess.DEVNULL,\n",
67-
" )\n",
68-
" print(\"Installed: faiss-gpu\")\n",
69-
"except subprocess.CalledProcessError:\n",
70-
" subprocess.check_call(\n",
71-
" [sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"faiss-cpu\"],\n",
72-
" stdout=subprocess.DEVNULL,\n",
73-
" stderr=subprocess.DEVNULL,\n",
74-
" )\n",
75-
" print(\"Installed: faiss-cpu (GPU not available)\")\n",
76-
"\n",
77-
"# Install visualization and embedding dependencies\n",
78-
"!pip install -q sentence-transformers matplotlib seaborn\n",
79-
"\n",
80-
"# Verify GPU\n",
81-
"import torch\n",
82-
"print(f\"\\nPyTorch version: {torch.__version__}\")\n",
83-
"print(f\"CUDA available: {torch.cuda.is_available()}\")\n",
84-
"if torch.cuda.is_available():\n",
85-
" print(f\"GPU device: {torch.cuda.get_device_name(0)}\")\n",
86-
" print(f\"GPU memory: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB\")\n",
87-
"else:\n",
88-
" print(\"Running on CPU -- embeddings will be slower but benchmarks still work.\")\n",
89-
"\n",
90-
"# Verify ALMA installation\n",
91-
"import alma\n",
92-
"print(f\"\\nALMA version: {alma.__version__}\")"
93-
]
54+
"source": "# Clone the ALMA repo (benchmarks are not part of the pip package)\nimport os\n\nif not os.path.exists(\"/content/ALMA-memory\"):\n !git clone https://github.com/RBKunnela/ALMA-memory.git /content/ALMA-memory\n print(\"Cloned ALMA repo.\")\nelse:\n !cd /content/ALMA-memory && git pull\n print(\"Updated ALMA repo.\")\n\n# Install ALMA and dependencies from the cloned repo\nos.chdir(\"/content/ALMA-memory\")\n!pip install -q -e .\n\n# Install GPU-accelerated FAISS if available, fall back to CPU\nimport subprocess\nimport sys\n\ntry:\n subprocess.check_call(\n [sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"faiss-gpu\"],\n stdout=subprocess.DEVNULL,\n stderr=subprocess.DEVNULL,\n )\n print(\"Installed: faiss-gpu\")\nexcept subprocess.CalledProcessError:\n subprocess.check_call(\n [sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"faiss-cpu\"],\n stdout=subprocess.DEVNULL,\n stderr=subprocess.DEVNULL,\n )\n print(\"Installed: faiss-cpu (GPU not available)\")\n\n# Install visualization and embedding dependencies\n!pip install -q sentence-transformers matplotlib seaborn\n\n# Verify GPU\nimport torch\nprint(f\"\\nPyTorch version: {torch.__version__}\")\nprint(f\"CUDA available: {torch.cuda.is_available()}\")\nif torch.cuda.is_available():\n print(f\"GPU device: {torch.cuda.get_device_name(0)}\")\n print(f\"GPU memory: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB\")\nelse:\n print(\"Running on CPU -- embeddings will be slower but benchmarks still work.\")\n\n# Verify ALMA installation\nimport alma\nprint(f\"\\nALMA version: {alma.__version__}\")"
9455
},
9556
{
9657
"cell_type": "markdown",
@@ -110,32 +71,7 @@
11071
"execution_count": null,
11172
"metadata": {},
11273
"outputs": [],
113-
"source": [
114-
"import os\n",
115-
"import json\n",
116-
"\n",
117-
"DATA_DIR = \"/tmp/alma-benchmark-data\"\n",
118-
"DATA_FILE = os.path.join(DATA_DIR, \"longmemeval_s_cleaned.json\")\n",
119-
"RESULTS_DIR = \"/tmp/alma-benchmark-results\"\n",
120-
"\n",
121-
"os.makedirs(DATA_DIR, exist_ok=True)\n",
122-
"os.makedirs(RESULTS_DIR, exist_ok=True)\n",
123-
"\n",
124-
"if not os.path.exists(DATA_FILE):\n",
125-
" print(\"Downloading LongMemEval dataset from HuggingFace...\")\n",
126-
" !curl -fsSL -o {DATA_FILE} \\\n",
127-
" https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned/resolve/main/longmemeval_s_cleaned.json\n",
128-
" print(\"Download complete.\")\n",
129-
"else:\n",
130-
" print(\"Dataset already downloaded.\")\n",
131-
"\n",
132-
"# Quick sanity check\n",
133-
"with open(DATA_FILE) as f:\n",
134-
" data = json.load(f)\n",
135-
"print(f\"Questions loaded: {len(data)}\")\n",
136-
"print(f\"File size: {os.path.getsize(DATA_FILE) / 1e6:.1f} MB\")\n",
137-
"print(f\"Sample question: {data[0].get('question', data[0].get('query', 'N/A'))[:100]}...\")"
138-
]
74+
"source": "import os\nimport json\n\n# Ensure we're in the repo directory\nos.chdir(\"/content/ALMA-memory\")\n\nDATA_DIR = \"/tmp/alma-benchmark-data\"\nDATA_FILE = os.path.join(DATA_DIR, \"longmemeval_s_cleaned.json\")\nRESULTS_DIR = \"/tmp/alma-benchmark-results\"\n\nos.makedirs(DATA_DIR, exist_ok=True)\nos.makedirs(RESULTS_DIR, exist_ok=True)\n\nif not os.path.exists(DATA_FILE):\n print(\"Downloading LongMemEval dataset from HuggingFace...\")\n !curl -fsSL -o {DATA_FILE} \\\n https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned/resolve/main/longmemeval_s_cleaned.json\n print(\"Download complete.\")\nelse:\n print(\"Dataset already downloaded.\")\n\n# Quick sanity check\nwith open(DATA_FILE) as f:\n data = json.load(f)\nprint(f\"Questions loaded: {len(data)}\")\nprint(f\"File size: {os.path.getsize(DATA_FILE) / 1e6:.1f} MB\")\nprint(f\"Sample question: {data[0].get('question', data[0].get('query', 'N/A'))[:100]}...\")"
13975
},
14076
{
14177
"cell_type": "markdown",

0 commit comments

Comments
 (0)