|
51 | 51 | "execution_count": null, |
52 | 52 | "metadata": {}, |
53 | 53 | "outputs": [], |
54 | | - "source": [ |
55 | | - "# Install ALMA from GitHub (latest main branch)\n", |
56 | | - "!pip install -q git+https://github.com/RBKunnela/ALMA-memory.git\n", |
57 | | - "\n", |
58 | | - "# Install GPU-accelerated FAISS if available, fall back to CPU\n", |
59 | | - "import subprocess\n", |
60 | | - "import sys\n", |
61 | | - "\n", |
62 | | - "try:\n", |
63 | | - " subprocess.check_call(\n", |
64 | | - " [sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"faiss-gpu\"],\n", |
65 | | - " stdout=subprocess.DEVNULL,\n", |
66 | | - " stderr=subprocess.DEVNULL,\n", |
67 | | - " )\n", |
68 | | - " print(\"Installed: faiss-gpu\")\n", |
69 | | - "except subprocess.CalledProcessError:\n", |
70 | | - " subprocess.check_call(\n", |
71 | | - " [sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"faiss-cpu\"],\n", |
72 | | - " stdout=subprocess.DEVNULL,\n", |
73 | | - " stderr=subprocess.DEVNULL,\n", |
74 | | - " )\n", |
75 | | - " print(\"Installed: faiss-cpu (GPU not available)\")\n", |
76 | | - "\n", |
77 | | - "# Install visualization and embedding dependencies\n", |
78 | | - "!pip install -q sentence-transformers matplotlib seaborn\n", |
79 | | - "\n", |
80 | | - "# Verify GPU\n", |
81 | | - "import torch\n", |
82 | | - "print(f\"\\nPyTorch version: {torch.__version__}\")\n", |
83 | | - "print(f\"CUDA available: {torch.cuda.is_available()}\")\n", |
84 | | - "if torch.cuda.is_available():\n", |
85 | | - " print(f\"GPU device: {torch.cuda.get_device_name(0)}\")\n", |
86 | | - " print(f\"GPU memory: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB\")\n", |
87 | | - "else:\n", |
88 | | - " print(\"Running on CPU -- embeddings will be slower but benchmarks still work.\")\n", |
89 | | - "\n", |
90 | | - "# Verify ALMA installation\n", |
91 | | - "import alma\n", |
92 | | - "print(f\"\\nALMA version: {alma.__version__}\")" |
93 | | - ] |
| 54 | + "source": "# Clone the ALMA repo (benchmarks are not part of the pip package)\nimport os\n\nif not os.path.exists(\"/content/ALMA-memory\"):\n !git clone https://github.com/RBKunnela/ALMA-memory.git /content/ALMA-memory\n print(\"Cloned ALMA repo.\")\nelse:\n !cd /content/ALMA-memory && git pull\n print(\"Updated ALMA repo.\")\n\n# Install ALMA and dependencies from the cloned repo\nos.chdir(\"/content/ALMA-memory\")\n!pip install -q -e .\n\n# Install GPU-accelerated FAISS if available, fall back to CPU\nimport subprocess\nimport sys\n\ntry:\n subprocess.check_call(\n [sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"faiss-gpu\"],\n stdout=subprocess.DEVNULL,\n stderr=subprocess.DEVNULL,\n )\n print(\"Installed: faiss-gpu\")\nexcept subprocess.CalledProcessError:\n subprocess.check_call(\n [sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"faiss-cpu\"],\n stdout=subprocess.DEVNULL,\n stderr=subprocess.DEVNULL,\n )\n print(\"Installed: faiss-cpu (GPU not available)\")\n\n# Install visualization and embedding dependencies\n!pip install -q sentence-transformers matplotlib seaborn\n\n# Verify GPU\nimport torch\nprint(f\"\\nPyTorch version: {torch.__version__}\")\nprint(f\"CUDA available: {torch.cuda.is_available()}\")\nif torch.cuda.is_available():\n print(f\"GPU device: {torch.cuda.get_device_name(0)}\")\n print(f\"GPU memory: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB\")\nelse:\n print(\"Running on CPU -- embeddings will be slower but benchmarks still work.\")\n\n# Verify ALMA installation\nimport alma\nprint(f\"\\nALMA version: {alma.__version__}\")" |
94 | 55 | }, |
95 | 56 | { |
96 | 57 | "cell_type": "markdown", |
|
110 | 71 | "execution_count": null, |
111 | 72 | "metadata": {}, |
112 | 73 | "outputs": [], |
113 | | - "source": [ |
114 | | - "import os\n", |
115 | | - "import json\n", |
116 | | - "\n", |
117 | | - "DATA_DIR = \"/tmp/alma-benchmark-data\"\n", |
118 | | - "DATA_FILE = os.path.join(DATA_DIR, \"longmemeval_s_cleaned.json\")\n", |
119 | | - "RESULTS_DIR = \"/tmp/alma-benchmark-results\"\n", |
120 | | - "\n", |
121 | | - "os.makedirs(DATA_DIR, exist_ok=True)\n", |
122 | | - "os.makedirs(RESULTS_DIR, exist_ok=True)\n", |
123 | | - "\n", |
124 | | - "if not os.path.exists(DATA_FILE):\n", |
125 | | - " print(\"Downloading LongMemEval dataset from HuggingFace...\")\n", |
126 | | - " !curl -fsSL -o {DATA_FILE} \\\n", |
127 | | - " https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned/resolve/main/longmemeval_s_cleaned.json\n", |
128 | | - " print(\"Download complete.\")\n", |
129 | | - "else:\n", |
130 | | - " print(\"Dataset already downloaded.\")\n", |
131 | | - "\n", |
132 | | - "# Quick sanity check\n", |
133 | | - "with open(DATA_FILE) as f:\n", |
134 | | - " data = json.load(f)\n", |
135 | | - "print(f\"Questions loaded: {len(data)}\")\n", |
136 | | - "print(f\"File size: {os.path.getsize(DATA_FILE) / 1e6:.1f} MB\")\n", |
137 | | - "print(f\"Sample question: {data[0].get('question', data[0].get('query', 'N/A'))[:100]}...\")" |
138 | | - ] |
| 74 | + "source": "import os\nimport json\n\n# Ensure we're in the repo directory\nos.chdir(\"/content/ALMA-memory\")\n\nDATA_DIR = \"/tmp/alma-benchmark-data\"\nDATA_FILE = os.path.join(DATA_DIR, \"longmemeval_s_cleaned.json\")\nRESULTS_DIR = \"/tmp/alma-benchmark-results\"\n\nos.makedirs(DATA_DIR, exist_ok=True)\nos.makedirs(RESULTS_DIR, exist_ok=True)\n\nif not os.path.exists(DATA_FILE):\n print(\"Downloading LongMemEval dataset from HuggingFace...\")\n !curl -fsSL -o {DATA_FILE} \\\n https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned/resolve/main/longmemeval_s_cleaned.json\n print(\"Download complete.\")\nelse:\n print(\"Dataset already downloaded.\")\n\n# Quick sanity check\nwith open(DATA_FILE) as f:\n data = json.load(f)\nprint(f\"Questions loaded: {len(data)}\")\nprint(f\"File size: {os.path.getsize(DATA_FILE) / 1e6:.1f} MB\")\nprint(f\"Sample question: {data[0].get('question', data[0].get('query', 'N/A'))[:100]}...\")" |
139 | 75 | }, |
140 | 76 | { |
141 | 77 | "cell_type": "markdown", |
|
0 commit comments