Review suggestions for PR #247

yellowcap · yellowcap · commit 08735391f851 · 2024-07-01T11:38:40.000+01:00
diff --git a/nbs/v1-inference-simsearch-naip-stacchip.ipynb b/nbs/v1-inference-simsearch-naip-stacchip.ipynb
@@ -36,8 +36,17 @@
    "source": [
     "import sys\n",
     "\n",
-    "sys.path.append(\"../model\")\n",
-    "sys.path.insert(0, \"../stacchip\")"
+    "sys.path.append(\"..\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eabd5bef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install stacchip==0.1.31"
    ]
   },
   {
@@ -59,13 +68,12 @@
     "import numpy as np\n",
     "import pandas as pd\n",
     "import pystac_client\n",
-    "import requests\n",
+    "import rasterio\n",
     "import shapely\n",
     "import torch\n",
     "import yaml\n",
     "from box import Box\n",
     "from pyproj import Transformer\n",
-    "from rasterio.io import MemoryFile\n",
     "from stacchip.chipper import Chipper\n",
     "from stacchip.indexer import NoStatsChipIndexer\n",
     "from stacchip.processors.prechip import normalize_timestamp\n",
@@ -145,20 +153,19 @@
     "    Returns:\n",
     "    tuple: Bounds coordinates and centroid coordinates.\n",
     "    \"\"\"\n",
-    "    response = requests.get(url)\n",
-    "    response.raise_for_status()\n",
+    "    with rasterio.open(url) as rst:\n",
+    "        bounds = rst.bounds\n",
+    "        transformer = Transformer.from_crs(rst.crs, 4326)\n",
+    "\n",
+    "        centroid_x = (bounds.left + bounds.right) / 2\n",
+    "        centroid_y = (bounds.top + bounds.bottom) / 2\n",
+    "\n",
+    "        centroid_x, centroid_y = transformer.transform(centroid_x, centroid_y)\n",
     "\n",
-    "    with MemoryFile(response.content) as memfile:\n",
-    "        with memfile.open() as src:\n",
-    "            bounds = src.bounds\n",
-    "            transformer = Transformer.from_crs(src.crs, 4326)\n",
-    "            # Calculate centroid\n",
-    "            centroid_x = (bounds.left + bounds.right) / 2\n",
-    "            centroid_y = (bounds.top + bounds.bottom) / 2\n",
-    "            centroid_x, centroid_y = transformer.transform(centroid_x, centroid_y)\n",
-    "            bounds_b, bounds_l = transformer.transform(bounds.left, bounds.bottom)\n",
-    "            bounds_t, bounds_r = transformer.transform(bounds.right, bounds.top)\n",
-    "            return [bounds_b, bounds_l, bounds_t, bounds_r], centroid_x, centroid_y"
+    "        bounds_b, bounds_l = transformer.transform(bounds.left, bounds.bottom)\n",
+    "        bounds_t, bounds_r = transformer.transform(bounds.right, bounds.top)\n",
+    "\n",
+    "        return [bounds_b, bounds_l, bounds_t, bounds_r], [centroid_x, centroid_y]"
    ]
   },
   {
@@ -192,11 +199,13 @@
     "    print(f\"Bounds coordinates: {bounds}, centroid coordinates: {centroid}\")\n",
     "\n",
     "    # Instantiate the chipper\n",
-    "    chipper = Chipper(indexer, asset_blacklist=[\"metadata\"])\n",
+    "    chipper = Chipper(\n",
+    "        indexer, asset_blacklist=[\"thumbnail\", \"tilejson\", \"rendered_preview\"]\n",
+    "    )\n",
     "\n",
     "    # Get 5 randomly sampled chips from the total\n",
     "    # number of chips within this item's entire image\n",
-    "    for chip_id in random.sample(range(0, len(chipper)), 5):\n",
+    "    for chip_id in random.sample(range(0, len(chipper)), 25):\n",
     "        chips.append(chipper[chip_id])\n",
     "        chip_images.append(chipper[chip_id][\"image\"])\n",
     "        chip_bounds.append(bounds)\n",
@@ -205,7 +214,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e1c80c88-b91a-474d-8f66-d830982e4e82",
+   "id": "b61aaad7",
    "metadata": {},
    "source": [
     "Visualize a generated image chip."
@@ -408,7 +417,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "outdir_embeddings = \"./embeddings/\"\n",
+    "outdir_embeddings = \"../data/embeddings/\"\n",
     "os.makedirs(outdir_embeddings, exist_ok=True)"
    ]
   },
@@ -427,11 +436,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Load the pretrained model\n",
+    "# Download the pretrained model from\n",
+    "# https://huggingface.co/made-with-clay/Clay/blob/main/clay-v1-base.ckpt\n",
+    "# and put it in a checkpoints folder.\n",
     "model = load_model(\n",
-    "    # ckpt=\"s3://clay-model-ckpt/v0.5.7/mae_v0.5.7_epoch-13_val-loss-0.3098.ckpt\",\n",
-    "    ckpt=\"../checkpoints/v0.5.7/mae_v0.5.7_epoch-13_val-loss-0.3098.ckpt\",\n",
-    "    device=\"cuda\",\n",
+    "    ckpt=\"../checkpoints/clay-v1-base.ckpt\",\n",
+    "    device=torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\"),\n",
     ")"
    ]
   },
@@ -495,17 +505,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "embeddings[0].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1bf9e1fc-b432-4ec3-a028-f85d2ff57469",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(embeddings)"
+    "print(f\"Created {len(embeddings)} embeddings of shape {embeddings[0].shape[1]}\")"
    ]
   },
   {
@@ -525,7 +525,7 @@
    "outputs": [],
    "source": [
     "# Connect to the embeddings database\n",
-    "db = lancedb.connect(\"embeddings\")"
+    "db = lancedb.connect(outdir_embeddings)"
    ]
   },
   {
@@ -583,7 +583,10 @@
    "outputs": [],
    "source": [
     "# Drop existing table if any\n",
-    "db.drop_table(\"clay-v001\")\n",
+    "try:\n",
+    "    db.drop_table(\"clay-v001\")\n",
+    "except FileNotFoundError:\n",
+    "    pass\n",
     "db.table_names()"
    ]
   },
@@ -618,7 +621,8 @@
    "outputs": [],
    "source": [
     "# Perform the search\n",
-    "result = tbl.search(query=v).limit(4).to_pandas()"
+    "search_x_images = 6\n",
+    "result = tbl.search(query=v).limit(search_x_images).to_pandas()"
    ]
   },
   {
@@ -638,7 +642,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def plot(df, cols=4):\n",
+    "def plot(df, cols=4, save=False):\n",
     "    \"\"\"\n",
     "    Plot the top similar images.\n",
     "\n",
@@ -656,7 +660,8 @@
     "        ax.set_title(f\"{row['idx']}\")\n",
     "        i += 1\n",
     "    plt.tight_layout()\n",
-    "    fig.savefig(\"similar.png\")"
+    "    if save:\n",
+    "        fig.savefig(\"similar.png\")"
    ]
   },
   {
@@ -667,7 +672,7 @@
    "outputs": [],
    "source": [
     "# Plot the top similar images\n",
-    "plot(result)"
+    "plot(result, search_x_images)"
    ]
   },
   {
@@ -695,7 +700,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.0"
+   "version": "3.11.8"
   }
  },
  "nbformat": 4,