diff --git a/notebooks/search/08-learning-to-rank.ipynb b/notebooks/search/08-learning-to-rank.ipynb index 13868edb..2b68d14a 100644 --- a/notebooks/search/08-learning-to-rank.ipynb +++ b/notebooks/search/08-learning-to-rank.ipynb @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -44,9 +44,7 @@ }, "outputs": [], "source": [ - "# TODO: when eland 8.12.1 is released, we can avoid installing from github main:\n", - "!pip install -qU git+https://github.com/elastic/eland@main\n", - "!pip install -qU elasticsearch \"eland[scikit-learn]\" xgboost tqdm\n", + "!pip install -qU elasticsearch eland \"eland[scikit-learn]\" xgboost tqdm\n", "\n", "from tqdm import tqdm\n", "\n", @@ -67,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -130,7 +128,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 3, "metadata": { "id": "gFm7i-b7mOpJ" }, @@ -179,7 +177,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -256,7 +254,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -418,7 +416,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 6, "metadata": { "id": "LjxAj4lQqEYJ" }, @@ -481,7 +479,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -508,219 +506,39 @@ "name": "stderr", "output_type": "stream", "text": [ - " 0%| | 0/16279 [00:0022\u001b[0m query_judgements_group[feature_name] \u001b[39m=\u001b[39m numpy\u001b[39m.\u001b[39marray([doc_features[doc_id][feature_index] \u001b[39mfor\u001b[39;00m doc_id \u001b[39min\u001b[39;00m doc_ids])\n\u001b[1;32m 24\u001b[0m \u001b[39mreturn\u001b[39;00m query_judgements_group\n\u001b[0;32m---> 27\u001b[0m judgments_with_features \u001b[39m=\u001b[39m judgments_df\u001b[39m.\u001b[39;49mgroupby(\u001b[39m\"\u001b[39;49m\u001b[39mquery_id\u001b[39;49m\u001b[39m\"\u001b[39;49m, group_keys\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m)\u001b[39m.\u001b[39;49mprogress_apply(_extract_query_features)\n\u001b[1;32m 29\u001b[0m judgments_with_features\n", + "File \u001b[0;32m~/git/elasticsearch-labs/.venv/lib/python3.8/site-packages/tqdm/std.py:920\u001b[0m, in \u001b[0;36mtqdm.pandas..inner_generator..inner\u001b[0;34m(df, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 917\u001b[0m \u001b[39m# Apply the provided function (in **kwargs)\u001b[39;00m\n\u001b[1;32m 918\u001b[0m \u001b[39m# on the df using our wrapper (which provides bar updating)\u001b[39;00m\n\u001b[1;32m 919\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 920\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mgetattr\u001b[39;49m(df, df_function)(wrapper, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 921\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m 922\u001b[0m t\u001b[39m.\u001b[39mclose()\n", + "File \u001b[0;32m~/git/elasticsearch-labs/.venv/lib/python3.8/site-packages/pandas/core/groupby/groupby.py:1567\u001b[0m, in \u001b[0;36mGroupBy.apply\u001b[0;34m(self, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1559\u001b[0m new_msg \u001b[39m=\u001b[39m (\n\u001b[1;32m 1560\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mThe operation \u001b[39m\u001b[39m{\u001b[39;00morig_func\u001b[39m}\u001b[39;00m\u001b[39m failed on a column. If any error is \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 1561\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mraised, this will raise an exception in a future version \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 1562\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mof pandas. Drop these columns to avoid this warning.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 1563\u001b[0m )\n\u001b[1;32m 1564\u001b[0m \u001b[39mwith\u001b[39;00m rewrite_warning(\n\u001b[1;32m 1565\u001b[0m old_msg, \u001b[39mFutureWarning\u001b[39;00m, new_msg\n\u001b[1;32m 1566\u001b[0m ) \u001b[39mif\u001b[39;00m is_np_func \u001b[39melse\u001b[39;00m nullcontext():\n\u001b[0;32m-> 1567\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_python_apply_general(f, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_selected_obj)\n\u001b[1;32m 1568\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mTypeError\u001b[39;00m:\n\u001b[1;32m 1569\u001b[0m \u001b[39m# gh-20949\u001b[39;00m\n\u001b[1;32m 1570\u001b[0m \u001b[39m# try again, with .apply acting as a filtering\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1574\u001b[0m \u001b[39m# fails on *some* columns, e.g. a numeric operation\u001b[39;00m\n\u001b[1;32m 1575\u001b[0m \u001b[39m# on a string grouper column\u001b[39;00m\n\u001b[1;32m 1577\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_group_selection_context():\n\u001b[1;32m 1578\u001b[0m \u001b[39m# GH#50538\u001b[39;00m\n", + "File \u001b[0;32m~/git/elasticsearch-labs/.venv/lib/python3.8/site-packages/pandas/core/groupby/groupby.py:1629\u001b[0m, in \u001b[0;36mGroupBy._python_apply_general\u001b[0;34m(self, f, data, not_indexed_same, is_transform, is_agg)\u001b[0m\n\u001b[1;32m 1592\u001b[0m \u001b[39m@final\u001b[39m\n\u001b[1;32m 1593\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_python_apply_general\u001b[39m(\n\u001b[1;32m 1594\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1599\u001b[0m is_agg: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m,\n\u001b[1;32m 1600\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m NDFrameT:\n\u001b[1;32m 1601\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 1602\u001b[0m \u001b[39m Apply function f in python space\u001b[39;00m\n\u001b[1;32m 1603\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1627\u001b[0m \u001b[39m data after applying f\u001b[39;00m\n\u001b[1;32m 1628\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1629\u001b[0m values, mutated \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgrouper\u001b[39m.\u001b[39;49mapply(f, data, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49maxis)\n\u001b[1;32m 1630\u001b[0m \u001b[39mif\u001b[39;00m not_indexed_same \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 1631\u001b[0m not_indexed_same \u001b[39m=\u001b[39m mutated \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmutated\n", + "File \u001b[0;32m~/git/elasticsearch-labs/.venv/lib/python3.8/site-packages/pandas/core/groupby/ops.py:839\u001b[0m, in \u001b[0;36mBaseGrouper.apply\u001b[0;34m(self, f, data, axis)\u001b[0m\n\u001b[1;32m 837\u001b[0m \u001b[39m# group might be modified\u001b[39;00m\n\u001b[1;32m 838\u001b[0m group_axes \u001b[39m=\u001b[39m group\u001b[39m.\u001b[39maxes\n\u001b[0;32m--> 839\u001b[0m res \u001b[39m=\u001b[39m f(group)\n\u001b[1;32m 840\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m mutated \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m _is_indexed_like(res, group_axes, axis):\n\u001b[1;32m 841\u001b[0m mutated \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n", + "File \u001b[0;32m~/git/elasticsearch-labs/.venv/lib/python3.8/site-packages/tqdm/std.py:915\u001b[0m, in \u001b[0;36mtqdm.pandas..inner_generator..inner..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 909\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mwrapper\u001b[39m(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m 910\u001b[0m \u001b[39m# update tbar correctly\u001b[39;00m\n\u001b[1;32m 911\u001b[0m \u001b[39m# it seems `pandas apply` calls `func` twice\u001b[39;00m\n\u001b[1;32m 912\u001b[0m \u001b[39m# on the first column/row to decide whether it can\u001b[39;00m\n\u001b[1;32m 913\u001b[0m \u001b[39m# take a fast or slow code path; so stop when t.total==t.n\u001b[39;00m\n\u001b[1;32m 914\u001b[0m t\u001b[39m.\u001b[39mupdate(n\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m t\u001b[39m.\u001b[39mtotal \u001b[39mor\u001b[39;00m t\u001b[39m.\u001b[39mn \u001b[39m<\u001b[39m t\u001b[39m.\u001b[39mtotal \u001b[39melse\u001b[39;00m \u001b[39m0\u001b[39m)\n\u001b[0;32m--> 915\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", + "\u001b[1;32m/Users/afoucret/git/elasticsearch-labs/notebooks/search/08-learning-to-rank.ipynb Cellule 15\u001b[0m line \u001b[0;36m1\n\u001b[1;32m 15\u001b[0m query_params \u001b[39m=\u001b[39m {\u001b[39m\"\u001b[39m\u001b[39mquery\u001b[39m\u001b[39m\"\u001b[39m: query_judgements_group[\u001b[39m\"\u001b[39m\u001b[39mquery\u001b[39m\u001b[39m\"\u001b[39m]\u001b[39m.\u001b[39miloc[\u001b[39m0\u001b[39m]}\n\u001b[1;32m 17\u001b[0m \u001b[39m# Extract the features for the documents in the query group:\u001b[39;00m\n\u001b[0;32m---> 18\u001b[0m doc_features \u001b[39m=\u001b[39m feature_logger\u001b[39m.\u001b[39;49mextract_features(query_params, doc_ids)\n\u001b[1;32m 20\u001b[0m \u001b[39m# Adding a column to the dataframe for each feature:\u001b[39;00m\n\u001b[1;32m 21\u001b[0m \u001b[39mfor\u001b[39;00m feature_index, feature_name \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(ltr_config\u001b[39m.\u001b[39mfeature_names):\n", + "File \u001b[0;32m~/git/elasticsearch-labs/.venv/lib/python3.8/site-packages/eland/ml/ltr/feature_logger.py:102\u001b[0m, in \u001b[0;36mFeatureLogger.extract_features\u001b[0;34m(self, query_params, doc_ids)\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 61\u001b[0m \u001b[39mExtract document features.\u001b[39;00m\n\u001b[1;32m 62\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[39m>>> doc_features = feature_logger.extract_features(query_params={\"query\": \"yosemite\"}, doc_ids=[\"park-yosemite\", \"park-everglade\"])\u001b[39;00m\n\u001b[1;32m 95\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 97\u001b[0m doc_features \u001b[39m=\u001b[39m {\n\u001b[1;32m 98\u001b[0m doc_id: [\u001b[39mfloat\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mnan\u001b[39m\u001b[39m\"\u001b[39m)] \u001b[39m*\u001b[39m \u001b[39mlen\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_model_config\u001b[39m.\u001b[39mfeature_extractors)\n\u001b[1;32m 99\u001b[0m \u001b[39mfor\u001b[39;00m doc_id \u001b[39min\u001b[39;00m doc_ids\n\u001b[1;32m 100\u001b[0m }\n\u001b[0;32m--> 102\u001b[0m \u001b[39mfor\u001b[39;00m doc_id, query_features \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_extract_query_features(\n\u001b[1;32m 103\u001b[0m query_params, doc_ids\n\u001b[1;32m 104\u001b[0m )\u001b[39m.\u001b[39mitems():\n\u001b[1;32m 105\u001b[0m \u001b[39mfor\u001b[39;00m feature_name, feature_value \u001b[39min\u001b[39;00m query_features\u001b[39m.\u001b[39mitems():\n\u001b[1;32m 106\u001b[0m doc_features[doc_id][\n\u001b[1;32m 107\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_model_config\u001b[39m.\u001b[39mfeature_index(feature_name)\n\u001b[1;32m 108\u001b[0m ] \u001b[39m=\u001b[39m feature_value\n", + "File \u001b[0;32m~/git/elasticsearch-labs/.venv/lib/python3.8/site-packages/eland/ml/ltr/feature_logger.py:159\u001b[0m, in \u001b[0;36mFeatureLogger._extract_query_features\u001b[0;34m(self, query_params, doc_ids)\u001b[0m\n\u001b[1;32m 151\u001b[0m __headers \u001b[39m=\u001b[39m {\u001b[39m\"\u001b[39m\u001b[39maccept\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mapplication/json\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mcontent-type\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mapplication/json\u001b[39m\u001b[39m\"\u001b[39m}\n\u001b[1;32m 152\u001b[0m __body \u001b[39m=\u001b[39m {\n\u001b[1;32m 153\u001b[0m \u001b[39m\"\u001b[39m\u001b[39msource\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_script_source,\n\u001b[1;32m 154\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mparams\u001b[39m\u001b[39m\"\u001b[39m: {\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mquery_params, \u001b[39m\"\u001b[39m\u001b[39m__doc_ids\u001b[39m\u001b[39m\"\u001b[39m: doc_ids, \u001b[39m\"\u001b[39m\u001b[39m__size\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mlen\u001b[39m(doc_ids)},\n\u001b[1;32m 155\u001b[0m }\n\u001b[1;32m 157\u001b[0m \u001b[39mreturn\u001b[39;00m {\n\u001b[1;32m 158\u001b[0m hit[\u001b[39m\"\u001b[39m\u001b[39m_id\u001b[39m\u001b[39m\"\u001b[39m]: hit[\u001b[39m\"\u001b[39m\u001b[39mmatched_queries\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mmatched_queries\u001b[39m\u001b[39m\"\u001b[39m \u001b[39min\u001b[39;00m hit \u001b[39melse\u001b[39;00m {}\n\u001b[0;32m--> 159\u001b[0m \u001b[39mfor\u001b[39;00m hit \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_client\u001b[39m.\u001b[39;49mperform_request(\n\u001b[1;32m 160\u001b[0m \u001b[39m\"\u001b[39;49m\u001b[39mGET\u001b[39;49m\u001b[39m\"\u001b[39;49m, __path, params\u001b[39m=\u001b[39;49m__query, headers\u001b[39m=\u001b[39;49m__headers, body\u001b[39m=\u001b[39;49m__body\n\u001b[1;32m 161\u001b[0m )[\u001b[39m\"\u001b[39m\u001b[39mhits\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mhits\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[1;32m 162\u001b[0m }\n", + "File \u001b[0;32m~/git/elasticsearch-labs/.venv/lib/python3.8/site-packages/elasticsearch/_sync/client/_base.py:285\u001b[0m, in \u001b[0;36mBaseClient.perform_request\u001b[0;34m(self, method, path, params, headers, body)\u001b[0m\n\u001b[1;32m 282\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 283\u001b[0m target \u001b[39m=\u001b[39m path\n\u001b[0;32m--> 285\u001b[0m meta, resp_body \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtransport\u001b[39m.\u001b[39;49mperform_request(\n\u001b[1;32m 286\u001b[0m method,\n\u001b[1;32m 287\u001b[0m target,\n\u001b[1;32m 288\u001b[0m headers\u001b[39m=\u001b[39;49mrequest_headers,\n\u001b[1;32m 289\u001b[0m body\u001b[39m=\u001b[39;49mbody,\n\u001b[1;32m 290\u001b[0m request_timeout\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_request_timeout,\n\u001b[1;32m 291\u001b[0m max_retries\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_max_retries,\n\u001b[1;32m 292\u001b[0m retry_on_status\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_retry_on_status,\n\u001b[1;32m 293\u001b[0m retry_on_timeout\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_retry_on_timeout,\n\u001b[1;32m 294\u001b[0m client_meta\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_client_meta,\n\u001b[1;32m 295\u001b[0m )\n\u001b[1;32m 297\u001b[0m \u001b[39m# HEAD with a 404 is returned as a normal response\u001b[39;00m\n\u001b[1;32m 298\u001b[0m \u001b[39m# since this is used as an 'exists' functionality.\u001b[39;00m\n\u001b[1;32m 299\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (method \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mHEAD\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mand\u001b[39;00m meta\u001b[39m.\u001b[39mstatus \u001b[39m==\u001b[39m \u001b[39m404\u001b[39m) \u001b[39mand\u001b[39;00m (\n\u001b[1;32m 300\u001b[0m \u001b[39mnot\u001b[39;00m \u001b[39m200\u001b[39m \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m meta\u001b[39m.\u001b[39mstatus \u001b[39m<\u001b[39m \u001b[39m299\u001b[39m\n\u001b[1;32m 301\u001b[0m \u001b[39mand\u001b[39;00m (\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 305\u001b[0m )\n\u001b[1;32m 306\u001b[0m ):\n", + "File \u001b[0;32m~/git/elasticsearch-labs/.venv/lib/python3.8/site-packages/elastic_transport/_transport.py:328\u001b[0m, in \u001b[0;36mTransport.perform_request\u001b[0;34m(self, method, target, body, headers, max_retries, retry_on_status, retry_on_timeout, request_timeout, client_meta)\u001b[0m\n\u001b[1;32m 326\u001b[0m start_time \u001b[39m=\u001b[39m time\u001b[39m.\u001b[39mtime()\n\u001b[1;32m 327\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 328\u001b[0m meta, raw_data \u001b[39m=\u001b[39m node\u001b[39m.\u001b[39;49mperform_request(\n\u001b[1;32m 329\u001b[0m method,\n\u001b[1;32m 330\u001b[0m target,\n\u001b[1;32m 331\u001b[0m body\u001b[39m=\u001b[39;49mrequest_body,\n\u001b[1;32m 332\u001b[0m headers\u001b[39m=\u001b[39;49mrequest_headers,\n\u001b[1;32m 333\u001b[0m request_timeout\u001b[39m=\u001b[39;49mrequest_timeout,\n\u001b[1;32m 334\u001b[0m )\n\u001b[1;32m 335\u001b[0m _logger\u001b[39m.\u001b[39minfo(\n\u001b[1;32m 336\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m%s\u001b[39;00m\u001b[39m [status:\u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m duration:\u001b[39m\u001b[39m%.3f\u001b[39;00m\u001b[39ms]\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 337\u001b[0m \u001b[39m%\u001b[39m (\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 343\u001b[0m )\n\u001b[1;32m 344\u001b[0m )\n\u001b[1;32m 346\u001b[0m \u001b[39mif\u001b[39;00m method \u001b[39m!=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mHEAD\u001b[39m\u001b[39m\"\u001b[39m:\n", + "File \u001b[0;32m~/git/elasticsearch-labs/.venv/lib/python3.8/site-packages/elastic_transport/_node/_http_urllib3.py:167\u001b[0m, in \u001b[0;36mUrllib3HttpNode.perform_request\u001b[0;34m(self, method, target, body, headers, request_timeout)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 165\u001b[0m body_to_send \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m--> 167\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mpool\u001b[39m.\u001b[39;49murlopen(\n\u001b[1;32m 168\u001b[0m method,\n\u001b[1;32m 169\u001b[0m target,\n\u001b[1;32m 170\u001b[0m body\u001b[39m=\u001b[39;49mbody_to_send,\n\u001b[1;32m 171\u001b[0m retries\u001b[39m=\u001b[39;49mRetry(\u001b[39mFalse\u001b[39;49;00m),\n\u001b[1;32m 172\u001b[0m headers\u001b[39m=\u001b[39;49mrequest_headers,\n\u001b[1;32m 173\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkw, \u001b[39m# type: ignore[arg-type]\u001b[39;49;00m\n\u001b[1;32m 174\u001b[0m )\n\u001b[1;32m 175\u001b[0m response_headers \u001b[39m=\u001b[39m HttpHeaders(response\u001b[39m.\u001b[39mheaders)\n\u001b[1;32m 176\u001b[0m data \u001b[39m=\u001b[39m response\u001b[39m.\u001b[39mdata\n", + "File \u001b[0;32m~/git/elasticsearch-labs/.venv/lib/python3.8/site-packages/urllib3/connectionpool.py:793\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m 790\u001b[0m response_conn \u001b[39m=\u001b[39m conn \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m release_conn \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 792\u001b[0m \u001b[39m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[0;32m--> 793\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_make_request(\n\u001b[1;32m 794\u001b[0m conn,\n\u001b[1;32m 795\u001b[0m method,\n\u001b[1;32m 796\u001b[0m url,\n\u001b[1;32m 797\u001b[0m timeout\u001b[39m=\u001b[39;49mtimeout_obj,\n\u001b[1;32m 798\u001b[0m body\u001b[39m=\u001b[39;49mbody,\n\u001b[1;32m 799\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 800\u001b[0m chunked\u001b[39m=\u001b[39;49mchunked,\n\u001b[1;32m 801\u001b[0m retries\u001b[39m=\u001b[39;49mretries,\n\u001b[1;32m 802\u001b[0m response_conn\u001b[39m=\u001b[39;49mresponse_conn,\n\u001b[1;32m 803\u001b[0m preload_content\u001b[39m=\u001b[39;49mpreload_content,\n\u001b[1;32m 804\u001b[0m decode_content\u001b[39m=\u001b[39;49mdecode_content,\n\u001b[1;32m 805\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mresponse_kw,\n\u001b[1;32m 806\u001b[0m )\n\u001b[1;32m 808\u001b[0m \u001b[39m# Everything went great!\u001b[39;00m\n\u001b[1;32m 809\u001b[0m clean_exit \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n", + "File \u001b[0;32m~/git/elasticsearch-labs/.venv/lib/python3.8/site-packages/urllib3/connectionpool.py:537\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[1;32m 535\u001b[0m \u001b[39m# Receive the response from the server\u001b[39;00m\n\u001b[1;32m 536\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 537\u001b[0m response \u001b[39m=\u001b[39m conn\u001b[39m.\u001b[39;49mgetresponse()\n\u001b[1;32m 538\u001b[0m \u001b[39mexcept\u001b[39;00m (BaseSSLError, \u001b[39mOSError\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 539\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_raise_timeout(err\u001b[39m=\u001b[39me, url\u001b[39m=\u001b[39murl, timeout_value\u001b[39m=\u001b[39mread_timeout)\n", + "File \u001b[0;32m~/git/elasticsearch-labs/.venv/lib/python3.8/site-packages/urllib3/connection.py:466\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 463\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mresponse\u001b[39;00m \u001b[39mimport\u001b[39;00m HTTPResponse\n\u001b[1;32m 465\u001b[0m \u001b[39m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[0;32m--> 466\u001b[0m httplib_response \u001b[39m=\u001b[39m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49mgetresponse()\n\u001b[1;32m 468\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 469\u001b[0m assert_header_parsing(httplib_response\u001b[39m.\u001b[39mmsg)\n", + "File \u001b[0;32m~/.pyenv/versions/3.8.13/lib/python3.8/http/client.py:1348\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1346\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1347\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 1348\u001b[0m response\u001b[39m.\u001b[39;49mbegin()\n\u001b[1;32m 1349\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mConnectionError\u001b[39;00m:\n\u001b[1;32m 1350\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclose()\n", + "File \u001b[0;32m~/.pyenv/versions/3.8.13/lib/python3.8/http/client.py:316\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 314\u001b[0m \u001b[39m# read until we get a non-100 response\u001b[39;00m\n\u001b[1;32m 315\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[0;32m--> 316\u001b[0m version, status, reason \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_read_status()\n\u001b[1;32m 317\u001b[0m \u001b[39mif\u001b[39;00m status \u001b[39m!=\u001b[39m CONTINUE:\n\u001b[1;32m 318\u001b[0m \u001b[39mbreak\u001b[39;00m\n", + "File \u001b[0;32m~/.pyenv/versions/3.8.13/lib/python3.8/http/client.py:277\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 276\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_read_status\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[0;32m--> 277\u001b[0m line \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mfp\u001b[39m.\u001b[39;49mreadline(_MAXLINE \u001b[39m+\u001b[39;49m \u001b[39m1\u001b[39;49m), \u001b[39m\"\u001b[39m\u001b[39miso-8859-1\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 278\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(line) \u001b[39m>\u001b[39m _MAXLINE:\n\u001b[1;32m 279\u001b[0m \u001b[39mraise\u001b[39;00m LineTooLong(\u001b[39m\"\u001b[39m\u001b[39mstatus line\u001b[39m\u001b[39m\"\u001b[39m)\n", + "File \u001b[0;32m~/.pyenv/versions/3.8.13/lib/python3.8/socket.py:669\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 667\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m 668\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 669\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv_into(b)\n\u001b[1;32m 670\u001b[0m \u001b[39mexcept\u001b[39;00m timeout:\n\u001b[1;32m 671\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_timeout_occurred \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n", + "File \u001b[0;32m~/.pyenv/versions/3.8.13/lib/python3.8/ssl.py:1241\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1237\u001b[0m \u001b[39mif\u001b[39;00m flags \u001b[39m!=\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[1;32m 1238\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 1239\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m\n\u001b[1;32m 1240\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m)\n\u001b[0;32m-> 1241\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mread(nbytes, buffer)\n\u001b[1;32m 1242\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1243\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39mrecv_into(buffer, nbytes, flags)\n", + "File \u001b[0;32m~/.pyenv/versions/3.8.13/lib/python3.8/ssl.py:1099\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1097\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1098\u001b[0m \u001b[39mif\u001b[39;00m buffer \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m-> 1099\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sslobj\u001b[39m.\u001b[39;49mread(\u001b[39mlen\u001b[39;49m, buffer)\n\u001b[1;32m 1100\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1101\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sslobj\u001b[39m.\u001b[39mread(\u001b[39mlen\u001b[39m)\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
query_idquerydoc_idgradetitle_bm25actors_bm25title_all_terms_bm25actors_all_terms_bm25popularity
0qid:5141insidious 2 netflix8464330NaN9.555246NaNNaN13.628
1qid:5141insidious 2 netflix4901819.857118NaNNaNNaN64.003
2qid:5141insidious 2 netflix382340NaNNaNNaNNaN143.211
3qid:5141insidious 2 netflix5676040NaNNaNNaNNaN32.913
4qid:5141insidious 2 netflix26979503.813253NaNNaNNaN21.058
..............................
384750qid:33832013 the wolverine2631150NaNNaNNaNNaN68.287
384751qid:33832013 the wolverine259130NaNNaNNaNNaN21.026
384752qid:33832013 the wolverine5676040NaNNaNNaNNaN32.913
384753qid:33832013 the wolverine5335350NaNNaNNaNNaN34.773
384754qid:33832013 the wolverine8763270NaNNaNNaNNaN25.920
\n", - "

384755 rows × 9 columns

\n", - "
" - ], - "text/plain": [ - " query_id query doc_id grade title_bm25 actors_bm25 \\\n", - "0 qid:5141 insidious 2 netflix 846433 0 NaN 9.555246 \n", - "1 qid:5141 insidious 2 netflix 49018 1 9.857118 NaN \n", - "2 qid:5141 insidious 2 netflix 38234 0 NaN NaN \n", - "3 qid:5141 insidious 2 netflix 567604 0 NaN NaN \n", - "4 qid:5141 insidious 2 netflix 269795 0 3.813253 NaN \n", - "... ... ... ... ... ... ... \n", - "384750 qid:3383 2013 the wolverine 263115 0 NaN NaN \n", - "384751 qid:3383 2013 the wolverine 25913 0 NaN NaN \n", - "384752 qid:3383 2013 the wolverine 567604 0 NaN NaN \n", - "384753 qid:3383 2013 the wolverine 533535 0 NaN NaN \n", - "384754 qid:3383 2013 the wolverine 876327 0 NaN NaN \n", - "\n", - " title_all_terms_bm25 actors_all_terms_bm25 popularity \n", - "0 NaN NaN 13.628 \n", - "1 NaN NaN 64.003 \n", - "2 NaN NaN 143.211 \n", - "3 NaN NaN 32.913 \n", - "4 NaN NaN 21.058 \n", - "... ... ... ... \n", - "384750 NaN NaN 68.287 \n", - "384751 NaN NaN 21.026 \n", - "384752 NaN NaN 32.913 \n", - "384753 NaN NaN 34.773 \n", - "384754 NaN NaN 25.920 \n", - "\n", - "[384755 rows x 9 columns]" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -768,7 +586,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -956,7 +774,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -996,7 +814,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1062,7 +880,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1108,7 +926,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1159,7 +977,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1230,7 +1048,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.8.13" }, "widgets": { "application/vnd.jupyter.widget-state+json": {