From 229f604360a13b34ba86068c8f162c310d413155 Mon Sep 17 00:00:00 2001 From: Aurelien FOUCRET Date: Thu, 1 Feb 2024 14:41:21 +0100 Subject: [PATCH] Changing link to point to the main repo. --- notebooks/search/08-learning-to-rank.ipynb | 67 ++-------------------- 1 file changed, 6 insertions(+), 61 deletions(-) diff --git a/notebooks/search/08-learning-to-rank.ipynb b/notebooks/search/08-learning-to-rank.ipynb index ccf94470..9b2e95f0 100644 --- a/notebooks/search/08-learning-to-rank.ipynb +++ b/notebooks/search/08-learning-to-rank.ipynb @@ -10,7 +10,7 @@ "\n", "TODO: udpate the link to elastic/elasticsearch-labs instead of my fork before merging.\n", "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/afoucret/elasticsearch-labs/blob/ltr-notebook/notebooks/search/08-learning-to-rank.ipynb)\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/elastic/elasticsearch-labs/blob/ltr-notebook/notebooks/search/08-learning-to-rank.ipynb)\n", "\n", "In this notebook we will see an example on how to train a Learning To Rank model using [XGBoost](https://xgboost.ai/) and how to deploy it to be used as a rescorer in Elasticsearch.\n", "\n", @@ -136,9 +136,7 @@ "source": [ "from urllib.parse import urljoin\n", "\n", - "# TODO: use elastic/elasticsearch-labs instead of afoucret/elasticsearch-labs before merging the PR.\n", - "\n", - "DATASET_BASE_URL = \"https://raw.githubusercontent.com/afoucret/elasticsearch-labs/ltr-notebook/notebooks/search/sample_data/learning-to-rank/\"\n", + "DATASET_BASE_URL = \"https://raw.githubusercontent.com/elastic/elasticsearch-labs/ltr-notebook/notebooks/search/sample_data/learning-to-rank/\"\n", "\n", "CORPUS_URL = urljoin(DATASET_BASE_URL, \"movies-corpus.jsonl.gz\")\n", "JUDGEMENTS_FILE_URL = urljoin(DATASET_BASE_URL, \"movies-judgments.tsv.gz\")\n", @@ -177,7 +175,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -192,7 +190,7 @@ "text": [ "Deleting index if it already exists: movies\n", "Creating index: movies\n", - "Loading the corpus from https://raw.githubusercontent.com/afoucret/elasticsearch-labs/ltr-notebook/notebooks/search/sample_data/learning-to-rank/movies-corpus.jsonl.gz\n", + "Loading the corpus from https://raw.githubusercontent.com/elastic/elasticsearch-labs/ltr-notebook/notebooks/search/sample_data/learning-to-rank/movies-corpus.jsonl.gz\n", "Indexing the corpus into movies ...\n", "Indexed 9750 documents into movies\n" ] @@ -1037,12 +1035,12 @@ "Once the model is uploaded to Elasticsearch, you will be able to use it as a rescorer in the _search API, as shown in this example:\n", "\n", "```\n", - "POST /_search\n", + "GET /movies/_search\n", "{\n", " \"query\" : {\n", " \"multi_match\" : {\n", " \"query\": \"star wars\",\n", - " \"field\": [\"title\", \"overview\", \"actors\", \"director\", \"tags\", \"characters\"]\n", + " \"fields\": [\"title\", \"overview\", \"actors\", \"director\", \"tags\", \"characters\"]\n", " }\n", " },\n", " \"rescore\" : {\n", @@ -1154,59 +1152,6 @@ "source": [ "We saw above that the title and popularity fields are important ranking feature in our model. Here we can see that now all results contain the query terms in the title. Moreover, more popular movies rank higher, for example `Star Wars: Episode I - The Phantom Menace` is now in third position." ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
XGBRanker(base_score=None, booster=None, callbacks=None, colsample_bylevel=None,\n",
-       "          colsample_bynode=None, colsample_bytree=None, device=None,\n",
-       "          early_stopping_rounds=20, enable_categorical=False,\n",
-       "          eval_metric=['ndcg@10'], feature_types=None, gamma=None,\n",
-       "          grow_policy=None, importance_type=None, interaction_constraints=None,\n",
-       "          learning_rate=None, max_bin=None, max_cat_threshold=None,\n",
-       "          max_cat_to_onehot=None, max_delta_step=None, max_depth=None,\n",
-       "          max_leaves=None, min_child_weight=None, missing=nan,\n",
-       "          monotone_constraints=None, multi_strategy=None, n_estimators=None,\n",
-       "          n_jobs=None, num_parallel_tree=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - ], - "text/plain": [ - "XGBRanker(base_score=None, booster=None, callbacks=None, colsample_bylevel=None,\n", - " colsample_bynode=None, colsample_bytree=None, device=None,\n", - " early_stopping_rounds=20, enable_categorical=False,\n", - " eval_metric=['ndcg@10'], feature_types=None, gamma=None,\n", - " grow_policy=None, importance_type=None, interaction_constraints=None,\n", - " learning_rate=None, max_bin=None, max_cat_threshold=None,\n", - " max_cat_to_onehot=None, max_delta_step=None, max_depth=None,\n", - " max_leaves=None, min_child_weight=None, missing=nan,\n", - " monotone_constraints=None, multi_strategy=None, n_estimators=None,\n", - " n_jobs=None, num_parallel_tree=None, random_state=None, ...)" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from sklearn.metrics import get_scorer_names\n", - "get_scorer_names()\n", - "\n", - "ranker" - ] } ], "metadata": {