Skip to content

Commit

Permalink
Review suggestions + typos fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
afoucret committed Jan 30, 2024
1 parent 43dcb4e commit 8e84eb9
Showing 1 changed file with 11 additions and 4 deletions.
15 changes: 11 additions & 4 deletions notebooks/search/08-learning-to-rank.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -713,18 +713,18 @@
"feature_logger = FeatureLogger(es_client, MOVIE_INDEX, ltr_config)\n",
"\n",
"\n",
"# This method will be applied for each group of query in the judgment log:\n",
"# This method will be applied for each query group in the judgment log:\n",
"def _extract_query_features(query_judgements_group):\n",
" # Retrieve document ids in the query group as strings.\n",
" doc_ids = query_judgements_group[\"doc_id\"].astype(\"str\").to_list()\n",
"\n",
" # Resolve query paras for the current query group (e.g.: {\"query\": \"batman\"}).\n",
" # Resolve query params for the current query group (e.g.: {\"query\": \"batman\"}).\n",
" query_params = {\"query\": query_judgements_group[\"query\"].iloc[0]}\n",
"\n",
" # Extract the features for the documents in the query group:\n",
" doc_features = feature_logger.extract_features(query_params, doc_ids)\n",
"\n",
" # Adding a column to the dataframe for each features:\n",
" # Adding a column to the dataframe for each feature:\n",
" for feature_index, feature_name in enumerate(ltr_config.feature_names):\n",
" query_judgements_group[feature_name] = numpy.array([doc_features[doc_id][feature_index] for doc_id in doc_ids])\n",
"\n",
Expand Down Expand Up @@ -919,8 +919,8 @@
"# Split the dataset in two parts respectively used for training and evaluation of the model.\n",
"group_preserving_splitter = GroupShuffleSplit(n_splits=1, train_size=0.7).split(X, y, groups)\n",
"train_idx, eval_idx = next(group_preserving_splitter)\n",
"train_features, eval_features = X.loc[train_idx], X.loc[eval_idx]\n",
"\n",
"train_features, eval_features = X.loc[train_idx], X.loc[eval_idx]\n",
"train_target, eval_target = y.loc[train_idx], y.loc[eval_idx]\n",
"train_query_groups, eval_query_groups = groups.loc[train_idx], groups.loc[eval_idx]\n",
"\n",
Expand Down Expand Up @@ -1138,6 +1138,13 @@
" for movie in rescored_search_response[\"hits\"][\"hits\"]\n",
"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We saw above that the title and popularity fields are important ranking feature in our model. Here we can see that now all results contain the query terms in the title. Moreover, more popular movies rank higher, for example `Star Wars: Episode I - The Phantom Menace` is now in third position."
]
}
],
"metadata": {
Expand Down

0 comments on commit 8e84eb9

Please sign in to comment.