Skip to content

Commit 8e84eb9

Browse files
committed
Review suggestions + typos fixes
1 parent 43dcb4e commit 8e84eb9

File tree

1 file changed

+11
-4
lines changed

1 file changed

+11
-4
lines changed

notebooks/search/08-learning-to-rank.ipynb

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -713,18 +713,18 @@
713713
"feature_logger = FeatureLogger(es_client, MOVIE_INDEX, ltr_config)\n",
714714
"\n",
715715
"\n",
716-
"# This method will be applied for each group of query in the judgment log:\n",
716+
"# This method will be applied for each query group in the judgment log:\n",
717717
"def _extract_query_features(query_judgements_group):\n",
718718
" # Retrieve document ids in the query group as strings.\n",
719719
" doc_ids = query_judgements_group[\"doc_id\"].astype(\"str\").to_list()\n",
720720
"\n",
721-
" # Resolve query paras for the current query group (e.g.: {\"query\": \"batman\"}).\n",
721+
" # Resolve query params for the current query group (e.g.: {\"query\": \"batman\"}).\n",
722722
" query_params = {\"query\": query_judgements_group[\"query\"].iloc[0]}\n",
723723
"\n",
724724
" # Extract the features for the documents in the query group:\n",
725725
" doc_features = feature_logger.extract_features(query_params, doc_ids)\n",
726726
"\n",
727-
" # Adding a column to the dataframe for each features:\n",
727+
" # Adding a column to the dataframe for each feature:\n",
728728
" for feature_index, feature_name in enumerate(ltr_config.feature_names):\n",
729729
" query_judgements_group[feature_name] = numpy.array([doc_features[doc_id][feature_index] for doc_id in doc_ids])\n",
730730
"\n",
@@ -919,8 +919,8 @@
919919
"# Split the dataset in two parts respectively used for training and evaluation of the model.\n",
920920
"group_preserving_splitter = GroupShuffleSplit(n_splits=1, train_size=0.7).split(X, y, groups)\n",
921921
"train_idx, eval_idx = next(group_preserving_splitter)\n",
922-
"train_features, eval_features = X.loc[train_idx], X.loc[eval_idx]\n",
923922
"\n",
923+
"train_features, eval_features = X.loc[train_idx], X.loc[eval_idx]\n",
924924
"train_target, eval_target = y.loc[train_idx], y.loc[eval_idx]\n",
925925
"train_query_groups, eval_query_groups = groups.loc[train_idx], groups.loc[eval_idx]\n",
926926
"\n",
@@ -1138,6 +1138,13 @@
11381138
" for movie in rescored_search_response[\"hits\"][\"hits\"]\n",
11391139
"]"
11401140
]
1141+
},
1142+
{
1143+
"cell_type": "markdown",
1144+
"metadata": {},
1145+
"source": [
1146+
"We saw above that the title and popularity fields are important ranking feature in our model. Here we can see that now all results contain the query terms in the title. Moreover, more popular movies rank higher, for example `Star Wars: Episode I - The Phantom Menace` is now in third position."
1147+
]
11411148
}
11421149
],
11431150
"metadata": {

0 commit comments

Comments
 (0)