From 7f477c67203cfa4e7dd234c2fee9f2a2cbe7b0cb Mon Sep 17 00:00:00 2001 From: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Date: Sun, 14 Jun 2026 05:19:31 +0800 Subject: [PATCH] docs: label MAP@k notebook metrics Signed-off-by: Yufeng He <40085740+he-yufeng@users.noreply.github.com> --- examples/00_quick_start/als_movielens.ipynb | 4 ++-- .../als_deep_dive.ipynb | 4 ++-- .../surprise_svd_deep_dive.ipynb | 6 +++--- .../als_movielens_diversity_metrics.ipynb | 12 ++++++------ examples/05_operationalize/als_movie_o16n.ipynb | 8 ++++---- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/examples/00_quick_start/als_movielens.ipynb b/examples/00_quick_start/als_movielens.ipynb index e485a216d6..b30e2167bc 100644 --- a/examples/00_quick_start/als_movielens.ipynb +++ b/examples/00_quick_start/als_movielens.ipynb @@ -416,7 +416,7 @@ "text": [ "Model:\tALS\n", "Top K:\t10\n", - "MAP:\t0.006527\n", + "MAP@K:\t0.006527\n", "NDCG:\t0.051718\n", "Precision@K:\t0.051274\n", "Recall@K:\t0.018840\n" @@ -434,7 +434,7 @@ "source": [ "print(\"Model:\\tALS\",\n", " \"Top K:\\t%d\" % rank_eval.k,\n", - " \"MAP:\\t%f\" % rank_eval.map_at_k(),\n", + " \"MAP@K:\\t%f\" % rank_eval.map_at_k(),\n", " \"NDCG:\\t%f\" % rank_eval.ndcg_at_k(),\n", " \"Precision@K:\\t%f\" % rank_eval.precision_at_k(),\n", " \"Recall@K:\\t%f\" % rank_eval.recall_at_k(), sep='\\n')" diff --git a/examples/02_model_collaborative_filtering/als_deep_dive.ipynb b/examples/02_model_collaborative_filtering/als_deep_dive.ipynb index dceff16a67..e643aea3b4 100644 --- a/examples/02_model_collaborative_filtering/als_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/als_deep_dive.ipynb @@ -502,7 +502,7 @@ "Precision@k = 0.04379639448568401\n", "Recall@k = 0.014286194686756822\n", "NDCG@k = 0.03730295615527768\n", - "Mean average precision = 0.0034619726118607337\n" + "MAP@k = 0.0034619726118607337\n" ] }, { @@ -529,7 +529,7 @@ " \"Precision@k = {}\".format(evaluations.precision_at_k()),\n", " \"Recall@k = {}\".format(evaluations.recall_at_k()),\n", " \"NDCG@k = {}\".format(evaluations.ndcg_at_k()),\n", - " \"Mean average precision = {}\".format(evaluations.map_at_k()),\n", + " \"MAP@k = {}\".format(evaluations.map_at_k()),\n", " sep=\"\\n\"\n", ")" ] diff --git a/examples/02_model_collaborative_filtering/surprise_svd_deep_dive.ipynb b/examples/02_model_collaborative_filtering/surprise_svd_deep_dive.ipynb index 0d5deef5a8..e0fecf537b 100644 --- a/examples/02_model_collaborative_filtering/surprise_svd_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/surprise_svd_deep_dive.ipynb @@ -587,7 +587,7 @@ "\n", "As we can see, the RMSE and MAE (Mean Absolute Error) are pretty low (i.e. good), indicating that on average the error in the predicted ratings is less than 1. The RMSE is of course a bit higher, because high errors are penalized much more.\n", "\n", - "For comparison with other models, we also display Top-k and ranking metrics (MAP, NDCG, etc.). Note however that the SVD algorithm was designed for achieving high accuracy, not for top-rank predictions." + "For comparison with other models, we also display Top-k and ranking metrics (MAP@K, NDCG, etc.). Note however that the SVD algorithm was designed for achieving high accuracy, not for top-rank predictions." ] }, { @@ -604,7 +604,7 @@ "rsquared:\t0.288045\n", "exp var:\t0.288157\n", "----\n", - "MAP:\t\t0.051501\n", + "MAP@K:\t\t0.051501\n", "NDCG:\t\t0.110465\n", "Precision@K:\t0.100425\n", "Recall@K:\t0.035267\n" @@ -636,7 +636,7 @@ "print(\"----\")\n", "\n", "print(\n", - " \"MAP:\\t\\t%f\" % eval_map,\n", + " \"MAP@K:\\t\\t%f\" % eval_map,\n", " \"NDCG:\\t\\t%f\" % eval_ndcg,\n", " \"Precision@K:\\t%f\" % eval_precision,\n", " \"Recall@K:\\t%f\" % eval_recall,\n", diff --git a/examples/03_evaluate/als_movielens_diversity_metrics.ipynb b/examples/03_evaluate/als_movielens_diversity_metrics.ipynb index 517f97afd2..4a511cb0fb 100644 --- a/examples/03_evaluate/als_movielens_diversity_metrics.ipynb +++ b/examples/03_evaluate/als_movielens_diversity_metrics.ipynb @@ -38,7 +38,7 @@ " - We also define a process which randomly recommends unseen items to each user. \n", " - We show two options to calculate item-item similarity: (1) based on item co-occurrence count; and (2) based on item feature vectors.\n", " \n", - "The comparision results show that the ALS recommender outperforms the random recommender on ranking metrics (Precision@k, Recall@k, NDCG@k, and\tMean average precision), while the random recommender outperforms ALS recommender on diversity metrics. This is because ALS is optimized for estimating the item rating as accurate as possible, therefore it performs well on accuracy metrics including rating and ranking metrics. As a side effect, the items being recommended tend to be popular items, which are the items mostly sold or viewed. It leaves the [long-tail items](https://github.com/microsoft/recommenders/blob/main/GLOSSARY.md) having less chance to get introduced to the users. This is the reason why ALS is not performing as well as a random recommender on diversity metrics. \n", + "The comparision results show that the ALS recommender outperforms the random recommender on ranking metrics (Precision@k, Recall@k, NDCG@k, and\tMAP@k), while the random recommender outperforms ALS recommender on diversity metrics. This is because ALS is optimized for estimating the item rating as accurate as possible, therefore it performs well on accuracy metrics including rating and ranking metrics. As a side effect, the items being recommended tend to be popular items, which are the items mostly sold or viewed. It leaves the [long-tail items](https://github.com/microsoft/recommenders/blob/main/GLOSSARY.md) having less chance to get introduced to the users. This is the reason why ALS is not performing as well as a random recommender on diversity metrics. \n", "\n", "From the algorithmic point of view, items in the tail suffer from the cold-start problem, making them hard for recommendation systems to use. However, from the business point of view, oftentimes the items in the tail can be highly profitable, since, depending on supply, business can apply a higher margin to them. Recommendation systems that optimize metrics like novelty and diversity, can help to find users willing to get these long tail items. Usually there is a trade-off between one type of metric vs. another. One should decide which set of metrics to optimize based on business scenarios." ] @@ -576,7 +576,7 @@ " \"Precision@k\": ranking_eval.precision_at_k(),\n", " \"Recall@k\": ranking_eval.recall_at_k(),\n", " \"NDCG@k\": ranking_eval.ndcg_at_k(),\n", - " \"Mean average precision\": ranking_eval.map_at_k()\n", + " \"MAP@k\": ranking_eval.map_at_k()\n", " \n", " }\n", " return metrics \n", @@ -761,7 +761,7 @@ "metadata": {}, "outputs": [], "source": [ - "cols = [\"Data\", \"Algo\", \"K\", \"Precision@k\", \"Recall@k\", \"NDCG@k\", \"Mean average precision\",\"catalog_coverage\", \"distributional_coverage\",\"novelty\", \"diversity\", \"serendipity\" ]\n", + "cols = [\"Data\", \"Algo\", \"K\", \"Precision@k\", \"Recall@k\", \"NDCG@k\", \"MAP@k\",\"catalog_coverage\", \"distributional_coverage\",\"novelty\", \"diversity\", \"serendipity\" ]\n", "df_results = pd.DataFrame(columns=cols)\n", "\n", "df_results.loc[1] = als_results \n", @@ -800,7 +800,7 @@ "