From 1450aebb74fe4ae5ed28913910cb425eb348166c Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 11 Mar 2024 14:54:10 +0800 Subject: [PATCH] Fix pairwise objective with NDCG metric along with custom gain. (#10100) * Fix pairwise objective with NDCG metric. - Allow setting `ndcg_exp_gain` for `rank:pairwise`. This is useful when using pairwise for objective but ndcg for metric. --- src/objective/lambdarank_obj.cc | 9 +++++++-- tests/cpp/common/test_parameter.cc | 5 +++++ tests/python/test_ranking.py | 14 ++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/objective/lambdarank_obj.cc b/src/objective/lambdarank_obj.cc index b7e290d416e1..36495d0caa88 100644 --- a/src/objective/lambdarank_obj.cc +++ b/src/objective/lambdarank_obj.cc @@ -474,7 +474,6 @@ class LambdaRankMAP : public LambdaRankObj { public: void GetGradientImpl(std::int32_t iter, const HostDeviceVector& predt, const MetaInfo& info, linalg::Matrix* out_gpair) { - CHECK(param_.ndcg_exp_gain) << "NDCG gain can not be set for the MAP objective."; if (ctx_->IsCUDA()) { return cuda_impl::LambdaRankGetGradientMAP( ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()), @@ -564,7 +563,6 @@ class LambdaRankPairwise : public LambdaRankObj& predt, const MetaInfo& info, linalg::Matrix* out_gpair) { - CHECK(param_.ndcg_exp_gain) << "NDCG gain can not be set for the pairwise objective."; if (ctx_->IsCUDA()) { return cuda_impl::LambdaRankGetGradientPairwise( ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()), @@ -610,6 +608,13 @@ class LambdaRankPairwise : public LambdaRankObjRankEvalMetric("ndcg"); } + + [[nodiscard]] Json DefaultMetricConfig() const override { + Json config{Object{}}; + config["name"] = String{DefaultEvalMetric()}; + config["lambdarank_param"] = ToJson(param_); + return config; + } }; #if !defined(XGBOOST_USE_CUDA) diff --git a/tests/cpp/common/test_parameter.cc b/tests/cpp/common/test_parameter.cc index 5e8021a1e7ba..5288366f8831 100644 --- a/tests/cpp/common/test_parameter.cc +++ b/tests/cpp/common/test_parameter.cc @@ -97,4 +97,9 @@ TEST(XGBoostParameter, Update) { ASSERT_NEAR(p.f, 2.71828f, kRtEps); ASSERT_NEAR(p.d, 2.71828, kRtEps); // default } + + // Just in case dmlc's use of global memory has any impact in parameters. + UpdatableParam a, b; + a.UpdateAllowUnknown(xgboost::Args{{"f", "2.71828"}}); + ASSERT_NE(a.f, b.f); } diff --git a/tests/python/test_ranking.py b/tests/python/test_ranking.py index f09ceceac99c..49508f594c52 100644 --- a/tests/python/test_ranking.py +++ b/tests/python/test_ranking.py @@ -54,6 +54,20 @@ def ndcg_gain(y: np.ndarray) -> np.ndarray: assert byxgb.evals_result() == bynp.evals_result() assert byxgb_json == bynp_json + # test pairwise can handle max_rel > 31, while ndcg metric is using custom gain + X, y, q, w = tm.make_ltr(n_samples=1024, n_features=4, n_query_groups=3, max_rel=33) + ranknet = xgboost.XGBRanker( + tree_method="hist", + ndcg_exp_gain=False, + n_estimators=10, + objective="rank:pairwise", + ) + ranknet.fit(X, y, qid=q, eval_set=[(X, y)], eval_qid=[q]) + history = ranknet.evals_result() + assert ( + history["validation_0"]["ndcg@32"][0] < history["validation_0"]["ndcg@32"][-1] + ) + def test_ranking_with_unweighted_data(): Xrow = np.array([1, 2, 6, 8, 11, 14, 16, 17])