From 1450aebb74fe4ae5ed28913910cb425eb348166c Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Mon, 11 Mar 2024 14:54:10 +0800
Subject: [PATCH] Fix pairwise objective with NDCG metric along with custom
 gain. (#10100)

* Fix pairwise objective with NDCG metric.

- Allow setting `ndcg_exp_gain` for `rank:pairwise`.

This is useful when using pairwise for objective but ndcg for metric.
---
 src/objective/lambdarank_obj.cc    |  9 +++++++--
 tests/cpp/common/test_parameter.cc |  5 +++++
 tests/python/test_ranking.py       | 14 ++++++++++++++
 3 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/src/objective/lambdarank_obj.cc b/src/objective/lambdarank_obj.cc
index b7e290d416e1..36495d0caa88 100644
--- a/src/objective/lambdarank_obj.cc
+++ b/src/objective/lambdarank_obj.cc
@@ -474,7 +474,6 @@ class LambdaRankMAP : public LambdaRankObj<LambdaRankMAP, ltr::MAPCache> {
  public:
   void GetGradientImpl(std::int32_t iter, const HostDeviceVector<float>& predt,
                        const MetaInfo& info, linalg::Matrix<GradientPair>* out_gpair) {
-    CHECK(param_.ndcg_exp_gain) << "NDCG gain can not be set for the MAP objective.";
     if (ctx_->IsCUDA()) {
       return cuda_impl::LambdaRankGetGradientMAP(
           ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()),
@@ -564,7 +563,6 @@ class LambdaRankPairwise : public LambdaRankObj<LambdaRankPairwise, ltr::Ranking
  public:
   void GetGradientImpl(std::int32_t iter, const HostDeviceVector<float>& predt,
                        const MetaInfo& info, linalg::Matrix<GradientPair>* out_gpair) {
-    CHECK(param_.ndcg_exp_gain) << "NDCG gain can not be set for the pairwise objective.";
     if (ctx_->IsCUDA()) {
       return cuda_impl::LambdaRankGetGradientPairwise(
           ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()),
@@ -610,6 +608,13 @@ class LambdaRankPairwise : public LambdaRankObj<LambdaRankPairwise, ltr::Ranking
   [[nodiscard]] const char* DefaultEvalMetric() const override {
     return this->RankEvalMetric("ndcg");
   }
+
+  [[nodiscard]] Json DefaultMetricConfig() const override {
+    Json config{Object{}};
+    config["name"] = String{DefaultEvalMetric()};
+    config["lambdarank_param"] = ToJson(param_);
+    return config;
+  }
 };
 
 #if !defined(XGBOOST_USE_CUDA)
diff --git a/tests/cpp/common/test_parameter.cc b/tests/cpp/common/test_parameter.cc
index 5e8021a1e7ba..5288366f8831 100644
--- a/tests/cpp/common/test_parameter.cc
+++ b/tests/cpp/common/test_parameter.cc
@@ -97,4 +97,9 @@ TEST(XGBoostParameter, Update) {
     ASSERT_NEAR(p.f, 2.71828f, kRtEps);
     ASSERT_NEAR(p.d, 2.71828, kRtEps);  // default
   }
+
+  // Just in case dmlc's use of global memory has any impact in parameters.
+  UpdatableParam a, b;
+  a.UpdateAllowUnknown(xgboost::Args{{"f", "2.71828"}});
+  ASSERT_NE(a.f, b.f);
 }
diff --git a/tests/python/test_ranking.py b/tests/python/test_ranking.py
index f09ceceac99c..49508f594c52 100644
--- a/tests/python/test_ranking.py
+++ b/tests/python/test_ranking.py
@@ -54,6 +54,20 @@ def ndcg_gain(y: np.ndarray) -> np.ndarray:
     assert byxgb.evals_result() == bynp.evals_result()
     assert byxgb_json == bynp_json
 
+    # test pairwise can handle max_rel > 31, while ndcg metric is using custom gain
+    X, y, q, w = tm.make_ltr(n_samples=1024, n_features=4, n_query_groups=3, max_rel=33)
+    ranknet = xgboost.XGBRanker(
+        tree_method="hist",
+        ndcg_exp_gain=False,
+        n_estimators=10,
+        objective="rank:pairwise",
+    )
+    ranknet.fit(X, y, qid=q, eval_set=[(X, y)], eval_qid=[q])
+    history = ranknet.evals_result()
+    assert (
+        history["validation_0"]["ndcg@32"][0] < history["validation_0"]["ndcg@32"][-1]
+    )
+
 
 def test_ranking_with_unweighted_data():
     Xrow = np.array([1, 2, 6, 8, 11, 14, 16, 17])