From 0983c945143d5fb86a1f4c93c4bf1aa1e6d0376f Mon Sep 17 00:00:00 2001 From: Mikhail Pravilov Date: Fri, 23 Jun 2023 17:53:08 +0200 Subject: [PATCH] Add more candidates if l0/linf have fewer candidates than requested. (#460) --- analysis/parameter_tuning.py | 14 ++++++ analysis/tests/parameter_tuning_test.py | 58 +++++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/analysis/parameter_tuning.py b/analysis/parameter_tuning.py index 279b23f8..00ad2d88 100644 --- a/analysis/parameter_tuning.py +++ b/analysis/parameter_tuning.py @@ -157,6 +157,20 @@ def _find_candidate_parameters( linf_candidates = find_candidates_func( hist.linf_contributions_histogram, max_candidates_per_parameter) l0_bounds, linf_bounds = [], [] + + # if linf or l0 has fewer candidates than requested then we can add more + # candidates for the other parameter. + if (len(linf_candidates) < max_candidates_per_parameter and + len(l0_candidates) == max_candidates_per_parameter): + l0_candidates = find_candidates_func( + hist.l0_contributions_histogram, + int(max_candidates / len(linf_candidates))) + elif (len(l0_candidates) < max_candidates_per_parameter and + len(linf_candidates) == max_candidates_per_parameter): + linf_candidates = find_candidates_func( + hist.linf_contributions_histogram, + int(max_candidates / len(l0_candidates))) + for l0 in l0_candidates: for linf in linf_candidates: l0_bounds.append(l0) diff --git a/analysis/tests/parameter_tuning_test.py b/analysis/tests/parameter_tuning_test.py index e9dabdbf..73fd3f3f 100644 --- a/analysis/tests/parameter_tuning_test.py +++ b/analysis/tests/parameter_tuning_test.py @@ -111,6 +111,64 @@ def test_find_candidate_parameters_maximum_number_of_candidates_is_respected_whe self.assertEqual([4, 5, 4, 5], candidates.max_contributions_per_partition) + def test_find_candidate_parameters_more_candidates_for_l_0_when_not_so_many_l_inf_candidates( + self): + mock_l0_histogram = histograms.Histogram(None, None) + mock_l0_histogram.quantiles = mock.Mock(return_value=[1, 2, 3, 4, 5]) + setattr(mock_l0_histogram.__class__, 'max_value', 6) + mock_linf_histogram = histograms.Histogram(None, None) + mock_linf_histogram.quantiles = mock.Mock(return_value=[6, 7]) + + mock_histograms = histograms.DatasetHistograms(mock_l0_histogram, None, + mock_linf_histogram, + None, None) + parameters_to_tune = parameter_tuning.ParametersToTune( + max_partitions_contributed=True, + max_contributions_per_partition=True) + + candidates = parameter_tuning._find_candidate_parameters( + mock_histograms, + parameters_to_tune, + pipeline_dp.Metrics.COUNT, + ParametersSearchStrategy.QUANTILES, + max_candidates=9) + # sqrt(9) = 3, but l_inf has only 2 quantiles, therefore for l_0 we can + # take 9 / 2 = 4 quantiles, we take first 4 quantiles (1, 2, 3, 4). + # Addition of max_value (6) to l_inf does not change anything because + # l_inf set already contains 6. + self.assertEqual([1, 1, 2, 2, 3, 3, 4, 4], + candidates.max_partitions_contributed) + self.assertEqual([6, 7, 6, 7, 6, 7, 6, 7], + candidates.max_contributions_per_partition) + + def test_find_candidate_parameters_more_candidates_for_l_inf_when_not_so_many_l_0_candidates( + self): + mock_l0_histogram = histograms.Histogram(None, None) + mock_l0_histogram.quantiles = mock.Mock(return_value=[1]) + setattr(mock_l0_histogram.__class__, 'max_value', 8) + mock_linf_histogram = histograms.Histogram(None, None) + mock_linf_histogram.quantiles = mock.Mock(return_value=[3, 4, 5, 6, 7]) + + mock_histograms = histograms.DatasetHistograms(mock_l0_histogram, None, + mock_linf_histogram, + None, None) + parameters_to_tune = parameter_tuning.ParametersToTune( + max_partitions_contributed=True, + max_contributions_per_partition=True) + + candidates = parameter_tuning._find_candidate_parameters( + mock_histograms, + parameters_to_tune, + pipeline_dp.Metrics.COUNT, + ParametersSearchStrategy.QUANTILES, + max_candidates=10) + # sqrt(10) = 3, but l_0 has only 2 quantiles (1 and 8 -- max_value), + # therefore for l_inf we can take 10 / 2 = 5 quantiles. + self.assertEqual([1, 1, 1, 1, 1, 8, 8, 8, 8, 8], + candidates.max_partitions_contributed) + self.assertEqual([3, 4, 5, 6, 7, 3, 4, 5, 6, 7], + candidates.max_contributions_per_partition) + def test_find_candidate_parameters_constant_relative_step_strategy_big_n_max( self): mock_l0_histogram = histograms.Histogram(None, None)