Skip to content

Commit

Permalink
tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dvadym committed Jul 19, 2023
1 parent cb278ef commit ef0e655
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 14 deletions.
34 changes: 21 additions & 13 deletions analysis/parameter_tuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ def _find_candidate_parameters(
Args:
hist: dataset contribution histogram.
parameters_to_tune: which parameters to tune.
metric: dp aggregation for which candidates are computed.
metric: dp aggregation for which candidates are computed. If metric is
None, it means no metrics to compute, i.e. only select partitions.
strategy: determines the strategy how to select candidates, see comments
to enum values for full description of the respective strategies.
max_candidates: how many candidates ((l0, linf) pairs) can be in the
Expand Down Expand Up @@ -243,15 +244,18 @@ def tune(col,
"""Tunes parameters.
It works in the following way:
1. Based on quantiles of privacy id contributions, candidates for
contribution bounding parameters chosen.
1. Candidates for contribution bounding parameters chosen based on
options.parameters_search_strategy strategy.
2. Utility analysis run for those parameters.
3. The best parameter set is chosen according to
options.minimizing_function.
The result contains output metrics for all utility analysis which were
performed.
For tuning parameters for DPEngine.select_partitions set
options.aggregate_params.metrics to an empty list.
Args:
col: collection where all elements are of the same type.
contribution_histograms:
Expand Down Expand Up @@ -341,19 +345,23 @@ def _convert_utility_analysis_to_tune_result(
def _check_tune_args(options: TuneOptions, is_public_partitions: bool):
# Check metrics to tune.
metrics = options.aggregate_params.metrics
if len(metrics) > 1:
if not metrics:
# Empty metrics means tuning for select_partitions.
if is_public_partitions:
# Empty metrics means that partition selection tuning is performed.
raise ValueError("empty metrics means tuning of partition selection"
" but public partitions were provided")
elif len(metrics) > 1:
raise NotImplementedError(
f"Tuning supports only one metrics, but {metrics} given.")
if len(metrics) == 1 and metrics[0] not in [
pipeline_dp.Metrics.COUNT, pipeline_dp.Metrics.PRIVACY_ID_COUNT
]:
raise NotImplementedError(
f"Tuning is supported only for Count and Privacy id count, but {metrics[0]} given."
)
else: # len(metrics) == 1
if metrics[0] not in [
pipeline_dp.Metrics.COUNT, pipeline_dp.Metrics.PRIVACY_ID_COUNT
]:
raise NotImplementedError(
f"Tuning is supported only for Count and Privacy id count, but {metrics[0]} given."
)

if is_public_partitions and not metrics:
raise ValueError("empty metrics means private partition selection "
"but public partitions were provided")
if options.function_to_minimize != MinimizingFunction.ABSOLUTE_ERROR:
raise NotImplementedError(
f"Only {MinimizingFunction.ABSOLUTE_ERROR} is implemented.")
96 changes: 95 additions & 1 deletion analysis/tests/parameter_tuning_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from absl.testing import absltest
from absl.testing import parameterized
from unittest import mock
from unittest.mock import patch
from typing import List

import pipeline_dp
Expand Down Expand Up @@ -206,7 +207,7 @@ def test_find_candidate_parameters_more_candidates_for_l_inf_when_not_so_many_l_
max_candidates=5,
# ceil(1000^(i / 4)), where i in [0, 1, 2, 3, 4]
expected_candidates=[1, 6, 32, 178, 1000]))
def test_find_candidate_parameters_constant_relative_ste_strategy(
def test_find_candidate_parameters_constant_relative_step_strategy(
self, max_value, max_candidates, expected_candidates):
mock_l0_histogram = histograms.Histogram(None, None)
mock_l0_histogram.max_value = mock.Mock(return_value=max_value)
Expand All @@ -227,6 +228,60 @@ def test_find_candidate_parameters_constant_relative_ste_strategy(
self.assertEqual(expected_candidates,
candidates.max_partitions_contributed)

@parameterized.named_parameters(
dict(
testcase_name='COUNT',
metric=pipeline_dp.Metrics.COUNT,
expected_generate_linf=True,
),
dict(
testcase_name='PRIVACY_ID_COUNT',
metric=pipeline_dp.Metrics.PRIVACY_ID_COUNT,
expected_generate_linf=False,
),
dict(
testcase_name='No metric (select partition)',
metric=None,
expected_generate_linf=False,
))
@patch('analysis.parameter_tuning._find_candidates_constant_relative_step')
def test_find_candidate_parameters_generate_linf(
self, mock_find_candidate_from_histogram, metric,
expected_generate_linf):
mock_l0_histogram = histograms.Histogram(
histograms.HistogramType.L0_CONTRIBUTIONS, None)
mock_linf_histogram = histograms.Histogram(
histograms.HistogramType.LINF_CONTRIBUTIONS, None)
mock_histograms = histograms.DatasetHistograms(mock_l0_histogram, None,
mock_linf_histogram,
None, None)

mock_find_candidate_from_histogram.return_value = [1, 2]

parameters_to_tune = parameter_tuning.ParametersToTune(
max_partitions_contributed=True,
max_contributions_per_partition=True)

candidates = parameter_tuning._find_candidate_parameters(
mock_histograms,
parameters_to_tune,
metric,
ParametersSearchStrategy.CONSTANT_RELATIVE_STEP,
max_candidates=100)

mock_find_candidate_from_histogram.assert_any_call(
mock_l0_histogram, mock.ANY)
if expected_generate_linf:
self.assertEqual(candidates.max_partitions_contributed,
[1, 1, 2, 2])
self.assertEqual(candidates.max_contributions_per_partition,
[1, 2, 1, 2])
mock_find_candidate_from_histogram.assert_any_call(
mock_linf_histogram, mock.ANY)
else:
self.assertEqual(candidates.max_partitions_contributed, [1, 2])
self.assertIsNone(candidates.max_contributions_per_partition)

def test_tune_count(self):
# Arrange.
input = [(i % 10, f"pk{i/10}") for i in range(10)]
Expand Down Expand Up @@ -264,6 +319,42 @@ def test_tune_count(self):
self.assertEqual(utility_reports[0].metric_errors[0].metric,
pipeline_dp.Metrics.COUNT)

def test_select_partitions(self):
# Arrange.
input = [(i % 10, f"pk{i/10}") for i in range(10)]
data_extractors = pipeline_dp.DataExtractors(
privacy_id_extractor=lambda x: x[0],
partition_extractor=lambda x: x[1],
value_extractor=lambda x: 0)

contribution_histograms = list(
computing_histograms.compute_dataset_histograms(
input, data_extractors, pipeline_dp.LocalBackend()))[0]

tune_options = _get_tune_options()
# Setting metrics to empty list makes running only partition selectoin.
tune_options.aggregate_params.metrics = []

# Act.
result = parameter_tuning.tune(input, pipeline_dp.LocalBackend(),
contribution_histograms, tune_options,
data_extractors)

# Assert.
tune_result, per_partition_utility_analysis = result
per_partition_utility_analysis = list(per_partition_utility_analysis)
self.assertLen(per_partition_utility_analysis, 10)

tune_result = list(tune_result)[0]

self.assertEqual(tune_options, tune_result.options)
self.assertEqual(contribution_histograms,
tune_result.contribution_histograms)
utility_reports = tune_result.utility_reports
self.assertLen(utility_reports, 1)
self.assertIsInstance(utility_reports[0], metrics.UtilityReport)
self.assertIsNone(utility_reports[0].metric_errors)

def test_tune_privacy_id_count(self):
# Arrange.
input = [(i % 10, f"pk{i/10}") for i in range(10)]
Expand Down Expand Up @@ -299,6 +390,9 @@ def test_tune_privacy_id_count(self):
self.assertEqual(utility_reports[0].metric_errors[0].metric,
pipeline_dp.Metrics.PRIVACY_ID_COUNT)

def test_tune_params_validation(self):
pass


if __name__ == '__main__':
absltest.main()

0 comments on commit ef0e655

Please sign in to comment.