Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(WIP) Support to PLD composition #259 #405

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions pipeline_dp/budget_accounting.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,7 @@ class MechanismSpec:

@property
def noise_standard_deviation(self):
"""Noise value for the mechanism.

Raises:
AssertionError: The noise value is not calculated yet.
"""
if self._noise_standard_deviation is None:
raise AssertionError(
"Noise standard deviation is not calculated yet.")
"""Noise value for the mechanism. It can be None before budget is computed."""
return self._noise_standard_deviation

@property
Expand Down
6 changes: 5 additions & 1 deletion pipeline_dp/combiners.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,10 @@ def eps(self):
def delta(self):
return self._mechanism_spec.delta

@property
def noise_standard_deviation(self):
return self._mechanism_spec.noise_standard_deviation

@property
def scalar_noise_params(self):
return dp_computations.ScalarNoiseParams(
Expand All @@ -159,7 +163,7 @@ def scalar_noise_params(self):
self.aggregate_params.max_sum_per_partition,
self.aggregate_params.max_partitions_contributed,
self.aggregate_params.max_contributions_per_partition,
self.aggregate_params.noise_kind)
self.aggregate_params.noise_kind, self.noise_standard_deviation)

@property
def additive_vector_noise_params(
Expand Down
85 changes: 69 additions & 16 deletions pipeline_dp/dp_computations.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class ScalarNoiseParams:
max_partitions_contributed: int
max_contributions_per_partition: Optional[int]
noise_kind: pipeline_dp.NoiseKind # Laplace or Gaussian
noise_standard_deviation: Optional[float] = None

def __post_init__(self):
assert (self.min_value is None) == (
Expand Down Expand Up @@ -103,50 +104,70 @@ def compute_sigma(eps: float, delta: float, l2_sensitivity: float):
delta: The delta value.
l2_sensitivity: The L2 sensitivity.
"""
# TODO: use named arguments, when argument names are added in PyDP on PR
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for fixing this TODO

# https://github.com/OpenMined/PyDP/pull/398.
return dp_mechanisms.GaussianMechanism(eps, delta, l2_sensitivity).std
return dp_mechanisms.GaussianMechanism(epsilon=eps,
delta=delta,
sensitivity=l2_sensitivity).std


def apply_laplace_mechanism(value: float, eps: float, l1_sensitivity: float):
def apply_laplace_mechanism(value: float,
eps: float,
l1_sensitivity: float,
noise_standard_deviation: Optional[float] = None):
"""Applies the Laplace mechanism to the value.
If noise_standard_deviation is set, it is used and eps is ignored.

Args:
value: The initial value.
eps: The epsilon value.
l1_sensitivity: The L1 sensitivity.
noise_standard_deviation: The standard deviation for the noise.

Returns:
The value resulted after adding the noise.
"""
mechanism = dp_mechanisms.LaplaceMechanism(epsilon=eps,
sensitivity=l1_sensitivity)
if noise_standard_deviation is not None:
mechanism = dp_mechanisms.LaplaceMechanism(epsilon=1 /
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add comment that here workaround is used, since we can't set the Laplace parameter directly

noise_standard_deviation,
sensitivity=l1_sensitivity)
else:
mechanism = dp_mechanisms.LaplaceMechanism(epsilon=eps,
sensitivity=l1_sensitivity)
return mechanism.add_noise(1.0 * value)


def apply_gaussian_mechanism(value: float, eps: float, delta: float,
l2_sensitivity: float):
def apply_gaussian_mechanism(value: float,
eps: float,
delta: float,
l2_sensitivity: float,
noise_standard_deviation: Optional[float] = None):
"""Applies the Gaussian mechanism to the value.
If noise_standard_deviation is set, it is used and eps&delta are ignored.

Args:
value: The initial value.
eps: The epsilon value.
delta: The delta value.
l2_sensitivity: The L2 sensitivity.
noise_standard_deviation: The standard deviation for the noise.

Returns:
The value resulted after adding the noise.
"""
# TODO: use named arguments, when argument names are added in PyDP on PR
# https://github.com/OpenMined/PyDP/pull/398.
mechanism = dp_mechanisms.GaussianMechanism(eps, delta, l2_sensitivity)
if noise_standard_deviation is not None:
mechanism = dp_mechanisms.GaussianMechanism.create_from_standard_deviation(
std=l2_sensitivity * noise_standard_deviation)
else:
mechanism = dp_mechanisms.GaussianMechanism(epsilon=eps,
delta=delta,
sensitivity=l2_sensitivity)
return mechanism.add_noise(1.0 * value)


def _add_random_noise(
value: float,
eps: float,
delta: float,
noise_standard_deviation: float,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Optional[float]?

l0_sensitivity: float,
linf_sensitivity: float,
noise_kind: pipeline_dp.NoiseKind,
Expand All @@ -164,14 +185,19 @@ def _add_random_noise(
Returns:
The value resulted after adding the random noise.
"""

if noise_kind == pipeline_dp.NoiseKind.LAPLACE:
l1_sensitivity = compute_l1_sensitivity(l0_sensitivity,
linf_sensitivity)
return apply_laplace_mechanism(value, eps, l1_sensitivity)
return apply_laplace_mechanism(value, eps, l1_sensitivity,
noise_standard_deviation)

if noise_kind == pipeline_dp.NoiseKind.GAUSSIAN:
l2_sensitivity = compute_l2_sensitivity(l0_sensitivity,
linf_sensitivity)
return apply_gaussian_mechanism(value, eps, delta, l2_sensitivity)
return apply_gaussian_mechanism(value, eps, delta, l2_sensitivity,
noise_standard_deviation)

raise ValueError("Noise kind must be either Laplace or Gaussian.")


Expand Down Expand Up @@ -213,6 +239,7 @@ def add_noise_vector(vec: np.ndarray, noise_params: AdditiveVectorNoiseParams):
s,
noise_params.eps_per_coordinate,
noise_params.delta_per_coordinate,
None, # TODO: Add noise_standard_deviation for vector sum computation
noise_params.l0_sensitivity,
noise_params.linf_sensitivity,
noise_params.noise_kind,
Expand Down Expand Up @@ -269,6 +296,7 @@ def compute_dp_count(count: int, dp_params: ScalarNoiseParams):
count,
dp_params.eps,
dp_params.delta,
dp_params.noise_standard_deviation,
l0_sensitivity,
linf_sensitivity,
dp_params.noise_kind,
Expand Down Expand Up @@ -301,6 +329,7 @@ def compute_dp_sum(sum: float, dp_params: ScalarNoiseParams):
sum,
dp_params.eps,
dp_params.delta,
dp_params.noise_standard_deviation,
l0_sensitivity,
linf_sensitivity,
dp_params.noise_kind,
Expand All @@ -314,6 +343,7 @@ def _compute_mean_for_normalized_sum(
max_value: float,
eps: float,
delta: float,
noise_standard_deviation: float,
l0_sensitivity: float,
max_contributions_per_partition: float,
noise_kind: pipeline_dp.NoiseKind,
Expand All @@ -325,6 +355,7 @@ def _compute_mean_for_normalized_sum(
sum: Non-DP normalized sum.
min_value, max_value: The lowest/highest contribution of the non-normalized values.
eps, delta: The budget allocated.
noise_standard_deviation: The standard deviation for the noise.
l0_sensitivity: The L0 sensitivity.
max_contributions_per_partition: The maximum number of contributions
per partition.
Expand All @@ -341,8 +372,10 @@ def _compute_mean_for_normalized_sum(
middle = compute_middle(min_value, max_value)
linf_sensitivity = max_contributions_per_partition * abs(middle - min_value)

dp_normalized_sum = _add_random_noise(sum, eps, delta, l0_sensitivity,
linf_sensitivity, noise_kind)
dp_normalized_sum = _add_random_noise(sum, eps, delta,
noise_standard_deviation,
l0_sensitivity, linf_sensitivity,
noise_kind)
# Clamps dp_count to 1.0. We know that actual count > 1 except when the
# input set is empty, in which case it shouldn't matter much what the
# denominator is.
Expand Down Expand Up @@ -370,10 +403,18 @@ def compute_dp_mean(count: int, normalized_sum: float,
dp_params.eps, dp_params.delta, 2)
l0_sensitivity = dp_params.l0_sensitivity()

# Increases noise std.dev. equally due to multiple computations
count_noise_standard_deviation = \
sum_noise_standard_deviation = \
2*dp_params.noise_standard_deviation \
if dp_params.noise_standard_deviation is not None \
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: maybe something like that

noise_std = None
if dp_params.noise_standard_deviation is not None:
  noise_std = 2*dp_params.noise_standard_deviation
count_noise_standard_deviation = sum_noise_standard_deviation = noise_std

else None

dp_count = _add_random_noise(
count,
count_eps,
count_delta,
count_noise_standard_deviation,
l0_sensitivity,
dp_params.max_contributions_per_partition,
dp_params.noise_kind,
Expand All @@ -386,6 +427,7 @@ def compute_dp_mean(count: int, normalized_sum: float,
dp_params.max_value,
sum_eps,
sum_delta,
sum_noise_standard_deviation,
l0_sensitivity,
dp_params.max_contributions_per_partition,
dp_params.noise_kind,
Expand Down Expand Up @@ -421,10 +463,19 @@ def compute_dp_var(count: int, normalized_sum: float,
) = equally_split_budget(dp_params.eps, dp_params.delta, 3)
l0_sensitivity = dp_params.l0_sensitivity()

# Increases noise std.dev. equally due to multiple computations
count_noise_standard_deviation = \
sum_noise_standard_deviation = \
sum_squares_noise_standard_deviation = \
3*dp_params.noise_standard_deviation \
if dp_params.noise_standard_deviation is not None \
else None

dp_count = _add_random_noise(
count,
count_eps,
count_delta,
count_noise_standard_deviation,
l0_sensitivity,
dp_params.max_contributions_per_partition,
dp_params.noise_kind,
Expand All @@ -438,6 +489,7 @@ def compute_dp_var(count: int, normalized_sum: float,
dp_params.max_value,
sum_eps,
sum_delta,
sum_noise_standard_deviation,
l0_sensitivity,
dp_params.max_contributions_per_partition,
dp_params.noise_kind,
Expand All @@ -449,7 +501,8 @@ def compute_dp_var(count: int, normalized_sum: float,
# Computes and adds noise to the mean of squares.
dp_mean_squares = _compute_mean_for_normalized_sum(
dp_count, normalized_sum_squares, squares_min_value, squares_max_value,
sum_squares_eps, sum_squares_delta, l0_sensitivity,
sum_squares_eps, sum_squares_delta,
sum_squares_noise_standard_deviation, l0_sensitivity,
dp_params.max_contributions_per_partition, dp_params.noise_kind)

dp_var = dp_mean_squares - dp_mean**2
Expand Down
7 changes: 3 additions & 4 deletions tests/budget_accounting_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,8 @@ def test_not_enough_aggregations(self, use_num_aggregations):
class PLDBudgetAccountantTest(unittest.TestCase):

def test_noise_not_calculated(self):
with self.assertRaises(AssertionError):
mechanism = MechanismSpec(MechanismType.LAPLACE)
print(mechanism.noise_standard_deviation())
mechanism = MechanismSpec(MechanismType.LAPLACE)
self.assertEqual(None, mechanism.noise_standard_deviation)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: self.assertNone(


def test_invalid_epsilon(self):
with self.assertRaises(ValueError):
Expand Down Expand Up @@ -257,7 +256,7 @@ class ComputeBudgetTestCase:
epsilon: float
delta: float
expected_pipeline_noise_std: float
mechanisms: []
mechanisms: list

testcases = [
ComputeBudgetTestCase(
Expand Down
4 changes: 3 additions & 1 deletion tests/dp_computations_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,9 @@ def test_secure_gaussian_noise_is_used(self, gaussian_mechanism):
value=20, eps=0.5, delta=1e-10, l2_sensitivity=3)

# Assert
gaussian_mechanism.assert_called_with(0.5, 1e-10, 3)
gaussian_mechanism.assert_called_with(epsilon=0.5,
delta=1e-10,
sensitivity=3)
mock_gaussian_mechanism.add_noise.assert_called_with(20)
self.assertEqual("value_with_noise", anonymized_value)

Expand Down