Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 14 additions & 16 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,6 @@ jobs:
- checkout
- pip_dev_install
- unit_tests
- coveralls_upload_parallel

unittest_py38_torch_release:
docker:
Expand All @@ -332,7 +331,6 @@ jobs:
- checkout
- pip_dev_install
- unit_tests
- coveralls_upload_parallel

unittest_py39_torch_release:
docker:
Expand All @@ -341,7 +339,6 @@ jobs:
- checkout
- pip_dev_install
- unit_tests
- coveralls_upload_parallel

unittest_py39_torch_nightly:
docker:
Expand All @@ -351,7 +348,19 @@ jobs:
- pip_dev_install:
args: "-n"
- unit_tests
- coveralls_upload_parallel

prv_accountant_values:
docker:
- image: cimg/python:3.7.5
steps:
- checkout
- py_3_7_setup
- pip_dev_install
- run:
name: "Unit test prv accountant"
no_output_timeout: 1h
command: |
python -m unittest opacus.tests.prv_accountant

integrationtest_py37_torch_release_cpu:
docker:
Expand Down Expand Up @@ -477,7 +486,6 @@ jobs:
- pip_dev_install
- run_nvidia_smi
- command_unit_tests_multi_gpu
- coveralls_upload_parallel


auto_deploy_site:
Expand Down Expand Up @@ -537,14 +545,8 @@ workflows:
filters: *exclude_ghpages
- integrationtest_py37_torch_release_cuda:
filters: *exclude_ghpages
- finish_coveralls_parallel:
- prv_accountant_values:
filters: *exclude_ghpages
requires:
- unittest_py37_torch_release
- unittest_py38_torch_release
- unittest_py39_torch_release
- unittest_py39_torch_nightly
- unittest_multi_gpu

nightly:
when:
Expand All @@ -560,10 +562,6 @@ workflows:
filters: *exclude_ghpages
- micro_benchmarks_py37_torch_release_cuda:
filters: *exclude_ghpages
- finish_coveralls_parallel:
filters: *exclude_ghpages
requires:
- unittest_py39_torch_nightly

website_deployment:
when:
Expand Down
12 changes: 4 additions & 8 deletions examples/char-lstm-classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,10 +332,8 @@ def train(
f"\t Epoch {epoch}. Accuracy: {mean(accs):.6f} | Loss: {mean(losses):.6f}"
)
try:
epsilon, best_alpha = privacy_engine.accountant.get_privacy_spent(
delta=target_delta
)
printstr += f" | (ε = {epsilon:.2f}, δ = {target_delta}) for α = {best_alpha}"
epsilon = privacy_engine.accountant.get_epsilon(delta=target_delta)
printstr += f" | (ε = {epsilon:.2f}, δ = {target_delta})"
except AttributeError:
pass
print(printstr)
Expand All @@ -359,10 +357,8 @@ def test(model, test_loader, privacy_engine, target_delta, device="cuda:0"):
mean_acc = mean(accs)
printstr = "\n----------------------------\n" f"Test Accuracy: {mean_acc:.6f}"
if privacy_engine:
epsilon, best_alpha = privacy_engine.accountant.get_privacy_spent(
delta=target_delta
)
printstr += f" (ε = {epsilon:.2f}, δ = {target_delta}) for α = {best_alpha}"
epsilon = privacy_engine.accountant.get_epsilon(delta=target_delta)
printstr += f" (ε = {epsilon:.2f}, δ = {target_delta})"
print(printstr + "\n----------------------------\n")
return mean_acc

Expand Down
7 changes: 2 additions & 5 deletions examples/cifar10.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,15 +195,12 @@ def compute_loss_stateless_model(params, sample, target):

if i % args.print_freq == 0:
if not args.disable_dp:
epsilon, best_alpha = privacy_engine.accountant.get_privacy_spent(
delta=args.delta,
alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
)
epsilon = privacy_engine.accountant.get_epsilon(delta=args.delta)
print(
f"\tTrain Epoch: {epoch} \t"
f"Loss: {np.mean(losses):.6f} "
f"Acc@1: {np.mean(top1_acc):.6f} "
f"(ε = {epsilon:.2f}, δ = {args.delta}) for α = {best_alpha}"
f"(ε = {epsilon:.2f}, δ = {args.delta})"
)
else:
print(
Expand Down
6 changes: 2 additions & 4 deletions examples/dcgan.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,14 +333,12 @@ def forward(self, input):
optimizerG.step()

if not opt.disable_dp:
epsilon, best_alpha = privacy_engine.accountant.get_privacy_spent(
delta=opt.delta
)
epsilon = privacy_engine.accountant.get_epsilon(delta=opt.delta)
data_bar.set_description(
f"epoch: {epoch}, Loss_D: {errD.item()} "
f"Loss_G: {errG.item()} D(x): {D_x} "
f"D(G(z)): {D_G_z1}/{D_G_z2}"
"(ε = %.2f, δ = %.2f) for α = %.2f" % (epsilon, opt.delta, best_alpha)
"(ε = %.2f, δ = %.2f)" % (epsilon, opt.delta)
)
else:
data_bar.set_description(
Expand Down
6 changes: 2 additions & 4 deletions examples/imdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,12 @@ def train(args, model, train_loader, optimizer, privacy_engine, epoch):
accuracies.append(acc.item())

if not args.disable_dp:
epsilon, best_alpha = privacy_engine.accountant.get_privacy_spent(
delta=args.delta
)
epsilon = privacy_engine.accountant.get_epsilon(delta=args.delta)
print(
f"Train Epoch: {epoch} \t"
f"Train Loss: {np.mean(losses):.6f} "
f"Train Accuracy: {np.mean(accuracies):.6f} "
f"(ε = {epsilon:.2f}, δ = {args.delta}) for α = {best_alpha}"
f"(ε = {epsilon:.2f}, δ = {args.delta})"
)
else:
print(
Expand Down
6 changes: 2 additions & 4 deletions examples/mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,11 @@ def train(args, model, device, train_loader, optimizer, privacy_engine, epoch):
losses.append(loss.item())

if not args.disable_dp:
epsilon, best_alpha = privacy_engine.accountant.get_privacy_spent(
delta=args.delta
)
epsilon = privacy_engine.accountant.get_epsilon(delta=args.delta)
print(
f"Train Epoch: {epoch} \t"
f"Loss: {np.mean(losses):.6f} "
f"(ε = {epsilon:.2f}, δ = {args.delta}) for α = {best_alpha}"
f"(ε = {epsilon:.2f}, δ = {args.delta})"
)
else:
print(f"Train Epoch: {epoch} \t Loss: {np.mean(losses):.6f}")
Expand Down
3 changes: 3 additions & 0 deletions opacus/accountants/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from .accountant import IAccountant
from .gdp import GaussianAccountant
from .prv import PRVAccountant
from .rdp import RDPAccountant


Expand All @@ -29,5 +30,7 @@ def create_accountant(mechanism: str) -> IAccountant:
return RDPAccountant()
elif mechanism == "gdp":
return GaussianAccountant()
elif mechanism == "prv":
return PRVAccountant()

raise ValueError(f"Unexpected accounting mechanism: {mechanism}")
19 changes: 19 additions & 0 deletions opacus/accountants/analysis/prv/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from .compose import compose_heterogeneous
from .domain import Domain, compute_safe_domain_size
from .prvs import (
DiscretePRV,
PoissonSubsampledGaussianPRV,
TruncatedPrivacyRandomVariable,
discretize,
)


__all__ = [
"DiscretePRV",
"Domain",
"PoissonSubsampledGaussianPRV",
"TruncatedPrivacyRandomVariable",
"compose_heterogeneous",
"compute_safe_domain_size",
"discretize",
]
62 changes: 62 additions & 0 deletions opacus/accountants/analysis/prv/compose.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from typing import List

import numpy as np
from scipy.fft import irfft, rfft
from scipy.signal import convolve

from .prvs import DiscretePRV


def _compose_fourier(dprv: DiscretePRV, num_self_composition: int) -> DiscretePRV:
if len(dprv) % 2 != 0:
raise ValueError("Can only compose evenly sized discrete PRVs")

composed_pmf = irfft(rfft(dprv.pmf) ** num_self_composition)

m = num_self_composition - 1
if num_self_composition % 2 == 0:
m += len(composed_pmf) // 2
composed_pmf = np.roll(composed_pmf, m)

domain = dprv.domain.shift_right(dprv.domain.shifts * (num_self_composition - 1))

return DiscretePRV(pmf=composed_pmf, domain=domain)


def _compose_two(dprv_left: DiscretePRV, dprv_right: DiscretePRV) -> DiscretePRV:
pmf = convolve(dprv_left.pmf, dprv_right.pmf, mode="same")
domain = dprv_left.domain.shift_right(dprv_right.domain.shifts)
return DiscretePRV(pmf=pmf, domain=domain)


def _compose_convolution_tree(dprvs: List[DiscretePRV]) -> DiscretePRV:
# repeatedly convolve neighbouring PRVs until we only have one left
while len(dprvs) > 1:
dprvs_conv = []
if len(dprvs) % 2 == 1:
dprvs_conv.append(dprvs.pop())

for dprv_left, dprv_right in zip(dprvs[:-1:2], dprvs[1::2]):
dprvs_conv.append(_compose_two(dprv_left, dprv_right))

dprvs = dprvs_conv
return dprvs[0]


def compose_heterogeneous(
dprvs: List[DiscretePRV], num_self_compositions: List[int]
) -> DiscretePRV:
r"""
Compose a heterogenous list of PRVs with multiplicity. We use FFT to compose
identical PRVs with themselves first, then pairwise convolve the remaining PRVs.

This is the approach taken in https://github.com/microsoft/prv_accountant
"""
if len(dprvs) != len(num_self_compositions):
raise ValueError("dprvs and num_self_compositions must have the same length")

dprvs = [
_compose_fourier(dprv, num_self_composition)
for dprv, num_self_composition in zip(dprvs, num_self_compositions)
]
return _compose_convolution_tree(dprvs)
99 changes: 99 additions & 0 deletions opacus/accountants/analysis/prv/domain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from dataclasses import dataclass
from typing import Sequence

import numpy as np

from ...rdp import RDPAccountant


@dataclass
class Domain:
r"""
Stores relevant information about the domain on which PRVs are discretized, and
includes a few convenience methods for manipulating it.
"""
t_min: float
t_max: float
size: int
shifts: float = 0.0

def __post_init__(self):
if not isinstance(self.size, int):
raise TypeError("`size` must be an integer")
if self.size % 2 != 0:
raise ValueError("`size` must be even")

@classmethod
def create_aligned(cls, t_min: float, t_max: float, dt: float) -> "Domain":
t_min = np.floor(t_min / dt) * dt
t_max = np.ceil(t_max / dt) * dt

size = int(np.round((t_max - t_min) / dt)) + 1

if size % 2 == 1:
size += 1
t_max += dt

domain = cls(t_min, t_max, size)

if np.abs(domain.dt - dt) / dt >= 1e-8:
raise RuntimeError

return domain

def shift_right(self, dt: float) -> "Domain":
return Domain(
t_min=self.t_min + dt,
t_max=self.t_max + dt,
size=self.size,
shifts=self.shifts + dt,
)

@property
def dt(self):
return (self.t_max - self.t_min) / (self.size - 1)

@property
def ts(self):
return np.linspace(self.t_min, self.t_max, self.size)

def __getitem__(self, i: int) -> float:
return self.t_min + i * self.dt


def compute_safe_domain_size(
prvs,
max_self_compositions: Sequence[int],
eps_error: float,
delta_error: float,
) -> float:
"""
Compute safe domain size for the discretization of the PRVs.

For details about this algorithm, see remark 5.6 in
https://www.microsoft.com/en-us/research/publication/numerical-composition-of-differential-privacy/
"""
total_compositions = sum(max_self_compositions)

rdp_accountant = RDPAccountant()
for prv, max_self_composition in zip(prvs, max_self_compositions):
rdp_accountant.history.append(
(prv.noise_multiplier, prv.sample_rate, max_self_composition)
)

L_max = rdp_accountant.get_epsilon(delta_error / 4)

for prv, max_self_composition in zip(prvs, max_self_compositions):
rdp_accountant = RDPAccountant()
rdp_accountant.history = [(prv.noise_multiplier, prv.sample_rate, 1)]
L_max = max(
L_max,
rdp_accountant.get_epsilon(delta=delta_error / (8 * total_compositions)),
)

# FIXME: this implementation is adapted from the code accompanying the paper, but
# disagrees subtly with the theory from remark 5.6. It's not immediately clear this
# gives the right guarantees in all cases, though it's fine for eps_error < 1 and
# hence generic cases.
# cf. https://github.com/microsoft/prv_accountant/discussions/34
return max(L_max, eps_error) + 3
Comment on lines +76 to +99
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm in two minds about whether to keep this as it is which follows the implementation from microsoft/prv_accountant or to follow the paper more precisely. It's not immediately clear to me that this implementation always satisfies the conditions required in the paper without making an assumption like eps_error < 1.

The changes required are minor and would look something like the following

-    L_max = rdp_accountant.get_epsilon(delta_error / 4)
+    L_max = rdp_accountant.get_epsilon(delta_error / 4) + eps_error
-    return max(L_max, eps_error) + 3
+    return L_max + 2

Loading