Skip to content

Commit fb9c26c

Browse files
Hyuckchul Jungmeta-codesync[bot]
authored andcommitted
LIA use one-sided CI upper-bound (#129)
Summary: Pull Request resolved: #129 * Added a choice between one-sided CI upper bound and two-sided upper bound in computing epsilons * Minor changes in `AnalysisNode._compute_ci()` with a related change in `FactoredLiAAnalysisNode.merge_results()` * Controlled by a new arg `use_one_sided_ci_ub` (default False; that is, the default is the same as before, two-sided) Reviewed By: knottb Differential Revision: D104239997 fbshipit-source-id: 570254a9eb73bdbf0b7fb20a1730e3ee84a4b2a7
1 parent 564bad1 commit fb9c26c

2 files changed

Lines changed: 75 additions & 3 deletions

File tree

privacy_guard/analysis/mia/analysis_node.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -235,14 +235,20 @@ def _calculate_one_off_eps(self) -> float:
235235
return eps_cp
236236

237237
@staticmethod
238-
def _compute_ci(array: NDArray[float], axis: int = 0) -> tuple[NDArray, NDArray]:
238+
def _compute_ci(
239+
array: NDArray[float], axis: int = 0, use_one_sided_ci_ub: bool = False
240+
) -> tuple[NDArray, NDArray]:
239241
"""Compute confidence intervals (used for eps, auc, accuracy)"""
240242
# Sort along the specified axis
241243
sorted_array = np.sort(array, axis=axis)
242244
axis_length = sorted_array.shape[axis]
243245

244-
lower_idx = max(int(0.025 * axis_length) - 1, 0)
245-
upper_idx = int(0.975 * axis_length)
246+
if use_one_sided_ci_ub:
247+
lower_idx = 0
248+
upper_idx = int(0.95 * axis_length) # at 95% confidence level
249+
else:
250+
lower_idx = max(int(0.025 * axis_length) - 1, 0)
251+
upper_idx = int(0.975 * axis_length)
246252

247253
# Index into the sorted array at the percentile positions
248254
lower_bound = np.take(sorted_array, lower_idx, axis=axis)

privacy_guard/analysis/tests/test_analysis_node.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,72 @@ def test_compute_ci_with_2d_array(self) -> None:
231231
# Check that all lower bounds are less than upper bounds
232232
self.assertTrue(np.all(lower_bound <= upper_bound))
233233

234+
def test_compute_ci_two_sided_default(self) -> None:
235+
"""Test that the default _compute_ci behavior is the two-sided 95% CI."""
236+
# Create test data ranging from 1 to 100
237+
test_data = np.arange(1, 101)
238+
239+
lower_bound, upper_bound = AnalysisNode._compute_ci(
240+
test_data, use_one_sided_ci_ub=False
241+
)
242+
243+
# Two-sided: lower_idx = max(int(0.025 * 100) - 1, 0) = 1 -> sorted[1] = 2
244+
# upper_idx = int(0.975 * 100) = 97 -> sorted[97] = 98
245+
self.assertEqual(lower_bound[0], 2)
246+
self.assertEqual(upper_bound[0], 98)
247+
248+
def test_compute_ci_one_sided_upper_bound(self) -> None:
249+
"""Test that use_one_sided_ci_ub=True uses the one-sided 95% CI upper bound."""
250+
# Create test data ranging from 1 to 100
251+
test_data = np.arange(1, 101)
252+
253+
lower_bound, upper_bound = AnalysisNode._compute_ci(
254+
test_data, use_one_sided_ci_ub=True
255+
)
256+
257+
# One-sided: lower_idx = 0 -> sorted[0] = 1
258+
# upper_idx = int(0.95 * 100) = 95 -> sorted[95] = 96
259+
self.assertEqual(lower_bound[0], 1)
260+
self.assertEqual(upper_bound[0], 96)
261+
262+
def test_compute_ci_one_sided_vs_two_sided(self) -> None:
263+
"""Test that one-sided CI upper bound is less than or equal to the two-sided CI upper bound."""
264+
# Use a larger sorted array so both indexing schemes are well-defined
265+
test_data = np.arange(1, 1001)
266+
267+
two_sided_lb, two_sided_ub = AnalysisNode._compute_ci(
268+
test_data, use_one_sided_ci_ub=False
269+
)
270+
one_sided_lb, one_sided_ub = AnalysisNode._compute_ci(
271+
test_data, use_one_sided_ci_ub=True
272+
)
273+
274+
# One-sided upper bound (95th percentile) should be lower than
275+
# two-sided upper bound (97.5th percentile)
276+
self.assertLess(one_sided_ub[0], two_sided_ub[0])
277+
# One-sided lower bound (index 0) should be lower than or equal to
278+
# two-sided lower bound (around 2.5th percentile)
279+
self.assertLessEqual(one_sided_lb[0], two_sided_lb[0])
280+
281+
def test_compute_ci_one_sided_with_2d_array(self) -> None:
282+
"""Test one-sided CI upper bound computation with 2D arrays."""
283+
# Create 2D test data (100 samples, 5 features)
284+
test_data_2d = np.random.rand(100, 5)
285+
286+
lower_bound, upper_bound = AnalysisNode._compute_ci(
287+
test_data_2d, axis=0, use_one_sided_ci_ub=True
288+
)
289+
290+
# Check shapes
291+
self.assertEqual(lower_bound.shape, (5,))
292+
self.assertEqual(upper_bound.shape, (5,))
293+
294+
# Check that all lower bounds are less than or equal to upper bounds
295+
self.assertTrue(np.all(lower_bound <= upper_bound))
296+
297+
# Lower bound should equal min along axis (lower_idx = 0 after sort)
298+
self.assertTrue(np.allclose(lower_bound, np.min(test_data_2d, axis=0)))
299+
234300
def test_compute_output_types(self) -> None:
235301
analysis_outputs = self.analysis_node.run_analysis()
236302
self.assertIsInstance(analysis_outputs, AnalysisNodeOutput)

0 commit comments

Comments
 (0)