Skip to content

Commit 732c1f9

Browse files
cicichen01facebook-github-bot
authored andcommitted
Split the get_random_model_and_data() method [1/n]
Summary: As titled. The get_random_model_and_data() method is used to construct testing data for influence and it is reported as too complex by flake8 (https://www.flake8rules.com/rules/C901.html). This series of diff will split the method and abstract the common parts. This diff isolate the model part for different gpu usage settings. It also eliminate the mix usage of bool and str. Differential Revision: D55153967
1 parent fabac35 commit 732c1f9

5 files changed

+94
-85
lines changed

tests/influence/_core/test_arnoldi_influence.py

+18-16
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import tempfile
2-
from typing import Callable, List, Tuple, Union
2+
from typing import Callable, List, Optional, Tuple
33

44
import torch
55

@@ -27,8 +27,9 @@
2727
generate_assymetric_matrix_given_eigenvalues,
2828
generate_symmetric_matrix_given_eigenvalues,
2929
get_random_model_and_data,
30+
GPU_SETTING_LIST,
31+
is_gpu,
3032
UnpackDataset,
31-
USE_GPU_LIST,
3233
)
3334
from torch import Tensor
3435
from torch.utils.data import DataLoader
@@ -237,17 +238,17 @@ def _param_matmul(params: Tuple[Tensor]):
237238
delta,
238239
mode,
239240
unpack_inputs,
240-
use_gpu,
241+
gpu_setting,
241242
)
242-
for use_gpu in USE_GPU_LIST
243+
for gpu_setting in GPU_SETTING_LIST
243244
for (influence_constructor_1, influence_constructor_2, delta) in [
244245
# compare implementations, when considering only 1 layer
245246
(
246247
DataInfluenceConstructor(
247248
NaiveInfluenceFunction,
248249
layers=(
249250
["module.linear1"]
250-
if use_gpu == "cuda_dataparallel"
251+
if gpu_setting == "cuda_dataparallel"
251252
else ["linear1"]
252253
),
253254
projection_dim=5,
@@ -258,7 +259,7 @@ def _param_matmul(params: Tuple[Tensor]):
258259
ArnoldiInfluenceFunction,
259260
layers=(
260261
["module.linear1"]
261-
if use_gpu == "cuda_dataparallel"
262+
if gpu_setting == "cuda_dataparallel"
262263
else ["linear1"]
263264
),
264265
arnoldi_dim=50,
@@ -314,7 +315,7 @@ def test_compare_implementations_trained_NN_model_and_data(
314315
delta: float,
315316
mode: str,
316317
unpack_inputs: bool,
317-
use_gpu: Union[bool, str],
318+
gpu_setting: Optional[str],
318319
) -> None:
319320
"""
320321
this compares 2 influence implementations on a trained 2-layer NN model.
@@ -329,7 +330,7 @@ def test_compare_implementations_trained_NN_model_and_data(
329330
delta,
330331
mode,
331332
unpack_inputs,
332-
use_gpu,
333+
gpu_setting,
333334
)
334335

335336
# this compares `ArnoldiInfluenceFunction` and `NaiveInfluenceFunction` on randomly
@@ -345,16 +346,16 @@ def test_compare_implementations_trained_NN_model_and_data(
345346
delta,
346347
mode,
347348
unpack_inputs,
348-
use_gpu,
349+
gpu_setting,
349350
)
350-
for use_gpu in USE_GPU_LIST
351+
for gpu_setting in GPU_SETTING_LIST
351352
for (influence_constructor_1, influence_constructor_2, delta) in [
352353
(
353354
DataInfluenceConstructor(
354355
NaiveInfluenceFunction,
355356
layers=(
356357
["module.linear1"]
357-
if use_gpu == "cuda_dataparallel"
358+
if gpu_setting == "cuda_dataparallel"
358359
else ["linear1"]
359360
),
360361
show_progress=False,
@@ -364,7 +365,7 @@ def test_compare_implementations_trained_NN_model_and_data(
364365
ArnoldiInfluenceFunction,
365366
layers=(
366367
["module.linear1"]
367-
if use_gpu == "cuda_dataparallel"
368+
if gpu_setting == "cuda_dataparallel"
368369
else ["linear1"]
369370
),
370371
show_progress=False,
@@ -397,7 +398,7 @@ def test_compare_implementations_random_model_and_data(
397398
delta: float,
398399
mode: str,
399400
unpack_inputs: bool,
400-
use_gpu: Union[bool, str],
401+
gpu_setting: Optional[str],
401402
) -> None:
402403
"""
403404
this compares 2 influence implementations on a trained 2-layer NN model.
@@ -412,7 +413,7 @@ def test_compare_implementations_random_model_and_data(
412413
delta,
413414
mode,
414415
unpack_inputs,
415-
use_gpu,
416+
gpu_setting,
416417
)
417418

418419
def _test_compare_implementations(
@@ -423,7 +424,7 @@ def _test_compare_implementations(
423424
delta: float,
424425
mode: str,
425426
unpack_inputs: bool,
426-
use_gpu: Union[bool, str],
427+
gpu_setting: Optional[str],
427428
) -> None:
428429
"""
429430
checks that 2 implementations of `InfluenceFunctionBase` return the same
@@ -444,13 +445,14 @@ def _test_compare_implementations(
444445
tmpdir,
445446
unpack_inputs,
446447
return_test_data=True,
447-
use_gpu=use_gpu,
448+
gpu_setting=gpu_setting,
448449
return_hessian_data=True,
449450
model_type=model_type,
450451
)
451452

452453
train_dataset = DataLoader(train_dataset, batch_size=5)
453454

455+
use_gpu = is_gpu(gpu_setting)
454456
hessian_dataset = (
455457
ExplicitDataset(hessian_samples, hessian_labels, use_gpu)
456458
if not unpack_inputs

tests/influence/_core/test_naive_influence.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import tempfile
2-
from typing import Callable, List, Tuple, Union
2+
from typing import Callable, List, Optional, Tuple
33

44
import torch
55

@@ -21,9 +21,10 @@
2121
DataInfluenceConstructor,
2222
ExplicitDataset,
2323
get_random_model_and_data,
24+
GPU_SETTING_LIST,
25+
is_gpu,
2426
Linear,
2527
UnpackDataset,
26-
USE_GPU_LIST,
2728
)
2829
from torch.utils.data import DataLoader
2930

@@ -59,17 +60,17 @@ def test_flatten_unflattener(self, param_shapes: List[Tuple[int, ...]]) -> None:
5960
delta,
6061
mode,
6162
unpack_inputs,
62-
use_gpu,
63+
gpu_setting,
6364
)
6465
for reduction in ["none", "sum", "mean"]
65-
for use_gpu in USE_GPU_LIST
66+
for gpu_setting in GPU_SETTING_LIST
6667
for (influence_constructor, delta) in [
6768
(
6869
DataInfluenceConstructor(
6970
NaiveInfluenceFunction,
7071
layers=(
7172
["module.linear"]
72-
if use_gpu == "cuda_dataparallel"
73+
if gpu_setting == "cuda_dataparallel"
7374
else ["linear"]
7475
),
7576
projection_dim=None,
@@ -109,7 +110,7 @@ def test_matches_linear_regression(
109110
delta: float,
110111
mode: str,
111112
unpack_inputs: bool,
112-
use_gpu: Union[bool, str],
113+
gpu_setting: Optional[str],
113114
) -> None:
114115
"""
115116
this tests that `NaiveInfluence`, the simplest implementation, agree with the
@@ -129,13 +130,14 @@ def test_matches_linear_regression(
129130
tmpdir,
130131
unpack_inputs,
131132
return_test_data=True,
132-
use_gpu=use_gpu,
133+
gpu_setting=gpu_setting,
133134
return_hessian_data=True,
134135
model_type="trained_linear",
135136
)
136137

137138
train_dataset = DataLoader(train_dataset, batch_size=5)
138139

140+
use_gpu = is_gpu(gpu_setting)
139141
hessian_dataset = (
140142
ExplicitDataset(hessian_samples, hessian_labels, use_gpu)
141143
if not unpack_inputs

tests/influence/_core/test_tracin_k_most_influential.py

+9-14
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import tempfile
2-
from typing import Callable, Union
2+
from typing import Callable, Optional
33

44
import torch
55
import torch.nn as nn
@@ -13,22 +13,17 @@
1313
build_test_name_func,
1414
DataInfluenceConstructor,
1515
get_random_model_and_data,
16+
GPU_SETTING_LIST,
17+
is_gpu,
1618
)
1719

1820

1921
class TestTracInGetKMostInfluential(BaseTest):
20-
21-
use_gpu_list = (
22-
[False, "cuda", "cuda_data_parallel"]
23-
if torch.cuda.is_available() and torch.cuda.device_count() != 0
24-
else [False]
25-
)
26-
2722
param_list = []
2823
for batch_size, k in [(4, 7), (7, 4), (40, 5), (5, 40), (40, 45)]:
2924
for unpack_inputs in [True, False]:
3025
for proponents in [True, False]:
31-
for use_gpu in use_gpu_list:
26+
for gpu_setting in GPU_SETTING_LIST:
3227
for reduction, constr, aggregate in [
3328
(
3429
"none",
@@ -51,7 +46,7 @@ class TestTracInGetKMostInfluential(BaseTest):
5146
name="linear2",
5247
layers=(
5348
["module.linear2"]
54-
if use_gpu == "cuda_data_parallel"
49+
if gpu_setting == "cuda_data_parallel"
5550
else ["linear2"]
5651
),
5752
),
@@ -61,7 +56,7 @@ class TestTracInGetKMostInfluential(BaseTest):
6156
if not (
6257
"sample_wise_grads_per_batch" in constr.kwargs
6358
and constr.kwargs["sample_wise_grads_per_batch"]
64-
and use_gpu
59+
and is_gpu(gpu_setting)
6560
):
6661
param_list.append(
6762
(
@@ -71,7 +66,7 @@ class TestTracInGetKMostInfluential(BaseTest):
7166
proponents,
7267
batch_size,
7368
k,
74-
use_gpu,
69+
gpu_setting,
7570
aggregate,
7671
)
7772
)
@@ -88,7 +83,7 @@ def test_tracin_k_most_influential(
8883
proponents: bool,
8984
batch_size: int,
9085
k: int,
91-
use_gpu: Union[bool, str],
86+
gpu_setting: Optional[str],
9287
aggregate: bool,
9388
) -> None:
9489
"""
@@ -107,7 +102,7 @@ def test_tracin_k_most_influential(
107102
tmpdir,
108103
unpack_inputs,
109104
True,
110-
use_gpu,
105+
gpu_setting,
111106
)
112107

113108
self.assertTrue(isinstance(reduction, str))

0 commit comments

Comments
 (0)