-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Closed
Description
Describe the bug
The success rate of ProjectedGradientDescentPyTorch turns out fairly low (around 50%) on an almost linear separable dataset and a simple classifier.
To Reproduce
Relevant dependencies:
Package Version Editable project location
------------------------------ -------------- -------------------------------------------------------------------
adversarial-robustness-toolbox 1.19.0
numpy 2.2.1
scikit-learn 1.6.0
scipy 1.15.0
torch 2.5.1
torchvision 0.20.1
Codes
import logging
import numpy as np
import matplotlib.pyplot as plt
import torch
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from art.estimators.classification import PyTorchClassifier
from art.attacks.evasion import ProjectedGradientDescentPyTorch
class MLP(torch.nn.Module):
def __init__(
self, input_dim, hidden_dim, output_dim, output_activation=None
):
super().__init__()
self.fc1 = torch.nn.Linear(input_dim, hidden_dim)
self.relu1 = torch.nn.Tanh()
self.fc2 = torch.nn.Linear(hidden_dim, output_dim)
self.output_activation = output_activation
def forward(self, x):
out = self.fc1(x)
out = self.relu1(out)
out = self.fc2(out)
if self.output_activation is not None:
out = self.output_activation(out)
return out
def train(
n_epochs,
model,
optimizer,
criterion,
train_dataloader,
device,
):
model.to(device)
model.train()
for epoch in range(n_epochs):
epoch_loss = 0.0
for _, data in enumerate(train_dataloader):
inputs, labels = data
optimizer.zero_grad()
outputs = model(inputs.to(device))
loss = criterion(outputs, labels.to(device))
loss.backward()
optimizer.step()
epoch_loss += loss.item()
epoch_loss = epoch_loss / len(train_dataloader)
logging.info("Epoch %d Loss %f", epoch, epoch_loss)
return model
np.random.seed(123)
torch.manual_seed(123)
x, y = make_classification(
n_samples=1000,
n_features=2,
n_informative=2,
n_redundant=0,
n_classes=2,
n_clusters_per_class=1,
random_state=37,
)
device = 'cpu'
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.2, random_state=123
)
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train = x_train.astype("float32")
x_test = x_test.astype("float32")
y_train = y_train[:, None]
y_test = y_test[:, None]
x_train, y_train, x_test, y_test = (
torch.Tensor(z).to(device) for z in [x_train, y_train, x_test, y_test]
)
train_dataloader = torch.utils.data.DataLoader(
torch.utils.data.TensorDataset(x_train, y_train),
batch_size=32,
shuffle=True,
)
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, alpha=.5)
plt.show()
model = MLP(input_dim=2, hidden_dim=5, output_dim=1, output_activation=torch.sigmoid)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.BCELoss()
model = train(
10,
model,
optimizer,
criterion,
train_dataloader,
device
)
pred = model(x_test)
(y_test.numpy().reshape(-1) == (pred.detach().numpy().reshape(-1)>.5)).mean()
epsilon = 0.05
alpha=0.001
steps = 1000
classifier = PyTorchClassifier(
model=model,
clip_values=(0, 1),
loss=criterion,
optimizer=optimizer,
input_shape=(2,),
nb_classes=2,
device_type=device
)
attack = ProjectedGradientDescentPyTorch(
estimator=classifier,
norm='2',
eps=epsilon,
eps_step=alpha,
max_iter=steps,
targeted=False,
batch_size=8
)
success = []
for _ in range(100):
sample_idx = np.random.choice(x_test.shape[0], 1)
sample_x = x_test[[sample_idx]]
sample_y = y_test[[sample_idx]]
benchmark_adv_x = attack.generate(x=sample_x.numpy())
benchmark_adv_pred = classifier.model(torch.tensor(benchmark_adv_x, device=device)).detach()[0]
success.append(
criterion(model(torch.tensor(benchmark_adv_x)), sample_y) > criterion(model(sample_x), sample_y)
)
print(np.array(success).mean())
Expected behavior
Even untargeted, the PGD is supposed to perform better than 0.5 IMO. Since a vanilla implementation achieved 1.0 with exactly the same configs, I believe that I'm probably not using the API correctly.
Would appreciate investigation, explanation or support very much.
System information (please complete the following information):
- OS: Ubuntu
- Python version: 3.11.5
- ART version or commit number: 1.19.0
- PyTorch: 2.5.1
Metadata
Metadata
Assignees
Labels
No labels