Poor performance of `ProjectedGradientDescentPyTorch` on a simple model and dataset

**Describe the bug**
The success rate of `ProjectedGradientDescentPyTorch` turns out fairly low (around 50%) on an almost linear separable dataset and a simple classifier. 

**To Reproduce**
Relevant dependencies: 
```
Package                        Version        Editable project location
------------------------------ -------------- -------------------------------------------------------------------
adversarial-robustness-toolbox 1.19.0
numpy                          2.2.1
scikit-learn                   1.6.0
scipy                          1.15.0
torch                          2.5.1
torchvision                    0.20.1
```

Codes
```
import logging

import numpy as np
import matplotlib.pyplot as plt
import torch
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from art.estimators.classification import PyTorchClassifier
from art.attacks.evasion import ProjectedGradientDescentPyTorch

class MLP(torch.nn.Module):
    def __init__(
        self, input_dim, hidden_dim, output_dim, output_activation=None
    ):
        super().__init__()
        self.fc1 = torch.nn.Linear(input_dim, hidden_dim)
        self.relu1 = torch.nn.Tanh()
        self.fc2 = torch.nn.Linear(hidden_dim, output_dim)
        self.output_activation = output_activation

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        if self.output_activation is not None:
            out = self.output_activation(out)
        return out

def train(
    n_epochs,
    model,
    optimizer,
    criterion,
    train_dataloader,
    device,
):
    model.to(device)
    model.train()

    for epoch in range(n_epochs):
        epoch_loss = 0.0
        for _, data in enumerate(train_dataloader):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = model(inputs.to(device))
            loss = criterion(outputs, labels.to(device))
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        epoch_loss = epoch_loss / len(train_dataloader)
        logging.info("Epoch %d Loss %f", epoch, epoch_loss)

    return model

np.random.seed(123)
torch.manual_seed(123)

x, y = make_classification(
    n_samples=1000,
    n_features=2,
    n_informative=2,
    n_redundant=0,
    n_classes=2,
    n_clusters_per_class=1,
    random_state=37,
)

device = 'cpu'

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=123
)
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

x_train = x_train.astype("float32")
x_test = x_test.astype("float32")

y_train = y_train[:, None]
y_test = y_test[:, None]

x_train, y_train, x_test, y_test = (
    torch.Tensor(z).to(device) for z in [x_train, y_train, x_test, y_test]
)

train_dataloader = torch.utils.data.DataLoader(
    torch.utils.data.TensorDataset(x_train, y_train),
    batch_size=32,
    shuffle=True,
)

plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, alpha=.5)
plt.show()

model = MLP(input_dim=2, hidden_dim=5, output_dim=1, output_activation=torch.sigmoid)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.BCELoss()

model = train(
    10,
    model,
    optimizer,
    criterion,
    train_dataloader,
    device
)
pred = model(x_test)

(y_test.numpy().reshape(-1) == (pred.detach().numpy().reshape(-1)>.5)).mean()

epsilon = 0.05
alpha=0.001
steps = 1000

classifier = PyTorchClassifier(
    model=model,
    clip_values=(0, 1),
    loss=criterion,
    optimizer=optimizer,
    input_shape=(2,),
    nb_classes=2,
    device_type=device
)

attack = ProjectedGradientDescentPyTorch(
    estimator=classifier,
    norm='2',
    eps=epsilon,
    eps_step=alpha,
    max_iter=steps,
    targeted=False, 
    batch_size=8
)
success = []

for _ in range(100): 

    sample_idx = np.random.choice(x_test.shape[0], 1)

    sample_x = x_test[[sample_idx]]
    sample_y = y_test[[sample_idx]]

    benchmark_adv_x = attack.generate(x=sample_x.numpy())

    benchmark_adv_pred = classifier.model(torch.tensor(benchmark_adv_x, device=device)).detach()[0]
    
    success.append(
        criterion(model(torch.tensor(benchmark_adv_x)), sample_y) > criterion(model(sample_x), sample_y)
    )

print(np.array(success).mean())
```
**Expected behavior**
Even untargeted, the PGD is supposed to perform better than 0.5 IMO. Since a vanilla implementation achieved 1.0 with exactly the same configs, I believe that I'm probably not using the API correctly. 
Would appreciate investigation, explanation or support very much. 

**System information (please complete the following information):**
 - OS: Ubuntu
 - Python version: 3.11.5
 - ART version or commit number: 1.19.0
 - PyTorch: 2.5.1


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Poor performance of `ProjectedGradientDescentPyTorch` on a simple model and dataset #2590

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Poor performance of ProjectedGradientDescentPyTorch on a simple model and dataset #2590

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions

Poor performance of `ProjectedGradientDescentPyTorch` on a simple model and dataset #2590