Skip to content

Commit 4ad17ec

Browse files
committed
distribute train
1 parent 30a50e4 commit 4ad17ec

1 file changed

Lines changed: 4 additions & 1 deletion

File tree

src/ml.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,10 @@ class LSTMModel(TorchBaseModel):
441441
device = DEVICE
442442
# torch.nn.CrossEntropyLoss()
443443

444+
def __init__(self, dataset: pd.DataFrame):
445+
super().__init__(dataset)
446+
self.dataset = DistributedSampler(self.dataset)
447+
444448
@property
445449
def get_metrics(self) -> dict:
446450
for k, v in self.model.state_dict().items():
@@ -482,7 +486,6 @@ def load_checkpoint(self):
482486

483487
self.model = self.model.cuda(local_rank)
484488
self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[local_rank], output_device=local_rank,find_unused_parameters=True)
485-
self.dataset = DistributedSampler(self.dataset)
486489
self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
487490
self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
488491
self.optimizer, mode="min", factor=0.5, patience=2

0 commit comments

Comments
 (0)