diff --git a/main.py b/main.py index 21e81352..413e5e29 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,4 @@ -import argparse, os, sys, datetime, glob, importlib, csv +import argparse, os, sys, datetime, glob import numpy as np import time import torch @@ -7,13 +7,13 @@ from packaging import version from omegaconf import OmegaConf -from torch.utils.data import random_split, DataLoader, Dataset, Subset +from torch.utils.data import DataLoader, Dataset from functools import partial from PIL import Image from pytorch_lightning import seed_everything from pytorch_lightning.trainer import Trainer -from pytorch_lightning.callbacks import ModelCheckpoint, Callback, LearningRateMonitor +from pytorch_lightning.callbacks import Callback from pytorch_lightning.utilities.distributed import rank_zero_only from pytorch_lightning.utilities import rank_zero_info diff --git a/scripts/checker.py b/scripts/checker.py index 8b94a5ba..884096ef 100644 --- a/scripts/checker.py +++ b/scripts/checker.py @@ -1,6 +1,5 @@ import os import glob -import subprocess import time import fire diff --git a/scripts/img2img.py b/scripts/img2img.py index 413b0e46..e81779ff 100644 --- a/scripts/img2img.py +++ b/scripts/img2img.py @@ -1,6 +1,6 @@ """make variations of input image""" -import argparse, os, sys, glob +import argparse, os import PIL import torch import numpy as np diff --git a/scripts/inpaint.py b/scripts/inpaint.py index d6e6387a..ad280e45 100644 --- a/scripts/inpaint.py +++ b/scripts/inpaint.py @@ -1,4 +1,4 @@ -import argparse, os, sys, glob +import argparse, os, glob from omegaconf import OmegaConf from PIL import Image from tqdm import tqdm diff --git a/scripts/inpaint_sd.py b/scripts/inpaint_sd.py index 568a0bc5..72af7a3f 100644 --- a/scripts/inpaint_sd.py +++ b/scripts/inpaint_sd.py @@ -1,4 +1,4 @@ -import argparse, os, sys, glob +import argparse, os, glob from omegaconf import OmegaConf from PIL import Image from tqdm import tqdm diff --git a/scripts/logging_template.py b/scripts/logging_template.py index 2199e48d..67675048 100644 --- a/scripts/logging_template.py +++ b/scripts/logging_template.py @@ -3,14 +3,8 @@ import numpy as np from omegaconf import OmegaConf import streamlit as st -from streamlit import caching -from PIL import Image -from torch.utils.data import DataLoader from torch.utils.data.dataloader import default_collate -import pytorch_lightning as pl from pytorch_lightning import seed_everything -from pytorch_lightning.callbacks import Callback -from pytorch_lightning.utilities.distributed import rank_zero_only from tqdm import tqdm import datetime diff --git a/scripts/mnist-distributed.py b/scripts/mnist-distributed.py index 2a28f78a..b3f72aaf 100644 --- a/scripts/mnist-distributed.py +++ b/scripts/mnist-distributed.py @@ -7,7 +7,6 @@ import torch import torch.nn as nn import torch.distributed as dist -from apex.parallel import DistributedDataParallel as DDP from apex import amp diff --git a/scripts/test_gpu.py b/scripts/test_gpu.py index 36d4e781..1570911e 100644 --- a/scripts/test_gpu.py +++ b/scripts/test_gpu.py @@ -1,5 +1,7 @@ import socket -try: + + +def main() -> int: import torch n_gpus = torch.cuda.device_count() print(f"checking {n_gpus} gpus.") @@ -16,7 +18,10 @@ out = net(data) out.backward(torch.randn_like(out)) torch.cuda.synchronize() -except RuntimeError as err: + return 1 + + +def runtime_error_case() -> None: import requests import datetime import os @@ -26,5 +31,12 @@ resp = requests.get('http://169.254.169.254/latest/meta-data/instance-id') print(f'ERROR at {ts} on {hostname}/{resp.text} (CUDA_VISIBLE_DEVICES={device}): {type(err).__name__}: {err}', flush=True) raise err -else: - print(f"checked {socket.gethostname()}") + + +if __name__ == '__main__': + try: + main() + except RuntimeError as err: + runtime_error_case() + else: + print(f"checked {socket.gethostname()}")