Skip to content

In Mosi, training on hmdb51 based on the pre trained checkpoint you provided cannot reproduce the results #21

@TJQdoIt9527

Description

@TJQdoIt9527

The backbone is r2p1d, and the results on the test list you provided after training are as follows (decor=0.4.0, with added TEST SCALE=112, the pretrain checkpoint you provided):

[09/08 15:44:59][INFO] tadaconv.utils.checkpoint: 492: Load from the last checkpoint file: output/r2p1d_mosi_ft_hmdb_autor/checkpoints/checkpoint_epoch_00300.pyth
[09/08 15:44:59][INFO] tadaconv.datasets.base.hmdb51: 37: Reading video list from file: hmdb51_test_list.txt
[09/08 15:44:59][INFO] tadaconv.datasets.base.base_dataset: 172: Loading HMDB51 dataset list for split 'test'...
[09/08 15:44:59][INFO] tadaconv.datasets.base.base_dataset: 197: Dataset HMDB51 split test loaded. Length 15300.
[09/08 15:44:59][INFO] test: 215: Testing model for 55 iterations
[09/08 15:45:55][INFO] tadaconv.utils.logging: 89: {"cur_iter": "5", "eta": "0:00:27", "split": "test_iter", "time_diff": 0.534744}
[09/08 15:45:57][INFO] tadaconv.utils.logging: 89: {"cur_iter": "10", "eta": "0:00:22", "split": "test_iter", "time_diff": 0.498246}
[09/08 15:46:23][INFO] tadaconv.utils.logging: 89: {"cur_iter": "15", "eta": "0:00:21", "split": "test_iter", "time_diff": 0.513669}
[09/08 15:46:26][INFO] tadaconv.utils.logging: 89: {"cur_iter": "20", "eta": "0:00:18", "split": "test_iter", "time_diff": 0.519889}
[09/08 15:46:45][INFO] tadaconv.utils.logging: 89: {"cur_iter": "25", "eta": "0:08:40", "split": "test_iter", "time_diff": 16.778454}
[09/08 15:46:52][INFO] tadaconv.utils.logging: 89: {"cur_iter": "30", "eta": "0:00:12", "split": "test_iter", "time_diff": 0.497483}
[09/08 15:46:54][INFO] tadaconv.utils.logging: 89: {"cur_iter": "35", "eta": "0:00:11", "split": "test_iter", "time_diff": 0.560603}
[09/08 15:47:17][INFO] tadaconv.utils.logging: 89: {"cur_iter": "40", "eta": "0:00:08", "split": "test_iter", "time_diff": 0.508289}
[09/08 15:47:20][INFO] tadaconv.utils.logging: 89: {"cur_iter": "45", "eta": "0:00:05", "split": "test_iter", "time_diff": 0.509557}
[09/08 15:47:40][INFO] tadaconv.utils.logging: 89: {"cur_iter": "50", "eta": "0:00:08", "split": "test_iter", "time_diff": 1.383271}
[09/08 15:47:43][INFO] tadaconv.utils.logging: 89: {"cur_iter": "55", "eta": "0:00:00", "split": "test_iter", "time_diff": 0.338351}
[09/08 15:47:44][INFO] tadaconv.utils.logging: 89: {"split": "test_final", "top1_acc": "40.00", "top5_acc": "69.87"}

I have found that both the pre train checkpoint and finetuned checkpoint I trained myself are much larger than the checkpoint file you provided. My two files are 165M, while the two files you provided are both over 50M

Here are my training configurations:

[09/08 12:41:12][INFO] train: 336: Train with config:
[09/08 12:41:12][INFO] train: 337: {
"TASK_TYPE": "classification",
"PRETRAIN": {
"ENABLE": false
},
"LOCALIZATION": {
"ENABLE": false
},
"TRAIN": {
"ENABLE": true,
"DATASET": "HMDB51",
"BATCH_SIZE": 280,
"LOG_FILE": "training_log.log",
"EVAL_PERIOD": 5,
"NUM_FOLDS": 30,
"AUTO_RESUME": true,
"CHECKPOINT_PERIOD": 10,
"INIT": "",
"CHECKPOINT_FILE_PATH": "/home/lzh/2022/tjq/TAdaConv/checkpoint/r2p1d_pt_hmdb_mosi_public.pyth",
"CHECKPOINT_TYPE": "pytorch",
"CHECKPOINT_INFLATE": false,
"CHECKPOINT_PRE_PROCESS": {
"ENABLE": false
},
"FINE_TUNE": true,
"ONLY_LINEAR": false,
"LR_REDUCE": false,
"TRAIN_VAL_COMBINE": false,
"LOSS_FUNC": "cross_entropy"
},
"TEST": {
"ENABLE": true,
"DATASET": "HMDB51",
"BATCH_SIZE": 280,
"NUM_SPATIAL_CROPS": 1,
"SPATIAL_CROPS": "cc",
"NUM_ENSEMBLE_VIEWS": 1,
"LOG_FILE": "val.log",
"CHECKPOINT_FILE_PATH": "",
"CHECKPOINT_TYPE": "pytorch",
"AUTOMATIC_MULTI_SCALE_TEST": true
},
"VISUALIZATION": {
"ENABLE": false,
"NAME": "",
"FEATURE_MAPS": {
"ENABLE": false,
"BASE_OUTPUT_DIR": ""
}
},
"SUBMISSION": {
"ENABLE": false,
"SAVE_RESULTS_PATH": "test.json"
},
"DATA": {
"DATA_ROOT_DIR": "/data/hmdb51/videos/",
"ANNO_DIR": "/data1/hmdb51_annotations/hmdb51/",
"NUM_INPUT_FRAMES": 16,
"NUM_INPUT_CHANNELS": 3,
"SAMPLING_MODE": "interval_based",
"SAMPLING_RATE": 4,
"TRAIN_JITTER_SCALES": [
168,
224
],
"TRAIN_CROP_SIZE": 112,
"TEST_SCALE": 112,
"TEST_CROP_SIZE": 112,
"MEAN": [
0.45,
0.45,
0.45
],
"STD": [
0.225,
0.225,
0.225
],
"MULTI_LABEL": false,
"ENSEMBLE_METHOD": "sum",
"TARGET_FPS": 30,
"MINUS_INTERVAL": false,
"FPS": 30
},
"MODEL": {
"NAME": "R2Plus1D",
"EMA": {
"ENABLE": false,
"DECAY": 0.99996
}
},
"VIDEO": {
"BACKBONE": {
"DEPTH": 10,
"META_ARCH": "ResNet3D",
"NUM_FILTERS": [
64,
64,
128,
256,
512
],
"NUM_INPUT_CHANNELS": 3,
"NUM_OUT_FEATURES": 512,
"KERNEL_SIZE": [
[
3,
7,
7
],
[
3,
3,
3
],
[
3,
3,
3
],
[
3,
3,
3
],
[
3,
3,
3
]
],
"DOWNSAMPLING": [
true,
false,
true,
true,
true
],
"DOWNSAMPLING_TEMPORAL": [
false,
false,
true,
true,
true
],
"NUM_STREAMS": 1,
"EXPANSION_RATIO": 2,
"BRANCH": {
"NAME": "R2Plus1DBranch"
},
"STEM": {
"NAME": "R2Plus1DStem"
},
"NONLOCAL": {
"ENABLE": false,
"STAGES": [
5
],
"MASK_ENABLE": false
},
"INITIALIZATION": null
},
"HEAD": {
"NAME": "BaseHead",
"ACTIVATION": "softmax",
"DROPOUT_RATE": 0.5,
"NUM_CLASSES": 51
}
},
"OPTIMIZER": {
"ADJUST_LR": false,
"BASE_LR": 0.00075,
"LR_POLICY": "cosine",
"MAX_EPOCH": 300,
"MOMENTUM": 0.9,
"WEIGHT_DECAY": "1e-3",
"WARMUP_EPOCHS": 10,
"WARMUP_START_LR": 7.5e-05,
"OPTIM_METHOD": "adam",
"DAMPENING": 0.0,
"NESTEROV": true,
"BIAS_DOUBLE": false,
"NEW_PARAMS": [],
"NEW_PARAMS_MULT": 10,
"NEW_PARAMS_WD_MULT": 1,
"LAYER_WISE_LR_DECAY": 1.0,
"COSINE_AFTER_WARMUP": false,
"COSINE_END_LR": "1e-6"
},
"BN": {
"WB_LOCK": false,
"FREEZE": false,
"WEIGHT_DECAY": 0.0,
"MOMENTUM": 0.1,
"EPS": "1e-3",
"SYNC": false
},
"DATA_LOADER": {
"NUM_WORKERS": 12,
"PIN_MEMORY": false,
"ENABLE_MULTI_THREAD_DECODE": true,
"COLLATE_FN": null
},
"NUM_GPUS": 4,
"SHARD_ID": 0,
"NUM_SHARDS": 1,
"RANDOM_SEED": 0,
"OUTPUT_DIR": "output/r2p1d_mosi_ft_hmdb_autor",
"OUTPUT_CFG_FILE": "configuration.log",
"LOG_PERIOD": 10,
"DIST_BACKEND": "nccl",
"LOG_MODEL_INFO": true,
"LOG_CONFIG_INFO": true,
"OSS": {
"ENABLE": false,
"KEY": null,
"SECRET": null,
"ENDPOINT": null,
"CHECKPOINT_OUTPUT_PATH": null,
"SECONDARY_DATA_OSS": {
"ENABLE": false,
"KEY": null,
"SECRET": null,
"ENDPOINT": null,
"BUCKETS": [
""
]
}
},
"AUGMENTATION": {
"COLOR_AUG": true,
"BRIGHTNESS": 0.5,
"CONTRAST": 0.5,
"SATURATION": 0.5,
"HUE": 0.25,
"GRAYSCALE": 0.3,
"CONSISTENT": true,
"SHUFFLE": true,
"GRAY_FIRST": true,
"RATIO": [
0.857142857142857,
1.1666666666666667
],
"USE_GPU": false,
"MIXUP": {
"ENABLE": false,
"ALPHA": 0.0,
"PROB": 1.0,
"MODE": "batch",
"SWITCH_PROB": 0.5
},
"CUTMIX": {
"ENABLE": false,
"ALPHA": 0.0,
"MINMAX": null
},
"RANDOM_ERASING": {
"ENABLE": false,
"PROB": 0.25,
"MODE": "const",
"COUNT": [
1,
1
],
"NUM_SPLITS": 0,
"AREA_RANGE": [
0.02,
0.33
],
"MIN_ASPECT": 0.3
},
"LABEL_SMOOTHING": 0.0,
"SSV2_FLIP": false,
"COLOR_P": 0.0,
"AUTOAUGMENT": {
"ENABLE": true,
"BEFORE_CROP": true,
"TYPE": "rand-m9-n4-mstd0.5-inc1"
}
},
"PAI": false,
"USE_MULTISEG_VAL_DIST": false
}
I can't find where there is a problem with my training configuration. Can you provide your configuration file? Or could you give me a hint?
Looking forward to your reply, thank you very much

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions