diff --git a/browse_out/video_0.mp4 b/browse_out/video_0.mp4 new file mode 100644 index 0000000000..ff1c6aedce Binary files /dev/null and b/browse_out/video_0.mp4 differ diff --git a/browse_out/video_1.mp4 b/browse_out/video_1.mp4 new file mode 100644 index 0000000000..b6604a2c8f Binary files /dev/null and b/browse_out/video_1.mp4 differ diff --git a/browse_out/video_10.mp4 b/browse_out/video_10.mp4 new file mode 100644 index 0000000000..1e434a7701 Binary files /dev/null and b/browse_out/video_10.mp4 differ diff --git a/browse_out/video_11.mp4 b/browse_out/video_11.mp4 new file mode 100644 index 0000000000..bfae69987d Binary files /dev/null and b/browse_out/video_11.mp4 differ diff --git a/browse_out/video_12.mp4 b/browse_out/video_12.mp4 new file mode 100644 index 0000000000..9af6811832 Binary files /dev/null and b/browse_out/video_12.mp4 differ diff --git a/browse_out/video_13.mp4 b/browse_out/video_13.mp4 new file mode 100644 index 0000000000..bdef59fa6b Binary files /dev/null and b/browse_out/video_13.mp4 differ diff --git a/browse_out/video_14.mp4 b/browse_out/video_14.mp4 new file mode 100644 index 0000000000..0f931e47fc Binary files /dev/null and b/browse_out/video_14.mp4 differ diff --git a/browse_out/video_15.mp4 b/browse_out/video_15.mp4 new file mode 100644 index 0000000000..bc3d295394 Binary files /dev/null and b/browse_out/video_15.mp4 differ diff --git a/browse_out/video_16.mp4 b/browse_out/video_16.mp4 new file mode 100644 index 0000000000..7ee8fb1d42 Binary files /dev/null and b/browse_out/video_16.mp4 differ diff --git a/browse_out/video_17.mp4 b/browse_out/video_17.mp4 new file mode 100644 index 0000000000..f14b46df87 Binary files /dev/null and b/browse_out/video_17.mp4 differ diff --git a/browse_out/video_18.mp4 b/browse_out/video_18.mp4 new file mode 100644 index 0000000000..2c3f26d5d5 Binary files /dev/null and b/browse_out/video_18.mp4 differ diff --git a/browse_out/video_19.mp4 b/browse_out/video_19.mp4 new file mode 100644 index 0000000000..3b187f0292 Binary files /dev/null and b/browse_out/video_19.mp4 differ diff --git a/browse_out/video_2.mp4 b/browse_out/video_2.mp4 new file mode 100644 index 0000000000..9b54bc6f0e Binary files /dev/null and b/browse_out/video_2.mp4 differ diff --git a/browse_out/video_20.mp4 b/browse_out/video_20.mp4 new file mode 100644 index 0000000000..4e75b2da8a Binary files /dev/null and b/browse_out/video_20.mp4 differ diff --git a/browse_out/video_21.mp4 b/browse_out/video_21.mp4 new file mode 100644 index 0000000000..03d0197138 Binary files /dev/null and b/browse_out/video_21.mp4 differ diff --git a/browse_out/video_22.mp4 b/browse_out/video_22.mp4 new file mode 100644 index 0000000000..4e83561570 Binary files /dev/null and b/browse_out/video_22.mp4 differ diff --git a/browse_out/video_23.mp4 b/browse_out/video_23.mp4 new file mode 100644 index 0000000000..6a028660cb Binary files /dev/null and b/browse_out/video_23.mp4 differ diff --git a/browse_out/video_24.mp4 b/browse_out/video_24.mp4 new file mode 100644 index 0000000000..8e5d178a5b Binary files /dev/null and b/browse_out/video_24.mp4 differ diff --git a/browse_out/video_25.mp4 b/browse_out/video_25.mp4 new file mode 100644 index 0000000000..d631eb8489 Binary files /dev/null and b/browse_out/video_25.mp4 differ diff --git a/browse_out/video_26.mp4 b/browse_out/video_26.mp4 new file mode 100644 index 0000000000..b99c5f9101 Binary files /dev/null and b/browse_out/video_26.mp4 differ diff --git a/browse_out/video_27.mp4 b/browse_out/video_27.mp4 new file mode 100644 index 0000000000..cecc786915 Binary files /dev/null and b/browse_out/video_27.mp4 differ diff --git a/browse_out/video_28.mp4 b/browse_out/video_28.mp4 new file mode 100644 index 0000000000..511aa09f4f Binary files /dev/null and b/browse_out/video_28.mp4 differ diff --git a/browse_out/video_29.mp4 b/browse_out/video_29.mp4 new file mode 100644 index 0000000000..b339257db2 Binary files /dev/null and b/browse_out/video_29.mp4 differ diff --git a/browse_out/video_3.mp4 b/browse_out/video_3.mp4 new file mode 100644 index 0000000000..91388cfd15 Binary files /dev/null and b/browse_out/video_3.mp4 differ diff --git a/browse_out/video_4.mp4 b/browse_out/video_4.mp4 new file mode 100644 index 0000000000..73e748b818 Binary files /dev/null and b/browse_out/video_4.mp4 differ diff --git a/browse_out/video_5.mp4 b/browse_out/video_5.mp4 new file mode 100644 index 0000000000..317fc8b092 Binary files /dev/null and b/browse_out/video_5.mp4 differ diff --git a/browse_out/video_6.mp4 b/browse_out/video_6.mp4 new file mode 100644 index 0000000000..fe105eb569 Binary files /dev/null and b/browse_out/video_6.mp4 differ diff --git a/browse_out/video_7.mp4 b/browse_out/video_7.mp4 new file mode 100644 index 0000000000..826e436c5a Binary files /dev/null and b/browse_out/video_7.mp4 differ diff --git a/browse_out/video_8.mp4 b/browse_out/video_8.mp4 new file mode 100644 index 0000000000..ca153b3bca Binary files /dev/null and b/browse_out/video_8.mp4 differ diff --git a/browse_out/video_9.mp4 b/browse_out/video_9.mp4 new file mode 100644 index 0000000000..c651724370 Binary files /dev/null and b/browse_out/video_9.mp4 differ diff --git a/configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py b/configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py index 3bea4b9ca7..af20912f7f 100644 --- a/configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py +++ b/configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py @@ -5,10 +5,10 @@ # dataset settings dataset_type = 'VideoDataset' -data_root = 'data/kinetics400/videos_train' -data_root_val = 'data/kinetics400/videos_val' -ann_file_train = 'data/kinetics400/kinetics400_train_list_videos.txt' -ann_file_val = 'data/kinetics400/kinetics400_val_list_videos.txt' +data_root = 'data/kinetics400_tiny/train' +data_root_val = 'data/kinetics400_tiny/val' +ann_file_train = 'data/kinetics400_tiny/kinetics_tiny_train_video.txt' +ann_file_val = 'data/kinetics400_tiny/kinetics_tiny_val_video.txt' file_client_args = dict(io_backend='disk') @@ -93,8 +93,26 @@ val_evaluator = dict(type='AccMetric') test_evaluator = val_evaluator -default_hooks = dict(checkpoint=dict(interval=3, max_keep_ckpts=3)) - +# set training batch size to 4 +train_dataloader['batch_size'] = 4 +# Save checkpoints every epoch, and only keep the latest checkpoint +default_hooks = dict( + checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=1)) +# Set the maximum number of epochs to 10, and validate the model every 1 epochs +train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=10, val_interval=1) +# adjust learning rate schedule according to 10 epochs +param_scheduler = [ + dict( + type='MultiStepLR', + begin=0, + end=10, + by_epoch=True, + milestones=[4, 8], + gamma=0.1) +] +model = dict( + cls_head=dict(num_classes=2)) +load_from = 'https://download.openmmlab.com/mmaction/v1.0/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20220906-cd10898e.pth' # Default setting for scaling LR automatically # - `enable` means enable scaling LR automatically # or not by default. diff --git a/kinetics400_tiny.zip b/kinetics400_tiny.zip new file mode 100644 index 0000000000..19cb4d3799 Binary files /dev/null and b/kinetics400_tiny.zip differ diff --git a/tsn_imagenet-pretrained-r50_8xb32-1x1x8-100e_kinetics400-rgb.py b/tsn_imagenet-pretrained-r50_8xb32-1x1x8-100e_kinetics400-rgb.py new file mode 100644 index 0000000000..c7b2089dc7 --- /dev/null +++ b/tsn_imagenet-pretrained-r50_8xb32-1x1x8-100e_kinetics400-rgb.py @@ -0,0 +1,237 @@ +ann_file_train = 'data/kinetics400/kinetics400_train_list_videos.txt' +ann_file_val = 'data/kinetics400/kinetics400_val_list_videos.txt' +auto_scale_lr = dict(base_batch_size=256, enable=False) +data_root = 'data/kinetics400/videos_train' +data_root_val = 'data/kinetics400/videos_val' +dataset_type = 'VideoDataset' +default_hooks = dict( + checkpoint=dict( + interval=3, max_keep_ckpts=3, save_best='auto', type='CheckpointHook'), + logger=dict(ignore_last=False, interval=20, type='LoggerHook'), + param_scheduler=dict(type='ParamSchedulerHook'), + runtime_info=dict(type='RuntimeInfoHook'), + sampler_seed=dict(type='DistSamplerSeedHook'), + sync_buffers=dict(type='SyncBuffersHook'), + timer=dict(type='IterTimerHook')) +default_scope = 'mmaction' +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) +file_client_args = dict(io_backend='disk') +load_from = None +log_level = 'INFO' +log_processor = dict(by_epoch=True, type='LogProcessor', window_size=20) +model = dict( + backbone=dict( + depth=50, + norm_eval=False, + pretrained='https://download.pytorch.org/models/resnet50-11ad3fa6.pth', + type='ResNet'), + cls_head=dict( + average_clips='prob', + consensus=dict(dim=1, type='AvgConsensus'), + dropout_ratio=0.4, + in_channels=2048, + init_std=0.01, + num_classes=400, + spatial_type='avg', + type='TSNHead'), + data_preprocessor=dict( + format_shape='NCHW', + mean=[ + 123.675, + 116.28, + 103.53, + ], + std=[ + 58.395, + 57.12, + 57.375, + ], + type='ActionDataPreprocessor'), + test_cfg=None, + train_cfg=None, + type='Recognizer2D') +optim_wrapper = dict( + clip_grad=dict(max_norm=40, norm_type=2), + optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0001)) +param_scheduler = [ + dict( + begin=0, + by_epoch=True, + end=100, + gamma=0.1, + milestones=[ + 40, + 80, + ], + type='MultiStepLR'), +] +resume = False +test_cfg = dict(type='TestLoop') +test_dataloader = dict( + batch_size=1, + dataset=dict( + ann_file='data/kinetics400/kinetics400_val_list_videos.txt', + data_prefix=dict(video='data/kinetics400/videos_val'), + pipeline=[ + dict(io_backend='disk', type='DecordInit'), + dict( + clip_len=1, + frame_interval=1, + num_clips=25, + test_mode=True, + type='SampleFrames'), + dict(type='DecordDecode'), + dict(scale=( + -1, + 256, + ), type='Resize'), + dict(crop_size=224, type='TenCrop'), + dict(input_format='NCHW', type='FormatShape'), + dict(type='PackActionInputs'), + ], + test_mode=True, + type='VideoDataset'), + num_workers=8, + persistent_workers=True, + sampler=dict(shuffle=False, type='DefaultSampler')) +test_evaluator = dict(type='AccMetric') +test_pipeline = [ + dict(io_backend='disk', type='DecordInit'), + dict( + clip_len=1, + frame_interval=1, + num_clips=25, + test_mode=True, + type='SampleFrames'), + dict(type='DecordDecode'), + dict(scale=( + -1, + 256, + ), type='Resize'), + dict(crop_size=224, type='TenCrop'), + dict(input_format='NCHW', type='FormatShape'), + dict(type='PackActionInputs'), +] +train_cfg = dict( + max_epochs=100, type='EpochBasedTrainLoop', val_begin=1, val_interval=1) +train_dataloader = dict( + batch_size=32, + dataset=dict( + ann_file='data/kinetics400/kinetics400_train_list_videos.txt', + data_prefix=dict(video='data/kinetics400/videos_train'), + pipeline=[ + dict(io_backend='disk', type='DecordInit'), + dict( + clip_len=1, frame_interval=1, num_clips=8, + type='SampleFrames'), + dict(type='DecordDecode'), + dict(scale=( + -1, + 256, + ), type='Resize'), + dict( + input_size=224, + max_wh_scale_gap=1, + random_crop=False, + scales=( + 1, + 0.875, + 0.75, + 0.66, + ), + type='MultiScaleCrop'), + dict(keep_ratio=False, scale=( + 224, + 224, + ), type='Resize'), + dict(flip_ratio=0.5, type='Flip'), + dict(input_format='NCHW', type='FormatShape'), + dict(type='PackActionInputs'), + ], + type='VideoDataset'), + num_workers=8, + persistent_workers=True, + sampler=dict(shuffle=True, type='DefaultSampler')) +train_pipeline = [ + dict(io_backend='disk', type='DecordInit'), + dict(clip_len=1, frame_interval=1, num_clips=8, type='SampleFrames'), + dict(type='DecordDecode'), + dict(scale=( + -1, + 256, + ), type='Resize'), + dict( + input_size=224, + max_wh_scale_gap=1, + random_crop=False, + scales=( + 1, + 0.875, + 0.75, + 0.66, + ), + type='MultiScaleCrop'), + dict(keep_ratio=False, scale=( + 224, + 224, + ), type='Resize'), + dict(flip_ratio=0.5, type='Flip'), + dict(input_format='NCHW', type='FormatShape'), + dict(type='PackActionInputs'), +] +val_cfg = dict(type='ValLoop') +val_dataloader = dict( + batch_size=32, + dataset=dict( + ann_file='data/kinetics400/kinetics400_val_list_videos.txt', + data_prefix=dict(video='data/kinetics400/videos_val'), + pipeline=[ + dict(io_backend='disk', type='DecordInit'), + dict( + clip_len=1, + frame_interval=1, + num_clips=8, + test_mode=True, + type='SampleFrames'), + dict(type='DecordDecode'), + dict(scale=( + -1, + 256, + ), type='Resize'), + dict(crop_size=224, type='CenterCrop'), + dict(input_format='NCHW', type='FormatShape'), + dict(type='PackActionInputs'), + ], + test_mode=True, + type='VideoDataset'), + num_workers=8, + persistent_workers=True, + sampler=dict(shuffle=False, type='DefaultSampler')) +val_evaluator = dict(type='AccMetric') +val_pipeline = [ + dict(io_backend='disk', type='DecordInit'), + dict( + clip_len=1, + frame_interval=1, + num_clips=8, + test_mode=True, + type='SampleFrames'), + dict(type='DecordDecode'), + dict(scale=( + -1, + 256, + ), type='Resize'), + dict(crop_size=224, type='CenterCrop'), + dict(input_format='NCHW', type='FormatShape'), + dict(type='PackActionInputs'), +] +vis_backends = [ + dict(type='LocalVisBackend'), +] +visualizer = dict( + type='ActionVisualizer', vis_backends=[ + dict(type='LocalVisBackend'), + ])