Skip to content

Commit 42d5e27

Browse files
authored
Merge pull request #292 from microsoft/siyu/mdv5_release
Update pytorch_detector.py and instructions to set up env for MDv5.
2 parents 518ab74 + 83e9a98 commit 42d5e27

22 files changed

+485
-4441
lines changed

1.9.0

Whitespace-only changes.

api/batch_processing/data_preparation/manage_local_batch.py

+168-5
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
from api.batch_processing.postprocessing.postprocess_batch_results import (
2424
PostProcessingOptions, process_batch_results)
25+
from detection.run_detector import get_detector_version_from_filename
2526

2627
max_task_name_length = 92
2728

@@ -44,21 +45,30 @@
4445
#%% Constants I set per script
4546

4647
input_path = os.path.expanduser('~/data/organization/2021-12-24')
48+
4749
organization_name_short = 'organization'
50+
job_date = date.today().strftime('%Y-%m-%d')
51+
# job_date = '2022-01-01'
52+
53+
model_file = os.path.expanduser('~/models/camera_traps/megadetector/md_v5.0.0/md_v5a.0.0.pt')
54+
# model_file = os.path.expanduser('~/models/camera_traps/megadetector/md_v5.0.0/md_v5b.0.0.pt')
55+
# model_file = os.path.expanduser('~/models/camera_traps/megadetector/md_v4.1.0/md_v4.1.0.pb')
4856

49-
model_file = os.path.expanduser('~/models/camera_traps/megadetector/md_v4.1.0/md_v4.1.0.pb')
5057
postprocessing_base = os.path.expanduser('~/postprocessing')
5158

5259
# Number of jobs to split data into, typically equal to the number of available GPUs
5360
n_jobs = 2
5461
n_gpus = 2
5562

5663
# Only used to print out a time estimate
57-
gpu_images_per_second = 2.9
64+
if ('v5') in model_file:
65+
gpu_images_per_second = 10
66+
else:
67+
gpu_images_per_second = 2.9
5868

5969
checkpoint_frequency = 10000
6070

61-
base_task_name = organization_name_short + '-' + date.today().strftime('%Y-%m-%d')
71+
base_task_name = organization_name_short + '-' + job_date + '-' + get_detector_version_from_filename(model_file)
6272
base_output_folder_name = os.path.join(postprocessing_base,organization_name_short)
6373
os.makedirs(base_output_folder_name,exist_ok=True)
6474

@@ -596,10 +606,10 @@ def remove_overflow_folders(relativePath):
596606
image_size_str = '300'
597607
batch_size_str = '64'
598608
num_workers_str = '8'
599-
logdir = filename_base
600-
601609
classification_threshold_str = '0.05'
602610

611+
logdir = filename_base
612+
603613
# This is just passed along to the metadata in the output file, it has no impact
604614
# on how the classification scripts run.
605615
typical_classification_threshold_str = '0.75'
@@ -754,6 +764,159 @@ def remove_overflow_folders(relativePath):
754764
os.chmod(output_file, st.st_mode | stat.S_IEXEC)
755765

756766

767+
#%% Run a non-MegaClassifier classifier (i.e., a classifier with no output mapping)
768+
769+
classifier_name_short = 'idfgclassifier'
770+
threshold_str = '0.1' # 0.6
771+
classifier_name = 'idfg_classifier_ckpt_14_compiled'
772+
773+
organization_name = organization_name_short
774+
job_name = base_task_name
775+
input_filename = filtered_output_filename # combined_api_output_file
776+
input_files = [input_filename]
777+
image_base = input_path
778+
crop_path = os.path.join(os.path.expanduser('~/crops'),job_name + '_crops')
779+
output_base = combined_api_output_folder
780+
device_id = 0
781+
782+
output_file = os.path.join(filename_base,'run_{}_'.format(classifier_name_short) + job_name + '.sh')
783+
784+
classifier_base = os.path.expanduser('~/models/camera_traps/idfg_classifier/idfg_classifier_20200905_042558')
785+
assert os.path.isdir(classifier_base)
786+
787+
checkpoint_path = os.path.join(classifier_base,'idfg_classifier_ckpt_14_compiled.pt')
788+
assert os.path.isfile(checkpoint_path)
789+
790+
classifier_categories_path = os.path.join(classifier_base,'label_index.json')
791+
assert os.path.isfile(classifier_categories_path)
792+
793+
classifier_output_suffix = '_{}_output.csv.gz'.format(classifier_name_short)
794+
final_output_suffix = '_{}.json'.format(classifier_name_short)
795+
796+
threshold_str = '0.65'
797+
n_threads_str = '50'
798+
image_size_str = '300'
799+
batch_size_str = '64'
800+
num_workers_str = '8'
801+
logdir = filename_base
802+
803+
classification_threshold_str = '0.05'
804+
805+
# This is just passed along to the metadata in the output file, it has no impact
806+
# on how the classification scripts run.
807+
typical_classification_threshold_str = '0.75'
808+
809+
810+
##%% Set up environment
811+
812+
commands = []
813+
814+
815+
##%% Crop images
816+
817+
commands.append('\n### Cropping ###\n')
818+
819+
# fn = input_files[0]
820+
for fn in input_files:
821+
822+
input_file_path = fn
823+
crop_cmd = ''
824+
825+
crop_comment = '\n# Cropping {}\n'.format(fn)
826+
crop_cmd += crop_comment
827+
828+
crop_cmd += "python crop_detections.py \\\n" + \
829+
input_file_path + ' \\\n' + \
830+
crop_path + ' \\\n' + \
831+
'--images-dir "' + image_base + '"' + ' \\\n' + \
832+
'--threshold "' + threshold_str + '"' + ' \\\n' + \
833+
'--square-crops ' + ' \\\n' + \
834+
'--threads "' + n_threads_str + '"' + ' \\\n' + \
835+
'--logdir "' + logdir + '"' + ' \\\n' + \
836+
'\n'
837+
crop_cmd = '{}'.format(crop_cmd)
838+
commands.append(crop_cmd)
839+
840+
841+
##%% Run classifier
842+
843+
commands.append('\n### Classifying ###\n')
844+
845+
# fn = input_files[0]
846+
for fn in input_files:
847+
848+
input_file_path = fn
849+
classifier_output_path = crop_path + classifier_output_suffix
850+
851+
classify_cmd = ''
852+
853+
classify_comment = '\n# Classifying {}\n'.format(fn)
854+
classify_cmd += classify_comment
855+
856+
classify_cmd += "python run_classifier.py \\\n" + \
857+
checkpoint_path + ' \\\n' + \
858+
crop_path + ' \\\n' + \
859+
classifier_output_path + ' \\\n' + \
860+
'--detections-json "' + input_file_path + '"' + ' \\\n' + \
861+
'--classifier-categories "' + classifier_categories_path + '"' + ' \\\n' + \
862+
'--image-size "' + image_size_str + '"' + ' \\\n' + \
863+
'--batch-size "' + batch_size_str + '"' + ' \\\n' + \
864+
'--num-workers "' + num_workers_str + '"' + ' \\\n'
865+
866+
if device_id is not None:
867+
classify_cmd += '--device {}'.format(device_id)
868+
869+
classify_cmd += '\n\n'
870+
classify_cmd = '{}'.format(classify_cmd)
871+
commands.append(classify_cmd)
872+
873+
874+
##%% Merge classification and detection outputs
875+
876+
commands.append('\n### Merging ###\n')
877+
878+
# fn = input_files[0]
879+
for fn in input_files:
880+
881+
input_file_path = fn
882+
classifier_output_path = crop_path + classifier_output_suffix
883+
final_output_path = os.path.join(output_base,
884+
os.path.basename(classifier_output_path)).\
885+
replace(classifier_output_suffix,
886+
final_output_suffix)
887+
final_output_path = final_output_path.replace('_detections','')
888+
final_output_path = final_output_path.replace('_crops','')
889+
final_output_path_ic = final_output_path
890+
891+
merge_cmd = ''
892+
893+
merge_comment = '\n# Merging {}\n'.format(fn)
894+
merge_cmd += merge_comment
895+
896+
merge_cmd += "python merge_classification_detection_output.py \\\n" + \
897+
classifier_output_path + ' \\\n' + \
898+
classifier_categories_path + ' \\\n' + \
899+
'--output-json "' + final_output_path_ic + '"' + ' \\\n' + \
900+
'--detection-json "' + input_file_path + '"' + ' \\\n' + \
901+
'--classifier-name "' + classifier_name + '"' + ' \\\n' + \
902+
'--threshold "' + classification_threshold_str + '"' + ' \\\n' + \
903+
'--typical-confidence-threshold "' + typical_classification_threshold_str + '"' + ' \\\n' + \
904+
'\n'
905+
merge_cmd = '{}'.format(merge_cmd)
906+
commands.append(merge_cmd)
907+
908+
909+
##%% Write everything out
910+
911+
with open(output_file,'w') as f:
912+
for s in commands:
913+
f.write('{}'.format(s))
914+
915+
import stat
916+
st = os.stat(output_file)
917+
os.chmod(output_file, st.st_mode | stat.S_IEXEC)
918+
919+
757920
#%% Create a new category for large boxes
758921

759922
from api.batch_processing.postprocessing import categorize_detections_by_size

benchmark/README.md

-4
This file was deleted.

0 commit comments

Comments
 (0)