-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathincremental_clarif_expt.py
executable file
·68 lines (56 loc) · 3.83 KB
/
incremental_clarif_expt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# -*- coding: utf-8 -*-
"""
Experiments with uploading very small amounts of data for learning.
Created on Fri Jan 19 22:51:54 2018
@author: lambert.wixson
"""
from clarifai import rest
from clarifai.rest import ClarifaiApp
from clarifai.rest import Image as ClImage
from skimage.io import imread_collection, imread
#%%
app = ClarifaiApp(api_key = 'REPLACETHISWITHYOUROWN')
#%%
def upload(folder : str, num_to_skip, num_to_load, app, concepts):
coll = imread_collection(folder, conserve_memory=True, check_files=False)
for i, img in enumerate(coll):
if i < num_to_skip:
continue
elif i < (num_to_skip + num_to_load):
print("{0}: {1}".format(i, coll.files[i]))
climg = app.inputs.create_image_from_filename(coll.files[i], concepts=concepts)
else:
break
return coll
#%%
if False:
# Experiments with learning from very small amounts of uploaded data.
coll0 = upload("C:/nobak/kaggle_carvana/project_front_vs_back/train-color/yaw01/*.png", 0, 100, app, ["yaw-0"])
coll180 = upload("C:/nobak/kaggle_carvana/project_front_vs_back/train-color/yaw09/*.png", 0, 100, app, ["yaw-180"])
coll90 = upload("C:/nobak/kaggle_carvana/project_front_vs_back/train-color/yaw05/*.png", 0, 100, app, ["yaw-90"])
coll270 = upload("C:/nobak/kaggle_carvana/project_front_vs_back/train-color/yaw13/*.png", 0, 100, app, ["yaw-270"])
# The above, when I trained the model, produced good classifications for 0 and 180, but for 90 vs 270 it
# was little better than chance. ROC AUC for 270 was .858 and for 90 it was .845.
# So I added a total of 500 additional training images each for 90 and 270.
coll270 = upload("C:/nobak/kaggle_carvana/project_front_vs_back/train-color/yaw13/*.png", 100, 400, app, ["yaw-270"])
coll90 = upload("C:/nobak/kaggle_carvana/project_front_vs_back/train-color/yaw05/*.png", 100, 400, app, ["yaw-90"])
# As a result of adding the additional data, the stats got better AUC for 270 was .903, and for 90 it was .900.
# But there is still plenty of errors.
coll90 = upload("C:/nobak/kaggle_carvana/project_front_vs_back/train-color/yaw05/*.png", 500, 500, app, ["yaw-90"])
coll270 = upload("C:/nobak/kaggle_carvana/project_front_vs_back/train-color/yaw13/*.png", 500, 500, app, ["yaw-270"])
# Now that we have 1000 examples each for 90 and 270, their behavior is getting better, with ROC of .937 and
# .936 respectively. Still the probability of a wrong classification given that yaw-270 is predicted is .282,
# and the prob given that yaw-90 is predicted is .214.
# So the predictor still has roughly a 25% chance of being wrong. That's not great.
# Try adding another 1000
coll90 = upload("C:/nobak/kaggle_carvana/project_front_vs_back/train-color/yaw05/*.png", 1000, 1000, app, ["yaw-90"])
coll270 = upload("C:/nobak/kaggle_carvana/project_front_vs_back/train-color/yaw13/*.png", 1000, 1000, app, ["yaw-270"])
# Now that we have 2000 samples of each, the ROC is .953 for both 90 and 270. The prob of a wrong classification given that
# yaw-270 is predicted is .229, and the prob given that yaw-90 is predicted is .163. So, some improvement, but
# still not great. Needs more data still.
# Now try loading another 2000 samples. The uploads failed on image 2160 of coll90, because I exceeded
# my total of 5000 operations for the month. Apparently each image upload counts as an operation. I'll have
# to try again after 2/11/18, I guess.
coll90 = upload("C:/nobak/kaggle_carvana/project_front_vs_back/train-color/yaw05/*.png", 2000, 2000, app, ["yaw-90"])
coll270 = upload("C:/nobak/kaggle_carvana/project_front_vs_back/train-color/yaw13/*.png", 2000, 2000, app, ["yaw-270"])
# foo = app.inputs.create_image_from_filename(coll0.files[0], concepts=["yaw-0"])