Skip to content

update previous py file #24

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions tour_model_eval/data_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ def read_data(user):

# - trips: all trips read from database
# - filter_trips: valid trips that have user labels and are not points
def filter_data(user,radius):
trips = read_data(user)
def filter_data(trips,radius):
non_empty_trips = [t for t in trips if t["data"]["user_input"] != {}]
non_empty_trips_df = pd.DataFrame(t["data"]["user_input"] for t in non_empty_trips)
valid_trips_df = non_empty_trips_df.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
Expand All @@ -23,7 +22,7 @@ def filter_data(user,radius):

# similarity codes can filter out trips that are points in valid_trips
filter_trips = similarity.filter_too_short(valid_trips, radius)
return filter_trips,trips
return filter_trips


# use KFold (n_splits=5) to split the data into 5 models (5 training sets, 5 test sets)
Expand Down
1 change: 1 addition & 0 deletions tour_model_eval/get_request_percentage.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,4 +131,5 @@ def get_req_pct(new_labels,track,filter_trips,sim):
new_bins = label_pro.group_similar_trips(new_labels,track)
req_trips = get_requested_trips(new_bins,filter_trips,sim)
pct = len(req_trips)/len(filter_trips)
pct = float('%.3f' % pct)
return pct
1 change: 1 addition & 0 deletions tour_model_eval/get_scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,5 @@ def score(bin_trips, labels_pred):

labels_pred = labels_pred
homo_score = skm.homogeneity_score(labels_true, labels_pred)
homo_score = float('%.3f' % homo_score)
return homo_score
3 changes: 2 additions & 1 deletion tour_model_eval/get_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def get_user_ls(all_users,radius):
for i in range(len(all_users)):
curr_user = 'user' + str(i + 1)
user = all_users[i]
filter_trips,trips = preprocess.filter_data(user,radius)
trips = preprocess.read_data(user)
filter_trips = preprocess.filter_data(trips,radius)
if valid_user(filter_trips,trips):
valid_user_ls.append(curr_user)
user_ls.append(curr_user)
Expand Down
4 changes: 2 additions & 2 deletions tour_model_eval/label_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def map_labels(user_input_df):
# - dist_pct: the percentage of the last distance in the dendrogram
# - sch.fcluster: form clusters from the hierarchical clustering defined by the given linkage matrix
# e.g., if last_d = 10000, dist_pct = 0.4, max_d = 400, clusters will be assigned at the distance of 400
# - clusters: the labels from the second round clustering
def get_second_labels(x,method,low,dist_pct):
z = sch.linkage(x, method=method, metric='euclidean')
last_d = z[-1][2]
Expand All @@ -80,8 +81,7 @@ def get_second_labels(x,method,low,dist_pct):
# (e.g. label from first round is 1, label from second round is 2, the new label will be 12)
# - second_round_idx_labels: a list to store the indices and labels from the first round.
# - second_labels: labels from the second round of clustering
def get_new_labels(x,low,dist_pct,second_round_idx_labels,new_labels,method=None):
second_labels = get_second_labels(x,method,low,dist_pct)
def get_new_labels(second_labels,second_round_idx_labels,new_labels):
for i in range(len(second_labels)):
first_index = second_round_idx_labels[i][0]
new_label = second_round_idx_labels[i][1]
Expand Down