Skip to content

Commit 1d03478

Browse files
Scriptify spurious class flag alerts (#102)
* create spurious_classifications_alerter with potential affected projects script * update parameters to flag potential spurious classified projects, keep track of for each date the day where there were high classifications * Update spurious_classifications_alerter.rb * Update spurious_classifications_alerter.rb * Update spurious_classifications_alerter.rb * Update spurious_classifications_alerter.rb
1 parent ac37b40 commit 1d03478

1 file changed

Lines changed: 58 additions & 0 deletions

File tree

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# rubocop:disable Layout/LineLength
2+
# frozen_string_literal: true
3+
4+
require '../config/environment'
5+
require 'json'
6+
7+
# if project's classification_rate (difference in classifications / days apart) is higher than 5000 classifications per day and percentage difference is over 50% then we flag as potential project with spurious classifications
8+
PROJECT_SPURIOUS_CLASSIFICATION_RATE_LOWER_BOUND = 5_000
9+
PERCENTAGE_DIFF_THRESHOLD = 50
10+
11+
USER_CLASSIFICATION_RATE_LOWER_BOUND = 3
12+
USER_CLASSIFICATION_COUNT_THRESHOLD = 1_000
13+
14+
puts 'Querying diffs to flag potential affected projects...'
15+
projects_weekly_classifications_history = ActiveRecord::Base.connection.exec_query("SELECT
16+
record1.day as day1,
17+
record2.day as day_compare,
18+
record1.project_id,
19+
record2.project_id,
20+
record1.classification_count as day1_count,
21+
record2.classification_count as day_compare_count,
22+
abs(cast(record2.classification_count - record1.classification_count as float) / record1.classification_count) * 100 as percentage_diff,
23+
abs(cast(record2.classification_count - record1.classification_count as float) / extract(day from record2.day - record1.day)) as classification_rate
24+
FROM
25+
daily_classification_count_per_project AS record1
26+
INNER JOIN
27+
daily_classification_count_per_project AS record2 ON record1.project_id = record2.project_id
28+
WHERE
29+
record1.classification_count IS NOT NULL AND record2.classification_count IS NOT NULL and record1.day < record2.day and record1.day >= (CURRENT_DATE - INTERVAL '7 days') and record2.day >= CURRENT_DATE - INTERVAL '2 days' and record2.day < CURRENT_DATE and record1.classification_count > 1000 and record2.classification_count > 1000 order by classification_rate desc;")
30+
31+
flagged_project_id_to_high_classifying_dates = Hash.new { |h, k| h[k] = [] }
32+
projects_weekly_classifications_history.each do |proj_history|
33+
next unless proj_history['classification_rate'] >= PROJECT_SPURIOUS_CLASSIFICATION_RATE_LOWER_BOUND && proj_history['percentage_diff'] >= PERCENTAGE_DIFF_THRESHOLD
34+
35+
if proj_history['day1_count'] > proj_history['day_compare_count']
36+
flagged_project_id_to_high_classifying_dates[proj_history['project_id']] << proj_history['day1'].strftime('%Y-%m-%d')
37+
elsif proj_history['day_compare_count'] > proj_history['day1_count']
38+
flagged_project_id_to_high_classifying_dates[proj_history['project_id']] << proj_history['day_compare'].strftime('%Y-%m-%d')
39+
end
40+
end
41+
42+
puts 'Potential Affected Project IDs...'
43+
puts flagged_project_id_to_high_classifying_dates.keys
44+
45+
puts 'Finding Potential Spurious Classifiers for each Project...'
46+
47+
users_to_flag = []
48+
49+
flagged_project_id_to_high_classifying_dates.each do |proj_id, dates|
50+
user_rates_for_proj = ActiveRecord::Base.connection.exec_query('SELECT *, cast(classification_count as float) / total_session_time as rate from daily_user_classification_count_and_time_per_project where project_id = $1 and day = ANY($2) order by rate desc', 'SQL', [proj_id, "{#{dates.join(',')}}"])
51+
52+
user_rates_for_proj.each do |user_rate|
53+
users_to_flag << user_rate['user_id'] if user_rate['rate'] >= USER_CLASSIFICATION_RATE_LOWER_BOUND && user_rate['classification_count'] >= USER_CLASSIFICATION_COUNT_THRESHOLD
54+
end
55+
end
56+
57+
puts 'Flagged Users...'
58+
puts users_to_flag

0 commit comments

Comments
 (0)