Skip to content

Commit a8777c6

Browse files
committed
Make backups in CleanInconsistencies migration
1 parent a57e571 commit a8777c6

1 file changed

Lines changed: 74 additions & 44 deletions

File tree

db/migrate/20160720185407_clean_inconsistencies.rb

Lines changed: 74 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,34 @@
1+
require 'fileutils'
2+
13
class CleanInconsistencies < ActiveRecord::Migration
2-
def self.up
4+
def backup_dir
5+
return @backup_dir if @backup_dir
6+
@backup_dir = Rails.root.join('db', 'backup', Time.now.strftime('%Y-%m-%d_%H-%M-%S'))
7+
FileUtils.mkdir_p(@backup_dir)
8+
end
9+
10+
def backup(name, query, header: true)
11+
say_with_time("backup(#{name.inspect}, #{query.inspect})") do
12+
copy_query = "COPY (#{query}) TO STDOUT WITH DELIMITER ',' CSV #{header ? 'HEADER' : ''}"
13+
14+
File.open(backup_dir.join("#{name}.csv"), 'a') do |f|
15+
connection.raw_connection.copy_data(copy_query) do
16+
while line = connection.raw_connection.get_copy_data
17+
f.write(line)
18+
end
19+
end
20+
end
21+
end
22+
end
23+
24+
def backup_and_delete_missing(table, exists_query)
25+
backup(table, "SELECT * FROM \"#{table}\" WHERE NOT EXISTS(#{exists_query})")
26+
execute "DELETE FROM \"#{table}\" WHERE NOT EXISTS(#{exists_query})"
27+
end
28+
29+
def up
30+
say "WARNING: destructive migration necessary. Deleted data will be backed up to #{backup_dir}"
31+
332
# Unset project reference for repositories with non-existing projects
433
execute <<-SQL
534
UPDATE repositories AS r
@@ -10,61 +39,62 @@ def self.up
1039
SQL
1140

1241
# Delete processings with non-existing repositories
13-
execute <<-SQL
14-
DELETE FROM processings AS p
15-
WHERE NOT EXISTS(
16-
SELECT 1 FROM repositories AS r WHERE r.id = p.repository_id
17-
)
18-
SQL
42+
backup_and_delete_missing("processings",
43+
"SELECT 1 FROM repositories AS r WHERE r.id = processings.repository_id")
1944

2045
# Delete process times with non-existing processings
21-
execute <<-SQL
22-
DELETE FROM process_times AS t
23-
WHERE NOT EXISTS (
24-
SELECT 1 FROM processings AS p WHERE p.id = t.processing_id
25-
)
26-
SQL
46+
backup_and_delete_missing("process_times",
47+
"SELECT 1 FROM processings AS p WHERE p.id = process_times.processing_id")
2748

2849
# Delete module results with non-existing processings
29-
execute <<-SQL
30-
DELETE FROM module_results AS m
31-
WHERE NOT EXISTS (
32-
SELECT 1 FROM processings AS p WHERE p.id = m.processing_id
33-
)
34-
SQL
50+
backup_and_delete_missing("module_results",
51+
"SELECT 1 FROM processings AS p WHERE p.id = module_results.processing_id")
3552

3653
# Delete kalibro modules with non-existing module results
54+
backup_and_delete_missing("kalibro_modules",
55+
"SELECT 1 FROM module_results AS m WHERE m.id = kalibro_modules.module_result_id")
56+
57+
# Fix up metric results type, even before backing up so the backup is cleaner
3758
execute <<-SQL
38-
DELETE FROM kalibro_modules AS k
39-
WHERE NOT EXISTS (
40-
SELECT 1 FROM module_results AS m WHERE m.id = k.module_result_id
41-
)
59+
UPDATE metric_results SET "type" = 'TreeMetricResult' WHERE "type" = 'MetricResult'
4260
SQL
4361

4462
# Delete metric results with non-existing module results
45-
execute <<-SQL
46-
DELETE FROM metric_results AS met
47-
WHERE NOT EXISTS (
48-
SELECT 1 FROM module_results AS mod WHERE mod.id = met.module_result_id
49-
)
50-
SQL
63+
backup_and_delete_missing("metric_results",
64+
"SELECT 1 FROM module_results AS m WHERE m.id = metric_results.module_result_id")
5165

52-
# Delete duplicate metric_results. Group them by (module_result, metric_configuration),
53-
# then delete all but the one with the highest ID
54-
# The double wrapping on the inner query is necessary because window functions
55-
# cannot be used in WHERE in PostgreSQL.
56-
execute <<-SQL
57-
DELETE FROM metric_results
58-
WHERE id IN (
59-
SELECT t.id FROM (
60-
SELECT id, ROW_NUMBER() OVER (PARTITION BY module_result_id, metric_configuration_id, "type"
61-
ORDER BY id DESC) AS rnum
62-
FROM metric_results
63-
WHERE "type" = 'TreeMetricResult'
64-
) AS t
65-
WHERE t.rnum > 1
66-
)
66+
# Delete duplicate metric_results. Group them by (module_result_id, metric_configuration_id),
67+
# then delete all but the one with the highest ID. The double wrapping on the inner query is
68+
# necessary because window functions cannot be used in WHERE in PostgreSQL.
69+
repeated_metric_result_query = exec_query <<-SQL
70+
SELECT t.id FROM (
71+
SELECT metric_results.*, ROW_NUMBER() OVER (
72+
PARTITION BY module_result_id, metric_configuration_id, "type"
73+
ORDER BY id DESC) AS rnum
74+
FROM metric_results
75+
WHERE "type" = 'TreeMetricResult'
76+
) AS t
77+
WHERE t.rnum > 1
6778
SQL
79+
80+
unless repeated_metric_result_query.empty?
81+
repeated_metric_result_ids = repeated_metric_result_query.rows.flat_map(&:first).join(',')
82+
83+
# Replace default messages with custom ones to avoid flooding the screen with the huge query
84+
say_with_time('backup("metric_results", "SELECT * metric_results WHERE id IN (...)")') do
85+
suppress_messages do
86+
backup('metric_results',
87+
"SELECT * FROM metric_results WHERE id IN (#{repeated_metric_result_ids})",
88+
header: false)
89+
end
90+
end
91+
92+
say_with_time('execute("DELETE FROM metric_results WHERE id IN (...)")') do
93+
suppress_messages do
94+
execute "DELETE FROM metric_results WHERE id IN (#{repeated_metric_result_ids})"
95+
end
96+
end
97+
end
6898
end
6999

70100
def self.down

0 commit comments

Comments
 (0)