1+ require 'fileutils'
2+
13class CleanInconsistencies < ActiveRecord ::Migration
2- def self . up
4+ def backup_dir
5+ return @backup_dir if @backup_dir
6+ @backup_dir = Rails . root . join ( 'db' , 'backup' , Time . now . strftime ( '%Y-%m-%d_%H-%M-%S' ) )
7+ FileUtils . mkdir_p ( @backup_dir )
8+ end
9+
10+ def backup ( name , query , header : true )
11+ say_with_time ( "backup(#{ name . inspect } , #{ query . inspect } )" ) do
12+ copy_query = "COPY (#{ query } ) TO STDOUT WITH DELIMITER ',' CSV #{ header ? 'HEADER' : '' } "
13+
14+ File . open ( backup_dir . join ( "#{ name } .csv" ) , 'a' ) do |f |
15+ connection . raw_connection . copy_data ( copy_query ) do
16+ while line = connection . raw_connection . get_copy_data
17+ f . write ( line )
18+ end
19+ end
20+ end
21+ end
22+ end
23+
24+ def backup_and_delete_missing ( table , exists_query )
25+ backup ( table , "SELECT * FROM \" #{ table } \" WHERE NOT EXISTS(#{ exists_query } )" )
26+ execute "DELETE FROM \" #{ table } \" WHERE NOT EXISTS(#{ exists_query } )"
27+ end
28+
29+ def up
30+ say "WARNING: destructive migration necessary. Deleted data will be backed up to #{ backup_dir } "
31+
332 # Unset project reference for repositories with non-existing projects
433 execute <<-SQL
534 UPDATE repositories AS r
@@ -10,61 +39,62 @@ def self.up
1039 SQL
1140
1241 # Delete processings with non-existing repositories
13- execute <<-SQL
14- DELETE FROM processings AS p
15- WHERE NOT EXISTS(
16- SELECT 1 FROM repositories AS r WHERE r.id = p.repository_id
17- )
18- SQL
42+ backup_and_delete_missing ( "processings" ,
43+ "SELECT 1 FROM repositories AS r WHERE r.id = processings.repository_id" )
1944
2045 # Delete process times with non-existing processings
21- execute <<-SQL
22- DELETE FROM process_times AS t
23- WHERE NOT EXISTS (
24- SELECT 1 FROM processings AS p WHERE p.id = t.processing_id
25- )
26- SQL
46+ backup_and_delete_missing ( "process_times" ,
47+ "SELECT 1 FROM processings AS p WHERE p.id = process_times.processing_id" )
2748
2849 # Delete module results with non-existing processings
29- execute <<-SQL
30- DELETE FROM module_results AS m
31- WHERE NOT EXISTS (
32- SELECT 1 FROM processings AS p WHERE p.id = m.processing_id
33- )
34- SQL
50+ backup_and_delete_missing ( "module_results" ,
51+ "SELECT 1 FROM processings AS p WHERE p.id = module_results.processing_id" )
3552
3653 # Delete kalibro modules with non-existing module results
54+ backup_and_delete_missing ( "kalibro_modules" ,
55+ "SELECT 1 FROM module_results AS m WHERE m.id = kalibro_modules.module_result_id" )
56+
57+ # Fix up metric results type, even before backing up so the backup is cleaner
3758 execute <<-SQL
38- DELETE FROM kalibro_modules AS k
39- WHERE NOT EXISTS (
40- SELECT 1 FROM module_results AS m WHERE m.id = k.module_result_id
41- )
59+ UPDATE metric_results SET "type" = 'TreeMetricResult' WHERE "type" = 'MetricResult'
4260 SQL
4361
4462 # Delete metric results with non-existing module results
45- execute <<-SQL
46- DELETE FROM metric_results AS met
47- WHERE NOT EXISTS (
48- SELECT 1 FROM module_results AS mod WHERE mod.id = met.module_result_id
49- )
50- SQL
63+ backup_and_delete_missing ( "metric_results" ,
64+ "SELECT 1 FROM module_results AS m WHERE m.id = metric_results.module_result_id" )
5165
52- # Delete duplicate metric_results. Group them by (module_result, metric_configuration),
53- # then delete all but the one with the highest ID
54- # The double wrapping on the inner query is necessary because window functions
55- # cannot be used in WHERE in PostgreSQL.
56- execute <<-SQL
57- DELETE FROM metric_results
58- WHERE id IN (
59- SELECT t.id FROM (
60- SELECT id, ROW_NUMBER() OVER (PARTITION BY module_result_id, metric_configuration_id, "type"
61- ORDER BY id DESC) AS rnum
62- FROM metric_results
63- WHERE "type" = 'TreeMetricResult'
64- ) AS t
65- WHERE t.rnum > 1
66- )
66+ # Delete duplicate metric_results. Group them by (module_result_id, metric_configuration_id),
67+ # then delete all but the one with the highest ID. The double wrapping on the inner query is
68+ # necessary because window functions cannot be used in WHERE in PostgreSQL.
69+ repeated_metric_result_query = exec_query <<-SQL
70+ SELECT t.id FROM (
71+ SELECT metric_results.*, ROW_NUMBER() OVER (
72+ PARTITION BY module_result_id, metric_configuration_id, "type"
73+ ORDER BY id DESC) AS rnum
74+ FROM metric_results
75+ WHERE "type" = 'TreeMetricResult'
76+ ) AS t
77+ WHERE t.rnum > 1
6778 SQL
79+
80+ unless repeated_metric_result_query . empty?
81+ repeated_metric_result_ids = repeated_metric_result_query . rows . flat_map ( &:first ) . join ( ',' )
82+
83+ # Replace default messages with custom ones to avoid flooding the screen with the huge query
84+ say_with_time ( 'backup("metric_results", "SELECT * metric_results WHERE id IN (...)")' ) do
85+ suppress_messages do
86+ backup ( 'metric_results' ,
87+ "SELECT * FROM metric_results WHERE id IN (#{ repeated_metric_result_ids } )" ,
88+ header : false )
89+ end
90+ end
91+
92+ say_with_time ( 'execute("DELETE FROM metric_results WHERE id IN (...)")' ) do
93+ suppress_messages do
94+ execute "DELETE FROM metric_results WHERE id IN (#{ repeated_metric_result_ids } )"
95+ end
96+ end
97+ end
6898 end
6999
70100 def self . down
0 commit comments