[WIP]

abelsiqueira · abelsiqueira · commit 94e903d20100 · 2025-04-17T16:19:58.000+02:00
diff --git a/src/convenience.jl b/src/convenience.jl
@@ -42,6 +42,15 @@ function cluster!(
     find_representative_periods(df, num_rps; drop_incomplete_last_period, method, distance)
   fit_rep_period_weights!(clusters; weight_type, tol, niters, learning_rate, adaptive_grad)
 
+  for table_name in (
+    "cluster_rep_periods_data",
+    "cluster_rep_periods_mapping",
+    "cluster_profiles_rep_periods",
+    "cluster_timeframe_data",
+  )
+    DuckDB.query(connection, "DROP VIEW IF EXISTS $table_name")
+    DuckDB.query(connection, "DROP TABLE IF EXISTS $table_name")
+  end
   write_clustering_result_to_tables(connection, clusters)
 
   # enrich the cluster_ data with year information because TulipaClustering
@@ -54,23 +63,12 @@ function cluster!(
   for table_name in ("cluster_rep_periods_data", "cluster_rep_periods_mapping")
     DuckDB.query(
       connection,
-      "CREATE OR REPLACE TEMP TABLE t_new_$table_name AS
+      "CREATE OR REPLACE TABLE t_new_$table_name AS
       SELECT unnest([$years_str]) AS year, $table_name.*
       FROM $table_name",
     )
-    # DROP TABLE OR VIEW
-    is_table =
-      only([
-        row.count for row in DuckDB.query(
-          connection,
-          "SELECT COUNT(*) AS count FROM duckdb_tables WHERE table_name='$table_name'",
-        )
-      ]) > 0
-    if is_table
-      DuckDB.query(connection, "DROP TABLE $table_name")
-    else
-      DuckDB.query(connection, "DROP VIEW $table_name")
-    end
+    DuckDB.query(connection, "DROP VIEW IF EXISTS $table_name")
+    DuckDB.query(connection, "DROP TABLE IF EXISTS $table_name")
     DuckDB.query(
       connection,
       "ALTER TABLE t_new_$table_name
@@ -79,29 +77,31 @@ function cluster!(
   end
 
   table_name = "cluster_profiles_rep_periods"
-  DuckDB.query(
-    connection,
-    "CREATE OR REPLACE TEMP TABLE t_new_$table_name AS FROM $table_name",
-  )
+  DuckDB.query(connection, "CREATE OR REPLACE TABLE t_new_$table_name AS FROM $table_name")
   # DROP TABLE OR VIEW
-  is_table =
-    only([
-      row.count for row in DuckDB.query(
-        connection,
-        "SELECT COUNT(*) AS count FROM duckdb_tables WHERE table_name='$table_name'",
-      )
-    ]) > 0
-  if is_table
-    DuckDB.query(connection, "DROP TABLE $table_name")
-  else
-    DuckDB.query(connection, "DROP VIEW $table_name")
-  end
+  DuckDB.query(connection, "DROP VIEW IF EXISTS $table_name")
+  DuckDB.query(connection, "DROP TABLE IF EXISTS $table_name")
   DuckDB.query(
     connection,
     "ALTER TABLE t_new_$table_name
     RENAME TO $table_name",
   )
 
+  DuckDB.query(
+    connection,
+    "CREATE OR REPLACE TABLE cluster_timeframe_data AS
+    SELECT DISTINCT
+      rep_periods_mapping.year,
+      rep_periods_mapping.period,
+      rep_periods_data.num_timesteps,
+    FROM cluster_rep_periods_mapping AS rep_periods_mapping
+    LEFT JOIN cluster_rep_periods_data AS rep_periods_data
+      ON rep_periods_mapping.year = rep_periods_data.year
+      AND rep_periods_mapping.rep_period = rep_periods_data.rep_period
+    ORDER BY rep_periods_mapping.year, rep_periods_mapping.period
+    ",
+  )
+
   return clusters
 end
 
diff --git a/test/test-convenience.jl b/test/test-convenience.jl
@@ -39,49 +39,73 @@ end
 
   clusters = cluster!(connection, period_duration, num_rps)
 
-  df_rep_periods_data =
-    DuckDB.query(
-      connection,
-      "FROM cluster_rep_periods_data
-      ORDER BY year, rep_period",
-    ) |> DataFrame
-  df_rep_periods_mapping =
-    DuckDB.query(
-      connection,
-      "FROM cluster_rep_periods_mapping
-      ORDER BY year, period, rep_period",
-    ) |> DataFrame
-  df_profiles_rep_periods =
-    DuckDB.query(
+  @testset "rep_periods_data" begin
+    df_rep_periods_data =
+      DuckDB.query(
+        connection,
+        "FROM cluster_rep_periods_data
+        ORDER BY year, rep_period",
+      ) |> DataFrame
+
+    @test sort(names(df_rep_periods_data)) ==
+          ["num_timesteps", "rep_period", "resolution", "year"]
+
+    @test df_rep_periods_data.year == repeat(years; inner = num_rps)
+    @test df_rep_periods_data.rep_period == repeat(1:num_rps; outer = length(years))
+    @test all(df_rep_periods_data.resolution .== 1.0)
+    @test all(df_rep_periods_data.num_timesteps .== period_duration)
+  end
+
+  @testset "rep_periods_mapping" begin
+    df_rep_periods_mapping =
+      DuckDB.query(
+        connection,
+        "FROM cluster_rep_periods_mapping
+        ORDER BY year, period, rep_period",
+      ) |> DataFrame
+
+    @test sort(names(df_rep_periods_mapping)) == ["period", "rep_period", "weight", "year"]
+
+    @test size(df_rep_periods_mapping, 1) ≥ length(years) * num_periods
+  end
+
+  @testset "timeframe_data" begin
+    df_timeframe_data = DuckDB.query(
       connection,
-      "FROM cluster_profiles_rep_periods
-      ORDER BY profile_name, year, rep_period, timestep",
+      "FROM cluster_timeframe_data
+      ORDER BY year, period",
     ) |> DataFrame
 
-  @test sort(names(df_rep_periods_data)) ==
-        ["num_timesteps", "rep_period", "resolution", "year"]
-  @test sort(names(df_rep_periods_mapping)) == ["period", "rep_period", "weight", "year"]
-  @test sort(names(df_profiles_rep_periods)) ==
-        ["profile_name", "rep_period", "timestep", "value", "year"]
-
-  @test df_rep_periods_data.year == repeat(years; inner = num_rps)
-  @test df_rep_periods_data.rep_period == repeat(1:num_rps; outer = length(years))
-  @test all(df_rep_periods_data.resolution .== 1.0)
-  @test all(df_rep_periods_data.num_timesteps .== period_duration)
-
-  @test size(df_rep_periods_mapping, 1) ≥ length(years) * num_periods
-
-  @test df_profiles_rep_periods.profile_name ==
-        repeat(profile_names; inner = period_duration * num_rps * length(years))
-  @test df_profiles_rep_periods.year ==
-        repeat(years; inner = period_duration * num_rps, outer = length(profile_names))
-  @test df_profiles_rep_periods.rep_period == repeat(
-    1:num_rps;
-    inner = period_duration,
-    outer = length(profile_names) * length(years),
-  )
-  @test df_profiles_rep_periods.timestep ==
-        repeat(1:period_duration; outer = length(profile_names) * length(years) * num_rps)
+    @test sort(names(df_timeframe_data)) == ["num_timesteps", "period", "year"]
+
+    @test df_timeframe_data.year == repeat(years; inner = num_periods)
+    @test df_timeframe_data.period == repeat(1:num_periods; outer = length(years))
+    @test all(df_timeframe_data.num_timesteps .== period_duration)
+  end
+
+  @testset "profiles_rep_periods" begin
+    df_profiles_rep_periods =
+      DuckDB.query(
+        connection,
+        "FROM cluster_profiles_rep_periods
+        ORDER BY profile_name, year, rep_period, timestep",
+      ) |> DataFrame
+
+    @test sort(names(df_profiles_rep_periods)) ==
+          ["profile_name", "rep_period", "timestep", "value", "year"]
+
+    @test df_profiles_rep_periods.profile_name ==
+          repeat(profile_names; inner = period_duration * num_rps * length(years))
+    @test df_profiles_rep_periods.year ==
+          repeat(years; inner = period_duration * num_rps, outer = length(profile_names))
+    @test df_profiles_rep_periods.rep_period == repeat(
+      1:num_rps;
+      inner = period_duration,
+      outer = length(profile_names) * length(years),
+    )
+    @test df_profiles_rep_periods.timestep ==
+          repeat(1:period_duration; outer = length(profile_names) * length(years) * num_rps)
+  end
 end
 
 @testset "dummy_cluster!" begin