Skip to content

Commit 94e903d

Browse files
committed
[WIP]
1 parent ba9d848 commit 94e903d

File tree

2 files changed

+94
-70
lines changed

2 files changed

+94
-70
lines changed

src/convenience.jl

+30-30
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,15 @@ function cluster!(
4242
find_representative_periods(df, num_rps; drop_incomplete_last_period, method, distance)
4343
fit_rep_period_weights!(clusters; weight_type, tol, niters, learning_rate, adaptive_grad)
4444

45+
for table_name in (
46+
"cluster_rep_periods_data",
47+
"cluster_rep_periods_mapping",
48+
"cluster_profiles_rep_periods",
49+
"cluster_timeframe_data",
50+
)
51+
DuckDB.query(connection, "DROP VIEW IF EXISTS $table_name")
52+
DuckDB.query(connection, "DROP TABLE IF EXISTS $table_name")
53+
end
4554
write_clustering_result_to_tables(connection, clusters)
4655

4756
# enrich the cluster_ data with year information because TulipaClustering
@@ -54,23 +63,12 @@ function cluster!(
5463
for table_name in ("cluster_rep_periods_data", "cluster_rep_periods_mapping")
5564
DuckDB.query(
5665
connection,
57-
"CREATE OR REPLACE TEMP TABLE t_new_$table_name AS
66+
"CREATE OR REPLACE TABLE t_new_$table_name AS
5867
SELECT unnest([$years_str]) AS year, $table_name.*
5968
FROM $table_name",
6069
)
61-
# DROP TABLE OR VIEW
62-
is_table =
63-
only([
64-
row.count for row in DuckDB.query(
65-
connection,
66-
"SELECT COUNT(*) AS count FROM duckdb_tables WHERE table_name='$table_name'",
67-
)
68-
]) > 0
69-
if is_table
70-
DuckDB.query(connection, "DROP TABLE $table_name")
71-
else
72-
DuckDB.query(connection, "DROP VIEW $table_name")
73-
end
70+
DuckDB.query(connection, "DROP VIEW IF EXISTS $table_name")
71+
DuckDB.query(connection, "DROP TABLE IF EXISTS $table_name")
7472
DuckDB.query(
7573
connection,
7674
"ALTER TABLE t_new_$table_name
@@ -79,29 +77,31 @@ function cluster!(
7977
end
8078

8179
table_name = "cluster_profiles_rep_periods"
82-
DuckDB.query(
83-
connection,
84-
"CREATE OR REPLACE TEMP TABLE t_new_$table_name AS FROM $table_name",
85-
)
80+
DuckDB.query(connection, "CREATE OR REPLACE TABLE t_new_$table_name AS FROM $table_name")
8681
# DROP TABLE OR VIEW
87-
is_table =
88-
only([
89-
row.count for row in DuckDB.query(
90-
connection,
91-
"SELECT COUNT(*) AS count FROM duckdb_tables WHERE table_name='$table_name'",
92-
)
93-
]) > 0
94-
if is_table
95-
DuckDB.query(connection, "DROP TABLE $table_name")
96-
else
97-
DuckDB.query(connection, "DROP VIEW $table_name")
98-
end
82+
DuckDB.query(connection, "DROP VIEW IF EXISTS $table_name")
83+
DuckDB.query(connection, "DROP TABLE IF EXISTS $table_name")
9984
DuckDB.query(
10085
connection,
10186
"ALTER TABLE t_new_$table_name
10287
RENAME TO $table_name",
10388
)
10489

90+
DuckDB.query(
91+
connection,
92+
"CREATE OR REPLACE TABLE cluster_timeframe_data AS
93+
SELECT DISTINCT
94+
rep_periods_mapping.year,
95+
rep_periods_mapping.period,
96+
rep_periods_data.num_timesteps,
97+
FROM cluster_rep_periods_mapping AS rep_periods_mapping
98+
LEFT JOIN cluster_rep_periods_data AS rep_periods_data
99+
ON rep_periods_mapping.year = rep_periods_data.year
100+
AND rep_periods_mapping.rep_period = rep_periods_data.rep_period
101+
ORDER BY rep_periods_mapping.year, rep_periods_mapping.period
102+
",
103+
)
104+
105105
return clusters
106106
end
107107

test/test-convenience.jl

+64-40
Original file line numberDiff line numberDiff line change
@@ -39,49 +39,73 @@ end
3939

4040
clusters = cluster!(connection, period_duration, num_rps)
4141

42-
df_rep_periods_data =
43-
DuckDB.query(
44-
connection,
45-
"FROM cluster_rep_periods_data
46-
ORDER BY year, rep_period",
47-
) |> DataFrame
48-
df_rep_periods_mapping =
49-
DuckDB.query(
50-
connection,
51-
"FROM cluster_rep_periods_mapping
52-
ORDER BY year, period, rep_period",
53-
) |> DataFrame
54-
df_profiles_rep_periods =
55-
DuckDB.query(
42+
@testset "rep_periods_data" begin
43+
df_rep_periods_data =
44+
DuckDB.query(
45+
connection,
46+
"FROM cluster_rep_periods_data
47+
ORDER BY year, rep_period",
48+
) |> DataFrame
49+
50+
@test sort(names(df_rep_periods_data)) ==
51+
["num_timesteps", "rep_period", "resolution", "year"]
52+
53+
@test df_rep_periods_data.year == repeat(years; inner = num_rps)
54+
@test df_rep_periods_data.rep_period == repeat(1:num_rps; outer = length(years))
55+
@test all(df_rep_periods_data.resolution .== 1.0)
56+
@test all(df_rep_periods_data.num_timesteps .== period_duration)
57+
end
58+
59+
@testset "rep_periods_mapping" begin
60+
df_rep_periods_mapping =
61+
DuckDB.query(
62+
connection,
63+
"FROM cluster_rep_periods_mapping
64+
ORDER BY year, period, rep_period",
65+
) |> DataFrame
66+
67+
@test sort(names(df_rep_periods_mapping)) == ["period", "rep_period", "weight", "year"]
68+
69+
@test size(df_rep_periods_mapping, 1) length(years) * num_periods
70+
end
71+
72+
@testset "timeframe_data" begin
73+
df_timeframe_data = DuckDB.query(
5674
connection,
57-
"FROM cluster_profiles_rep_periods
58-
ORDER BY profile_name, year, rep_period, timestep",
75+
"FROM cluster_timeframe_data
76+
ORDER BY year, period",
5977
) |> DataFrame
6078

61-
@test sort(names(df_rep_periods_data)) ==
62-
["num_timesteps", "rep_period", "resolution", "year"]
63-
@test sort(names(df_rep_periods_mapping)) == ["period", "rep_period", "weight", "year"]
64-
@test sort(names(df_profiles_rep_periods)) ==
65-
["profile_name", "rep_period", "timestep", "value", "year"]
66-
67-
@test df_rep_periods_data.year == repeat(years; inner = num_rps)
68-
@test df_rep_periods_data.rep_period == repeat(1:num_rps; outer = length(years))
69-
@test all(df_rep_periods_data.resolution .== 1.0)
70-
@test all(df_rep_periods_data.num_timesteps .== period_duration)
71-
72-
@test size(df_rep_periods_mapping, 1) length(years) * num_periods
73-
74-
@test df_profiles_rep_periods.profile_name ==
75-
repeat(profile_names; inner = period_duration * num_rps * length(years))
76-
@test df_profiles_rep_periods.year ==
77-
repeat(years; inner = period_duration * num_rps, outer = length(profile_names))
78-
@test df_profiles_rep_periods.rep_period == repeat(
79-
1:num_rps;
80-
inner = period_duration,
81-
outer = length(profile_names) * length(years),
82-
)
83-
@test df_profiles_rep_periods.timestep ==
84-
repeat(1:period_duration; outer = length(profile_names) * length(years) * num_rps)
79+
@test sort(names(df_timeframe_data)) == ["num_timesteps", "period", "year"]
80+
81+
@test df_timeframe_data.year == repeat(years; inner = num_periods)
82+
@test df_timeframe_data.period == repeat(1:num_periods; outer = length(years))
83+
@test all(df_timeframe_data.num_timesteps .== period_duration)
84+
end
85+
86+
@testset "profiles_rep_periods" begin
87+
df_profiles_rep_periods =
88+
DuckDB.query(
89+
connection,
90+
"FROM cluster_profiles_rep_periods
91+
ORDER BY profile_name, year, rep_period, timestep",
92+
) |> DataFrame
93+
94+
@test sort(names(df_profiles_rep_periods)) ==
95+
["profile_name", "rep_period", "timestep", "value", "year"]
96+
97+
@test df_profiles_rep_periods.profile_name ==
98+
repeat(profile_names; inner = period_duration * num_rps * length(years))
99+
@test df_profiles_rep_periods.year ==
100+
repeat(years; inner = period_duration * num_rps, outer = length(profile_names))
101+
@test df_profiles_rep_periods.rep_period == repeat(
102+
1:num_rps;
103+
inner = period_duration,
104+
outer = length(profile_names) * length(years),
105+
)
106+
@test df_profiles_rep_periods.timestep ==
107+
repeat(1:period_duration; outer = length(profile_names) * length(years) * num_rps)
108+
end
85109
end
86110

87111
@testset "dummy_cluster!" begin

0 commit comments

Comments
 (0)