1- using CSV, DataFrames, DuckDB
2-
3- TIO = TulipaIO
1+ using CSV: CSV
2+ using DataFrames : DataFrames, DataFrame
3+ using DuckDB : DuckDB, DBInterface
44
55function shape (df:: DataFrame )
6- return (nrow (df), ncol (df))
6+ return (DataFrames . nrow (df), DataFrames . ncol (df))
77end
88
99function tmp_tbls (con:: DuckDB.DB )
1717When row order is different, do a join to determine equality; use the
1818columns `cols`, join on `on` (often :name). The resulting DataFrame
1919is returned. It uniquifies columns with clashing names (see
20- `?leftjoin`), and stores a "source" under the `:source` column.
20+ `?DataFrames. leftjoin`), and stores a "source" under the `:source` column.
2121
2222"""
2323function join_cmp (df1, df2, cols; on:: Union{Symbol, Vector{Symbol}} )
24- leftjoin (df1[! , cols], df2[! , cols]; on = on, makeunique = true , source = :source )
24+ DataFrames . leftjoin (df1[! , cols], df2[! , cols]; on = on, makeunique = true , source = :source )
2525end
2626
2727@testset " Utilities" begin
2828 csv_path = joinpath (DATA, " Norse/assets-data.csv" )
2929
3030 @testset " get_tbl_name(source, tmp)" begin
3131 for (name, tmp) in [[" my_file" , false ], [" t_my_file" , true ]]
32- @test name == TIO . get_tbl_name (" path/my-file.csv" , tmp)
32+ @test name == TulipaIO . get_tbl_name (" path/my-file.csv" , tmp)
3333 end
3434 end
3535
3636 # redundant for the current implementation, needed when we support globs
3737 @testset " check_file(source)" begin
38- @test TIO . check_file (csv_path)
39- @test ! TIO . check_file (" not-there" )
38+ @test TulipaIO . check_file (csv_path)
39+ @test ! TulipaIO . check_file (" not-there" )
4040 end
4141
4242 con = DBInterface. connect (DuckDB. DB)
4343 tbl_name = " mytbl"
4444
4545 @testset " check_tbl(con, source)" begin
4646 DBInterface. execute (con, " CREATE TABLE $tbl_name AS SELECT * FROM range(5)" )
47- @test TIO . check_tbl (con, tbl_name)
48- @test ! TIO . check_tbl (con, " not_there" )
47+ @test TulipaIO . check_tbl (con, tbl_name)
48+ @test ! TulipaIO . check_tbl (con, " not_there" )
4949 end
5050
5151 @testset " Conditionally format source as SQL" begin
52- read_ = TIO . fmt_source (con, csv_path)
52+ read_ = TulipaIO . fmt_source (con, csv_path)
5353 @test occursin (" read_csv" , read_)
5454 @test occursin (csv_path, read_)
55- @test TIO . fmt_source (con, tbl_name) == tbl_name
56- @test_throws TIO . NeitherTableNorFileError TIO . fmt_source (con, " not-there" )
55+ @test TulipaIO . fmt_source (con, tbl_name) == tbl_name
56+ @test_throws TulipaIO . NeitherTableNorFileError TulipaIO . fmt_source (con, " not-there" )
5757 if (VERSION . major >= 1 ) && (VERSION . minor >= 8 )
5858 msg_re = r" not-there.+"
5959 msg_re *= " $con "
60- @test_throws msg_re TIO . fmt_source (con, " not-there" )
60+ @test_throws msg_re TulipaIO . fmt_source (con, " not-there" )
6161 end
6262 end
6363end
7171
7272 @testset " CSV -> DataFrame" begin
7373 con = DBInterface. connect (DuckDB. DB)
74- df_res = TIO . create_tbl (con, csv_path; show = true )
74+ df_res = TulipaIO . create_tbl (con, csv_path; show = true )
7575 @test shape (df_org) == shape (df_res)
76- @test_throws TIO . FileNotFoundError TIO . create_tbl (con, " not-there" )
76+ @test_throws TulipaIO . FileNotFoundError TulipaIO . create_tbl (con, " not-there" )
7777 if (VERSION . major >= 1 ) && (VERSION . minor >= 8 )
78- @test_throws r" not-there" TIO . create_tbl (con, " not-there" )
78+ @test_throws r" not-there" TulipaIO . create_tbl (con, " not-there" )
7979 end
8080 end
8181
8282 @testset " CSV -> DataFrame w/ a schema" begin
8383 con = DBInterface. connect (DuckDB. DB)
8484 mapping_csv_path = joinpath (DATA, " Norse/rep-periods-mapping.csv" )
8585 col_schema = Dict (:period => " INT" , :rep_period => " VARCHAR" , :weight => " DOUBLE" )
86- TIO . create_tbl (con, mapping_csv_path; types = col_schema)
86+ TulipaIO . create_tbl (con, mapping_csv_path; types = col_schema)
8787 df_types = DuckDB. query (con, " DESCRIBE rep_periods_mapping" ) |> DataFrame
8888 @test df_types. column_name == [" period" , " rep_period" , " weight" ]
8989 @test df_types. column_type == [" INTEGER" , " VARCHAR" , " DOUBLE" ]
9292 opts = Dict (:on => [:name ], :cols => [:investable ], :show => true )
9393 @testset " CSV w/ alternatives -> DataFrame" begin
9494 con = DBInterface. connect (DuckDB. DB)
95- df_res = TIO . create_tbl (con, csv_path, csv_copy; opts... , fill = false )
95+ df_res = TulipaIO . create_tbl (con, csv_path, csv_copy; opts... , fill = false )
9696 df_exp = DataFrame (CSV. File (csv_copy; header = 2 ))
9797 @test df_exp. investable == df_res. investable
9898 @test df_org. investable != df_res. investable
9999 end
100100
101101 @testset " no filling for missing rows" begin
102102 con = DBInterface. connect (DuckDB. DB)
103- df_res = TIO . create_tbl (con, csv_path, csv_fill; opts... , fill = false )
103+ df_res = TulipaIO . create_tbl (con, csv_path, csv_fill; opts... , fill = false )
104104 df_ref = DataFrame (CSV. File (csv_fill; header = 2 ))
105105 # NOTE: row order is different, join to determine equality
106106 cmp = join_cmp (df_res, df_ref, [" name" , " investable" ]; on = :name )
107- @test (subset (cmp, :investable_1 => ByRow (ismissing)). source .== " left_only" ) |> all
108- @test (subset (cmp, :investable_1 => ByRow (! ismissing)). source .== " both" ) |> all
107+ @test (
108+ DataFrames. subset (cmp, :investable_1 => DataFrames. ByRow (ismissing)). source .==
109+ " left_only"
110+ ) |> all
111+ @test (
112+ DataFrames. subset (cmp, :investable_1 => DataFrames. ByRow (! ismissing)). source .== " both"
113+ ) |> all
109114 end
110115
111116 @testset " back-filling missing rows" begin
112117 con = DBInterface. connect (DuckDB. DB)
113- df_res = TIO . create_tbl (con, csv_path, csv_fill; opts... , fill = true )
118+ df_res = TulipaIO . create_tbl (con, csv_path, csv_fill; opts... , fill = true )
114119 df_exp = DataFrame (CSV. File (csv_copy; header = 2 ))
115120 cmp = join_cmp (df_exp, df_res, [" name" , " investable" ]; on = :name )
116121 @test all (cmp. investable .== cmp. investable_1)
119124
120125 @testset " back-filling missing rows w/ alternate values" begin
121126 con = DBInterface. connect (DuckDB. DB)
122- df_res = TIO . create_tbl (
127+ df_res = TulipaIO . create_tbl (
123128 con,
124129 csv_path,
125130 csv_fill;
@@ -129,22 +134,23 @@ end
129134 )
130135 df_ref = DataFrame (CSV. File (csv_fill; header = 2 ))
131136 cmp = join_cmp (df_res, df_ref, [" name" , " investable" ]; on = :name )
132- @test (subset (cmp, :investable_1 => ByRow (ismissing)). investable) |> all
137+ @test (DataFrames. subset (cmp, :investable_1 => DataFrames. ByRow (ismissing)). investable) |>
138+ all
133139 end
134140
135141 @testset " temporary tables" begin
136142 con = DBInterface. connect (DuckDB. DB)
137- tbl_name = TIO . create_tbl (con, csv_path; name = " tmp_assets" , tmp = true )
143+ tbl_name = TulipaIO . create_tbl (con, csv_path; name = " tmp_assets" , tmp = true )
138144 @test tbl_name in tmp_tbls (con)[! , :name ]
139145
140- tbl_name = TIO . create_tbl (con, csv_path; tmp = true )
146+ tbl_name = TulipaIO . create_tbl (con, csv_path; tmp = true )
141147 @test tbl_name == " t_assets_data" # t_<cleaned up filename>
142148 @test tbl_name in tmp_tbls (con)[! , :name ]
143149 end
144150
145151 @testset " CSV -> table" begin
146152 con = DBInterface. connect (DuckDB. DB)
147- tbl_name = TIO . create_tbl (con, csv_path; name = " no_assets" )
153+ tbl_name = TulipaIO . create_tbl (con, csv_path; name = " no_assets" )
148154 df_res = DataFrame (DBInterface. execute (con, " SELECT * FROM $tbl_name " ))
149155 @test shape (df_org) == shape (df_res)
150156 # @show df_org[1:3, 1:5] df_res[1:3, 1:5]
@@ -168,18 +174,24 @@ end
168174 @testset " table + CSV w/ alternatives -> table" begin
169175 # test setup
170176 con = DBInterface. connect (DuckDB. DB)
171- TIO . create_tbl (con, csv_path; name = " no_assets" )
177+ TulipaIO . create_tbl (con, csv_path; name = " no_assets" )
172178
173179 opts = Dict (:on => [:name ], :cols => [:investable ])
174- tbl_name =
175- TIO. create_tbl (con, " no_assets" , csv_copy; name = " alt_assets" , opts... , fill = false )
180+ tbl_name = TulipaIO. create_tbl (
181+ con,
182+ " no_assets" ,
183+ csv_copy;
184+ name = " alt_assets" ,
185+ opts... ,
186+ fill = false ,
187+ )
176188 df_res = DataFrame (DBInterface. execute (con, " SELECT * FROM $tbl_name " ))
177189 df_exp = DataFrame (CSV. File (csv_copy; header = 2 ))
178190 @test df_exp. investable == df_res. investable
179191 @test df_org. investable != df_res. investable
180192
181193 @testset " back-filling missing rows" begin
182- tbl_name = TIO . create_tbl (
194+ tbl_name = TulipaIO . create_tbl (
183195 con,
184196 " no_assets" ,
185197 csv_fill;
196208 end
197209
198210 @testset " back-filling missing rows w/ alternate values" begin
199- tbl_name = TIO . create_tbl (
211+ tbl_name = TulipaIO . create_tbl (
200212 con,
201213 " no_assets" ,
202214 csv_fill;
208220 df_res = DataFrame (DBInterface. execute (con, " SELECT * FROM $tbl_name " ))
209221 df_ref = DataFrame (CSV. File (csv_fill; header = 2 ))
210222 cmp = join_cmp (df_res, df_ref, [" name" , " investable" ]; on = :name )
211- @test (subset (cmp, :investable_1 => ByRow (ismissing)). investable) |> all
223+ @test (
224+ DataFrames. subset (cmp, :investable_1 => DataFrames. ByRow (ismissing)). investable
225+ ) |> all
212226 end
213227 end
214228end
@@ -224,35 +238,40 @@ end
224238 @testset " w/ vector" begin
225239 con = DBInterface. connect (DuckDB. DB)
226240 df_exp = DataFrame (CSV. File (csv_copy; header = 2 ))
227- df_res = TIO . create_tbl (con, csv_path, Dict (:investable => df_exp. investable); opts... )
241+ df_res = TulipaIO . create_tbl (con, csv_path, Dict (:investable => df_exp. investable); opts... )
228242 # NOTE: row order is different, join to determine equality
229243 cmp = join_cmp (df_exp, df_res, [" name" , " investable" ]; on = :name )
230244 investable = cmp[! , [c for c in propertynames (cmp) if occursin (" investable" , String (c))]]
231245 @test isequal .(investable[! , 1 ], investable[! , 2 ]) |> all
232246
233247 # stupid Julia! grow up!
234248 args = [con, csv_path, Dict (:investable => df_exp. investable[2 : end ])]
235- @test_throws DimensionMismatch TIO . create_tbl (args... ; opts... )
249+ @test_throws DimensionMismatch TulipaIO . create_tbl (args... ; opts... )
236250 if (VERSION . major >= 1 ) && (VERSION . minor >= 8 )
237- @test_throws r" Length.+different" TIO . create_tbl (args... ; opts... )
238- @test_throws r" index.+value" TIO . create_tbl (args... ; opts... )
251+ @test_throws r" Length.+different" TulipaIO . create_tbl (args... ; opts... )
252+ @test_throws r" index.+value" TulipaIO . create_tbl (args... ; opts... )
239253 end
240254 end
241255
242256 @testset " w/ constant" begin
243257 con = DBInterface. connect (DuckDB. DB)
244- df_res = TIO . create_tbl (con, csv_path, Dict (:investable => true ); opts... )
258+ df_res = TulipaIO . create_tbl (con, csv_path, Dict (:investable => true ); opts... )
245259 @test df_res. investable |> all
246260
247- table_name = TIO . create_tbl (con, csv_path, Dict (:investable => true ); on = :name )
261+ table_name = TulipaIO . create_tbl (con, csv_path, Dict (:investable => true ); on = :name )
248262 @test " assets_data" == table_name
249263 end
250264
251265 @testset " w/ constant after filtering" begin
252266 con = DBInterface. connect (DuckDB. DB)
253- where_clause = TIO. FmtSQL. @where_ (lifetime in 25 : 50 , name % " Valhalla_%" )
254- df_res =
255- TIO. create_tbl (con, csv_path, Dict (:investable => true ); opts... , where_ = where_clause)
267+ where_clause = TulipaIO. FmtSQL. @where_ (lifetime in 25 : 50 , name % " Valhalla_%" )
268+ df_res = TulipaIO. create_tbl (
269+ con,
270+ csv_path,
271+ Dict (:investable => true );
272+ opts... ,
273+ where_ = where_clause,
274+ )
256275 @test shape (df_res) == shape (df_org)
257276 df_res =
258277 filter (row -> 25 <= row. lifetime <= 50 && startswith (row. name, " Valhalla_" ), df_res)
0 commit comments