@@ -181,3 +181,128 @@ test "property: well-formed SELECT always extracts a table" {
181181 .{ .num_runs = 300 , .seed = 42 },
182182 );
183183}
184+
185+ // ============================================================================
186+ // Property: normalization never crashes and always produces output
187+ // ============================================================================
188+
189+ fn prop_normalize_does_not_crash (input : []const u8 ) ! void {
190+ var buf : [4096 ]u8 = undefined ;
191+ const result = extract .normalizeSql (input , & buf );
192+ _ = result ;
193+ }
194+
195+ test "property: normalization handles arbitrary input" {
196+ try minish .check (
197+ testing .allocator ,
198+ gen .string (.{ .min_len = 0 , .max_len = 512 }),
199+ prop_normalize_does_not_crash ,
200+ .{ .num_runs = 500 , .seed = 42 },
201+ );
202+ }
203+
204+ // ============================================================================
205+ // Property: extraction result predicates always reference known tables
206+ // ============================================================================
207+
208+ fn prop_predicates_reference_extracted_tables (input : []const u8 ) ! void {
209+ const result = extract .extractFromSql (input );
210+ const tables = result .tableSlice ();
211+ for (result .predicateSlice ()) | pred | {
212+ if (pred .table_name .len == 0 ) continue ;
213+ // Each predicate's table should either match a table name or an alias
214+ var found = false ;
215+ for (tables ) | t | {
216+ if (std .mem .eql (u8 , pred .table_name , t .name )) {
217+ found = true ;
218+ break ;
219+ }
220+ if (t .alias ) | a | {
221+ if (std .mem .eql (u8 , pred .table_name , a )) {
222+ found = true ;
223+ break ;
224+ }
225+ }
226+ }
227+ // If no tables extracted, predicate uses defaultTable which may be empty
228+ if (tables .len == 0 ) continue ;
229+ // The predicate table should be from the extracted tables (or resolved alias)
230+ try testing .expect (found );
231+ }
232+ }
233+
234+ test "property: predicates reference extracted tables" {
235+ try minish .check (
236+ testing .allocator ,
237+ gen .string (.{ .min_len = 10 , .max_len = 256 }),
238+ prop_predicates_reference_extracted_tables ,
239+ .{ .num_runs = 500 , .seed = 42 },
240+ );
241+ }
242+
243+ // ============================================================================
244+ // Property: hash distribution (no constant output)
245+ // ============================================================================
246+
247+ fn prop_hash_not_constant (input : []const u8 ) ! void {
248+ if (input .len < 2 ) return ;
249+ const h1 = capture .hashQuery (input );
250+ const h2 = capture .hashQuery (input [1.. ]);
251+ // Different inputs should (almost certainly) produce different hashes
252+ // This won't always hold due to collisions, but with 64-bit hashes
253+ // the probability is negligible for 500 runs
254+ if (! std .mem .eql (u8 , input , input [1.. ])) {
255+ try testing .expect (h1 != h2 );
256+ }
257+ }
258+
259+ test "property: hash varies for different inputs" {
260+ try minish .check (
261+ testing .allocator ,
262+ gen .string (.{ .min_len = 2 , .max_len = 128 }),
263+ prop_hash_not_constant ,
264+ .{ .num_runs = 500 , .seed = 42 },
265+ );
266+ }
267+
268+ // ============================================================================
269+ // Property: extraction of JOIN queries always finds at least 2 tables
270+ // ============================================================================
271+
272+ fn prop_join_extracts_multiple_tables (input : []const u8 ) ! void {
273+ // Build a JOIN query with random table names
274+ var name1_buf : [32 ]u8 = undefined ;
275+ var name2_buf : [32 ]u8 = undefined ;
276+ var n1 : usize = 0 ;
277+ var n2 : usize = 0 ;
278+
279+ for (input ) | c | {
280+ if (n1 < 31 and std .ascii .isAlphabetic (c )) {
281+ name1_buf [n1 ] = c ;
282+ n1 += 1 ;
283+ }
284+ }
285+ // Use second half for name2
286+ const mid = input .len / 2 ;
287+ for (input [mid .. ]) | c | {
288+ if (n2 < 31 and std .ascii .isAlphabetic (c )) {
289+ name2_buf [n2 ] = c ;
290+ n2 += 1 ;
291+ }
292+ }
293+ if (n1 == 0 or n2 == 0 ) return ;
294+ if (std .mem .eql (u8 , name1_buf [0.. n1 ], name2_buf [0.. n2 ])) return ;
295+
296+ const t1 = name1_buf [0.. n1 ];
297+ const t2 = name2_buf [0.. n2 ];
298+
299+ var sql_buf : [256 ]u8 = undefined ;
300+ const sql = std .fmt .bufPrint (& sql_buf , "select * from {s} join {s} on {s}.id = {s}.id" , .{ t1 , t2 , t1 , t2 }) catch return ;
301+
302+ const result = extract .extractFromSql (sql );
303+
304+ // Skip if either name is a SQL keyword (would not be parsed as table)
305+ if (extract .isKeyword (t1 ) or extract .isKeyword (t2 )) return ;
306+
307+ try testing .expect (result .table_count >= 2 );
308+ }
0 commit comments