|
2 | 2 | from databricks.labs.dqx.geo.check_funcs import ( |
3 | 3 | has_area_greater_than, |
4 | 4 | has_area_less_than, |
| 5 | + has_num_points_greater_than, |
| 6 | + has_num_points_less_than, |
5 | 7 | has_dimension, |
6 | 8 | has_x_coordinate_between, |
7 | 9 | has_y_coordinate_between, |
|
18 | 20 | is_point, |
19 | 21 | is_polygon, |
20 | 22 | is_ogc_valid, |
21 | | - has_num_points_greater_than, |
22 | | - has_num_points_less_than, |
23 | 23 | ) |
24 | 24 |
|
25 | 25 |
|
@@ -404,133 +404,125 @@ def test_has_y_coordinate_between(skip_if_runtime_not_geo_compatible, spark): |
404 | 404 | assert_df_equality(actual, expected, ignore_nullable=True) |
405 | 405 |
|
406 | 406 |
|
407 | | -def test_area_not_greater_than(skip_if_runtime_not_geo_compatible, spark): |
| 407 | +def test_has_area_less_than(skip_if_runtime_not_geo_compatible, spark): |
408 | 408 | test_df = spark.sql( |
409 | 409 | """ |
410 | 410 | SELECT geom FROM VALUES |
411 | 411 | ('POINT(0 0)'), -- Point has area 0 |
412 | | - ('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'), -- Unit square has area 1 |
413 | | - ('POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))'), -- 2x2 square has area 4 |
| 412 | + ('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'), -- Unit square has area 1 |
| 413 | + ('POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))'), -- 2x2 square has area 4 |
414 | 414 | ('invalid-geometry'), -- Invalid geometry |
415 | 415 | (null) -- Null geometry |
416 | 416 | AS data(geom) |
417 | | - """ |
| 417 | + """ |
418 | 418 | ) |
419 | 419 |
|
420 | | - actual = test_df.select(has_area_greater_than("geom", 2.0)) |
| 420 | + actual = test_df.select(has_area_less_than("geom", 2.0)) |
421 | 421 |
|
422 | 422 | checked_schema = "geom_area_greater_than_limit: string" |
423 | 423 | expected = spark.createDataFrame( |
424 | 424 | [ |
425 | | - [None], # Point area (0) <= 2.0, so no error |
426 | | - [None], # Square area (1) <= 2.0, so no error |
427 | | - [ |
428 | | - "value `POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))` in column `geom` has area greater than limit: 2.0" |
429 | | - ], # Area (4) > 2.0 |
430 | | - ["value `invalid-geometry` in column `geom` has area greater than limit: 2.0"], # Invalid geometry |
431 | | - [None], # Null geometry |
| 425 | + [None], |
| 426 | + [None], |
| 427 | + ["value `POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))` in column `geom` has area greater than limit: 2.0"], |
| 428 | + ["value `invalid-geometry` in column `geom` is not a valid geometry"], |
| 429 | + [None], |
432 | 430 | ], |
433 | 431 | checked_schema, |
434 | 432 | ) |
435 | 433 |
|
436 | 434 | assert_df_equality(actual, expected, ignore_nullable=True) |
437 | 435 |
|
438 | 436 |
|
439 | | -def test_area_not_less_than(skip_if_runtime_not_geo_compatible, spark): |
| 437 | +def test_has_area_greater_than(skip_if_runtime_not_geo_compatible, spark): |
440 | 438 | test_df = spark.sql( |
441 | 439 | """ |
442 | 440 | SELECT geom FROM VALUES |
443 | 441 | ('POINT(0 0)'), -- Point has area 0 |
444 | | - ('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'), -- Unit square has area 1 |
445 | | - ('POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))'), -- 2x2 square has area 4 |
| 442 | + ('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'), -- Unit square has area 1 |
| 443 | + ('POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))'), -- 2x2 square has area 4 |
446 | 444 | ('invalid-geometry'), -- Invalid geometry |
447 | 445 | (null) -- Null geometry |
448 | 446 | AS data(geom) |
449 | | - """ |
| 447 | + """ |
450 | 448 | ) |
451 | 449 |
|
452 | | - actual = test_df.select(has_area_less_than("geom", 1.0)) |
| 450 | + actual = test_df.select(has_area_greater_than("geom", 1.0)) |
453 | 451 |
|
454 | 452 | checked_schema = "geom_area_less_than_limit: string" |
455 | 453 | expected = spark.createDataFrame( |
456 | 454 | [ |
457 | | - ["value `POINT(0 0)` in column `geom` has area less than limit: 1.0"], # Point area (0) < 1.0 |
458 | | - [None], # Square area (1) >= 1.0, so no error |
459 | | - [None], # Square area (4) >= 1.0, so no error |
460 | | - ["value `invalid-geometry` in column `geom` has area less than limit: 1.0"], # Invalid geometry |
461 | | - [None], # Null geometry |
| 455 | + ["value `POINT(0 0)` in column `geom` has area less than limit: 1.0"], |
| 456 | + [None], |
| 457 | + [None], |
| 458 | + ["value `invalid-geometry` in column `geom` is not a valid geometry"], |
| 459 | + [None], |
462 | 460 | ], |
463 | 461 | checked_schema, |
464 | 462 | ) |
465 | 463 |
|
466 | 464 | assert_df_equality(actual, expected, ignore_nullable=True) |
467 | 465 |
|
468 | 466 |
|
469 | | -def test_num_points_not_greater_than(skip_if_runtime_not_geo_compatible, spark): |
| 467 | +def test_has_num_points_less_than(skip_if_runtime_not_geo_compatible, spark): |
470 | 468 | test_df = spark.sql( |
471 | 469 | """ |
472 | 470 | SELECT geom FROM VALUES |
473 | 471 | ('POINT(0 0)'), -- 1 point |
474 | | - ('LINESTRING(0 0, 1 1)'), -- 2 points |
475 | | - ('LINESTRING(0 0, 1 1, 2 2)'), -- 3 points |
476 | | - ('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'), -- 5 points (including closing point) |
| 472 | + ('LINESTRING(0 0, 1 1)'), -- 2 points |
| 473 | + ('LINESTRING(0 0, 1 1, 2 2)'), -- 3 points |
| 474 | + ('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'), -- 5 points (including closing point) |
477 | 475 | ('invalid-geometry'), -- Invalid geometry |
478 | 476 | (null) -- Null geometry |
479 | 477 | AS data(geom) |
480 | | - """ |
| 478 | + """ |
481 | 479 | ) |
482 | 480 |
|
483 | 481 | actual = test_df.select(has_num_points_less_than("geom", 3)) |
484 | 482 |
|
485 | 483 | checked_schema = "geom_num_points_greater_than_limit: string" |
486 | 484 | expected = spark.createDataFrame( |
487 | 485 | [ |
488 | | - [None], # Point (1 point) <= 3, so no error |
489 | | - [None], # LineString (2 points) <= 3, so no error |
490 | | - [None], # LineString (3 points) <= 3, so no error |
| 486 | + [None], |
| 487 | + [None], |
| 488 | + [None], |
491 | 489 | [ |
492 | 490 | "value `POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))` in column `geom` has number of coordinates greater than limit: 3" |
493 | | - ], # Polygon (5 points) > 3 |
494 | | - [ |
495 | | - "value `invalid-geometry` in column `geom` has number of coordinates greater than limit: 3" |
496 | | - ], # Invalid geometry |
497 | | - [None], # Null geometry |
| 491 | + ], |
| 492 | + ["value `invalid-geometry` in column `geom` is not a valid geometry"], |
| 493 | + [None], |
498 | 494 | ], |
499 | 495 | checked_schema, |
500 | 496 | ) |
501 | 497 |
|
502 | 498 | assert_df_equality(actual, expected, ignore_nullable=True) |
503 | 499 |
|
504 | 500 |
|
505 | | -def test_num_points_not_less_than(skip_if_runtime_not_geo_compatible, spark): |
| 501 | +def test_has_num_points_greater_than(skip_if_runtime_not_geo_compatible, spark): |
506 | 502 | test_df = spark.sql( |
507 | 503 | """ |
508 | 504 | SELECT geom FROM VALUES |
509 | 505 | ('POINT(0 0)'), -- 1 point |
510 | | - ('LINESTRING(0 0, 1 1)'), -- 2 points |
511 | | - ('LINESTRING(0 0, 1 1, 2 2)'), -- 3 points |
512 | | - ('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'), -- 5 points (including closing point) |
| 506 | + ('LINESTRING(0 0, 1 1)'), -- 2 points |
| 507 | + ('LINESTRING(0 0, 1 1, 2 2)'), -- 3 points |
| 508 | + ('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'), -- 5 points (including closing point) |
513 | 509 | ('invalid-geometry'), -- Invalid geometry |
514 | 510 | (null) -- Null geometry |
515 | 511 | AS data(geom) |
516 | | - """ |
| 512 | + """ |
517 | 513 | ) |
518 | 514 |
|
519 | 515 | actual = test_df.select(has_num_points_greater_than("geom", 3)) |
520 | 516 |
|
521 | 517 | checked_schema = "geom_num_points_less_than_limit: string" |
522 | 518 | expected = spark.createDataFrame( |
523 | 519 | [ |
524 | | - ["value `POINT(0 0)` in column `geom` has number of coordinates less than limit: 3"], # Point (1 point) < 3 |
525 | | - [ |
526 | | - "value `LINESTRING(0 0, 1 1)` in column `geom` has number of coordinates less than limit: 3" |
527 | | - ], # LineString (2 points) < 3 |
528 | | - [None], # LineString (3 points) >= 3, so no error |
529 | | - [None], # Polygon (5 points) >= 3, so no error |
530 | | - [ |
531 | | - "value `invalid-geometry` in column `geom` has number of coordinates less than limit: 3" |
532 | | - ], # Invalid geometry |
533 | | - [None], # Null geometry |
| 520 | + ["value `POINT(0 0)` in column `geom` has number of coordinates less than limit: 3"], |
| 521 | + ["value `LINESTRING(0 0, 1 1)` in column `geom` has number of coordinates less than limit: 3"], |
| 522 | + [None], |
| 523 | + [None], |
| 524 | + ["value `invalid-geometry` in column `geom` is not a valid geometry"], |
| 525 | + [None], |
534 | 526 | ], |
535 | 527 | checked_schema, |
536 | 528 | ) |
|
0 commit comments