Skip to content

Commit da09e63

Browse files
committed
Restrict POSIX character classes to 7-bit ASCII, i.e. POSIX locale (#4)
Including the example from the original issue description, as well as many extra test cases for the "cntrl" POSIX class, as that one was most tricky to express.
1 parent f0c2ae3 commit da09e63

3 files changed

Lines changed: 36 additions & 15 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ function%mikmatch {|/ "hello" / u|} -> "matched" (* matches "hello world" *)
6363

6464
Available in patterns:
6565

66-
**POSIX character classes:** `lower`, `upper`, `alpha`, `digit`, `alnum`, `punct`, `graph`, `print`, `blank`, `space`, `cntrl`, `xdigit`
66+
**POSIX character classes:** `lower`, `upper`, `alpha`, `digit`, `alnum`, `punct`, `graph`, `print`, `blank`, `space`, `cntrl`, `xdigit` (7-bit ASCII, i.e. POSIX locale)
6767

6868
**Control sequences:**
6969
- `bos` - beginning of string (`^`)

lib/mik_lexer.mll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,19 @@ let new_line lexbuf =
66
Lexing.new_line lexbuf
77

88
let predefined_classes = [
9-
("lower", {|[[:lower:]]|});
10-
("upper", {|[[:upper:]]|});
11-
("alpha", {|[[:alpha:]]|});
12-
("digit", {|[[:digit:]]|});
13-
("alnum", {|[[:alnum:]]|});
14-
("punct", {|[[:punct:]]|});
15-
("graph", {|[[:graph:]]|});
16-
("print", {|[[:print:]]|});
17-
("blank", {|[[:blank:]]|});
18-
("cntrl", {|[[:cntrl:]]|});
19-
("xdigit", {|[[:xdigit:]]|});
20-
("space", {|[[:space:]]|});
21-
(* ("word", {|[[:word:]]|}); *)
9+
("lower", {|[a-z]|});
10+
("upper", {|[A-Z]|});
11+
("alpha", {|[A-Za-z]|});
12+
("digit", {|[0-9]|});
13+
("alnum", {|[0-9A-Za-z]|});
14+
("punct", {|[!-/:-@[-`{-~]|});
15+
("graph", {|[!-~]|});
16+
("print", {|[ -~]|});
17+
("blank", {|[\t ]|});
18+
("cntrl", "[\x00-\x1f\x7f]");
19+
("xdigit", {|[0-9A-Fa-f]]|});
20+
("space", {|[\t-\r ]|});
21+
(* ("word", {|[0-9A-Za-z_]|}); *)
2222
("eos", {|$|});
2323
("eol", {|$|[\n]|});
2424
("bnd", {|\b|});

tests/test_ppx_mikmatch.ml

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,27 @@ let test_basic_matching _ =
88
assert_equal "matched hello" (match_hello "hello");
99
assert_equal "no match" (match_hello "world");
1010

11+
let match_lower = function%mikmatch {| lower |} -> "lower" | _ -> "not lower" in
12+
assert_equal "lower" (match_lower "a");
13+
assert_equal "not lower" (match_lower "A");
14+
assert_equal "not lower" (match_lower "\xb5");
15+
16+
let match_cntrl = function%mikmatch {| cntrl |} -> "control character" | _ -> "not a control character" in
17+
assert_equal "control character" (match_cntrl "\x00");
18+
assert_equal "control character" (match_cntrl "\x01");
19+
assert_equal "control character" (match_cntrl "\t");
20+
assert_equal "control character" (match_cntrl "\n");
21+
assert_equal "control character" (match_cntrl "\x1f");
22+
assert_equal "control character" (match_cntrl "\x7f");
23+
assert_equal "not a control character" (match_cntrl "");
24+
assert_equal "not a control character" (match_cntrl "\x00\x00");
25+
assert_equal "not a control character" (match_cntrl " ");
26+
assert_equal "not a control character" (match_cntrl "~");
27+
assert_equal "not a control character" (match_cntrl "\x80");
28+
assert_equal "not a control character" (match_cntrl "\x81");
29+
assert_equal "not a control character" (match_cntrl "\x9f");
30+
assert_equal "not a control character" (match_cntrl "\xff");
31+
1132
let match_digit = function%mikmatch {| digit |} -> "single digit" | _ -> "not a digit" in
1233
assert_equal "single digit" (match_digit "5");
1334
assert_equal "not a digit" (match_digit "a");
@@ -422,7 +443,7 @@ let test_mixed_matching _ =
422443

423444
assert_equal "got a" (no_default_case "a");
424445
assert_equal "got b" (no_default_case "b");
425-
assert_raises (Failure "File tests/test_ppx_mikmatch.ml, lines 417-419, characters 24-33: String did not match any mikmatch cases.")
446+
assert_raises (Failure "File tests/test_ppx_mikmatch.ml, lines 438-440, characters 24-33: String did not match any mikmatch cases.")
426447
(fun () -> no_default_case "c")
427448

428449
type mode =

0 commit comments

Comments
 (0)