|
1 | 1 | /- |
2 | 2 | Copyright (c) 2016 Microsoft Corporation. All rights reserved. |
3 | 3 | Released under Apache 2.0 license as described in the file LICENSE. |
4 | | -Author: Leonardo de Moura, Mario Carneiro |
| 4 | +Author: Leonardo de Moura, Mario Carneiro, Markus Himmel |
5 | 5 | -/ |
6 | 6 | module |
7 | 7 |
|
@@ -2479,98 +2479,6 @@ where |
2479 | 2479 | def extract : (@& String) → (@& Pos.Raw) → (@& Pos.Raw) → String |
2480 | 2480 | | s, b, e => Pos.Raw.extract s b e |
2481 | 2481 |
|
2482 | | -@[specialize] def splitAux (s : String) (p : Char → Bool) (b : Pos.Raw) (i : Pos.Raw) (r : List String) : List String := |
2483 | | - if h : i.atEnd s then |
2484 | | - let r := (b.extract s i)::r |
2485 | | - r.reverse |
2486 | | - else |
2487 | | - have := Nat.sub_lt_sub_left (Nat.gt_of_not_le (mt decide_eq_true h)) (Pos.Raw.lt_next s _) |
2488 | | - if p (i.get s) then |
2489 | | - let i' := i.next s |
2490 | | - splitAux s p i' i' (b.extract s i :: r) |
2491 | | - else |
2492 | | - splitAux s p b (i.next s) r |
2493 | | -termination_by s.rawEndPos.1 - i.1 |
2494 | | - |
2495 | | -/-- |
2496 | | -Splits a string at each character for which `p` returns `true`. |
2497 | | -
|
2498 | | -The characters that satisfy `p` are not included in any of the resulting strings. If multiple |
2499 | | -characters in a row satisfy `p`, then the resulting list will contain empty strings. |
2500 | | -
|
2501 | | -Examples: |
2502 | | -* `"coffee tea water".split (·.isWhitespace) = ["coffee", "tea", "water"]` |
2503 | | -* `"coffee tea water".split (·.isWhitespace) = ["coffee", "", "tea", "", "water"]` |
2504 | | -* `"fun x =>\n x + 1\n".split (· == '\n') = ["fun x =>", " x + 1", ""]` |
2505 | | --/ |
2506 | | -@[inline] def splitToList (s : String) (p : Char → Bool) : List String := |
2507 | | - splitAux s p 0 0 [] |
2508 | | - |
2509 | | -@[inline, deprecated splitToList (since := "2025-10-17")] |
2510 | | -def split (s : String) (p : Char → Bool) : List String := |
2511 | | - splitToList s p |
2512 | | - |
2513 | | -/-- |
2514 | | -Auxiliary for `splitOn`. Preconditions: |
2515 | | -* `sep` is not empty |
2516 | | -* `b <= i` are indexes into `s` |
2517 | | -* `j` is an index into `sep`, and not at the end |
2518 | | -
|
2519 | | -It represents the state where we have currently parsed some split parts into `r` (in reverse order), |
2520 | | -`b` is the beginning of the string / the end of the previous match of `sep`, and the first `j` bytes |
2521 | | -of `sep` match the bytes `i-j .. i` of `s`. |
2522 | | --/ |
2523 | | -def splitOnAux (s sep : String) (b : Pos.Raw) (i : Pos.Raw) (j : Pos.Raw) (r : List String) : List String := |
2524 | | - if i.atEnd s then |
2525 | | - let r := (b.extract s i)::r |
2526 | | - r.reverse |
2527 | | - else |
2528 | | - if i.get s == j.get sep then |
2529 | | - let i := i.next s |
2530 | | - let j := j.next sep |
2531 | | - if j.atEnd sep then |
2532 | | - splitOnAux s sep i i 0 (b.extract s (i.unoffsetBy j)::r) |
2533 | | - else |
2534 | | - splitOnAux s sep b i j r |
2535 | | - else |
2536 | | - splitOnAux s sep b ((i.unoffsetBy j).next s) 0 r |
2537 | | -termination_by (s.rawEndPos.1 - (j.byteDistance i), sep.rawEndPos.1 - j.1) |
2538 | | -decreasing_by |
2539 | | - focus |
2540 | | - rename_i h _ _ |
2541 | | - left; exact Nat.sub_lt_sub_left |
2542 | | - (Nat.lt_of_le_of_lt (Nat.sub_le ..) (Nat.gt_of_not_le (mt decide_eq_true h))) |
2543 | | - (Nat.lt_of_le_of_lt (Nat.sub_le ..) (Pos.Raw.lt_next s _)) |
2544 | | - focus |
2545 | | - rename_i i₀ j₀ _ eq h' |
2546 | | - rw [show (j₀.next sep).byteDistance (i₀.next s) = j₀.byteDistance i₀ by |
2547 | | - change (_ + Char.utf8Size _) - (_ + Char.utf8Size _) = _ |
2548 | | - rw [(beq_iff_eq ..).1 eq, Nat.add_sub_add_right]; rfl] |
2549 | | - right; exact Nat.sub_lt_sub_left |
2550 | | - (Nat.lt_of_le_of_lt (Nat.le_add_right ..) (Nat.gt_of_not_le (mt decide_eq_true h'))) |
2551 | | - (Pos.Raw.lt_next sep _) |
2552 | | - focus |
2553 | | - rename_i h _ |
2554 | | - left; exact Nat.sub_lt_sub_left |
2555 | | - (Nat.lt_of_le_of_lt (Nat.sub_le ..) (Nat.gt_of_not_le (mt decide_eq_true h))) |
2556 | | - (Pos.Raw.lt_next s _) |
2557 | | - |
2558 | | -/-- |
2559 | | -Splits a string `s` on occurrences of the separator string `sep`. The default separator is `" "`. |
2560 | | -
|
2561 | | -When `sep` is empty, the result is `[s]`. When `sep` occurs in overlapping patterns, the first match |
2562 | | -is taken. There will always be exactly `n+1` elements in the returned list if there were `n` |
2563 | | -non-overlapping matches of `sep` in the string. The separators are not included in the returned |
2564 | | -substrings. |
2565 | | -
|
2566 | | -Examples: |
2567 | | -* `"here is some text ".splitOn = ["here", "is", "some", "text", ""]` |
2568 | | -* `"here is some text ".splitOn "some" = ["here is ", " text "]` |
2569 | | -* `"here is some text ".splitOn "" = ["here is some text "]` |
2570 | | -* `"ababacabac".splitOn "aba" = ["", "bac", "c"]` |
2571 | | --/ |
2572 | | -@[inline] def splitOn (s : String) (sep : String := " ") : List String := |
2573 | | - if sep == "" then [s] else splitOnAux s sep 0 0 0 [] |
2574 | 2482 |
|
2575 | 2483 |
|
2576 | 2484 | def Pos.Raw.offsetOfPosAux (s : String) (pos : Pos.Raw) (i : Pos.Raw) (offset : Nat) : Nat := |
|
0 commit comments