sounio-benchmarks/045_word_break.sio at main · Sounio-lang/sounio-benchmarks · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
//@ run-pass
// HumanEval 045: Word Break (Simplified DP)
//
// Given a string (as byte array) and a dictionary of words (as byte arrays
// with lengths), determine if the string can be segmented into a sequence
// of dictionary words. Uses bottom-up DP.
// dp[i] = 1 if s[0..i] can be segmented, 0 otherwise.
//
// Simplified: dictionary stored as flat array of words with lengths.

struct DpBuf { data: [i64; 256] }
struct WordDict {
    // Store up to 16 words, each up to 16 bytes
    words: [i64; 256]
    word_lens: [i64; 16]
    num_words: i64
}

fn match_at(s: [i64; 256], s_start: i64, w: [i64; 256], w_start: i64, w_len: i64) -> i64 with Mut, Panic, Div {
    var k: i64 = 0
    while k < w_len {
        if s[s_start + k] != w[w_start + k] {
            return 0
        }
        k = k + 1
    }
    1
}

fn word_break(s: [i64; 256], s_len: i64, dict: WordDict) -> i64 with Mut, Panic, Div {
    // dp[i] = 1 if s[0..i] can be segmented
    var dp = DpBuf { data: [0; 256] }
    dp.data[0] = 1

    var i: i64 = 1
    while i <= s_len {
        var w: i64 = 0
        while w < dict.num_words {
            let wlen = dict.word_lens[w]
            if wlen <= i {
                if dp.data[i - wlen] == 1 {
                    // Check if s[i-wlen .. i] matches word w
                    let w_start = w * 16
                    if match_at(s, i - wlen, dict.words, w_start, wlen) == 1 {
                        dp.data[i] = 1
                    }
                }
            }
            w = w + 1
        }
        i = i + 1
    }

    dp.data[s_len]
}

fn main() -> i64 with IO, Mut, Panic, Div {
    // Test 1: s="abcd" dict=["ab","cd"] -> 1
    // a=1, b=2, c=3, d=4
    var s1: [i64; 256] = [0; 256]
    s1[0] = 1
    s1[1] = 2
    s1[2] = 3
    s1[3] = 4
    var dict1 = WordDict { words: [0; 256], word_lens: [0; 16], num_words: 2 }
    dict1.words[0] = 1
    dict1.words[1] = 2
    dict1.word_lens[0] = 2
    dict1.words[16] = 3
    dict1.words[17] = 4
    dict1.word_lens[1] = 2
    assert(word_break(s1, 4, dict1) == 1)

    // Test 2: s="abcd" dict=["ab","ef"] -> 0 (no match for "cd")
    var s2: [i64; 256] = [0; 256]
    s2[0] = 1
    s2[1] = 2
    s2[2] = 3
    s2[3] = 4
    var dict2 = WordDict { words: [0; 256], word_lens: [0; 16], num_words: 2 }
    dict2.words[0] = 1
    dict2.words[1] = 2
    dict2.word_lens[0] = 2
    dict2.words[16] = 5
    dict2.words[17] = 6
    dict2.word_lens[1] = 2
    assert(word_break(s2, 4, dict2) == 0)

    // Test 3: s="aaa" dict=["a"] -> 1
    var s3: [i64; 256] = [0; 256]
    s3[0] = 1
    s3[1] = 1
    s3[2] = 1
    var dict3 = WordDict { words: [0; 256], word_lens: [0; 16], num_words: 1 }
    dict3.words[0] = 1
    dict3.word_lens[0] = 1
    assert(word_break(s3, 3, dict3) == 1)

    // Test 4: s="abab" dict=["ab"] -> 1
    var s4: [i64; 256] = [0; 256]
    s4[0] = 1
    s4[1] = 2
    s4[2] = 1
    s4[3] = 2
    var dict4 = WordDict { words: [0; 256], word_lens: [0; 16], num_words: 1 }
    dict4.words[0] = 1
    dict4.words[1] = 2
    dict4.word_lens[0] = 2
    assert(word_break(s4, 4, dict4) == 1)

    // Test 5: empty string -> 1 (trivially segmented)
    var s5: [i64; 256] = [0; 256]
    var dict5 = WordDict { words: [0; 256], word_lens: [0; 16], num_words: 1 }
    dict5.words[0] = 1
    dict5.word_lens[0] = 1
    assert(word_break(s5, 0, dict5) == 1)

    // Test 6: s="abc" dict=["a","b","c"] -> 1
    var s6: [i64; 256] = [0; 256]
    s6[0] = 1
    s6[1] = 2
    s6[2] = 3
    var dict6 = WordDict { words: [0; 256], word_lens: [0; 16], num_words: 3 }
    dict6.words[0] = 1
    dict6.word_lens[0] = 1
    dict6.words[16] = 2
    dict6.word_lens[1] = 1
    dict6.words[32] = 3
    dict6.word_lens[2] = 1
    assert(word_break(s6, 3, dict6) == 1)

    println("045_word_break: ALL TESTS PASSED")
    0
}