Skip to content

Commit 385ca72

Browse files
authored
init
1 parent 15389fc commit 385ca72

5 files changed

Lines changed: 821 additions & 1 deletion

File tree

README.md

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,23 @@
1-
# tokenizer
1+
# Tokenizer
22
A Zig library for tokenizing text using PCRE2 regular expressions.
3+
4+
## Requirement
5+
zig v0.13.0
6+
7+
## Install
8+
```bash
9+
git clone https://github.com/jaco-bro/tokenizer
10+
cd tokenizer
11+
zig build exe --release=fast
12+
```
13+
14+
## Usage
15+
- `zig-out/bin/tokenizer_exe [--model MODEL_NAME] COMMAND INPUT`
16+
- `zig build run -- [--model MODEL_NAME] COMMAND INPUT`
17+
18+
```bash
19+
zig build run -- --encode "hello world"
20+
zig build run -- --decode "{14990, 1879}"
21+
zig build run -- --model "phi-4-4bit" --encode "hello world"
22+
zig build run -- --model "phi-4-4bit" --decode "15339 1917"
23+
```

build.zig

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
const std = @import("std");
2+
3+
pub fn build(b: *std.Build) !void {
4+
const target = b.standardTargetOptions(.{});
5+
const optimize = b.standardOptimizeOption(.{});
6+
const pcre2_dep = b.dependency("pcre2", .{
7+
.target = target,
8+
.optimize = optimize,
9+
});
10+
// Exe
11+
const exe = b.addExecutable(.{
12+
.name = "tokenizer_exe",
13+
.root_source_file = b.path("src/tokenizer.zig"),
14+
.target = target,
15+
.optimize = .ReleaseFast,
16+
});
17+
exe.linkLibrary(pcre2_dep.artifact("pcre2-8"));
18+
const exe_install = b.addInstallArtifact(exe, .{});
19+
const exe_step = b.step("exe", "Install executable");
20+
exe_step.dependOn(&exe_install.step);
21+
// Run exe
22+
const exe_run = b.addRunArtifact(exe);
23+
if (b.args) |args| exe_run.addArgs(args);
24+
const run_step = b.step("run", "Run tokenizer app");
25+
run_step.dependOn(&exe_run.step);
26+
// Tests
27+
const tst = b.addTest(.{
28+
.root_source_file = b.path("src/tokenizer.zig"),
29+
.target = target,
30+
.optimize = optimize,
31+
});
32+
tst.linkLibrary(pcre2_dep.artifact("pcre2-8"));
33+
// Run test
34+
const run_test = b.addRunArtifact(tst);
35+
const test_step = b.step("test", "Run tests");
36+
test_step.dependOn(&run_test.step);
37+
// Lib
38+
const lib = b.addSharedLibrary(.{
39+
.name = "tokenizer",
40+
.root_source_file = b.path("src/tokenizer.zig"),
41+
.target = target,
42+
.optimize = optimize,
43+
});
44+
lib.linkLibrary(pcre2_dep.artifact("pcre2-8"));
45+
const lib_install = b.addInstallArtifact(lib, .{});
46+
const lib_step = b.step("lib", "Install library only");
47+
lib_step.dependOn(&lib_install.step);
48+
b.getInstallStep().dependOn(&lib_install.step);
49+
}

build.zig.zon

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
.{
2+
.name = "tokenizer",
3+
.version = "0.0.1",
4+
.dependencies = .{
5+
.pcre2 = .{
6+
.url = "https://github.com/PCRE2Project/pcre2/archive/refs/tags/pcre2-10.45.tar.gz",
7+
.hash = "1220c927731a4b31f5ebf8b4f33f0efe6fa1561cdcf026f39cbee512e08ad24fb785",
8+
},
9+
},
10+
.paths = .{
11+
"build.zig",
12+
"build.zig.zon",
13+
"src",
14+
"LICENSE",
15+
"README.md",
16+
},
17+
}

0 commit comments

Comments
 (0)