-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathocaml-tree-sitter-gen-ocaml
executable file
·300 lines (260 loc) · 7.18 KB
/
ocaml-tree-sitter-gen-ocaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
#! /usr/bin/env bash
#
# Generate OCaml parsers
#
# For lang=ruby, the file hierarchy we create looks like this:
#
# ocaml-src
# ├── bin
# │ ├── dune
# │ └── Main.ml
# ├── lib
# │ ├── bindings.c
# │ ├── Boilerplate.ml
# │ ├── CST.ml
# │ ├── dune
# │ ├── Parse.ml
# │ ├── Parse.mli
# │ ├── parser.c
# │ ├── scanner.cc
# │ └── tree_sitter
# │ └── parser.h
# └── tree-sitter-ruby.opam
#
set -eu -o pipefail
project_root=$(git rev-parse --show-toplevel)
# The ocaml-tree-sitter executable and the runtime library are assumed to
# to have been installed with 'make install'
#
ocaml_tree_sitter=$(which ocaml-tree-sitter)
default_dst_dir="ocaml-src"
default_src_dir="src"
default_lang="lang"
default_package="tree-sitter-lang"
usage() {
cat <<EOF
Usage: $(basename "$0") [OPTIONS]
Call ocaml-tree-sitter to derive an OCaml parsing library and executable
from a tree-sitter grammar in json format, normally generated by
tree-sitter as 'src/grammar.json'.
Options:
--dst DST_DIR
Specify the output directory. Default: $default_dst_dir
--help
Show this help message and exit.
--lang LANG
Name of the programming language. It will be part of the name
of the library SUPERNAME.LANG and of the OCaml module.
Case conversion and conversions between dashes and underscores will
take place as needed.
Default: $default_lang
--src SRC_DIR
Location of the 'src' folder with some of its contents generated
by tree-sitter. It must contain 'grammar.json', 'parser.c',
and optionally other C files ('scanner.c' or 'scanner.cc') needed
to build the C parser. Default: $src_dir
--package PKG
The Dune package name to which the library will belong.
The library name will be PKG.LANG where LANG is the language name.
Default: $default_package.
--trace
Print debugging info during parsing.
--tree-sitter-libdir LIBDIR
Where the tree-sitter runtime library was installed.
Default:
EOF
}
error() {
cat >&2 <<EOF
Error: $*
EOF
exit 1
}
test -x "$ocaml_tree_sitter" || error "missing executable $ocaml_tree_sitter"
lang="$default_lang"
package="$default_package"
src_dir="$default_src_dir"
dst_dir="$default_dst_dir"
trace_option=()
while [[ $# -gt 0 ]]; do
case "$1" in
--dst)
dst_dir="$2"
shift
;;
--help)
usage
exit 0
;;
--lang)
lang="$2"
shift
;;
--package)
package="$2"
shift
;;
--src)
src_dir="$2"
shift
;;
--trace)
trace_option="--trace"
;;
*)
error "Unsupported argument: $1"
esac
shift
done
lang_dashes=$(echo "$lang" | tr 'A-Z_' 'a-z-')
lang_underscores=$(echo "$lang" | tr 'A-Z-' 'a-z_')
# Copy what we need to the ocaml-src folder. We need at least to copy
# the C source and headers.
#
rm -rf "$dst_dir"
mkdir -p "$dst_dir"/lib
# Build the lists of C and C++ files to compile, without their extension.
#
c_files="parser bindings"
cxx_files=""
cp "$src_dir"/parser.c "$dst_dir"/lib
if [[ -e "$src_dir"/scanner.c ]]; then
cp "$src_dir"/scanner.c "$dst_dir"/lib
c_files="scanner $c_files"
fi
if [[ -e "$src_dir"/scanner.cc ]]; then
cp "$src_dir"/scanner.cc "$dst_dir"/lib
cxx_files="scanner"
fi
# Copy C header files
#
shopt -s nullglob
for x in "$src_dir"/*.h; do cp "$x" "$dst_dir"/lib; done
cp -a "$src_dir"/tree_sitter "$dst_dir"/lib/tree_sitter
# Generate the OCaml code needed to parse the examples/*.out json files.
#
"$ocaml_tree_sitter" \
gen \
"$lang_underscores" \
"$src_dir"/grammar.json \
-d "$dst_dir" \
"${trace_option[@]}"
cat > "$dst_dir"/lib/bindings.c <<EOF
/*
Generated by ocaml-tree-sitter for $lang_underscores.
*/
#include <string.h>
#include <tree_sitter/api.h>
#include <caml/alloc.h>
#include <caml/bigarray.h>
#include <caml/callback.h>
#include <caml/custom.h>
#include <caml/memory.h>
#include <caml/mlvalues.h>
#include <caml/threads.h>
// Implemented by parser.c
TSLanguage *tree_sitter_${lang_underscores}();
typedef struct _parser {
TSParser *parser;
} parser_W;
static void finalize_parser(value v) {
parser_W *p;
p = (parser_W *)Data_custom_val(v);
ts_parser_delete(p->parser);
}
static struct custom_operations parser_custom_ops = {
.identifier = "parser handling",
.finalize = finalize_parser,
.compare = custom_compare_default,
.hash = custom_hash_default,
.serialize = custom_serialize_default,
.deserialize = custom_deserialize_default
};
// OCaml function
CAMLprim value octs_create_parser_${lang_underscores}(value unit) {
CAMLparam0();
CAMLlocal1(v);
parser_W parserWrapper;
TSParser *parser = ts_parser_new();
parserWrapper.parser = parser;
v = caml_alloc_custom(&parser_custom_ops, sizeof(parser_W), 0, 1);
memcpy(Data_custom_val(v), &parserWrapper, sizeof(parser_W));
ts_parser_set_language(parser, tree_sitter_${lang_underscores}());
CAMLreturn(v);
};
EOF
cat > "$dst_dir"/lib/dune <<EOF
; required to install tree_sitter/parser.h
(include_subdirs qualified)
(library
(public_name $package.${lang_dashes})
(name tree_sitter_${lang_underscores})
(libraries atdgen-runtime tree-sitter.run)
; A copy of the C headers for the tree-sitter library is found locally.
; This is because it's important to use the right version of 'parser.h'.
;
(foreign_stubs
(language c)
(names ${c_files})
(flags -std=c99
-fPIC
-I %{env:TREESITTER_INCDIR=/usr/local/include}
-I .)
)
; TREESITTER_LIBDIR is meant to be some nonstandard location containing the
; desired version of the tree-sitter library.
; The -rpath option tells the linker to hardcode this search location
; in the binary.
;
; This works as long as libtree-sitter stays where it is, which is
; fine for test executables. Production executables should instead
; link statically against libree-sitter to avoid problems in locating
; the library at runtime.
;
(c_library_flags
(
-L%{env:TREESITTER_LIBDIR=/usr/local/lib}
-lstdc++
-ltree-sitter
-Wl,-rpath,%{env:TREESITTER_LIBDIR=/usr/local/lib}
)
)
(foreign_stubs
(language cxx)
(names ${cxx_files})
(flags -fPIC
-I %{env:TREESITTER_INCDIR=/usr/local/include}
-I .)
)
)
EOF
cat > "$dst_dir"/bin/dune <<EOF
(executable
(package $package)
(public_name parse-${lang_dashes})
(name Main)
(libraries $package.${lang_dashes})
)
EOF
# .gitignore file suitable for building with dune.
cat > "$dst_dir"/.gitignore <<EOF
# Dune build data
_build
# Merlin files for Vim and Emacs generated by dune < 2.8
.merlin
# Dune-generated files
*.install
EOF
############################################################################
# The following files are needed to build the libraries and executables
# as a standalone dune project. This is for local testing.
#
# We don't include these files in the destination git repo so it can be
# plugged into an existing git project without modifications.
cat > "$dst_dir"/dune-project <<EOF
(lang dune 3.7)
(name tree-sitter-$lang)
EOF
# This is needed for building with dune.
#
touch "$dst_dir"/tree-sitter-lang.opam