Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Flatten the scanner files into one file. #6

Closed
wants to merge 5 commits into from
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Flatten the scanner
NullVoxPopuli committed Aug 21, 2024
commit 896877bcee6c719a8c95c2145a17b7c25ac456e1
316 changes: 307 additions & 9 deletions src/scanner.c
Original file line number Diff line number Diff line change
@@ -2,18 +2,15 @@
//
//
//
// Scanners are not extensible, so we have to copy them,
//
// To Update:
// - delete everything under the comment block
// - find and replace "tree_sitter_javascript" with "tree_sitter_glimmer_typescript"
// - there should be 5 methods updated
// - %s/tree_sitter_javascript/tree_sitter_glimmer_typescript/g
// Scanners are not extensible, so we have to copy them
//
// rename external_scanner_scan => tree_sitter_glimmer_typescript_external_scanner_scan
//
//
// ----------------------------------------------------------
#include "./tree-sitter-typescript/scanner.h"
#include "tree_sitter/parser.h"

#include <wctype.h>

void *tree_sitter_glimmer_typescript_external_scanner_create() { return NULL; }

@@ -23,6 +20,307 @@ unsigned tree_sitter_glimmer_typescript_external_scanner_serialize(void *payload

void tree_sitter_glimmer_typescript_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {}

// bool tree_sitter_glimmer_typescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
// return external_scanner_scan(payload, lexer, valid_symbols);
// }

/***********************************
*
* from tree-sitter-typescript v0.21.0
*
* ***********************************/
enum TokenType {
AUTOMATIC_SEMICOLON,
TEMPLATE_CHARS,
TERNARY_QMARK,
HTML_COMMENT,
LOGICAL_OR,
ESCAPE_SEQUENCE,
REGEX_PATTERN,
FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON,
ERROR_RECOVERY,
};

static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }

static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }

static bool scan_template_chars(TSLexer *lexer) {
lexer->result_symbol = TEMPLATE_CHARS;
for (bool has_content = false;; has_content = true) {
lexer->mark_end(lexer);
switch (lexer->lookahead) {
case '`':
return has_content;
case '\0':
return false;
case '$':
advance(lexer);
if (lexer->lookahead == '{') {
return has_content;
}
break;
case '\\':
return has_content;
default:
advance(lexer);
}
}
}

static bool scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment) {
for (;;) {
while (iswspace(lexer->lookahead)) {
skip(lexer);
}

if (lexer->lookahead == '/') {
skip(lexer);

if (lexer->lookahead == '/') {
skip(lexer);
while (lexer->lookahead != 0 && lexer->lookahead != '\n') {
skip(lexer);
}
*scanned_comment = true;
} else if (lexer->lookahead == '*') {
skip(lexer);
while (lexer->lookahead != 0) {
if (lexer->lookahead == '*') {
skip(lexer);
if (lexer->lookahead == '/') {
skip(lexer);
break;
}
} else {
skip(lexer);
}
}
} else {
return false;
}
} else {
return true;
}
}
}

static bool scan_automatic_semicolon(TSLexer *lexer, const bool *valid_symbols, bool *scanned_comment) {
lexer->result_symbol = AUTOMATIC_SEMICOLON;
lexer->mark_end(lexer);

for (;;) {
if (lexer->lookahead == 0) {
return true;
}
if (lexer->lookahead == '}') {
// Automatic semicolon insertion breaks detection of object patterns
// in a typed context:
// type F = ({a}: {a: number}) => number;
// Therefore, disable automatic semicolons when followed by typing
do {
skip(lexer);
} while (iswspace(lexer->lookahead));
if (lexer->lookahead == ':') {
return valid_symbols[LOGICAL_OR]; // Don't return false if we're in a ternary by checking if || is valid
}
return true;
}
if (!iswspace(lexer->lookahead)) {
return false;
}
if (lexer->lookahead == '\n') {
break;
}
skip(lexer);
}

skip(lexer);

if (!scan_whitespace_and_comments(lexer, scanned_comment)) {
return false;
}

switch (lexer->lookahead) {
case ',':
case '.':
case ';':
case '*':
case '%':
case '>':
case '<':
case '=':
case '?':
case '^':
case '|':
case '&':
case '/':
case ':':
return false;

case '{':
if (valid_symbols[FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON]) {
return false;
}
break;

// Don't insert a semicolon before a '[' or '(', unless we're parsing
// a type. Detect whether we're parsing a type or an expression using
// the validity of a binary operator token.
case '(':
case '[':
if (valid_symbols[LOGICAL_OR]) {
return false;
}
break;

// Insert a semicolon before `--` and `++`, but not before binary `+` or `-`.
case '+':
skip(lexer);
return lexer->lookahead == '+';
case '-':
skip(lexer);
return lexer->lookahead == '-';

// Don't insert a semicolon before `!=`, but do insert one before a unary `!`.
case '!':
skip(lexer);
return lexer->lookahead != '=';

// Don't insert a semicolon before `in` or `instanceof`, but do insert one
// before an identifier.
case 'i':
skip(lexer);

if (lexer->lookahead != 'n') {
return true;
}
skip(lexer);

if (!iswalpha(lexer->lookahead)) {
return false;
}

for (unsigned i = 0; i < 8; i++) {
if (lexer->lookahead != "stanceof"[i]) {
return true;
}
skip(lexer);
}

if (!iswalpha(lexer->lookahead)) {
return false;
}
break;
}

return true;
}

static bool scan_ternary_qmark(TSLexer *lexer) {
for (;;) {
if (!iswspace(lexer->lookahead)) {
break;
}
skip(lexer);
}

if (lexer->lookahead == '?') {
advance(lexer);

/* Optional chaining. */
if (lexer->lookahead == '?' || lexer->lookahead == '.') {
return false;
}

lexer->mark_end(lexer);
lexer->result_symbol = TERNARY_QMARK;

/* TypeScript optional arguments contain the ?: sequence, possibly
with whitespace. */
for (;;) {
if (!iswspace(lexer->lookahead)) {
break;
}
advance(lexer);
}

if (lexer->lookahead == ':' || lexer->lookahead == ')' || lexer->lookahead == ',') {
return false;
}

if (lexer->lookahead == '.') {
advance(lexer);
if (iswdigit(lexer->lookahead)) {
return true;
}
return false;
}
return true;
}
return false;
}

static bool scan_closing_comment(TSLexer *lexer) {
while (iswspace(lexer->lookahead) || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
skip(lexer);
}

const char *comment_start = "<!--";
const char *comment_end = "-->";

if (lexer->lookahead == '<') {
for (unsigned i = 0; i < 4; i++) {
if (lexer->lookahead != comment_start[i]) {
return false;
}
advance(lexer);
}
} else if (lexer->lookahead == '-') {
for (unsigned i = 0; i < 3; i++) {
if (lexer->lookahead != comment_end[i]) {
return false;
}
advance(lexer);
}
} else {
return false;
}

while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 &&
lexer->lookahead != 0x2029) {
advance(lexer);
}

lexer->result_symbol = HTML_COMMENT;
lexer->mark_end(lexer);

return true;
}

bool tree_sitter_glimmer_typescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
return external_scanner_scan(payload, lexer, valid_symbols);
if (valid_symbols[TEMPLATE_CHARS]) {
if (valid_symbols[AUTOMATIC_SEMICOLON]) {
return false;
}
return scan_template_chars(lexer);
}
if (valid_symbols[AUTOMATIC_SEMICOLON] || valid_symbols[FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON]) {
bool scanned_comment = false;
bool ret = scan_automatic_semicolon(lexer, valid_symbols, &scanned_comment);
if (!ret && !scanned_comment && valid_symbols[TERNARY_QMARK] && lexer->lookahead == '?') {
return scan_ternary_qmark(lexer);
}
return ret;
}
if (valid_symbols[TERNARY_QMARK]) {
return scan_ternary_qmark(lexer);
}

if (valid_symbols[HTML_COMMENT] && !valid_symbols[LOGICAL_OR] && !valid_symbols[ESCAPE_SEQUENCE] &&
!valid_symbols[REGEX_PATTERN]) {
return scan_closing_comment(lexer);
}

return false;
}
304 changes: 0 additions & 304 deletions src/tree-sitter-typescript/scanner.h

This file was deleted.