Skip to content

Switch to native Julia highlighter #666

Open
@maleadt

Description

@maleadt

We should switch to a native Julia highlighter instead of using Pygments. For example, using Highlights.jl and some example lexers as generated by an LLM:

@lexer PTXLexer let
    string = r"\"[^\"]*?\""
    followsym = r"[a-zA-Z0-9_$]"
    identifier = "(" * raw"[-a-zA-Z$._][\w\-$.]*|" * string * ")"
    block_label = "(" * identifier * raw"|(\d+))"

    instruction_keywords = [
        "abs", "discard", "min", "shf", "vadd",
        "activemask", "div", "mma", "shfl", "vadd2", 
        "add", "dp2a", "mov", "shl", "vadd4",
        "addc", "dp4a", "movmatrix", "shr", "vavrg2",
        "alloca", "elect", "mul", "sin", "vavrg4",
        "and", "ex2", "mul24", "slct", "vmad",
        "applypriority", "exit", "multimem", "sqrt", "vmax",
        "atom", "fence", "nanosleep", "st", "vmax2",
        "bar", "fma", "neg", "stackrestore", "vmax4",
        "barrier", "fns", "not", "stacksave", "vmin",
        "bfe", "getctarank", "or", "stmatrix", "vmin2",
        "bfi", "griddepcontrol", "pmevent", "sub", "vmin4",
        "bfind", "isspacep", "popc", "subc", "vote",
        "bmsk", "istypep", "prefetch", "suld", "vset",
        "bra", "ld", "prefetchu", "suq", "vset2",
        "brev", "ldmatrix", "prmt", "sured", "vset4",
        "brkpt", "ldu", "rcp", "sust", "vshl",
        "brx", "lg2", "red", "szext", "vshr",
        "call", "lop3", "redux", "tanh", "vsub",
        "clz", "mad", "rem", "testp", "vsub2",
        "cnot", "mad24", "ret", "tex", "vsub4",
        "copysign", "madc", "rsqrt", "tld4", "wgmma",
        "cos", "mapa", "sad", "trap", "wmma",
        "cp", "match", "selp", "txq", "xor",
        "createpolicy", "max", "set", "vabsdiff", "cvt",
        "mbarrier", "setmaxnreg", "vabsdiff2", "cvta",
        "membar", "setp", "vabsdiff4"
    ]

    state_spaces = [
        "reg", ".sreg", ".const", ".global",
        ".local", ".param", ".shared", ".tex",
        ".wide", ".loc"
    ]

    directives = [
        ".address_size", ".explicitcluster", ".maxnreg", ".section",
        ".alias", ".extern", ".maxntid", ".shared",
        ".align", ".file", ".minnctapersm", ".sreg",
        ".branchtargets", ".func", ".noreturn", ".target",
        ".callprototype", ".global", ".param", ".tex",
        ".calltargets", ".loc", ".pragma", ".version",
        ".common", ".local", ".reg", ".visible",
        ".const", ".maxclusterrank", ".reqnctapercluster", ".weak",
        ".entry", ".maxnctapersm", ".reqntid"
    ]

    types = [
        ".s8", ".s16", ".s32", ".s64",
        ".u8", ".u16", ".u32", ".u64",
        ".f16", ".f16x2", ".f32", ".f64",
        ".b8", ".b16", ".b32", ".b64",
        ".pred"
    ]

    Dict(
        :name => "PTX",
        :description => "A lexer for NVIDIA PTX (Parallel Thread Execution) source code.",
        :aliases => ["ptx"],
        :filenames => ["*.ptx"],
        :mimetypes => ["text/x-ptx"],
        :tokens => Dict(
            :root => [
                (r"\s+", TEXT),
                (r"//.*?\n", COMMENT_SINGLE),
                (block_label * raw"\s*:", NAME_LABEL),
                (words(instruction_keywords; prefix="\\b", suffix="\\b"), KEYWORD),
                (words(state_spaces; prefix="\\b", suffix="\\b"), KEYWORD_PSEUDO),
                (words(directives; prefix="\\b", suffix="\\b"), KEYWORD_RESERVED),
                (words(types; prefix="\\b", suffix="\\b"), KEYWORD_TYPE),
                (r"%[-a-zA-Z$._][\w\-$.]*", NAME_VARIABLE),
                (r"%\d+", NAME_VARIABLE),
                (r"c?\"[^\"]*?\"", STRING),
                (r"[-a-zA-Z$._][\w\-$.]*", NAME_VARIABLE),
                (r"0[xX][a-fA-F0-9]+", NUMBER_HEX),
                (r"-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?", NUMBER_FLOAT),
                (r"[=<>{}\[\]()*.,!]|x\b", PUNCTUATION),
                (r"[*+-/]", OPERATOR),
                (r";", PUNCTUATION),
            ],
        ),
    )
end
@lexer AMDGPULexer Dict(
    :name => "AMDGPU",
    :description => "A lexer for AMD GPU assembly code.",
    :aliases => ["amdgpu"],
    :filenames => ["*.isa"],
    :tokens => Dict(
        :root => [
            (r"\s+", WHITESPACE),
            (r"[\r\n]+", TEXT),
            (r"([a-z_0-9])*:([a-z_0-9])*", NAME_ATTRIBUTE),
            (r"[\[\]\(\),:\&]", TEXT),
            (r"[;#]|//.*?\n", COMMENT_SINGLE),
            (r"((s_)?(scratch|ds|buffer|flat|image)_[a-z0-9_]+)", KEYWORD_RESERVED),
            (r"(_lo|_hi)", NAME_VARIABLE),
            (r"(vmcnt|lgkmcnt|expcnt)", NAME_ATTRIBUTE),
            (r"(attr[0-9]\.[a-z])", NAME_ATTRIBUTE),
            (words([
                "op", "vaddr", "vdata", "off", "soffset", "srsrc", "format",
                "offset", "offen", "idxen", "glc", "dlc", "slc", "tfe", "lds",
                "lit", "unorm"
            ], suffix=r"\b"), NAME_ATTRIBUTE),
            (r"(label_[a-z0-9]+)", KEYWORD),
            (r"(_L[0-9]*)", NAME_VARIABLE),
            (r"(s|v)_[a-z0-9_]+", KEYWORD),
            (r"(v[0-9.]+|vcc|exec|v)", NAME_VARIABLE),
            (r"s[0-9.]+|s", NAME_VARIABLE),
            (r"[0-9]+\.[^0-9]+", NUMBER_FLOAT),
            (r"(0[xX][a-z0-9]+)|([0-9]+)", NUMBER_INTEGER)
        ]
    )
)

They don't render beautifully in the terminal though, so probably needs some tweaking (or a theme to accompany the text/ansi renderer).

Originally posted by @maleadt in #659 (comment)

Metadata

Metadata

Assignees

No one assigned

    Labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions