Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 109 additions & 0 deletions maintainers/tools/purl_tables/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/env python3

"""Tool for updating the tables for PURL."""

import argparse
import os
import sys

from typing import List
from urllib.parse import quote


def _percent_encode(s: str) -> str:
"""
Percent-encodes a character according to the [PURL specification](https://github.com/package-url/purl-spec/blob/main/PURL-SPECIFICATION.rst#character-encoding).
"""

try:
return quote(s, encoding=None).replace("%3A", ":")
except:
return None


def _run(args) -> int:
lc_all = os.environ.get("LC_ALL")
if lc_all != "en_US.UTF-8":
print("Your environment settings will reorder the file badly.")
print("Please rerun as:")
print(' LC_ALL="en_US.UTF-8"', " ".join(sys.argv))
return 0

with open(args.output, "w") as f:
f.write("""# Generated by maintainers/tools/purl_tables/main.py. DO NOT EDIT.

visibility([
"//purl/...",
])

""")
f.write("encode_byte = {\n")
for i in range(0, 256, 1):
f.write(""" %d: "%s",\n""" % (i, _percent_encode(bytes([i]))))
f.write("}\n")

percent_encoding_tests = [
"foo",
"Hello, World!",
"path: /foo",
# German
"München",
"Köln",
# Swedish
"Småland",
# French
"française",
# Spanish
"¡Hola Mundo!",
# Arabic
"مرحبا بالعالم!",
# Chinese
"你好世界!",
# Japanese
"こんにちは世界!",
# Emoji,
"🙎",
"🙊",
# Emoji with modifiers.
str(
b"\xf0\x9f\x99\x8e\xf0\x9f\x8f\xbe\xe2\x80\x8d\xe2\x99\x80\xef\xb8\x8f",
"utf8",
),
]

test_cases = {s: _percent_encode(s) for s in percent_encoding_tests}

if args.output.endswith(".bzl"):
test_data = args.output.replace(".bzl", "_test.bzl")
with open(test_data, "w") as f:
f.write("""# Generated by maintainers/tools/purl_tables/main.py. DO NOT EDIT.

visibility([
"//purl/...",
])
""")
f.write("test_cases = {\n")
for s in percent_encoding_tests:
f.write(" \"%s\": \"%s\",\n" % (s, _percent_encode(s)))
f.write("}\n")

return 0


def main(argv: List[str]) -> int:
"""Main program.
Args:
argv: command-line arguments, such as sys.argv (including the program name
in argv[0]).
Returns:
Zero on successful program termination, non-zero otherwise.
"""

parser = argparse.ArgumentParser(description="Update tables for PURL.")
parser.add_argument("--output", required=True, help="The .bzl file to write to.")

return _run(parser.parse_args(argv[1:]))


if __name__ == "__main__":
sys.exit(main(sys.argv))
8 changes: 8 additions & 0 deletions metadata/purl/BUILD
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
load("//purl:percent_encoding_test.bzl", "percent_encoding_test")
load("//purl/private:tables_test.bzl", "test_cases")

exports_files(
[
"purl.bzl",
Expand All @@ -12,3 +15,8 @@ filegroup(
],
visibility = ["//visibility:public"],
)

percent_encoding_test(
name = "percent_encoding_test",
cases = test_cases,
)
37 changes: 37 additions & 0 deletions metadata/purl/percent_encoding.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""Utils for [purl](https://github.com/package-url/purl-spec)'s `percent encoding`.

Spec: https://github.com/package-url/purl-spec/blob/main/PURL-SPECIFICATION.rst#character-encoding
"""

load("//purl:string.bzl", "string")
load("//purl/private:tables.bzl", "encode_byte")

visibility([
"//purl/...",
])

def _encode_byte(b):
"""Encodes a single byte.

Args:
c: The byte to encode.
Returns:
The encoded string.
"""

encoded = encode_byte.get(b, None)
if not encoded:
fail("Cannot encode {} (type={})".format(b, type(b)))

return encoded

def percent_encode(value):
"""Encodes the provided string.

Args:
value (string): The string to encode.
Returns:
The encoded string.
"""

return "".join([_encode_byte(b) for b in string.to_bytes(value)])
53 changes: 53 additions & 0 deletions metadata/purl/percent_encoding_test.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""Rule for testing `percent_encode` and `percent_decode`."""

load("//purl:percent_encoding.bzl", "percent_encode")

visibility([
"//purl/...",
])

_bash_executable = """
#!/usr/bin/env bash

echo "All tests passed"
""".strip()

_bat_executable = """
echo "Hello World"
""".strip()

def _percent_encoding_test_impl(ctx):
for decoded, encoded in ctx.attr.cases.items():
actual_encoded = percent_encode(decoded)
if encoded != actual_encoded:
fail("Error encoding {}: expected {}, got {}".format(decoded, encoded, actual_encoded))

# Unix does not care about the file extension, so always use `.bat` so it
# also works on Windows.
executable = ctx.actions.declare_file("{}.bat".format(ctx.attr.name))
ctx.actions.write(
output = executable,
content = _bash_executable if (ctx.configuration.host_path_separator == ":") else _bat_executable,
is_executable = True,
)

return [
DefaultInfo(
files = depset(
direct = [
executable,
],
),
executable = executable,
),
]

percent_encoding_test = rule(
implementation = _percent_encoding_test_impl,
attrs = {
"cases": attr.string_dict(
mandatory = True,
),
},
test = True,
)
Empty file added metadata/purl/private/BUILD
Empty file.
Loading