Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions docs/content/manual/dev/manual.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2198,10 +2198,24 @@ sections:

The input is converted to base64 as specified by RFC 4648.

* `@base64url`:

The input is converted to base64url without padding as specified
by RFC 4648. This no padding version should only be used when the
length of the data is known by a referring specification.

* `@base64urlp`:

The input is converted to base64url with padding as specified
by RFC 4648. This padding version is for general url safe encoding,
but be aware padding bytes (`=`) will be percent encoded in urls.

* `@base64d`:

The inverse of `@base64`, input is decoded as specified by RFC 4648.
Note\: If the decoded string is not UTF-8, the results are undefined.
This function decodes the base64 or base64url encodings. If you require
strict validation, re-encode the string and check for equality.

This syntax can be combined with string interpolation in a
useful way. You can follow a `@foo` token with a string
Expand Down
16 changes: 14 additions & 2 deletions jq.1.prebuilt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

72 changes: 49 additions & 23 deletions src/builtin.c
Original file line number Diff line number Diff line change
Expand Up @@ -516,18 +516,23 @@ static jv f_utf8bytelength(jq_state *jq, jv input) {
#define CHARS_ALPHANUM "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"

static const unsigned char BASE64_ENCODE_TABLE[64 + 1] = CHARS_ALPHANUM "+/";
static const unsigned char BASE64URL_ENCODE_TABLE[64 + 1] = CHARS_ALPHANUM "-_";
static const unsigned char BASE64_INVALID_ENTRY = 0xFF;
static const unsigned char BASE64_DECODE_TABLE[255] = {
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
62, // +
0xFF, 0xFF, 0xFF,
0xFF,
62, // -
0xFF,
63, // /
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // 0-9
0xFF, 0xFF, 0xFF,
99, // =
0xFF, 0xFF, 0xFF,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // A-Z
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF,
63, // _
0xFF,
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // a-z
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
};
Expand Down Expand Up @@ -563,6 +568,41 @@ static jv escape_string(jv input, const char* escapings) {

}

static jv base64_encode_common(jq_state *jq, jv input, const unsigned char * encoding_table, int is_pad) {
input = f_tostring(jq, input);
jv line = jv_string("");
const unsigned char* data = (const unsigned char*)jv_string_value(input);
int len = jv_string_length_bytes(jv_copy(input));
for (int i=0; i<len; i+=3) {
uint32_t code = 0;
int n = len - i >= 3 ? 3 : len-i;
for (int j=0; j<3; j++) {
code <<= 8;
code |= j < n ? (unsigned)data[i+j] : 0;
}
char buf[4];
for (int j=0; j<4; j++) {
buf[j] = encoding_table[(code >> (18 - j*6)) & 0x3f];
}
unsigned buf_len = sizeof(buf);
if (n < 3) {
// n is the number of bytes encoded this loop, this branch is possible only
// when len is a multiple of 3, and then on the last loop only.
// n = 1 byte -> buf[0,1] are valid, n = 2 bytes -> buf[0,1,2] are valid
if (is_pad) {
buf[3] = '=';
if (n < 2) buf[2] = '=';
}
else {
buf_len = n + 1;
}
}
line = jv_string_append_buf(line, buf, buf_len);
}
jv_free(input);
return line;
}

static jv f_format(jq_state *jq, jv input, jv fmt) {
if (jv_get_kind(fmt) != JV_KIND_STRING) {
jv_free(input);
Expand Down Expand Up @@ -725,27 +765,13 @@ static jv f_format(jq_state *jq, jv input, jv fmt) {
return line;
} else if (!strcmp(fmt_s, "base64")) {
jv_free(fmt);
input = f_tostring(jq, input);
jv line = jv_string("");
const unsigned char* data = (const unsigned char*)jv_string_value(input);
int len = jv_string_length_bytes(jv_copy(input));
for (int i=0; i<len; i+=3) {
uint32_t code = 0;
int n = len - i >= 3 ? 3 : len-i;
for (int j=0; j<3; j++) {
code <<= 8;
code |= j < n ? (unsigned)data[i+j] : 0;
}
char buf[4];
for (int j=0; j<4; j++) {
buf[j] = BASE64_ENCODE_TABLE[(code >> (18 - j*6)) & 0x3f];
}
if (n < 3) buf[3] = '=';
if (n < 2) buf[2] = '=';
line = jv_string_append_buf(line, buf, sizeof(buf));
}
jv_free(input);
return line;
return base64_encode_common(jq, input, BASE64_ENCODE_TABLE, 1);
} else if (!strcmp(fmt_s, "base64url")) {
jv_free(fmt);
return base64_encode_common(jq, input, BASE64URL_ENCODE_TABLE, 0);
} else if (!strcmp(fmt_s, "base64urlp")) {
jv_free(fmt);
return base64_encode_common(jq, input, BASE64URL_ENCODE_TABLE, 1);
} else if (!strcmp(fmt_s, "base64d")) {
jv_free(fmt);
input = f_tostring(jq, input);
Expand Down
47 changes: 47 additions & 0 deletions tests/base64.test
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,53 @@
"cWl4YmF6Cg"
"qixbaz\n"

# base64url encoding (Section 5 of RFC 4648) without padding
@base64url
"ab>cd?"
"YWI-Y2Q_"

@base64url
"ab>cd?x"
"YWI-Y2Q_eA"

@base64url
"ab>cd?xy"
"YWI-Y2Q_eHk"

@base64url
"ab>cd?xyz"
"YWI-Y2Q_eHl6"

# base64url encoding (Section 5 of RFC 4648) with padding
@base64urlp
"ab>cd?"
"YWI-Y2Q_"

@base64urlp
"ab>cd?x"
"YWI-Y2Q_eA=="

@base64urlp
"ab>cd?xy"
"YWI-Y2Q_eHk="

@base64urlp
"ab>cd?xyz"
"YWI-Y2Q_eHl6"

# base64url decoding (Section 5 of RFC 4648)
@base64d
"YWI-"
"ab>"

@base64d
"Y2Q_"
"cd?"

@base64d
"YWI-Y2Q_"
"ab>cd?"

# invalid base64 characters (whitespace)
. | try @base64d catch .
"Not base64 data"
Expand Down
6 changes: 5 additions & 1 deletion tests/jq.test
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jq: error: Invalid escape at line 1, column 4 (while parsing '"\v"') at <top-lev
null
"interpolation"

@text,@json,([1,.]|@csv,@tsv),@html,(@uri|.,@urid),@sh,(@base64|.,@base64d)
@text,@json,([1,.]|@csv,@tsv),@html,(@uri|.,@urid),@sh,(@base64|.,@base64d),(@base64url|.,@base64d),(@base64urlp|.,@base64d)
"!()<>&'\"\t"
"!()<>&'\"\t"
"\"!()<>&'\\\"\\t\""
Expand All @@ -81,6 +81,10 @@ null
"'!()<>&'\\''\"\t'"
"ISgpPD4mJyIJ"
"!()<>&'\"\t"
"ISgpPD4mJyIJ"
"!()<>&'\"\t"
"ISgpPD4mJyIJ"
"!()<>&'\"\t"

# regression test for #436
@base64
Expand Down