diff --git a/Makefile b/Makefile index a513f9c..06b9556 100644 --- a/Makefile +++ b/Makefile @@ -2,10 +2,10 @@ CFLAGS = -Wall -Wno-misleading-indentation -Wno-unused-function -Werror -O2 -g - BOLT11_HDRS := src/bolt11/amount.h src/bolt11/bech32.h src/bolt11/bech32_util.h src/bolt11/bolt11.h src/bolt11/debug.h src/bolt11/error.h src/bolt11/hash_u5.h src/bolt11/node_id.h src/bolt11/overflows.h CCAN_SRCS := ccan/ccan/utf8/utf8.c ccan/ccan/tal/tal.c ccan/ccan/tal/str/str.c ccan/ccan/list/list.c ccan/ccan/mem/mem.c ccan/ccan/crypto/sha256/sha256.c ccan/ccan/take/take.c CCAN_HDRS := ccan/ccan/utf8/utf8.h ccan/ccan/container_of/container_of.h ccan/ccan/check_type/check_type.h ccan/ccan/str/str.h ccan/ccan/tal/str/str.h ccan/ccan/tal/tal.h ccan/ccan/list/list.h ccan/ccan/structeq/structeq.h ccan/ccan/typesafe_cb/typesafe_cb.h ccan/ccan/short_types/short_types.h ccan/ccan/mem/mem.h ccan/ccan/likely/likely.h ccan/ccan/alignof/alignof.h ccan/ccan/crypto/sha256/sha256.h ccan/ccan/array_size/array_size.h ccan/ccan/endian/endian.h ccan/ccan/take/take.h ccan/ccan/build_assert/build_assert.h ccan/ccan/cppmagic/cppmagic.h -HEADERS = deps/lmdb/lmdb.h deps/secp256k1/include/secp256k1.h src/nostrdb.h src/cursor.h src/hex.h src/jsmn.h src/config.h src/random.h src/memchr.h src/cpu.h src/nostr_bech32.h src/block.h src/str_block.h $(C_BINDINGS) $(CCAN_HDRS) $(BOLT11_HDRS) +HEADERS = deps/lmdb/lmdb.h deps/secp256k1/include/secp256k1.h src/nostrdb.h src/cursor.h src/hex.h src/jsmn.h src/config.h src/random.h src/memchr.h src/cpu.h src/nostr_bech32.h src/block.h src/str_block.h src/rcur.h $(C_BINDINGS) $(CCAN_HDRS) $(BOLT11_HDRS) FLATCC_SRCS=deps/flatcc/src/runtime/json_parser.c deps/flatcc/src/runtime/verifier.c deps/flatcc/src/runtime/builder.c deps/flatcc/src/runtime/emitter.c deps/flatcc/src/runtime/refmap.c BOLT11_SRCS = src/bolt11/bolt11.c src/bolt11/bech32.c src/bolt11/amount.c src/bolt11/hash_u5.c -SRCS = src/nostrdb.c src/invoice.c src/nostr_bech32.c src/content_parser.c src/block.c $(BOLT11_SRCS) $(FLATCC_SRCS) $(CCAN_SRCS) +SRCS = src/nostrdb.c src/invoice.c src/nostr_bech32.c src/content_parser.c src/block.c src/rcur.c $(BOLT11_SRCS) $(FLATCC_SRCS) $(CCAN_SRCS) LDS = $(OBJS) $(ARS) OBJS = $(SRCS:.c=.o) DEPS = $(OBJS) $(HEADERS) $(ARS) diff --git a/src/block.c b/src/block.c index 65f25e3..df25198 100644 --- a/src/block.c +++ b/src/block.c @@ -2,6 +2,7 @@ #include "nostrdb.h" #include "block.h" +#include "rcur.h" #include int push_str_block(struct cursor *buf, const char *content, struct ndb_str_block *block) { @@ -9,115 +10,91 @@ int push_str_block(struct cursor *buf, const char *content, struct ndb_str_block cursor_push_varint(buf, block->len); } -int pull_str_block(struct cursor *buf, const char *content, struct ndb_str_block *block) { - uint32_t start; - if (!cursor_pull_varint_u32(buf, &start)) - return 0; +bool pull_str_block(struct rcur *rcur, const char *content, struct ndb_str_block *block) { + block->str = content + rcur_pull_varint_u32(rcur); + block->len = rcur_pull_varint_u32(rcur); - block->str = content + start; - - return cursor_pull_varint_u32(buf, &block->len); + return rcur_valid(rcur); } -static int pull_nostr_bech32_type(struct cursor *cur, enum nostr_bech32_type *type) +static bool pull_nostr_bech32_type(struct rcur *rcur, enum nostr_bech32_type *type) { uint64_t inttype; - if (!cursor_pull_varint(cur, &inttype)) - return 0; - if (inttype <= 0 || inttype > NOSTR_BECH32_KNOWN_TYPES) - return 0; + // Returns 0 on failure. + inttype = rcur_pull_varint(rcur); + if (inttype <= 0 || inttype > NOSTR_BECH32_KNOWN_TYPES) { + rcur_fail(rcur); + return false; + } *type = inttype; - return 1; + return true; } -static int pull_bech32_mention(const char *content, struct cursor *cur, struct ndb_mention_bech32_block *block) { +static bool pull_bech32_mention(const char *content, struct rcur *rcur, struct ndb_mention_bech32_block *block) { uint16_t size; - unsigned char *start; - struct cursor bech32; - - if (!pull_str_block(cur, content, &block->str)) - return 0; - - if (!cursor_pull_u16(cur, &size)) - return 0; - - if (!pull_nostr_bech32_type(cur, &block->bech32.type)) - return 0; - - make_cursor(cur->p, cur->p + size, &bech32); + struct rcur bech32; - start = cur->p; + pull_str_block(rcur, content, &block->str); + size = rcur_pull_u16(rcur); + pull_nostr_bech32_type(rcur, &block->bech32.type); - if (!parse_nostr_bech32_buffer(&bech32, block->bech32.type, &block->bech32)) - return 0; + bech32 = rcur_pull_slice(rcur, size); + parse_nostr_bech32_buffer(&bech32, block->bech32.type, &block->bech32); + if (!rcur_valid(&bech32)) + rcur_fail(rcur); - //assert(bech32.p == start + size); - cur->p = start + size; - return 1; + return rcur_valid(rcur); } -static int pull_invoice(const char *content, struct cursor *cur, - struct ndb_invoice_block *block) +static bool pull_invoice(const char *content, struct rcur *rcur, + struct ndb_invoice_block *block) { - if (!pull_str_block(cur, content, &block->invstr)) - return 0; - - return ndb_decode_invoice(cur, &block->invoice); + pull_str_block(rcur, content, &block->invstr); + return ndb_decode_invoice(rcur, &block->invoice); } -static int pull_block_type(struct cursor *cur, enum ndb_block_type *type) +static bool pull_block_type(struct rcur *rcur, enum ndb_block_type *type) { uint32_t itype; - *type = 0; - if (!cursor_pull_varint_u32(cur, &itype)) - return 0; - if (itype <= 0 || itype > NDB_NUM_BLOCK_TYPES) - return 0; + itype = rcur_pull_varint_u32(rcur); + if (itype <= 0 || itype > NDB_NUM_BLOCK_TYPES) { + rcur_fail(rcur); + return false; + } *type = itype; - return 1; + return true; } -static int pull_block(const char *content, struct cursor *cur, struct ndb_block *block) +static bool pull_block(const char *content, struct rcur *rcur, struct ndb_block *block) { - unsigned char *start = cur->p; - - if (!pull_block_type(cur, &block->type)) - return 0; + if (!pull_block_type(rcur, &block->type)) + return false; switch (block->type) { case BLOCK_HASHTAG: case BLOCK_TEXT: case BLOCK_URL: - if (!pull_str_block(cur, content, &block->block.str)) - goto fail; - break; + return pull_str_block(rcur, content, &block->block.str); case BLOCK_MENTION_INDEX: - if (!cursor_pull_varint_u32(cur, &block->block.mention_index)) - goto fail; - break; + block->block.mention_index = rcur_pull_varint_u32(rcur); + return rcur_valid(rcur); case BLOCK_MENTION_BECH32: - if (!pull_bech32_mention(content, cur, &block->block.mention_bech32)) - goto fail; - break; + return pull_bech32_mention(content, rcur, &block->block.mention_bech32); case BLOCK_INVOICE: // we only push invoice strs here - if (!pull_invoice(content, cur, &block->block.invoice)) - goto fail; - break; + return pull_invoice(content, rcur, &block->block.invoice); } - return 1; -fail: - cur->p = start; - return 0; + /* unreachable: pull_block_type can only return known types */ + assert(0); } @@ -129,27 +106,15 @@ enum ndb_block_type ndb_get_block_type(struct ndb_block *block) { void ndb_blocks_iterate_start(const char *content, struct ndb_blocks *blocks, struct ndb_block_iterator *iter) { iter->blocks = blocks; iter->content = content; - iter->p = blocks->blocks; + iter->rcur = rcur_forbuf(blocks->blocks, iter->blocks->blocks_size); } struct ndb_block *ndb_blocks_iterate_next(struct ndb_block_iterator *iter) { - struct cursor cur; - cur.start = iter->blocks->blocks; - cur.p = iter->p; - cur.end = iter->blocks->blocks + iter->blocks->blocks_size; - - while (cur.p < cur.end) { - if (!pull_block(iter->content, &cur, &iter->block)) { - iter->p = cur.p; - return NULL; - } else { - iter->p = cur.p; - return &iter->block; - } - } + if (!pull_block(iter->content, &iter->rcur, &iter->block)) + return NULL; - return NULL; + return &iter->block; } // STR BLOCKS diff --git a/src/block.h b/src/block.h index ed4b358..7097774 100644 --- a/src/block.h +++ b/src/block.h @@ -8,6 +8,7 @@ #include "nostr_bech32.h" #include "nostrdb.h" #include +#include #define NDB_BLOCK_FLAG_OWNED 1 @@ -29,8 +30,9 @@ struct ndb_blocks { #pragma pack(pop) -int push_str_block(struct cursor *buf, const char *content, struct ndb_str_block *block); -int pull_str_block(struct cursor *buf, const char *content, struct ndb_str_block *block); +struct rcur; +int push_str_block(struct cursor *buf, const char *content, struct ndb_str_block *block); +bool pull_str_block(struct rcur *rcur, const char *content, struct ndb_str_block *block); #endif // NDB_BLOCK_H diff --git a/src/content_parser.c b/src/content_parser.c index 753880b..466ea42 100644 --- a/src/content_parser.c +++ b/src/content_parser.c @@ -1,89 +1,101 @@ #include "cursor.h" +#include "rcur.h" #include "nostr_bech32.h" #include "block.h" #include "nostrdb.h" #include "invoice.h" +#include "ccan/mem/mem.h" #include "bolt11/bolt11.h" #include "bolt11/bech32.h" #include #include -#include "cursor.h" - struct ndb_content_parser { int bech32_strs; struct cursor buffer; - struct cursor content; + const char *content_start; + struct rcur content; struct ndb_blocks *blocks; }; -static int parse_digit(struct cursor *cur, int *digit) { - int c; - if ((c = peek_char(cur, 0)) == -1) - return 0; - - c -= '0'; +static struct ndb_str_block str_block_forrcur(struct rcur rcur) +{ + struct ndb_str_block strblock; + size_t len; + + strblock.str = rcur_peek_remainder(&rcur, &len); + strblock.len = len; + return strblock; +} + +/* Only updates rcur if it returns true */ +static bool parse_digit(struct rcur *rcur, int *digit) +{ + const char *c; + + c = rcur_peek(rcur, 1); + if (!c) + return false; - if (c >= 0 && c <= 9) { - *digit = c; - cur->p++; - return 1; + if (*c >= '0' && *c <= '9') { + *digit = *c - '0'; + rcur_skip(rcur, 1); + return true; } - return 0; + return false; } - -static int parse_mention_index(struct cursor *cur, struct ndb_block *block) { +/* Leaves rcur untouched if it returns false. */ +static bool parse_if_mention_index(struct rcur *rcur, struct ndb_block *block) +{ int d1, d2, d3, ind; - unsigned char *start = cur->p; + struct rcur index = *rcur; + + if (!rcur_skip_if_str_anycase(&index, "#[")) + return false; - if (!parse_str(cur, "#[")) - return 0; - - if (!parse_digit(cur, &d1)) { - cur->p = start; - return 0; - } + if (!parse_digit(&index, &d1)) + return false; ind = d1; - if (parse_digit(cur, &d2)) + if (parse_digit(&index, &d2)) { ind = (d1 * 10) + d2; - - if (parse_digit(cur, &d3)) - ind = (d1 * 100) + (d2 * 10) + d3; - - if (!parse_char(cur, ']')) { - cur->p = start; - return 0; + if (parse_digit(&index, &d3)) + ind = (d1 * 100) + (d2 * 10) + d3; } + + if (!rcur_skip_if_str_anycase(rcur, "]")) + return false; block->type = BLOCK_MENTION_INDEX; block->block.mention_index = ind; - - return 1; + + *rcur = index; + return true; } -static int parse_hashtag(struct cursor *cur, struct ndb_block *block) { - int c; - unsigned char *start = cur->p; - - if (!parse_char(cur, '#')) - return 0; - - c = peek_char(cur, 0); - if (c == -1 || is_whitespace(c) || c == '#') { - cur->p = start; - return 0; - } - - consume_until_boundary(cur); - +/* Leaves rcur untouched if it returns false. */ +static bool parse_if_hashtag(struct rcur *rcur, struct ndb_block *block) +{ + const char *c; + struct rcur hashtag = *rcur; + size_t len; + + /* Use hashtag to look ahead */ + if (!rcur_skip_if_str_anycase(&hashtag, "#")) + return false; + + c = rcur_pull_word(&hashtag, &len); + if (!c || is_whitespace(*c) || *c == '#') + return false; + + rcur_skip(rcur, 1); block->type = BLOCK_HASHTAG; - block->block.str.str = (const char*)(start + 1); - block->block.str.len = cur->p - (start + 1); - - return 1; + block->block.str.len = len; + block->block.str.str = rcur_pull(rcur, len); + + return true; } // @@ -110,14 +122,17 @@ static int push_bech32_mention(struct ndb_content_parser *p, struct ndb_str_bloc size_t u5_out_len, u8_out_len; static const int MAX_PREFIX = 8; char prefix[9] = {0}; + struct rcur strcur; start = p->buffer.p; - if (!parse_nostr_bech32_type(bech32->str, &type)) + strcur = rcur_forstr(bech32->str); + type = parse_nostr_bech32_type(&strcur); + if (!rcur_valid(&strcur)) goto fail; // make sure to push the str block! - if (!push_str_block(&p->buffer, (const char*)p->content.start, bech32)) + if (!push_str_block(&p->buffer, p->content_start, bech32)) goto fail; // // save a spot for the raw bech32 buffer size @@ -174,7 +189,7 @@ static int push_invoice_str(struct ndb_content_parser *p, struct ndb_str_block * start = p->buffer.p; // push the text block just incase we don't care for the invoice - if (!push_str_block(&p->buffer, (const char*)p->content.start, str)) + if (!push_str_block(&p->buffer, p->content_start, str)) return 0; // push decoded invoice data for quick access @@ -203,6 +218,15 @@ static int add_text_block(struct ndb_content_parser *p, const char *start, const return push_block(p, &b); } +static bool add_text_block_rcur(struct ndb_content_parser *p, + struct rcur rcur) +{ + const char *start; + size_t len; + + start = rcur_peek_remainder(&rcur, &len); + return add_text_block(p, start, start + len); +} int push_block(struct ndb_content_parser *p, struct ndb_block *block) { @@ -216,7 +240,7 @@ int push_block(struct ndb_content_parser *p, struct ndb_block *block) case BLOCK_HASHTAG: case BLOCK_TEXT: case BLOCK_URL: - if (!push_str_block(&p->buffer, (const char*)p->content.start, + if (!push_str_block(&p->buffer, p->content_start, &block->block.str)) goto fail; break; @@ -262,103 +286,91 @@ int push_block(struct ndb_content_parser *p, struct ndb_block *block) -static inline int next_char_is_whitespace(unsigned char *cur, unsigned char *end) { - unsigned char *next = cur + 1; - - if (next > end) - return 0; - - if (next == end) - return 1; - - return is_whitespace(*next); -} - -static inline int char_disallowed_at_end_url(char c) +static bool char_disallowed_at_end_url(char c) { return c == '.' || c == ','; } -static int is_final_url_char(unsigned char *cur, unsigned char *end) +static bool is_final_url_char(const struct rcur *rcur) { - if (is_whitespace(*cur)) - return 1; + const char *p = rcur_peek(rcur, 1), *p2; + if (!p) + return true; - if (next_char_is_whitespace(cur, end)) { + if (is_whitespace(*p)) + return true; + + p2 = rcur_peek(rcur, 2); + if (!p2 || is_whitespace(p2[1])) { // next char is whitespace so this char could be the final char in the url - return char_disallowed_at_end_url(*cur); + return char_disallowed_at_end_url(*p); } // next char isn't whitespace so it can't be a final char - return 0; + return false; } -static int consume_until_end_url(struct cursor *cur, int or_end) { - unsigned char *start = cur->p; +static bool consume_until_end_url(struct rcur *rcur) +{ + bool consumed = false; - while (cur->p < cur->end) { - if (is_final_url_char(cur->p, cur->end)) - return cur->p != start; + while (rcur_bytes_remaining(*rcur)) { + if (is_final_url_char(rcur)) + return consumed; - cur->p++; + rcur_skip(rcur, 1); + consumed = true; } - return or_end; + return true; } -static int consume_url_fragment(struct cursor *cur) +static bool consume_url_fragment(struct rcur *rcur) { - int c; + const char *c; - if ((c = peek_char(cur, 0)) < 0) - return 1; + c = rcur_peek(rcur, 1); + if (!c) + return true; - if (c != '#' && c != '?') { - return 1; + if (*c != '#' && *c != '?') { + return true; } - cur->p++; + rcur_skip(rcur, 1); - return consume_until_end_url(cur, 1); + return consume_until_end_url(rcur); } -static int consume_url_path(struct cursor *cur) +static void consume_url_path(struct rcur *rcur) { - int c; - - if ((c = peek_char(cur, 0)) < 0) - return 1; - - if (c != '/') { - return 1; - } + const char *c; - while (cur->p < cur->end) { - c = *cur->p; + c = rcur_peek(rcur, 1); + if (!c || *c != '/') + return; - if (c == '?' || c == '#' || is_final_url_char(cur->p, cur->end)) { - return 1; + while ((c = rcur_peek(rcur, 1)) != NULL) { + if (*c == '?' || *c == '#' || is_final_url_char(rcur)) { + return; } - cur->p++; + rcur_skip(rcur, 1); } - - return 1; } -static int consume_url_host(struct cursor *cur) +static bool consume_url_host(struct rcur *rcur) { - char c; + const char *c; int count = 0; - while (cur->p < cur->end) { - c = *cur->p; + while ((c = rcur_peek(rcur, 1)) != NULL) { // TODO: handle IDNs - if ((is_alphanumeric(c) || c == '.' || c == '-') && !is_final_url_char(cur->p, cur->end)) + if ((is_alphanumeric(*c) || *c == '.' || *c == '-') && !is_final_url_char(rcur)) { count++; - cur->p++; + rcur_skip(rcur, 1); continue; } @@ -370,159 +382,117 @@ static int consume_url_host(struct cursor *cur) return count != 0; } -static int parse_url(struct cursor *cur, struct ndb_block *block) { - unsigned char *start = cur->p; - unsigned char *host; - unsigned char tmp[4096]; - int host_len; - struct cursor path_cur, tmp_cur; +/* Leaves rcur untouched if it returns false. */ +static bool parse_if_url(struct rcur *rcur, + bool prev_was_open_bracket, + struct ndb_block *block) +{ + struct rcur url = *rcur, path, host, url_only; enum nostr_bech32_type type; - make_cursor(tmp, tmp + sizeof(tmp), &tmp_cur); - if (!parse_str(cur, "http")) - return 0; - - if (parse_char(cur, 's') || parse_char(cur, 'S')) { - if (!parse_str(cur, "://")) { - cur->p = start; - return 0; - } - } else { - if (!parse_str(cur, "://")) { - cur->p = start; - return 0; - } - } - - // make sure to save the hostname. We will use this to detect damus.io links - host = cur->p; + if (!rcur_skip_if_str_anycase(&url, "http")) + return false; - if (!consume_url_host(cur)) { - cur->p = start; - return 0; - } + rcur_skip_if_str_anycase(&url, "s"); + if (!rcur_skip_if_str_anycase(&url, "://")) + return false; - // get the length of the host string - host_len = (int)(cur->p - host); + // make sure to save the hostname. We will use this to detect damus.io links + host = url; + if (!consume_url_host(&url)) + return false; + host = rcur_between(&host, &url); // save the current parse state so that we can continue from here when // parsing the bech32 in the damus.io link if we have it - copy_cursor(cur, &path_cur); - + path = url; // skip leading / - cursor_skip(&path_cur, 1); + rcur_skip(&path, 1); - if (!consume_url_path(cur)) { - cur->p = start; - return 0; - } + consume_url_path(&url); - if (!consume_url_fragment(cur)) { - cur->p = start; - return 0; - } + if (!consume_url_fragment(&url)) + return false; - // smart parens - if ((start - 1) >= cur->start && - start < cur->end && - *(start - 1) == '(' && - (cur->p - 1) < cur->end && - *(cur->p - 1) == ')') - { - cur->p--; - } + // Now we've parsed, get url in buffer by itself. + url_only = rcur_between(rcur, &url); - // save the bech32 string pos in case we hit a damus.io link - block->block.str.str = (const char *)path_cur.p; + // smart parens: is entire URL surrounded by ()? + // Ideally, we'd pass prev_was_open_bracket to consume_url_fragment + // and it would be smart. + if (prev_was_open_bracket) + rcur_trim_if_char(&url_only, ')'); // if we have a damus link, make it a mention - if (host_len == 8 - && !strncmp((const char *)host, "damus.io", 8) - && parse_nostr_bech32_str(&path_cur, &type)) - { - block->block.str.len = path_cur.p - path_cur.start; - block->type = BLOCK_MENTION_BECH32; - return 1; + if (rcur_skip_if_match(&host, "damus.io", strlen("damus.io"))) { + struct rcur bech32 = path; + if (parse_nostr_bech32_str(&bech32, &type)) { + block->type = BLOCK_MENTION_BECH32; + block->block.str = str_block_forrcur(path); + goto got_url; + } } block->type = BLOCK_URL; - block->block.str.str = (const char *)start; - block->block.str.len = cur->p - start; - - return 1; + block->block.str = str_block_forrcur(url_only); + +got_url: + /* We've processed the url, now increment rcur */ + rcur_skip(rcur, rcur_bytes_remaining(url_only)); + return true; } -static int parse_invoice(struct cursor *cur, struct ndb_block *block) { - unsigned char *start, *end; +/* Leaves rcur untouched if it returns false. */ +static bool parse_if_invoice(struct rcur *rcur, struct ndb_block *block) +{ + struct rcur invoice = *rcur; + size_t len; // optional - parse_str(cur, "lightning:"); - - start = cur->p; - - if (!parse_str(cur, "lnbc")) - return 0; - - if (!consume_until_whitespace(cur, 1)) { - cur->p = start; - return 0; - } - - end = cur->p; + rcur_skip_if_str_anycase(&invoice, "lightning:"); + if (!rcur_skip_if_str_anycase(&invoice, "lnbc")) + return false; + + if (!rcur_pull_word(&invoice, &len) || len == 0) + return false; + block->type = BLOCK_INVOICE; - - block->block.str.str = (const char*)start; - block->block.str.len = end - start; - - cur->p = end; - - return 1; -} + block->block.str = str_block_forrcur(rcur_between(rcur, &invoice)); + *rcur = invoice; + return true; +} -static int parse_mention_bech32(struct cursor *cur, struct ndb_block *block) { - unsigned char *start = cur->p; +/* Leaves rcur untouched if it returns false. */ +static bool parse_if_mention_bech32(struct rcur *rcur, struct ndb_block *block) +{ + struct rcur bech32 = *rcur, after_ignored; enum nostr_bech32_type type; - - parse_char(cur, '@'); - parse_str(cur, "nostr:"); - block->block.str.str = (const char *)cur->p; - - if (!parse_nostr_bech32_str(cur, &type)) { - cur->p = start; - return 0; - } + /* Ignore these */ + rcur_skip_if_str_anycase(&bech32, "@"); + rcur_skip_if_str_anycase(&bech32, "nostr:"); + + after_ignored = bech32; + if (!parse_nostr_bech32_str(&bech32, &type)) + return false; - block->block.str.len = cur->p - (unsigned char*)block->block.str.str; block->type = BLOCK_MENTION_BECH32; + block->block.str = str_block_forrcur(rcur_between(&after_ignored, &bech32)); - return 1; -} - -static int add_text_then_block(struct ndb_content_parser *p, - struct ndb_block *block, - unsigned char **start, - const unsigned char *pre_mention) -{ - if (!add_text_block(p, (const char *)*start, (const char*)pre_mention)) - return 0; - - *start = (unsigned char*)p->content.p; - - return push_block(p, block); + *rcur = bech32; + return true; } int ndb_parse_content(unsigned char *buf, int buf_size, const char *content, int content_len, struct ndb_blocks **blocks_p) { - int cp, c; struct ndb_content_parser parser; struct ndb_block block; - - unsigned char *start, *pre_mention, *blocks_start; + bool prev_was_open_bracket = false; + struct rcur start, pre_mention; make_cursor(buf, buf + buf_size, &parser.buffer); @@ -530,8 +500,8 @@ int ndb_parse_content(unsigned char *buf, int buf_size, *blocks_p = parser.blocks = (struct ndb_blocks *)buf; parser.buffer.p += sizeof(struct ndb_blocks); - make_cursor((unsigned char *)content, - (unsigned char*)content + content_len, &parser.content); + parser.content_start = content; + parser.content = rcur_forbuf(parser.content_start, content_len); parser.blocks->words = 0; parser.blocks->num_blocks = 0; @@ -539,53 +509,75 @@ int ndb_parse_content(unsigned char *buf, int buf_size, parser.blocks->flags = 0; parser.blocks->version = 1; - blocks_start = start = parser.content.p; - while (parser.content.p < parser.content.end) { - cp = peek_char(&parser.content, -1); - c = peek_char(&parser.content, 0); + start = parser.content; + + while (rcur_bytes_remaining(parser.content)) { + const char *c; + + // Skip whitespace. + rcur_pull_whitespace(&parser.content); + + c = rcur_peek(&parser.content, 1); + if (!c) + break; // new word - if (is_whitespace(cp) && !is_whitespace(c)) - parser.blocks->words++; - - pre_mention = parser.content.p; - if (cp == -1 || is_left_boundary(cp) || c == '#') { - if (c == '#' && (parse_mention_index(&parser.content, &block) || parse_hashtag(&parser.content, &block))) { - if (!add_text_then_block(&parser, &block, &start, pre_mention)) - return 0; - continue; - } else if ((c == 'h' || c == 'H') && parse_url(&parser.content, &block)) { - if (!add_text_then_block(&parser, &block, &start, pre_mention)) - return 0; - continue; - } else if ((c == 'l' || c == 'L') && parse_invoice(&parser.content, &block)) { - if (!add_text_then_block(&parser, &block, &start, pre_mention)) - return 0; - continue; - } else if ((c == 'n' || c == '@') && parse_mention_bech32(&parser.content, &block)) { - if (!add_text_then_block(&parser, &block, &start, pre_mention)) - return 0; - continue; - } + parser.blocks->words++; + + pre_mention = parser.content; + + switch (*c) { + case '#': + if (parse_if_mention_index(&parser.content, &block) || parse_if_hashtag(&parser.content, &block)) + goto add_it; + break; + + case 'h': + case 'H': + if (parse_if_url(&parser.content, prev_was_open_bracket, &block)) + goto add_it; + break; + + case 'l': + case 'L': + if (parse_if_invoice(&parser.content, &block)) + goto add_it; + break; + + case 'n': + case '@': + if (parse_if_mention_bech32(&parser.content, &block)) + goto add_it; + break; } - - parser.content.p++; - } - - if (parser.content.p - start > 0) { - if (!add_text_block(&parser, (const char*)start, (const char *)parser.content.p)) + prev_was_open_bracket = (*c == '('); + rcur_skip(&parser.content, 1); + continue; + + add_it: + // Add any text (e.g. whitespace) before this (noop if empty) + if (!add_text_block_rcur(&parser, + rcur_between(&start, &pre_mention))) return 0; + if (!push_block(&parser, &block)) + return 0; + + start = parser.content; } - parser.blocks->blocks_size = parser.buffer.p - blocks_start; + // Add any trailing text (noop if empty) + if (!add_text_block_rcur(&parser, rcur_between(&start, &parser.content))) + return 0; + + parser.blocks->blocks_size = parser.buffer.p - buf; // // pad to 8-byte alignment // if (!cursor_align(&parser.buffer, 8)) return 0; - assert((parser.buffer.p - parser.buffer.start) % 8 == 0); - parser.blocks->total_size = parser.buffer.p - parser.buffer.start; + assert((parser.buffer.p - buf) % 8 == 0); + parser.blocks->total_size = parser.buffer.p - buf; return 1; } diff --git a/src/cursor.h b/src/cursor.h index 3d8fc1e..2eec0ad 100644 --- a/src/cursor.h +++ b/src/cursor.h @@ -94,7 +94,7 @@ static inline void copy_cursor(struct cursor *src, struct cursor *dest) static inline int cursor_skip(struct cursor *cursor, int n) { - if (cursor->p + n >= cursor->end) + if (cursor->p + n > cursor->end) return 0; cursor->p += n; diff --git a/src/invoice.c b/src/invoice.c index a5a6f34..74d427e 100644 --- a/src/invoice.c +++ b/src/invoice.c @@ -1,5 +1,6 @@ #include "cursor.h" +#include "rcur.h" #include "invoice.h" #include "nostrdb.h" #include "bolt11/bolt11.h" @@ -36,34 +37,28 @@ int ndb_encode_invoice(struct cursor *cur, struct bolt11 *invoice) { return 1; } -int ndb_decode_invoice(struct cursor *cur, struct ndb_invoice *invoice) +bool ndb_decode_invoice(struct rcur *rcur, struct ndb_invoice *invoice) { unsigned char desc_type; - if (!cursor_pull_byte(cur, &invoice->version)) - return 0; - - if (!cursor_pull_varint(cur, &invoice->amount)) - return 0; - - if (!cursor_pull_varint(cur, &invoice->timestamp)) - return 0; - - if (!cursor_pull_varint(cur, &invoice->expiry)) - return 0; - if (!cursor_pull_byte(cur, &desc_type)) - return 0; + invoice->version = rcur_pull_byte(rcur); + invoice->amount = rcur_pull_varint(rcur); + invoice->timestamp = rcur_pull_varint(rcur); + invoice->expiry = rcur_pull_varint(rcur); + desc_type = rcur_pull_byte(rcur); if (desc_type == 1) { - if (!cursor_pull_c_str(cur, (const char**)&invoice->description)) - return 0; + invoice->description = rcur_pull_c_string(rcur); + if (!invoice->description) + return false; } else if (desc_type == 2) { - invoice->description_hash = cur->p; - if (!cursor_skip(cur, 32)) - return 0; + invoice->description_hash = rcur_pull(rcur, 32); + if (!invoice->description_hash) + return false; } else { - return 0; + rcur_fail(rcur); + return false; } - return 1; + return true; } diff --git a/src/invoice.h b/src/invoice.h index 3e3e0c5..345bd64 100644 --- a/src/invoice.h +++ b/src/invoice.h @@ -7,9 +7,10 @@ #include "nostrdb.h" struct bolt11; +struct rcur; // ENCODING int ndb_encode_invoice(struct cursor *cur, struct bolt11 *invoice); -int ndb_decode_invoice(struct cursor *cur, struct ndb_invoice *invoice); +bool ndb_decode_invoice(struct rcur *rcur, struct ndb_invoice *invoice); #endif /* NDB_INVOICE_H */ diff --git a/src/nostr_bech32.c b/src/nostr_bech32.c index c1e39b7..a914082 100644 --- a/src/nostr_bech32.c +++ b/src/nostr_bech32.c @@ -7,10 +7,11 @@ #include "nostr_bech32.h" #include -#include "cursor.h" #include "str_block.h" #include "nostrdb.h" #include "bolt11/bech32.h" +#include "ccan/array_size/array_size.h" +#include "rcur.h" #define MAX_TLVS 32 @@ -26,83 +27,71 @@ struct nostr_tlv { const unsigned char *value; }; -static int parse_nostr_tlv(struct cursor *cur, struct nostr_tlv *tlv) { - // get the tlv tag - if (!cursor_pull_byte(cur, &tlv->type)) - return 0; +/* Returns false on error, *or* empty */ +static bool parse_nostr_tlv(struct rcur *rcur, struct nostr_tlv *tlv) +{ + if (!rcur_bytes_remaining(*rcur)) + return false; + tlv->type = rcur_pull_byte(rcur); + tlv->len = rcur_pull_byte(rcur); + tlv->value = rcur_pull(rcur, tlv->len); - if (tlv->type >= TLV_KNOWN_TLVS) - return 0; - - // get the length - if (!cursor_pull_byte(cur, &tlv->len)) - return 0; - - // is the reported length greater then our buffer? if so fail - if (cur->p + tlv->len > cur->end) - return 0; - - tlv->value = cur->p; - cur->p += tlv->len; - - return 1; + return tlv->value != NULL; } -int parse_nostr_bech32_type(const char *prefix, enum nostr_bech32_type *type) { - // Parse type - if (strncmp(prefix, "note", 4) == 0) { - *type = NOSTR_BECH32_NOTE; - return 4; - } else if (strncmp(prefix, "npub", 4) == 0) { - *type = NOSTR_BECH32_NPUB; - return 4; - } else if (strncmp(prefix, "nsec", 4) == 0) { - *type = NOSTR_BECH32_NSEC; - return 4; - } else if (strncmp(prefix, "nprofile", 8) == 0) { - *type = NOSTR_BECH32_NPROFILE; - return 8; - } else if (strncmp(prefix, "nevent", 6) == 0) { - *type = NOSTR_BECH32_NEVENT; - return 6; - } else if (strncmp(prefix, "nrelay", 6) == 0) { - *type = NOSTR_BECH32_NRELAY; - return 6; - } else if (strncmp(prefix, "naddr", 5) == 0) { - *type = NOSTR_BECH32_NADDR; - return 5; +enum nostr_bech32_type parse_nostr_bech32_type(struct rcur *typestr) +{ + struct { + const char *name; + enum nostr_bech32_type type; + } table[] = { + {"note", NOSTR_BECH32_NOTE}, + {"npub", NOSTR_BECH32_NPUB}, + {"nsec", NOSTR_BECH32_NSEC}, + {"nprofile", NOSTR_BECH32_NPROFILE}, + {"nevent", NOSTR_BECH32_NEVENT}, + {"nrelay", NOSTR_BECH32_NRELAY}, + {"naddr", NOSTR_BECH32_NADDR}, + }; + + for (size_t i = 0; i < ARRAY_SIZE(table); i++) { + if (rcur_skip_if_match(typestr, + table[i].name, strlen(table[i].name))) + return table[i].type; } - + + rcur_fail(typestr); return 0; } -static int parse_nostr_bech32_note(struct cursor *cur, struct bech32_note *note) { - return pull_bytes(cur, 32, ¬e->event_id); +static void parse_nostr_bech32_note(struct rcur *rcur, struct bech32_note *note) { + note->event_id = rcur_pull(rcur, 32); } -static int parse_nostr_bech32_npub(struct cursor *cur, struct bech32_npub *npub) { - return pull_bytes(cur, 32, &npub->pubkey); +static void parse_nostr_bech32_npub(struct rcur *rcur, struct bech32_npub *npub) { + npub->pubkey = rcur_pull(rcur, 32); } -static int parse_nostr_bech32_nsec(struct cursor *cur, struct bech32_nsec *nsec) { - return pull_bytes(cur, 32, &nsec->nsec); +static void parse_nostr_bech32_nsec(struct rcur *rcur, struct bech32_nsec *nsec) { + nsec->nsec = rcur_pull(rcur, 32); } -static int add_relay(struct ndb_relays *relays, struct nostr_tlv *tlv) +/* FIXME: Nobody checks this return? */ +static bool add_relay(struct ndb_relays *relays, struct nostr_tlv *tlv) { struct ndb_str_block *str; if (relays->num_relays + 1 > NDB_MAX_RELAYS) - return 0; + return false; str = &relays->relays[relays->num_relays++]; str->str = (const char*)tlv->value; str->len = tlv->len; - return 1; + return true; } -static int parse_nostr_bech32_nevent(struct cursor *cur, struct bech32_nevent *nevent) { +static bool parse_nostr_bech32_nevent(struct rcur *rcur, struct bech32_nevent *nevent) { struct nostr_tlv tlv; int i; @@ -111,18 +100,18 @@ static int parse_nostr_bech32_nevent(struct cursor *cur, struct bech32_nevent *n nevent->relays.num_relays = 0; for (i = 0; i < MAX_TLVS; i++) { - if (!parse_nostr_tlv(cur, &tlv)) + if (!parse_nostr_tlv(rcur, &tlv)) break; switch (tlv.type) { case TLV_SPECIAL: if (tlv.len != 32) - return 0; + return rcur_fail(rcur); nevent->event_id = tlv.value; break; case TLV_AUTHOR: if (tlv.len != 32) - return 0; + return rcur_fail(rcur); nevent->pubkey = tlv.value; break; case TLV_RELAY: @@ -131,10 +120,12 @@ static int parse_nostr_bech32_nevent(struct cursor *cur, struct bech32_nevent *n } } - return nevent->event_id != NULL; + if (nevent->event_id == NULL) + return rcur_fail(rcur); + return true; } -static int parse_nostr_bech32_naddr(struct cursor *cur, struct bech32_naddr *naddr) { +static bool parse_nostr_bech32_naddr(struct rcur *rcur, struct bech32_naddr *naddr) { struct nostr_tlv tlv; int i; @@ -144,7 +135,7 @@ static int parse_nostr_bech32_naddr(struct cursor *cur, struct bech32_naddr *nad naddr->relays.num_relays = 0; for (i = 0; i < MAX_TLVS; i++) { - if (!parse_nostr_tlv(cur, &tlv)) + if (!parse_nostr_tlv(rcur, &tlv)) break; switch (tlv.type) { @@ -153,7 +144,7 @@ static int parse_nostr_bech32_naddr(struct cursor *cur, struct bech32_naddr *nad naddr->identifier.len = tlv.len; break; case TLV_AUTHOR: - if (tlv.len != 32) return 0; + if (tlv.len != 32) return false; naddr->pubkey = tlv.value; break; case TLV_RELAY: @@ -162,10 +153,12 @@ static int parse_nostr_bech32_naddr(struct cursor *cur, struct bech32_naddr *nad } } - return naddr->identifier.str != NULL; + if (naddr->identifier.str == NULL) + return rcur_fail(rcur); + return true; } -static int parse_nostr_bech32_nprofile(struct cursor *cur, struct bech32_nprofile *nprofile) { +static bool parse_nostr_bech32_nprofile(struct rcur *rcur, struct bech32_nprofile *nprofile) { struct nostr_tlv tlv; int i; @@ -173,12 +166,12 @@ static int parse_nostr_bech32_nprofile(struct cursor *cur, struct bech32_nprofil nprofile->relays.num_relays = 0; for (i = 0; i < MAX_TLVS; i++) { - if (!parse_nostr_tlv(cur, &tlv)) + if (!parse_nostr_tlv(rcur, &tlv)) break; switch (tlv.type) { case TLV_SPECIAL: - if (tlv.len != 32) return 0; + if (tlv.len != 32) return rcur_fail(rcur); nprofile->pubkey = tlv.value; break; case TLV_RELAY: @@ -187,10 +180,12 @@ static int parse_nostr_bech32_nprofile(struct cursor *cur, struct bech32_nprofil } } - return nprofile->pubkey != NULL; + if (nprofile->pubkey == NULL) + return rcur_fail(rcur); + return true; } -static int parse_nostr_bech32_nrelay(struct cursor *cur, struct bech32_nrelay *nrelay) { +static bool parse_nostr_bech32_nrelay(struct rcur *rcur, struct bech32_nrelay *nrelay) { struct nostr_tlv tlv; int i; @@ -198,7 +193,7 @@ static int parse_nostr_bech32_nrelay(struct cursor *cur, struct bech32_nrelay *n nrelay->relay.len = 0; for (i = 0; i < MAX_TLVS; i++) { - if (!parse_nostr_tlv(cur, &tlv)) + if (!parse_nostr_tlv(rcur, &tlv)) break; switch (tlv.type) { @@ -209,94 +204,69 @@ static int parse_nostr_bech32_nrelay(struct cursor *cur, struct bech32_nrelay *n } } - return nrelay->relay.str != NULL; + if (nrelay->relay.str == NULL) + return rcur_fail(rcur); + return true; } -int parse_nostr_bech32_buffer(struct cursor *cur, - enum nostr_bech32_type type, - struct nostr_bech32 *obj) +bool parse_nostr_bech32_buffer(struct rcur *rcur, + enum nostr_bech32_type type, + struct nostr_bech32 *obj) { obj->type = type; switch (obj->type) { case NOSTR_BECH32_NOTE: - if (!parse_nostr_bech32_note(cur, &obj->note)) - return 0; + parse_nostr_bech32_note(rcur, &obj->note); break; case NOSTR_BECH32_NPUB: - if (!parse_nostr_bech32_npub(cur, &obj->npub)) - return 0; + parse_nostr_bech32_npub(rcur, &obj->npub); break; case NOSTR_BECH32_NSEC: - if (!parse_nostr_bech32_nsec(cur, &obj->nsec)) - return 0; + parse_nostr_bech32_nsec(rcur, &obj->nsec); break; case NOSTR_BECH32_NEVENT: - if (!parse_nostr_bech32_nevent(cur, &obj->nevent)) - return 0; + parse_nostr_bech32_nevent(rcur, &obj->nevent); break; case NOSTR_BECH32_NADDR: - if (!parse_nostr_bech32_naddr(cur, &obj->naddr)) - return 0; + parse_nostr_bech32_naddr(rcur, &obj->naddr); break; case NOSTR_BECH32_NPROFILE: - if (!parse_nostr_bech32_nprofile(cur, &obj->nprofile)) - return 0; + parse_nostr_bech32_nprofile(rcur, &obj->nprofile); break; case NOSTR_BECH32_NRELAY: - if (!parse_nostr_bech32_nrelay(cur, &obj->nrelay)) - return 0; + parse_nostr_bech32_nrelay(rcur, &obj->nrelay); break; } - - return 1; + return rcur_valid(rcur); } - -int parse_nostr_bech32_str(struct cursor *bech32, enum nostr_bech32_type *type) { - unsigned char *start = bech32->p; - unsigned char *data_start; - int n; - - if (!(n = parse_nostr_bech32_type((const char *)bech32->p, type))) { - bech32->p = start; - return 0; - } - - data_start = start + n; - if (!consume_until_non_alphanumeric(bech32, 1)) { - bech32->p = start; - return 0; - } - +bool parse_nostr_bech32_str(struct rcur *bech32, enum nostr_bech32_type *type) { + *type = parse_nostr_bech32_type(bech32); // must be at least 59 chars for the data part - //ndb_debug("bech32_data_size %ld '%c' '%c' '%.*s'\n", bech32->p - data_start, *(bech32->p-1), *data_start, (int)(bech32->p - data_start), data_start); - if (bech32->p - data_start < 59) { - bech32->p = start; - return 0; - } - - return 1; + return rcur_pull_non_alphanumeric(bech32) >= 59; } -int parse_nostr_bech32(unsigned char *buf, int buflen, - const char *bech32_str, size_t bech32_len, - struct nostr_bech32 *obj) { - unsigned char *start; +bool parse_nostr_bech32(unsigned char *buf, int buflen, + const char *bech32_str, size_t bech32_len, + struct nostr_bech32 *obj) { + const unsigned char *start = (const unsigned char *)bech32_str; size_t parsed_len, u5_out_len, u8_out_len; enum nostr_bech32_type type; static const int MAX_PREFIX = 8; - struct cursor cur, bech32, u8; + struct cursor cur; + struct rcur test, bech32, u8; make_cursor(buf, buf + buflen, &cur); - make_cursor((unsigned char*)bech32_str, (unsigned char*)bech32_str + bech32_len, &bech32); - - start = bech32.p; - if (!parse_nostr_bech32_str(&bech32, &type)) + + // parse_nostr_bech32_str consumes the copy + test = bech32 = rcur_forbuf(start, bech32_len); + + if (!parse_nostr_bech32_str(&test, &type)) return 0; - parsed_len = bech32.p - start; + parsed_len = rcur_bytes_remaining(rcur_between(&bech32, &test)); // some random sanity checking if (parsed_len < 10 || parsed_len > 10000) @@ -314,7 +284,7 @@ int parse_nostr_bech32(unsigned char *buf, int buflen, if (!bech32_convert_bits(cur.p, &u8_out_len, 8, u5, u5_out_len, 5, 0)) return 0; - make_cursor(cur.p, cur.p + u8_out_len, &u8); + u8 = rcur_forbuf(cur.p, u8_out_len); return parse_nostr_bech32_buffer(&u8, type, obj); } diff --git a/src/nostr_bech32.h b/src/nostr_bech32.h index 6b1956b..76c84cb 100644 --- a/src/nostr_bech32.h +++ b/src/nostr_bech32.h @@ -11,17 +11,20 @@ #include #include "str_block.h" #include "nostrdb.h" -#include "cursor.h" -int parse_nostr_bech32_str(struct cursor *bech32, enum nostr_bech32_type *type); -int parse_nostr_bech32_type(const char *prefix, enum nostr_bech32_type *type); +struct rcur; -int parse_nostr_bech32_buffer(struct cursor *cur, enum nostr_bech32_type type, - struct nostr_bech32 *obj); +bool parse_nostr_bech32_str(struct rcur *bech32, enum nostr_bech32_type *type); -int parse_nostr_bech32(unsigned char *buf, int buflen, - const char *bech32_str, size_t bech32_len, - struct nostr_bech32 *obj); +/* Check rcur_valid(typestr) to determine if it failed */ +enum nostr_bech32_type parse_nostr_bech32_type(struct rcur *typestr); + +bool parse_nostr_bech32_buffer(struct rcur *rcur, enum nostr_bech32_type type, + struct nostr_bech32 *obj); + +bool parse_nostr_bech32(unsigned char *buf, int buflen, + const char *bech32_str, size_t bech32_len, + struct nostr_bech32 *obj); /* int parse_nostr_bech32(const char *bech32, size_t input_len, diff --git a/src/nostrdb.c b/src/nostrdb.c index a0d0136..0bba413 100644 --- a/src/nostrdb.c +++ b/src/nostrdb.c @@ -3,6 +3,7 @@ #include "jsmn.h" #include "hex.h" #include "cursor.h" +#include "rcur.h" #include "random.h" #include "ccan/crypto/sha256/sha256.h" #include "bolt11/bolt11.h" @@ -377,37 +378,32 @@ static int ndb_tag_key_compare(const MDB_val *a, const MDB_val *b) static int ndb_text_search_key_compare(const MDB_val *a, const MDB_val *b) { - struct cursor ca, cb; + struct rcur rcura, rcurb; uint64_t sa, sb, nid_a, nid_b; MDB_val a2, b2; - make_cursor(a->mv_data, a->mv_data + a->mv_size, &ca); - make_cursor(b->mv_data, b->mv_data + b->mv_size, &cb); + rcura = rcur_forbuf(a->mv_data, a->mv_size); + rcurb = rcur_forbuf(b->mv_data, b->mv_size); // note_id - if (unlikely(!cursor_pull_varint(&ca, &nid_a) || !cursor_pull_varint(&cb, &nid_b))) - return 0; - - // string size - if (unlikely(!cursor_pull_varint(&ca, &sa) || !cursor_pull_varint(&cb, &sb))) - return 0; + nid_a = rcur_pull_varint(&rcura); + nid_b = rcur_pull_varint(&rcurb); - a2.mv_data = ca.p; - a2.mv_size = sa; + // strings (cast away const) + a2.mv_data = (char *)rcur_pull_prefixed_str(&rcura, &a2.mv_size); + b2.mv_data = (char *)rcur_pull_prefixed_str(&rcurb, &b2.mv_size); - b2.mv_data = cb.p; - b2.mv_size = sb; + // We don't *have* to bail here, but memcmp on NULL is technically + // illegal, so we do. + if (!rcur_valid(&rcura) || !rcur_valid(&rcurb)) + return 0; int cmp = mdb_cmp_memn(&a2, &b2); if (cmp) return cmp; - // skip over string - ca.p += sa; - cb.p += sb; - // timestamp - if (unlikely(!cursor_pull_varint(&ca, &sa) || !cursor_pull_varint(&cb, &sb))) - return 0; + sa = rcur_pull_varint(&rcura); + sb = rcur_pull_varint(&rcurb); if (sa < sb) return -1; else if (sa > sb) return 1; @@ -417,8 +413,8 @@ static int ndb_text_search_key_compare(const MDB_val *a, const MDB_val *b) else if (nid_a > nid_b) return 1; // word index - if (unlikely(!cursor_pull_varint(&ca, &sa) || !cursor_pull_varint(&cb, &sb))) - return 0; + sa = rcur_pull_varint(&rcura); + sb = rcur_pull_varint(&rcurb); if (sa < sb) return -1; else if (sa > sb) return 1; @@ -426,50 +422,29 @@ static int ndb_text_search_key_compare(const MDB_val *a, const MDB_val *b) return 0; } -static inline int ndb_unpack_text_search_key_noteid( - struct cursor *cur, uint64_t *note_id) +static uint64_t ndb_unpack_text_search_key_noteid(struct rcur *rcur) { - if (!cursor_pull_varint(cur, note_id)) - return 0; - - return 1; + return rcur_pull_varint(rcur); } // faster peek of just the string instead of unpacking everything // this is used to quickly discard range query matches if there is no // common prefix -static inline int ndb_unpack_text_search_key_string(struct cursor *cur, - const char **str, - int *str_len) +static const char *ndb_unpack_text_search_key_string(struct rcur *rcur, + size_t *str_len) { - uint64_t len; - - if (!cursor_pull_varint(cur, &len)) - return 0; - - *str_len = len; - - *str = (const char *)cur->p; - - if (!cursor_skip(cur, *str_len)) - return 0; - - return 1; + return rcur_pull_prefixed_str(rcur, str_len); } // should be called after ndb_unpack_text_search_key_string. It continues // the unpacking of a text search key if we've already started it. -static inline int -ndb_unpack_remaining_text_search_key(struct cursor *cur, +static bool +ndb_unpack_remaining_text_search_key(struct rcur *rcur, struct ndb_text_search_key *key) { - if (!cursor_pull_varint(cur, &key->timestamp)) - return 0; - - if (!cursor_pull_varint(cur, &key->word_index)) - return 0; - - return 1; + key->timestamp = rcur_pull_varint(rcur); + key->word_index = rcur_pull_varint(rcur); + return rcur_valid(rcur); } // unpack a fulltext search key @@ -477,19 +452,14 @@ ndb_unpack_remaining_text_search_key(struct cursor *cur, // full version of string + unpack remaining. This is split up because text // searching only requires to pull the string for prefix searching, and the // remaining is optional -static inline int ndb_unpack_text_search_key(unsigned char *p, int len, - struct ndb_text_search_key *key) +static bool ndb_unpack_text_search_key(unsigned char *p, int len, + struct ndb_text_search_key *key) { - struct cursor c; - make_cursor(p, p + len, &c); + struct rcur rcur = rcur_forbuf(p, len); - if (!ndb_unpack_text_search_key_noteid(&c, &key->note_id)) - return 0; - - if (!ndb_unpack_text_search_key_string(&c, &key->str, &key->str_len)) - return 0; - - return ndb_unpack_remaining_text_search_key(&c, key); + key->note_id = ndb_unpack_text_search_key_noteid(&rcur); + key->str = ndb_unpack_text_search_key_string(&rcur, &key->str_len); + return ndb_unpack_remaining_text_search_key(&rcur, key); } // Copies only lowercase characters to the destination string and fills the rest with null bytes. @@ -3205,42 +3175,27 @@ static int ndb_write_word_to_index(struct ndb_txn *txn, const char *word, // break a string into individual words for querying or for building the // fulltext search index. This is callback based so we don't need to // build up an intermediate structure -static int ndb_parse_words(struct cursor *cur, void *ctx, ndb_word_parser_fn fn) +static void ndb_parse_words(struct rcur *rcur, void *ctx, ndb_word_parser_fn fn) { - int word_len, words; + size_t word_len, words; const char *word; words = 0; + /* Skip any leading whitespace and punctuation */ + while (rcur_pull_whitespace(rcur) || rcur_pull_punctuation(rcur)); - while (cur->p < cur->end) { - consume_whitespace_or_punctuation(cur); - if (cur->p >= cur->end) - break; - word = (const char *)cur->p; - - if (!consume_until_boundary(cur)) - break; - - // start of word or end - word_len = cur->p - (unsigned char *)word; - if (word_len == 0 && cur->p >= cur->end) - break; - - if (word_len == 0) { - if (!cursor_skip(cur, 1)) - break; - continue; - } - + while ((word = rcur_pull_word(rcur, &word_len)) != NULL) { //ndb_debug("writing word index '%.*s'\n", word_len, word); if (!fn(ctx, word, word_len, words)) continue; words++; - } - return 1; + /* Skip next whitespace and punctuation */ + while (rcur_pull_whitespace(rcur) + || rcur_pull_punctuation(rcur)); + } } struct ndb_word_writer_ctx @@ -3271,7 +3226,7 @@ static int ndb_write_note_fulltext_index(struct ndb_txn *txn, struct ndb_note *note, uint64_t note_id) { - struct cursor cur; + struct rcur rcur; unsigned char *content; struct ndb_str str; struct ndb_word_writer_ctx ctx; @@ -3283,13 +3238,13 @@ static int ndb_write_note_fulltext_index(struct ndb_txn *txn, content = (unsigned char *)str.str; - make_cursor(content, content + note->content_length, &cur); + rcur = rcur_forbuf(content, note->content_length); ctx.txn = txn; ctx.note = note; ctx.note_id = note_id; - ndb_parse_words(&cur, &ctx, ndb_fulltext_word_writer); + ndb_parse_words(&rcur, &ctx, ndb_fulltext_word_writer); return 1; } @@ -3371,14 +3326,12 @@ static int ndb_text_search_next_word(MDB_cursor *cursor, MDB_cursor_op op, struct ndb_text_search_result *result, MDB_cursor_op order_op) { - struct cursor key_cursor; + struct rcur key_cursor; //struct ndb_text_search_key search_key; MDB_val v; int retries; retries = -1; - make_cursor(k->mv_data, k->mv_data + k->mv_size, &key_cursor); - // When op is MDB_SET_RANGE, this initializes the search. Position // the cursor at the next key greater than or equal to the specified // key. @@ -3406,9 +3359,10 @@ static int ndb_text_search_next_word(MDB_cursor *cursor, MDB_cursor_op op, printf("\n"); */ - make_cursor(k->mv_data, k->mv_data + k->mv_size, &key_cursor); + key_cursor = rcur_forbuf(k->mv_data, k->mv_size); - if (unlikely(!ndb_unpack_text_search_key_noteid(&key_cursor, &result->key.note_id))) { + result->key.note_id = ndb_unpack_text_search_key_noteid(&key_cursor); + if (unlikely(!result->key.note_id)) { fprintf(stderr, "UNUSUAL: failed to unpack text search key note_id\n"); return 0; } @@ -3426,14 +3380,12 @@ static int ndb_text_search_next_word(MDB_cursor *cursor, MDB_cursor_op op, // unpack just the string to check the prefix. We don't // need to unpack the entire key if the prefix doesn't // match - if (!ndb_unpack_text_search_key_string(&key_cursor, - &result->key.str, - &result->key.str_len)) { - // this should never happen - fprintf(stderr, "UNUSUAL: failed to unpack text search key string\n"); + result->key.str = ndb_unpack_text_search_key_string(&key_cursor, + &result->key.str_len); + if (unlikely(!result->key.str)) { + fprintf(stderr, "UNUSUAL: failed to unpack text search key note_id\n"); return 0; } - if (!ndb_prefix_matches(result, search_word)) { /* printf("result prefix '%.*s' didn't match search word '%.*s'\n", @@ -3506,7 +3458,7 @@ int ndb_text_search(struct ndb_txn *txn, const char *query, struct ndb_search_words search_words; //struct ndb_text_search_key search_key; struct ndb_word *search_word; - struct cursor cur; + struct rcur rcur; ndb_text_search_key_order_fn key_order_fn; MDB_dbi text_db; MDB_cursor *cursor; @@ -3532,9 +3484,10 @@ int ndb_text_search(struct ndb_txn *txn, const char *query, // end search config text_db = txn->lmdb->dbs[NDB_DB_NOTE_TEXT]; - make_cursor((unsigned char *)query, (unsigned char *)query + strlen(query), &cur); - ndb_parse_words(&cur, &search_words, ndb_parse_search_words); + rcur = rcur_forstr(query); + + ndb_parse_words(&rcur, &search_words, ndb_parse_search_words); if (search_words.num_words == 0) return 0; diff --git a/src/nostrdb.h b/src/nostrdb.h index 92b9b47..97d7f80 100644 --- a/src/nostrdb.h +++ b/src/nostrdb.h @@ -3,6 +3,7 @@ #include #include "cursor.h" +#include "rcur.h" // maximum number of filters allowed in a filter group #define NDB_PACKED_STR 0x1 @@ -294,7 +295,7 @@ struct ndb_stat { // see `ndb_make_text_search_key` for how the packed version is constructed struct ndb_text_search_key { - int str_len; + size_t str_len; const char *str; uint64_t timestamp; uint64_t note_id; @@ -396,8 +397,8 @@ struct ndb_invoice { uint64_t amount; uint64_t timestamp; uint64_t expiry; - char *description; - unsigned char *description_hash; + const char *description; + const unsigned char *description_hash; }; struct ndb_invoice_block { @@ -419,7 +420,7 @@ struct ndb_block_iterator { const char *content; struct ndb_blocks *blocks; struct ndb_block block; - unsigned char *p; + struct rcur rcur; }; struct ndb_query_result { diff --git a/src/print_util.h b/src/print_util.h index 6c77882..d68a16e 100644 --- a/src/print_util.h +++ b/src/print_util.h @@ -3,7 +3,7 @@ static void ndb_print_text_search_key(struct ndb_text_search_key *key) { - fprintf(stderr,"K<'%.*s' %" PRIu64 " %" PRIu64 " note_id:%" PRIu64 ">", key->str_len, key->str, + fprintf(stderr,"K<'%.*s' %" PRIu64 " %" PRIu64 " note_id:%" PRIu64 ">", (int)key->str_len, key->str, key->word_index, key->timestamp, key->note_id); diff --git a/src/rcur.c b/src/rcur.c new file mode 100644 index 0000000..fab6395 --- /dev/null +++ b/src/rcur.c @@ -0,0 +1,251 @@ +#include "rcur.h" +#include "ccan/utf8/utf8.h" +#include "cursor.h" +#include +#include + +/* Note that only rcur_pull and rcur_peek_remainder actually access + * rcur. The rest are built on top of them! */ + +/* Pull @len bytes from rcur, if space available. + * Return NULL if not valid. + */ +const void *rcur_pull(struct rcur *rcur, size_t len) +{ + const void *p = rcur_peek(rcur, len); + if (!p) { + rcur_fail(rcur); + return NULL; + } + rcur->cur += len; + rcur->len -= len; + return p; +} + +/* Access the remaining bytes. Returns NULL and *len=0 if invalid. */ +const void *rcur_peek_remainder(const struct rcur *rcur, size_t *len) +{ + *len = rcur->len; + return rcur->cur; +} + +bool rcur_copy(struct rcur *rcur, void *dst, size_t len) +{ + const void *src = rcur_pull(rcur, len); + if (!src) + return false; + memcpy(dst, src, len); + return true; +} + +/* Look ahead: returns NULL if @len too long. Does *not* alter + * rcur */ +const void *rcur_peek(const struct rcur *rcur, size_t len) +{ + size_t actual_len; + const void *p = rcur_peek_remainder(rcur, &actual_len); + + if (len > actual_len) + return NULL; + return p; +} + +/* All these are based on rcur_pull. */ +bool rcur_skip(struct rcur *rcur, size_t len) +{ + return rcur_pull(rcur, len) != NULL; +} + +unsigned char rcur_pull_byte(struct rcur *rcur) +{ + unsigned char v; + + if (!rcur_copy(rcur, &v, sizeof(v))) + return 0; + return v; +} + +uint16_t rcur_pull_u16(struct rcur *rcur) +{ + uint16_t v; + + if (!rcur_copy(rcur, &v, sizeof(v))) + return 0; + return v; +} + +uint32_t rcur_pull_u32(struct rcur *rcur) +{ + uint32_t v; + + if (!rcur_copy(rcur, &v, sizeof(v))) + return 0; + return v; +} + +uint64_t rcur_pull_varint(struct rcur *rcur) +{ + uint64_t v = 0; + unsigned char c; + + for (size_t i = 0; i < 10; i++) { // Loop up to 10 bytes for 64-bit + c = rcur_pull_byte(rcur); + v |= ((uint64_t)c & 0x7F) << (i * 7); + + if ((c & 0x80) == 0) + break; + } + return v; +} + +uint32_t rcur_pull_varint_u32(struct rcur *rcur) +{ + uint64_t v = rcur_pull_varint(rcur); + if (v >= UINT32_MAX) { + rcur_fail(rcur); + v = 0; + } + return v; +} + +size_t rcur_pull_whitespace(struct rcur *rcur) +{ + size_t len, wslen; + const unsigned char *c; + + c = rcur_peek_remainder(rcur, &len); + for (wslen = 0; wslen < len; wslen++) { + if (!is_whitespace(c[wslen])) + break; + } + + rcur_skip(rcur, wslen); + return wslen; +} + +// Returns 0 on error. Adds length to *totlen. +static uint32_t rcur_pull_utf8(struct rcur *rcur, size_t *totlen) +{ + struct utf8_state state = UTF8_STATE_INIT; + + /* Since 0 is treated as a bad encoding, this terminated if we run + * out of chars */ + while (!utf8_decode(&state, rcur_pull_byte(rcur))) + (*totlen)++; + + if (errno != 0) { + rcur_fail(rcur); + return 0; + } + return state.c; +} + +/* Remove is_punctuation(), return bytes removed */ +size_t rcur_pull_punctuation(struct rcur *rcur) +{ + size_t totlen = 0; + + while (rcur_bytes_remaining(*rcur)) { + uint32_t c = rcur_pull_utf8(rcur, &totlen); + if (!is_punctuation(c)) + break; + } + + if (!rcur_valid(rcur)) + return 0; + return totlen; +} + +/* Remove !is_alphanumeric(), return bytes removed */ +size_t rcur_pull_non_alphanumeric(struct rcur *rcur) +{ + size_t len, nonalpha_len; + const char *p = rcur_peek_remainder(rcur, &len); + + for (nonalpha_len = 0; nonalpha_len < len; nonalpha_len++) { + if (!is_alphanumeric(p[nonalpha_len])) + break; + } + + rcur_skip(rcur, nonalpha_len); + return nonalpha_len; +} + +const char *rcur_pull_word(struct rcur *rcur, size_t *len) +{ + const char *start = rcur_peek(rcur, 0); + + /* consume_until_boundary */ + *len = 0; + while (rcur_bytes_remaining(*rcur)) { + uint32_t c = rcur_pull_utf8(rcur, len); + if (!c || is_right_boundary(c)) + break; + } + + if (!rcur_valid(rcur) || *len == 0) + return NULL; + + return start; +} + +const char *rcur_pull_c_string(struct rcur *rcur) +{ + size_t len; + const char *p = rcur_peek_remainder(rcur, &len); + + for (size_t i = 0; i < len; i++) { + if (p[i] == '\0') + return rcur_pull(rcur, i+1); + } + rcur_fail(rcur); + return NULL; +} + +bool rcur_skip_if_match(struct rcur *rcur, const void *p, size_t len) +{ + const void *peek = rcur_peek(rcur, len); + + if (!peek) + return false; + + if (memcmp(p, peek, len) != 0) + return false; + rcur_skip(rcur, len); + return true; +} + +bool rcur_skip_if_str_anycase(struct rcur *rcur, const char *str) +{ + size_t len = strlen(str); + const char *peek = rcur_peek(rcur, len); + + if (!peek) + return false; + + for (size_t i = 0; i < len; i++) { + if (tolower(peek[i]) != tolower(str[i])) + return false; + } + rcur_skip(rcur, len); + return true; +} + +const char *rcur_pull_prefixed_str(struct rcur *rcur, size_t *len) +{ + *len = rcur_pull_varint(rcur); + return rcur_pull(rcur, *len); +} + +bool rcur_trim_if_char(struct rcur *rcur, char c) +{ + const char *p; + size_t len; + + p = rcur_peek_remainder(rcur, &len); + if (len > 0 && p[len-1] == c) { + rcur->len--; + return true; + } + return false; +} diff --git a/src/rcur.h b/src/rcur.h new file mode 100644 index 0000000..f999ca5 --- /dev/null +++ b/src/rcur.h @@ -0,0 +1,144 @@ +/* A read-only cursor into a buffer. You can pull as many times as you want, + * and check at the end if it is valid. */ +#ifndef JB55_RCUR_H +#define JB55_RCUR_H + +#include "ccan/compiler/compiler.h" +#include "ccan/likely/likely.h" +#include "cursor.h" +#include +#include +#include +#include +#include + +struct rcur { + const unsigned char *cur; + size_t len; +}; + +/* Is this valid? Pulling too much (or invalid values) sets this true */ +static inline bool rcur_valid(const struct rcur *rcur) +{ + return unlikely(rcur->cur != NULL); +} + +/* How many bytes left to pull? 0 if invalid */ +static inline size_t rcur_bytes_remaining(struct rcur rcur) +{ + return rcur.len; +} + +/* Pull @len bytes from rcur, if space available. + * Return NULL if not valid. + */ +const void *rcur_pull(struct rcur *rcur, size_t len); + +/* Access the remaining bytes. Returns NULL and *len=0 if invalid. */ +const void *rcur_peek_remainder(const struct rcur *rcur, size_t *len); + +/* Copy @len bytes from rcur, if space available. + * Return false if not valid. + */ +bool rcur_copy(struct rcur *rcur, void *dst, size_t len); + +/* Look ahead: returns NULL if @len too long. Does *not* alter + * rcur */ +const void *rcur_peek(const struct rcur *rcur, size_t len); + +/* Mark this rcur invalid: return false for convenience. */ +static inline bool COLD rcur_fail(struct rcur *rcur) +{ + rcur->cur = NULL; + rcur->len = 0; + return false; +} + +/* Create rcur for buffer */ +static inline struct rcur rcur_forbuf(const void *buf, size_t len) +{ + struct rcur rcur; + rcur.cur = buf; + rcur.len = len; + + return rcur; +} + +/* Create rcur for string */ +static inline struct rcur rcur_forstr(const char *str) +{ + struct rcur rcur; + rcur.cur = (const unsigned char *)str; + rcur.len = strlen(str); + + return rcur; +} + +/* Create rcur for next len bytes in rcur */ +static inline struct rcur rcur_pull_slice(struct rcur *rcur, size_t len) +{ + struct rcur slice; + const unsigned char *p; + + p = rcur_pull(rcur, len); + if (likely(p)) + slice = rcur_forbuf(p, len); + else + rcur_fail(&slice); + return slice; +} + +/* Get rcur between these two: newer has been pulled from */ +static inline struct rcur rcur_between(const struct rcur *orig, + const struct rcur *newer) +{ + struct rcur rcur; + + if (rcur_valid(newer)) { + assert(newer->cur >= orig->cur); + assert(newer->cur + newer->len <= orig->cur + orig->len); + rcur = rcur_forbuf(orig->cur, newer->cur - orig->cur); + } else { + rcur_fail(&rcur); + } + return rcur; +} + +/* All these are based on rcur_pull. */ +bool rcur_skip(struct rcur *rcur, size_t len); +unsigned char rcur_pull_byte(struct rcur *rcur); +uint16_t rcur_pull_u16(struct rcur *rcur); +uint32_t rcur_pull_u32(struct rcur *rcur); +uint64_t rcur_pull_varint(struct rcur *rcur); +uint32_t rcur_pull_varint_u32(struct rcur *rcur); + +/* Trim this character from the end of buffer, if present. Returns + * true if trimmed. */ +bool rcur_trim_if_char(struct rcur *rcur, char c); + +/* Remove is_whitespace(), return bytes removed */ +size_t rcur_pull_whitespace(struct rcur *rcur); + +/* Remove is_punctuation(), return bytes removed */ +size_t rcur_pull_punctuation(struct rcur *rcur); + +/* Remove !is_alphanumeric(), return bytes removed */ +size_t rcur_pull_non_alphanumeric(struct rcur *rcur); + +/* Note: returns non-zero-terminated string, and sets len (or NULL) */ +const char *rcur_pull_prefixed_str(struct rcur *rcur, size_t *len); + +/* Returns up to next whitespace / punctuation. + * Returns NULL for invalid / at end. */ +const char *rcur_pull_word(struct rcur *rcur, size_t *len); + +/* Returns to \0 terminator. NULL if invalid / at end */ +const char *rcur_pull_c_string(struct rcur *rcur); + +/* Skip over this if it matches. Return true if skipped */ +bool rcur_skip_if_match(struct rcur *rcur, const void *p, size_t len); + +/* Skpi over if this matches string (case insentive) */ +bool rcur_skip_if_str_anycase(struct rcur *rcur, const char *str); + +#endif /* JB55_RCUR_H */ diff --git a/test.c b/test.c index 467635a..974c88b 100644 --- a/test.c +++ b/test.c @@ -8,6 +8,7 @@ #include "protected_queue.h" #include "memchr.h" #include "print_util.h" +#include "rcur.h" #include "bindings/c/profile_reader.h" #include "bindings/c/profile_verifier.h" #include "bindings/c/meta_reader.h" @@ -169,6 +170,7 @@ static void test_invoice_encoding(const char *bolt11_str) unsigned char buf[4096]; char *fail = NULL; struct cursor cur; + struct rcur rcur; struct ndb_invoice invoice; struct bolt11 *bolt11; @@ -177,8 +179,8 @@ static void test_invoice_encoding(const char *bolt11_str) assert(fail == NULL); assert(ndb_encode_invoice(&cur, bolt11)); - cur.p = cur.start; - assert(ndb_decode_invoice(&cur, &invoice)); + rcur = rcur_forbuf(cur.start, cur.end - cur.start); + assert(ndb_decode_invoice(&rcur, &invoice)); assert(bolt11->msat->millisatoshis == invoice.amount); assert(bolt11->timestamp == invoice.timestamp);