diff --git a/db/types.c b/db/types.c index d3aa44668d..0dcaf85923 100644 --- a/db/types.c +++ b/db/types.c @@ -3695,6 +3695,21 @@ TYPES_INLINE int CLIENT_BLOB_to_CLIENT_PSTR2( return -1; } +static int utf8_validate_permitting_trailing_zeros(const char *u, int max) +{ + int valid_len; + + if (utf8_validate(u, max, &valid_len) != 0) + return -1; + + /* utf8_validate() stops at the 1st NUL character. We want to permit trailing zeros */ + for (; valid_len < max - 1; ++valid_len) { + if (u[valid_len] != '\0') + return -1; + } + return 0; +} + /** * Finds out where the input vutf8 string is stored and then determines where it * should be copied and copies it. Doesn't deal with NULLs. @@ -3717,7 +3732,6 @@ static TYPES_INLINE int vutf8_convert(int len, const void *in, int in_len, blob_buffer_t *inblob, blob_buffer_t *outblob, int *outdtsz) { - int valid_len; if (out_len > 0) memset(out, 0, out_len); @@ -3742,10 +3756,8 @@ static TYPES_INLINE int vutf8_convert(int len, const void *in, int in_len, /* validate input blob */ assert(inblob->length == len); - if (utf8_validate(inblob->data, inblob->length, &valid_len) || - valid_len != len - 1) { + if (utf8_validate_permitting_trailing_zeros(inblob->data, inblob->length)) return -1; - } memcpy(outblob, inblob, sizeof(blob_buffer_t)); bzero(inblob, sizeof(blob_buffer_t)); @@ -3767,8 +3779,7 @@ static TYPES_INLINE int vutf8_convert(int len, const void *in, int in_len, /* if the string isn't empty, validate the string and make sure its * length matches len (minus 1 for the NUL byte) */ - if (len > 0 && - (utf8_validate(in, len, &valid_len) || valid_len != len - 1)) + if (len > 0 && utf8_validate_permitting_trailing_zeros(in, len)) return -1; memcpy(out, in, len); @@ -3785,7 +3796,6 @@ static TYPES_INLINE int vutf8_convert(int len, const void *in, int in_len, * fit in the out buffer, then the string needs to be copied from the in * buffer to a new out blob */ else if (len <= in_len) { - int valid_len; if (outblob) { if (len > gbl_blob_sz_thresh_bytes) @@ -3800,8 +3810,7 @@ static TYPES_INLINE int vutf8_convert(int len, const void *in, int in_len, /* if the string isn't empty, validate the string and make sure its * length matches len (minus 1 for the NUL byte) */ - if (len > 0 && - (utf8_validate(in, len, &valid_len) || valid_len != len - 1)) + if (len > 0 && utf8_validate_permitting_trailing_zeros(in, len)) return -1; memcpy(outblob->data, in, len); @@ -3821,8 +3830,6 @@ static TYPES_INLINE int vutf8_convert(int len, const void *in, int in_len, * blob to the out buffer */ else /* len <= out_len */ { - int valid_len; - /* Do not attempt to convert a blob placeholder (i.e., length == -2) */ if (inblob && inblob->length != OSQL_BLOB_FILLER_LENGTH) { if (!inblob->exists || !inblob->data) { @@ -3832,8 +3839,7 @@ static TYPES_INLINE int vutf8_convert(int len, const void *in, int in_len, /* if the string isn't empty, validate the string and make sure its * length matches len (minus 1 for the NUL byte) */ - if (len > 0 && (utf8_validate(inblob->data, len, &valid_len) || - valid_len != len - 1)) + if (len > 0 && utf8_validate_permitting_trailing_zeros(inblob->data, len)) return -1; memcpy(out, inblob->data, len); diff --git a/docs/images/alter-table-ddl.gif b/docs/images/alter-table-ddl.gif index 60c5e77a02..66e3957c72 100644 Binary files a/docs/images/alter-table-ddl.gif and b/docs/images/alter-table-ddl.gif differ diff --git a/docs/images/column-constraint.gif b/docs/images/column-constraint.gif index 3b890f0b52..41de3ee9ef 100644 Binary files a/docs/images/column-constraint.gif and b/docs/images/column-constraint.gif differ diff --git a/docs/src/sqlitegen/bubble-generator-data.tcl b/docs/src/sqlitegen/bubble-generator-data.tcl index 3356b0ce32..ae85137586 100644 --- a/docs/src/sqlitegen/bubble-generator-data.tcl +++ b/docs/src/sqlitegen/bubble-generator-data.tcl @@ -724,6 +724,12 @@ stack } } {line OPTION DBPAD = signed-number } + {line ENCODING + {or + {line /string-literal} + {line NONE} + } + } } table-constraint { @@ -828,6 +834,12 @@ stack } NOT NULL } + {line ENCODING + {or + {line /string-literal} + {line NONE} + } + } } } {line OPTIONS ( table-options ) } diff --git a/schemachange/sc_records.c b/schemachange/sc_records.c index 2b1d9ff683..b13b9748df 100644 --- a/schemachange/sc_records.c +++ b/schemachange/sc_records.c @@ -545,12 +545,12 @@ static int prepare_and_verify_newdb_record(struct convert_record_data *data, if (rc < 0) { logmsg(LOGMSG_DEBUG, "%s:%d internal error during CHECK constraint\n", __func__, __LINE__); - return ERR_CONSTR; + return ERR_CHECK_CONSTRAINT; } else if (rc > 0) { logmsg(LOGMSG_DEBUG, "%s:%d CHECK constraint failed for '%s'\n", __func__, __LINE__, data->iq.usedb->check_constraints[rc - 1].consname); - return ERR_CONSTR; + return ERR_CHECK_CONSTRAINT; } rc = verify_record_constraint(&data->iq, data->to, data->trans, p_buf_data, @@ -1123,6 +1123,9 @@ static int convert_record(struct convert_record_data *data) } else if (rc == ERR_VERIFY_PI) { sc_client_error(data->s, "Error verifying partial indexes! rrn %d genid 0x%llx", rrn, genid); return -2; + } else if (rc == ERR_CHECK_CONSTRAINT) { + sc_client_error(data->s, "Record violates check constraints rrn %d genid 0x%llx", rrn, genid); + return -2; } else if (rc != 0) { sc_client_error(data->s, "Error adding record rcode %d opfailcode %d ixfailnum %d rrn %d genid 0x%llx, stripe %d", rc, diff --git a/sqlite/src/comdb2build.c b/sqlite/src/comdb2build.c index 93498012a4..2eb3f2f298 100644 --- a/sqlite/src/comdb2build.c +++ b/sqlite/src/comdb2build.c @@ -6406,7 +6406,7 @@ void comdb2DeferForeignKey(Parse *pParse, int isDeferred) return; } -static void drop_constraint(Parse *pParse, Token *pName, int type) +static void drop_constraint(Parse *pParse, Token *pName, int type, int hush) { if (comdb2IsPrepareOnly(pParse)) return; @@ -6433,7 +6433,7 @@ static void drop_constraint(Parse *pParse, Token *pName, int type) if (cons) { /* Mark it as dropped. */ cons->flags |= CONS_DELETED; - } else { + } else if (!hush) { pParse->rc = SQLITE_ERROR; sqlite3ErrorMsg(pParse, "Constraint '%s' not found.", name); goto cleanup; @@ -6454,7 +6454,7 @@ void comdb2DropForeignKey(Parse *pParse, /* Parser context */ Token *pName /* Foreign key name */ ) { - drop_constraint(pParse, pName, CONS_FKEY); + drop_constraint(pParse, pName, CONS_FKEY, 0); return; } @@ -6462,7 +6462,7 @@ void comdb2DropConstraint(Parse *pParse, /* Parser context */ Token *pName /* Foreign key name */ ) { - drop_constraint(pParse, pName, CONS_ALL); + drop_constraint(pParse, pName, CONS_ALL, 0); return; } @@ -7690,3 +7690,65 @@ void create_default_consumer_sp(Parse *p, char *spname) comdb2prepareNoRows(v, p, 0, sc, &comdb2SqlSchemaChange, (vdbeFuncArgFree)&free_schema_change_type); } + +void comdb2ChangeCharacterSet(Parse *pParse, Token *t, int alter) +{ + struct comdb2_ddl_context *ctx; + struct comdb2_column *column; + sqlite3 *db = pParse->db; + + char *charset = NULL; + char expr[MAXCOLNAME + sizeof("utf8_validate()=0")]; + char constraint_name[MAXCOLNAME + sizeof("$" GEN_CONS_PREFIX "_CHAR_ENC_")]; + int nw; + + Token colToken; + Token funcToken; + ExprList *arg; + Expr *func; + Expr *zero; + Expr *equality; + + if (t != NULL) { + charset = sqlite3NameFromToken(db, t); + if (charset == NULL) + return; + + /* so far only utf8 is supported */ + if (strcasecmp(charset, "utf8") != 0 && strcasecmp(charset, "utf-8") != 0) { + setError(pParse, SQLITE_MISUSE, "unknown charset"); + goto out; + } + } + + ctx = pParse->comdb2_ddl_ctx; + if (alter) + column = ctx->alter_column; + else + column = (struct comdb2_column *)LISTC_BOT(&ctx->schema->column_list); + + if (column->type != SQL_TYPE_CSTRING && column->type != SQL_TYPE_VARCHAR && column->type != SQL_TYPE_CHAR) { + setError(pParse, SQLITE_MISUSE, "invalid column type to use character encoding"); + goto out; + } + + snprintf(constraint_name, sizeof(constraint_name), "$" GEN_CONS_PREFIX "_CHAR_ENC_%s", column->name); + sqlite3TokenInit(&pParse->constraintName, constraint_name); + + if (t == NULL) { + drop_constraint(pParse, &pParse->constraintName, CONS_CHECK, 1); + } else { + sqlite3TokenInit(&colToken, column->name); + sqlite3TokenInit(&funcToken, "utf8_validate"); + + arg = sqlite3ExprListAppend(pParse, NULL, sqlite3ExprAlloc(db, TK_ID, &colToken, 0)); + func = sqlite3ExprFunction(pParse, arg, &funcToken, 0); + zero = sqlite3ExprAlloc(db, TK_INTEGER, &sqlite3IntTokens[0], 0); + + equality = sqlite3PExpr(pParse, TK_EQ, func, zero); + nw = snprintf(expr, sizeof(expr), "utf8_validate(%s)=0", column->name); + comdb2AddCheckConstraint(pParse, equality, expr, expr + nw + 1); + } +out: + sqlite3DbFree(db, charset); +} diff --git a/sqlite/src/comdb2build.h b/sqlite/src/comdb2build.h index 3464fe6e8a..79876f7296 100644 --- a/sqlite/src/comdb2build.h +++ b/sqlite/src/comdb2build.h @@ -80,6 +80,7 @@ void comdb2AddIndex(Parse *, Token *, ExprList *, int, Expr *, const char *, const char *, int, u8, int, ExprList *); void comdb2AddDbpad(Parse *, int); void comdb2AddCheckConstraint(Parse *, Expr *, const char *, const char *); +void comdb2ChangeCharacterSet(Parse *pParse, Token *, int); void comdb2CreateIndex(Parse *, Token *, Token *, SrcList *, ExprList *, int, Token *, Expr *, const char *, const char *, int, int, u8, int, ExprList *, int); diff --git a/sqlite/src/func.c b/sqlite/src/func.c index bec8095f54..1ccfc03d88 100644 --- a/sqlite/src/func.c +++ b/sqlite/src/func.c @@ -1418,6 +1418,36 @@ static void uncompressGzipFunc( return; } +/* Return 0 if payload is utf8. Return (-N - 1), where N is the index + * of the first malformed character */ +int utf8_validate(const char *str, int len, int *valid_len); +static void comdb2Utf8ValidateFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + int valid_len, rc, len; + const char *z; + assert(argc == 1); + UNUSED_PARAMETER(argc); + + switch( sqlite3_value_type(argv[0]) ){ + case SQLITE_BLOB: + len = sqlite3_value_bytes(argv[0]); + z = sqlite3_value_blob(argv[0]); + rc = utf8_validate(z, len, &valid_len); + break; + case SQLITE_TEXT: + len = sqlite3_value_bytes(argv[0]) + 1; /* +1 for \0 */ + z = (const char *)sqlite3_value_text(argv[0]); + rc = utf8_validate(z, len, &valid_len); + break; + default: + rc = -1; + break; + } + sqlite3_result_int(context, rc == 0 ? rc : (-valid_len - 1)); +} #endif /* defined(SQLITE_BUILDING_FOR_COMDB2) */ /* @@ -3093,6 +3123,7 @@ void sqlite3RegisterBuiltinFunctions(void){ FUNCTION(comdb2_starttime, 0, 0, 0, comdb2StartTimeFunc), FUNCTION(comdb2_user, 0, 0, 0, comdb2UserFunc), FUNCTION(comdb2_last_cost, 0, 0, 0, comdb2LastCostFunc), + FUNCTION(utf8_validate, 1, 0, 0, comdb2Utf8ValidateFunc), FUNCTION(checksum_md5, 1, 0, 0, md5Func), FUNCTION(compress, 1, 0, 0, compressFunc), FUNCTION(uncompress, 1, 0, 0, uncompressFunc), diff --git a/sqlite/src/parse.y b/sqlite/src/parse.y index ef711ba7b9..c1cddd3a58 100644 --- a/sqlite/src/parse.y +++ b/sqlite/src/parse.y @@ -542,6 +542,8 @@ ccons ::= PRIMARY KEY sortorder(Z) onconf(R) autoinc(I). {sqlite3AddPrimaryKey(pParse,0,R,I,Z);} %endif !SQLITE_BUILDING_FOR_COMDB2 %ifdef SQLITE_BUILDING_FOR_COMDB2 +ccons ::= ENCODING STRING(H). {comdb2ChangeCharacterSet(pParse,&H,0);} +ccons ::= ENCODING NONE. {comdb2ChangeCharacterSet(pParse,NULL,0);} ccons ::= UNIQUE onconf(R). { comdb2AddIndex(pParse, 0, 0, R, 0, 0, 0, SQLITE_SO_ASC, SQLITE_IDXTYPE_UNIQUE, 0, 0); @@ -2037,6 +2039,12 @@ alter_table_alter_column_cmd ::= SET NOT NULL. { alter_table_alter_column_cmd ::= DROP NOT NULL. { comdb2AlterColumnDropNotNull(pParse); } +alter_table_alter_column_cmd ::= ENCODING STRING(H). { + comdb2ChangeCharacterSet(pParse,&H,1); +} +alter_table_alter_column_cmd ::= ENCODING NONE. { + comdb2ChangeCharacterSet(pParse,NULL,1); +} alter_table_alter_column ::= alter_table_alter_column_start alter_table_alter_column_cmd. { comdb2AlterColumnEnd(pParse); diff --git a/sqlite/tool/mkkeywordhash.c b/sqlite/tool/mkkeywordhash.c index 7b48aa16d0..25057fb7ae 100644 --- a/sqlite/tool/mkkeywordhash.c +++ b/sqlite/tool/mkkeywordhash.c @@ -211,6 +211,7 @@ static Keyword aKeywordTable[] = { { "DISTINCT", "TK_DISTINCT", ALWAYS }, { "DO", "TK_DO", UPSERT }, { "DROP", "TK_DROP", ALWAYS }, + { "ENCODING", "TK_ENCODING", ALWAYS }, { "END", "TK_END", ALWAYS }, { "EACH", "TK_EACH", TRIGGER }, { "ELSE", "TK_ELSE", ALWAYS }, diff --git a/tests/auth.test/t09.expected b/tests/auth.test/t09.expected index 2071231c5c..289219abd4 100644 --- a/tests/auth.test/t09.expected +++ b/tests/auth.test/t09.expected @@ -65,6 +65,7 @@ (candidate='EACH') (candidate='ELSE') (candidate='ENABLE') +(candidate='ENCODING') (candidate='END') (candidate='ESCAPE') (candidate='EXCEPT') @@ -377,6 +378,7 @@ (candidate='unlikely()') (candidate='upper()') (candidate='usleep()') +(candidate='utf8_validate()') (candidate='zeroblob()') (username='user1') (username='user2') diff --git a/tests/comdb2sys.test/comdb2sys.expected b/tests/comdb2sys.test/comdb2sys.expected index 5588a2d0da..bcf0eb8824 100644 --- a/tests/comdb2sys.test/comdb2sys.expected +++ b/tests/comdb2sys.test/comdb2sys.expected @@ -81,9 +81,9 @@ (tablename='t3', bytes=73728) (tablename='t4', bytes=73728) [select * from comdb2_tablesizes order by tablename] rc 0 -(KEYWORDS_COUNT=223) +(KEYWORDS_COUNT=224) [SELECT COUNT(*) AS KEYWORDS_COUNT FROM comdb2_keywords] rc 0 -(RESERVED_KW=66) +(RESERVED_KW=67) [SELECT COUNT(*) AS RESERVED_KW FROM comdb2_keywords WHERE reserved = 'Y'] rc 0 (NONRESERVED_KW=157) [SELECT COUNT(*) AS NONRESERVED_KW FROM comdb2_keywords WHERE reserved = 'N'] rc 0 @@ -104,6 +104,7 @@ (name='DISTINCT', reserved='Y') (name='DROP', reserved='Y') (name='ELSE', reserved='Y') +(name='ENCODING', reserved='Y') (name='ESCAPE', reserved='Y') (name='EXCEPT', reserved='Y') (name='EXISTS', reserved='Y') diff --git a/tests/ddl_no_csc2.test/t09_check.expected b/tests/ddl_no_csc2.test/t09_check.expected index 9528074044..3d3e55d3d8 100644 --- a/tests/ddl_no_csc2.test/t09_check.expected +++ b/tests/ddl_no_csc2.test/t09_check.expected @@ -11,7 +11,7 @@ ') (rows inserted=1) (rows inserted=1) -[ALTER TABLE t1 ADD CONSTRAINT valid_colors CHECK (color IN ('red', 'green', 'blue'))] failed with rc 240 Record violates foreign constraints rrn xx genid xx +[ALTER TABLE t1 ADD CONSTRAINT valid_colors CHECK (color IN ('red', 'green', 'blue'))] failed with rc 240 Record violates check constraints rrn xx genid xx (csc2='schema { cstring color[11] null = yes diff --git a/tests/ddl_no_csc2.test/t15_encoding.expected b/tests/ddl_no_csc2.test/t15_encoding.expected new file mode 100644 index 0000000000..d5e0e74b69 --- /dev/null +++ b/tests/ddl_no_csc2.test/t15_encoding.expected @@ -0,0 +1,17 @@ +[CREATE TABLE t15(a INTEGER ENCODING 'ascii')] failed with rc -3 unknown charset +[CREATE TABLE t15(a INTEGER ENCODING 'utf8')] failed with rc -3 invalid column type to use character encoding +[CREATE TABLE t15(a TEXT ENCODING 'utf8')] failed with rc -3 invalid column type to use character encoding +(csc2='schema + { + cstring a[11] null = yes + } +constraints + { + check "$CONSTRAINT_CHAR_ENC_a" = {where utf8_validate(a)=0} + } +') +[INSERT INTO t15 VALUES (CAST(x'616263FF616263' AS TEXT))] failed with rc 403 CHECK constraint violation CHECK constraint failed for '$CONSTRAINT_CHAR_ENC_a' unable to add record rc = 320 +(COUNT(*)=0) +(rows inserted=1) +(COUNT(*)=1) +[ALTER TABLE t15 ALTER COLUMN a ENCODING 'utf8'] failed with rc 240 Record violates check constraints rrn xx genid xx diff --git a/tests/ddl_no_csc2.test/t15_encoding.sql b/tests/ddl_no_csc2.test/t15_encoding.sql new file mode 100644 index 0000000000..9831733322 --- /dev/null +++ b/tests/ddl_no_csc2.test/t15_encoding.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS t15 +CREATE TABLE t15(a INTEGER ENCODING 'ascii')$$ +CREATE TABLE t15(a INTEGER ENCODING 'utf8')$$ +CREATE TABLE t15(a TEXT ENCODING 'utf8')$$ +CREATE TABLE t15(a CHAR(10) ENCODING 'utf8')$$ +SELECT csc2 FROM sqlite_master WHERE name='t15' +INSERT INTO t15 VALUES (CAST(x'616263FF616263' AS TEXT)) +SELECT COUNT(*) FROM t15 +ALTER TABLE t15 ALTER COLUMN a ENCODING NONE$$ +INSERT INTO t15 VALUES (CAST(x'616263FF616263' AS TEXT)) +SELECT COUNT(*) FROM t15 +ALTER TABLE t15 ALTER COLUMN a ENCODING 'utf8'$$ +DROP TABLE t15 diff --git a/tests/func.test/t02_utf8_validate.expected b/tests/func.test/t02_utf8_validate.expected new file mode 100644 index 0000000000..cd466b7462 --- /dev/null +++ b/tests/func.test/t02_utf8_validate.expected @@ -0,0 +1,2 @@ +(utf8_validate('abc')=0) +(utf8_validate(x'616263FF')=-4) diff --git a/tests/func.test/t02_utf8_validate.sql b/tests/func.test/t02_utf8_validate.sql new file mode 100644 index 0000000000..d2753955f6 --- /dev/null +++ b/tests/func.test/t02_utf8_validate.sql @@ -0,0 +1,2 @@ +SELECT utf8_validate('abc') +SELECT utf8_validate(x'616263FF')