diff --git a/bdb/bdb_api.h b/bdb/bdb_api.h index ea4b7e76e9..a2cbf1f46c 100644 --- a/bdb/bdb_api.h +++ b/bdb/bdb_api.h @@ -1729,6 +1729,11 @@ int bdb_llmeta_get_schema_versions(tran_type *t, schema_version_row **data, int int bdb_del_schema_change_history(tran_type *t, const char *tablename, uint64_t seed); +/* llmeta entry for comdb2_oplog seqno */ +int bdb_get_seqno(tran_type *t, int64_t *seqno); +int bdb_set_seqno(tran_type *t, int64_t seqno); +int bdb_del_seqno(tran_type *t); + typedef struct { uint64_t genid; unsigned int file; diff --git a/bdb/llmeta.c b/bdb/llmeta.c index cb39d58809..cc72cdba58 100644 --- a/bdb/llmeta.c +++ b/bdb/llmeta.c @@ -179,6 +179,7 @@ typedef enum { LLMETA_SCHEMACHANGE_STATUS_V2 = 56, LLMETA_SCHEMACHANGE_LIST = 57, /* list of all sc-s in a uuid txh */ LLMETA_SCHEMACHANGE_STATUS_PROTOBUF = 58, /* Indicate protobuf sc */ + LLMETA_MAX_SEQNO = 59, } llmetakey_t; struct llmeta_file_type_key { @@ -7354,6 +7355,9 @@ int bdb_llmeta_print_record(bdb_state_type *bdb_state, void *key, int keylen, if (osql_scl_print((uint8_t*)p_buf_key, p_buf_end_key, (uint8_t*)p_buf_data, p_buf_end_data)) logmsg(LOGMSG_USER, " failed to deserialize object\n"); } break; + case LLMETA_MAX_SEQNO: + logmsg(LOGMSG_USER, "LLMETA_MAX_SEQNO: %ld\n", flibc_ntohll(*(int64_t *)p_buf_data)); + break; default: logmsg(LOGMSG_USER, "Todo (type=%d)\n", type); break; @@ -11372,3 +11376,48 @@ void *buf_get_schemachange(struct schema_change_type *s, void *p_buf, void *p_bu } return NULL; } + +typedef union llmeta_seqno_key { + int file_type; + uint8_t buf[LLMETA_IXLEN]; +} llmeta_seqno_key; + +int bdb_get_seqno(tran_type *t, int64_t *seqno) +{ + llmeta_seqno_key k = {0}; + k.file_type = htonl(LLMETA_MAX_SEQNO); + int64_t **v; + int n; + int rc; + int bdberr; + + rc = kv_get(t, &k, sizeof(llmeta_seqno_key), (void ***)&v, &n, &bdberr); + if (rc == 0) + *seqno = n ? (flibc_ntohll(*v[0]) + 1) : 1; + else + logmsg(LOGMSG_WARN, "%s: kv_get rc %d bdberr %d\n", __func__, rc, bdberr); + return rc; +} + +int bdb_set_seqno(tran_type *t, int64_t seqno) +{ + int rc, bdberr; + llmeta_seqno_key k = {0}; + k.file_type = htonl(LLMETA_MAX_SEQNO); + seqno = flibc_htonll(seqno); + rc = kv_put(t, &k, &seqno, sizeof(int64_t), &bdberr); + if (rc != 0) + logmsg(LOGMSG_WARN, "%s: kv_put rc %d bdberr %d\n", __func__, rc, bdberr); + return rc; +} + +int bdb_del_seqno(tran_type *t) +{ + int rc, bdberr; + llmeta_seqno_key k = {0}; + k.file_type = htonl(LLMETA_MAX_SEQNO); + rc = kv_del(t, &k, &bdberr); + if (rc != 0) + logmsg(LOGMSG_WARN, "%s: kv_del rc %d bdberr %d\n", __func__, rc, bdberr); + return rc; +} diff --git a/db/comdb2.h b/db/comdb2.h index 6c25f76f9f..725725395a 100644 --- a/db/comdb2.h +++ b/db/comdb2.h @@ -2979,7 +2979,7 @@ long long get_unique_longlong(struct dbenv *env); void block_new_requests(struct dbenv *dbenv); void allow_new_requests(struct dbenv *dbenv); -int get_next_seqno(void *tran, long long *seqno); +int get_next_seqno(void *tran, void *sc_tran, long long *seqno); int add_oplog_entry(struct ireq *iq, void *trans, int type, void *logrec, int logsz); int local_replicant_write_clear(struct ireq *in_iq, void *in_trans, diff --git a/db/db_tunables.c b/db/db_tunables.c index fb44df35c8..97d90a30b0 100644 --- a/db/db_tunables.c +++ b/db/db_tunables.c @@ -570,6 +570,7 @@ extern int gbl_sc_status_max_rows; extern int gbl_rep_process_pstack_time; extern int gbl_sql_recover_time; extern int gbl_legacy_requests_verbose; +extern int gbl_comdb2_oplog_preserve_seqno; extern void set_snapshot_impl(snap_impl_enum impl); extern const char *snap_impl_str(snap_impl_enum impl); diff --git a/db/db_tunables.h b/db/db_tunables.h index b5c5e4b6be..6e8db56786 100644 --- a/db/db_tunables.h +++ b/db/db_tunables.h @@ -2514,4 +2514,5 @@ REGISTER_TUNABLE("genshard_verbose", TUNABLE_BOOLEAN, &gbl_gen_shard_verbose, 0, NULL, NULL, NULL, NULL); REGISTER_TUNABLE("legacy_verbose", "Log all legacy (opcode+old sql) requests (default: off)", TUNABLE_BOOLEAN, &gbl_legacy_requests_verbose, 0, NULL, NULL, NULL, NULL); +REGISTER_TUNABLE("comdb2_oplog_preserve_seqno", "Preserve max value of the seqno in llmeta", TUNABLE_BOOLEAN, &gbl_comdb2_oplog_preserve_seqno, INTERNAL, NULL, NULL, NULL, NULL); #endif /* _DB_TUNABLES_H */ diff --git a/db/localrep.c b/db/localrep.c index 0a6c5ad790..96841baee0 100644 --- a/db/localrep.c +++ b/db/localrep.c @@ -828,7 +828,7 @@ int local_replicant_write_clear(struct ireq *in_iq, void *in_trans, goto done; } - rc = get_next_seqno(trans, &seqno); + rc = get_next_seqno(trans, NULL, &seqno); if (rc) { if (rc != RC_INTERNAL_RETRY) { printf("get_next_seqno unexpected rc %d\n", rc); diff --git a/db/osqlcomm.c b/db/osqlcomm.c index d0a2baa52f..3c9c2439b4 100644 --- a/db/osqlcomm.c +++ b/db/osqlcomm.c @@ -6894,6 +6894,8 @@ int osql_set_usedb(struct ireq *iq, const char *tablename, int tableversion, int return 0; } +int gbl_comdb2_oplog_preserve_seqno = 1; + /** * Handle the finalize part of a chain of schema changes * @@ -6925,6 +6927,15 @@ int osql_finalize_scs(struct ireq *iq, tran_type *trans) iq->usedb = iq->sc->db; assert(iq->sc->nothrevent); + if (gbl_comdb2_oplog_preserve_seqno && + IS_FASTINIT(iq->sc) && + gbl_replicate_local && + strcasecmp(iq->usedb->tablename, "comdb2_oplog") == 0) { + long long seqno; + if (get_next_seqno(trans, iq->sc_tran, &seqno) == 0) + bdb_set_seqno(iq->sc_tran, seqno); + } + rc = finalize_schema_change(iq, iq->sc_tran); iq->usedb = NULL; if (rc != SC_OK) { diff --git a/db/process_message.c b/db/process_message.c index 1c3521bda2..a78786881b 100644 --- a/db/process_message.c +++ b/db/process_message.c @@ -5478,6 +5478,8 @@ int process_command(struct dbenv *dbenv, char *line, int lline, int st) } } else if (tokcmp(tok, ltok, "do_not_use_modsnap_for_snapshot") == 0) { gbl_use_modsnap_for_snapshot = 0; + } else if (tokcmp(tok, ltok, "del_llmeta_comdb2_seqno") == 0) { + bdb_del_seqno(NULL); } else { // see if any plugins know how to handle this struct message_handler *h; diff --git a/db/sqlanalyze.c b/db/sqlanalyze.c index f9663b677e..d3cb8a86be 100644 --- a/db/sqlanalyze.c +++ b/db/sqlanalyze.c @@ -568,7 +568,7 @@ static int local_replicate_write_analyze(char *table) useqno = bdb_get_timestamp(thedb->bdb_env); memcpy(&seqno, &useqno, sizeof(seqno)); } else - rc = get_next_seqno(trans, &seqno); + rc = get_next_seqno(trans, NULL, &seqno); if (rc) { if (rc != RC_INTERNAL_RETRY) { logmsg(LOGMSG_ERROR, "get_next_seqno unexpected rc %d\n", rc); diff --git a/db/toblock.c b/db/toblock.c index a82b92969b..f0d2f3d6d6 100644 --- a/db/toblock.c +++ b/db/toblock.c @@ -2725,7 +2725,7 @@ static int localrep_seqno(tran_type *trans, block_state_t *p_blkstate) memcpy(&p_blkstate->seqno, &useqno, sizeof(unsigned long long)); } else { long long seqno; - rc = get_next_seqno(trans, &seqno); + rc = get_next_seqno(trans, NULL, &seqno); if (rc == 0) p_blkstate->seqno = seqno; } @@ -4842,7 +4842,7 @@ static int toblock_main_int(struct javasp_trans_state *javasp_trans_handle, stru if (gbl_replicate_local && get_dbtable_by_name("comdb2_oplog") && !gbl_replicate_local_concurrent) { long long seqno; - rc = get_next_seqno(trans, &seqno); + rc = get_next_seqno(trans, iq->sc_tran, &seqno); if (rc) { if (rc != RC_INTERNAL_RETRY) logmsg(LOGMSG_ERROR, @@ -5017,7 +5017,7 @@ static int toblock_main_int(struct javasp_trans_state *javasp_trans_handle, stru long long seqno; struct ireq aiq; - rc = get_next_seqno(trans, &seqno); + rc = get_next_seqno(trans, NULL, &seqno); if (rc) GOTOBACKOUT; @@ -6490,7 +6490,7 @@ static int keyless_range_delete_post_delete(void *record, size_t record_len, return 0; } -int get_next_seqno(void *tran, long long *seqno) +int get_next_seqno(void *tran, void *sc_tran, long long *seqno) { /* key is a long long + int + descriptor bytes for each */ char fndkey[14]; @@ -6498,6 +6498,7 @@ int get_next_seqno(void *tran, long long *seqno) int fndlen; int outnull, outsz; struct ireq iq; + int64_t stored_seqno = 0; init_fake_ireq(thedb, &iq); @@ -6517,9 +6518,10 @@ int get_next_seqno(void *tran, long long *seqno) &fndlen); if (rc) return rc; - if (fndlen == 0) - *seqno = 1; - else { + if (fndlen == 0) { + bdb_get_seqno(sc_tran ? sc_tran : tran, &stored_seqno); + *seqno = stored_seqno; + } else { long long next_seqno = 0; /* convert to client format value so we can use it. Call the Routine @@ -6540,4 +6542,3 @@ int get_next_seqno(void *tran, long long *seqno) } return 0; } - diff --git a/tests/comdb2_oplog_preserve_seqno.test/Makefile b/tests/comdb2_oplog_preserve_seqno.test/Makefile new file mode 100644 index 0000000000..b6cafefe6f --- /dev/null +++ b/tests/comdb2_oplog_preserve_seqno.test/Makefile @@ -0,0 +1,5 @@ +ifeq ($(TESTSROOTDIR),) + include ../testcase.mk +else + include $(TESTSROOTDIR)/testcase.mk +endif diff --git a/tests/comdb2_oplog_preserve_seqno.test/comdb2_oplog.csc2 b/tests/comdb2_oplog_preserve_seqno.test/comdb2_oplog.csc2 new file mode 100644 index 0000000000..545242ad56 --- /dev/null +++ b/tests/comdb2_oplog_preserve_seqno.test/comdb2_oplog.csc2 @@ -0,0 +1,23 @@ +tag ondisk { +longlong seqno +int blkpos +int optype +blob ops null=yes +} + +tag "log" { +longlong seqno +int optype +int blkpos +blob ops +} + +tag "justseq" { +longlong seqno +int blkpos +int optype +} + +keys { +"seqno" = seqno + blkpos +} diff --git a/tests/comdb2_oplog_preserve_seqno.test/expected b/tests/comdb2_oplog_preserve_seqno.test/expected new file mode 100644 index 0000000000..38db395170 --- /dev/null +++ b/tests/comdb2_oplog_preserve_seqno.test/expected @@ -0,0 +1,19 @@ +(rows inserted=1) +(rows deleted=1) +(seqno=1, blkpos=0, optype=1) +(seqno=1, blkpos=1, optype=3) +(seqno=2, blkpos=0, optype=2) +(seqno=2, blkpos=1, optype=3) +(rows inserted=1) +(rows deleted=1) +(seqno=4, blkpos=0, optype=1) +(seqno=4, blkpos=1, optype=3) +(seqno=5, blkpos=0, optype=2) +(seqno=5, blkpos=1, optype=3) +LLMETA_MAX_SEQNO: 3 +(rows inserted=1) +(rows deleted=1) +(seqno=1, blkpos=0, optype=1) +(seqno=1, blkpos=1, optype=3) +(seqno=2, blkpos=0, optype=2) +(seqno=2, blkpos=1, optype=3) diff --git a/tests/comdb2_oplog_preserve_seqno.test/lrl.options b/tests/comdb2_oplog_preserve_seqno.test/lrl.options new file mode 100644 index 0000000000..a353ed1431 --- /dev/null +++ b/tests/comdb2_oplog_preserve_seqno.test/lrl.options @@ -0,0 +1,3 @@ +replicate_local +comdb2_oplog_preserve_seqno 1 +table comdb2_oplog comdb2_oplog.csc2 diff --git a/tests/comdb2_oplog_preserve_seqno.test/runit b/tests/comdb2_oplog_preserve_seqno.test/runit new file mode 100755 index 0000000000..cfe552d7a3 --- /dev/null +++ b/tests/comdb2_oplog_preserve_seqno.test/runit @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +bash -n "$0" | exit 1 + +# Verifies that the value of seqno is preserved across fastinits on comdb2_oplog + +dbnm=$1 +master=`cdb2sql --tabs ${CDB2_OPTIONS} $dbnm default "SELECT host FROM comdb2_cluster WHERE is_master='Y'"` +cdb2sql $dbnm --host $master "EXEC PROCEDURE sys.cmd.send('comdb2_oplog_preserve_seqno 1')" >/dev/null + +cdb2sql -s ${CDB2_OPTIONS} $dbnm default - >actual <>actual + +cdb2sql $dbnm --host $master "EXEC PROCEDURE sys.cmd.send('del_llmeta_comdb2_seqno')" >/dev/null +cdb2sql $dbnm --host $master "EXEC PROCEDURE sys.cmd.send('comdb2_oplog_preserve_seqno 0')" >/dev/null +cdb2sql $dbnm --host $master "EXEC PROCEDURE sys.cmd.send('del_llmeta_comdb2_seqno')" >/dev/null +cdb2sql -s ${CDB2_OPTIONS} $dbnm default - >>actual <