Skip to content

Allow nested rtxn from wtxn #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 16 commits into
base: mdb.master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 45 additions & 12 deletions libraries/liblmdb/mdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ static NtCloseFunc *NtClose;
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <stdatomic.h>

#ifdef _MSC_VER
#include <io.h>
Expand Down Expand Up @@ -167,7 +168,11 @@ typedef SSIZE_T ssize_t;
# if !(defined(MDB_USE_POSIX_MUTEX) || defined(MDB_USE_POSIX_SEM))
# define MDB_USE_SYSV_SEM 1
# endif
# if defined(__APPLE__)
# define MDB_FDATASYNC(fd) fcntl(fd, F_FULLFSYNC)
# else
# define MDB_FDATASYNC fsync
# endif
#elif defined(__ANDROID__)
# define MDB_FDATASYNC fsync
#endif
Expand Down Expand Up @@ -1298,6 +1303,8 @@ struct MDB_txn {
MDB_txn *mt_parent; /**< parent of a nested txn */
/** Nested txn under this txn, set together with flag #MDB_TXN_HAS_CHILD */
MDB_txn *mt_child;
/** The count of nested RDONLY txns under this txn also named child txns */
atomic_uint mt_rdonly_child_count;
pgno_t mt_next_pgno; /**< next unallocated page */
#ifdef MDB_VL32
pgno_t mt_last_pgno; /**< last written page */
Expand Down Expand Up @@ -1379,11 +1386,11 @@ struct MDB_txn {
#define MDB_TXN_RDONLY MDB_RDONLY /**< read-only transaction */
/* internal txn flags */
#define MDB_TXN_WRITEMAP MDB_WRITEMAP /**< copy of #MDB_env flag in writers */
#define MDB_TXN_FINISHED 0x01 /**< txn is finished or never began */
#define MDB_TXN_ERROR 0x02 /**< txn is unusable after an error */
#define MDB_TXN_DIRTY 0x04 /**< must write, even if dirty list is empty */
#define MDB_TXN_SPILLS 0x08 /**< txn or a parent has spilled pages */
#define MDB_TXN_HAS_CHILD 0x10 /**< txn has an #MDB_txn.%mt_child */
#define MDB_TXN_FINISHED 0x01 /**< txn is finished or never began */
#define MDB_TXN_ERROR 0x02 /**< txn is unusable after an error */
#define MDB_TXN_DIRTY 0x04 /**< must write, even if dirty list is empty */
#define MDB_TXN_SPILLS 0x08 /**< txn or a parent has spilled pages */
#define MDB_TXN_HAS_CHILD 0x10 /**< txn has an #MDB_txn.%mt_child */
/** most operations on the txn are currently illegal */
#define MDB_TXN_BLOCKED (MDB_TXN_FINISHED|MDB_TXN_ERROR|MDB_TXN_HAS_CHILD)
/** @} */
Expand Down Expand Up @@ -2883,7 +2890,7 @@ mdb_env_sync0(MDB_env *env, int force, pgno_t numpgs)
? MS_ASYNC : MS_SYNC;
if (MDB_MSYNC(env->me_map, env->me_psize * numpgs, flags))
rc = ErrCode();
#ifdef _WIN32
#if defined(_WIN32) || defined(__APPLE__)
else if (flags == MS_SYNC && MDB_FDATASYNC(env->me_fd))
rc = ErrCode();
#endif
Expand Down Expand Up @@ -3140,6 +3147,7 @@ mdb_txn_renew0(MDB_txn *txn)
mdb_debug = MDB_DBG_INFO;
#endif
txn->mt_child = NULL;
txn->mt_rdonly_child_count = 0;
txn->mt_loose_pgs = NULL;
txn->mt_loose_count = 0;
txn->mt_dirty_room = MDB_IDL_UM_MAX;
Expand Down Expand Up @@ -3208,6 +3216,7 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret)
MDB_txn *txn;
MDB_ntxn *ntxn;
int rc, size, tsize;
int is_nested_rdonly = 0;

flags &= MDB_TXN_BEGIN_FLAGS;
flags |= env->me_flags & MDB_WRITEMAP;
Expand All @@ -3216,8 +3225,13 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret)
return EACCES;

if (parent) {
// We remove the RDONLY flag but still keep track of it
// TODO Remove this and make that better integrated
is_nested_rdonly = flags | MDB_RDONLY;
flags &= ~MDB_RDONLY;
/* Nested transactions: Max 1 child, write txns only, no writemap */
flags |= parent->mt_flags;
// TODO disallow when mt_rdonly_child_count > 0
if (flags & (MDB_RDONLY|MDB_WRITEMAP|MDB_TXN_BLOCKED)) {
return (parent->mt_flags & MDB_TXN_RDONLY) ? EINVAL : MDB_BAD_TXN;
}
Expand Down Expand Up @@ -3272,8 +3286,14 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret)
txn->mt_u.dirty_list[0].mid = 0;
txn->mt_spill_pgs = NULL;
txn->mt_next_pgno = parent->mt_next_pgno;
parent->mt_flags |= MDB_TXN_HAS_CHILD;
parent->mt_child = txn;
if (is_nested_rdonly != 0) {
parent->mt_child = NULL;
atomic_fetch_add(&parent->mt_rdonly_child_count, 1);
} else {
parent->mt_flags |= MDB_TXN_HAS_CHILD;
parent->mt_child = txn;
parent->mt_rdonly_child_count = 0;
}
txn->mt_parent = parent;
txn->mt_numdbs = parent->mt_numdbs;
#ifdef MDB_VL32
Expand Down Expand Up @@ -3374,6 +3394,7 @@ static void
mdb_txn_end(MDB_txn *txn, unsigned mode)
{
MDB_env *env = txn->mt_env;
int iamtheone = 0;
#if MDB_DEBUG
static const char *const names[] = MDB_END_NAMES;
#endif
Expand Down Expand Up @@ -3425,15 +3446,21 @@ mdb_txn_end(MDB_txn *txn, unsigned mode)
if (env->me_txns)
UNLOCK_MUTEX(env->me_wmutex);
} else {
txn->mt_parent->mt_child = NULL;
txn->mt_parent->mt_flags &= ~MDB_TXN_HAS_CHILD;
env->me_pgstate = ((MDB_ntxn *)txn)->mnt_pgstate;
if (F_ISSET(txn->mt_parent->mt_flags, MDB_TXN_HAS_CHILD) ||
(iamtheone = atomic_fetch_sub(&txn->mt_parent->mt_rdonly_child_count, 1) == 1))
{
txn->mt_parent->mt_child = NULL;
txn->mt_parent->mt_flags &= ~MDB_TXN_HAS_CHILD;
env->me_pgstate = ((MDB_ntxn *)txn)->mnt_pgstate;
}
mdb_midl_free(txn->mt_free_pgs);
free(txn->mt_u.dirty_list);
}
mdb_midl_free(txn->mt_spill_pgs);

mdb_midl_free(pghead);
if (!txn->mt_parent || (txn->mt_parent && iamtheone)) {
mdb_midl_free(pghead);
}
}
#ifdef MDB_VL32
if (!txn->mt_parent) {
Expand Down Expand Up @@ -3483,6 +3510,9 @@ _mdb_txn_abort(MDB_txn *txn)
if (txn == NULL)
return;

// You must first abort the child before the parent
mdb_tassert(txn, atomic_load(&txn->mt_rdonly_child_count) == 0);

if (txn->mt_child)
_mdb_txn_abort(txn->mt_child);

Expand Down Expand Up @@ -5079,6 +5109,9 @@ mdb_env_open2(MDB_env *env, int prev)
#endif
env->me_maxpg = env->me_mapsize / env->me_psize;

if (prev && env->me_txns)
env->me_txns->mti_txnid = meta.mm_txnid;

#if MDB_DEBUG
{
MDB_meta *meta = mdb_env_pick_meta(env);
Expand Down