Skip to content

Kill leftovers of pre-MVCC read conflicts #320

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 31, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 2 additions & 16 deletions src/ZODB/Connection.py
Original file line number Diff line number Diff line change
@@ -148,10 +148,8 @@ def before_instance(before):
self._pre_cache = {}

# List of all objects (not oids) registered as modified by the
# persistence machinery, or by add(), or whose access caused a
# ReadConflictError (just to be able to clean them up from the
# cache on abort with the other modified objects). All objects
# of this list are either in _cache or in _added.
# persistence machinery.
# All objects of this list are either in _cache or in _added.
self._registered_objects = [] # [object]

# ids and serials of objects for which readCurrent was called
@@ -182,15 +180,6 @@ def before_instance(before):
# in the cache on abort and in other connections on finish.
self._modified = [] # [oid]

# We intend to prevent committing a transaction in which
# ReadConflictError occurs. _conflicts is the set of oids that
# experienced ReadConflictError. Any time we raise ReadConflictError,
# the oid should be added to this set, and we should be sure that the
# object is registered. Because it's registered, Connection.commit()
# will raise ReadConflictError again (because the oid is in
# _conflicts).
self._conflicts = {}

# To support importFile(), implemented in the ExportImport base
# class, we need to run _importDuringCommit() from our commit()
# method. If _import is not None, it is a two-tuple of arguments
@@ -460,7 +449,6 @@ def _abort(self):

def _tpc_cleanup(self):
"""Performs cleanup operations to support tpc_finish and tpc_abort."""
self._conflicts.clear()
self._needs_to_join = True
self._registered_objects = []
self._creating.clear()
@@ -528,8 +516,6 @@ def _commit(self, transaction):
for obj in self._registered_objects:
oid = obj._p_oid
assert oid
if oid in self._conflicts:
raise ReadConflictError(object=obj)

if obj._p_jar is not self:
raise InvalidObjectReference(obj, obj._p_jar)
13 changes: 10 additions & 3 deletions src/ZODB/POSException.py
Original file line number Diff line number Diff line change
@@ -144,10 +144,17 @@ def get_serials(self):
return self.serials

class ReadConflictError(ConflictError):
"""Conflict detected when object was loaded.
"""Conflict detected when object was requested to stay unchanged.

An attempt was made to read an object that has changed in another
transaction (eg. another thread or process).
An object was requested to stay not modified via
checkCurrentSerialInTransaction, and at commit time was found to be
changed by another transaction (eg. another thread or process).

Note: for backward compatibility ReadConflictError is also raised on
plain object access if

- object is found to be removed, and
- there is possibility that database pack was running simultaneously.
"""
def __init__(self, message=None, object=None, serials=None, **kw):
if message is None:
6 changes: 0 additions & 6 deletions src/ZODB/interfaces.py
Original file line number Diff line number Diff line change
@@ -78,12 +78,6 @@ class IConnection(Interface):
Two options affect consistency. By default, the mvcc and synch
options are enabled by default.

If you pass mvcc=False to db.open(), the Connection will never read
non-current revisions of an object. Instead it will raise a
ReadConflictError to indicate that the current revision is
unavailable because it was written after the current transaction
began.

The logic for handling modifications assumes that the thread that
opened a Connection (called db.open()) is the thread that will use
the Connection. If this is not true, you should pass synch=False
29 changes: 27 additions & 2 deletions src/ZODB/mvccadapter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
"""Adapt IStorage objects to IMVCCStorage

This is a largely internal implementation of ZODB, especially DB and
@@ -9,7 +10,7 @@
import zope.interface

from . import interfaces, serialize, POSException
from .utils import p64, u64, Lock
from .utils import p64, u64, Lock, oid_repr, tid_repr

class Base(object):

@@ -152,7 +153,31 @@ def load(self, oid):
assert self._start is not None
r = self._storage.loadBefore(oid, self._start)
if r is None:
raise POSException.ReadConflictError(repr(oid))
# object was deleted or not-yet-created.
# raise ReadConflictError - not - POSKeyError due to backward
# compatibility: a pack(t+δ) could be running simultaneously to our
# transaction that observes database as of t state. Such pack,
# because it packs the storage from a "future-to-us" point of view,
# can remove object revisions that we can try to load, for example:
#
# txn1 <-- t
# obj.revA
#
# txn2 <-- t+δ
# obj.revB
#
# for such case we want user transaction to be restarted - not
# failed - by raising ConflictError subclass.
#
# XXX we don't detect for pack to be actually running - just assume
# the worst. It would be good if storage could provide information
# whether pack is/was actually running and its details, take that
# into account, and raise ReadConflictError only in the presence of
# database being simultaneously updated from back of its log.
raise POSException.ReadConflictError(
"load %s @%s: object deleted, likely by simultaneous pack" %
(oid_repr(oid), tid_repr(p64(u64(self._start) - 1))))

return r[:2]

def prefetch(self, oids):
151 changes: 0 additions & 151 deletions src/ZODB/tests/testZODB.py
Original file line number Diff line number Diff line change
@@ -13,7 +13,6 @@
##############################################################################
from persistent import Persistent
from persistent.mapping import PersistentMapping
from ZODB.POSException import ReadConflictError
from ZODB.POSException import TransactionFailedError

import doctest
@@ -445,156 +444,6 @@ def checkMultipleUndoInOneTransaction(self):
transaction.abort()
conn.close()

class ReadConflictTests(ZODB.tests.util.TestCase):

def setUp(self):
ZODB.tests.utils.TestCase.setUp(self)
self._storage = ZODB.MappingStorage.MappingStorage()

def readConflict(self, shouldFail=True):
# Two transactions run concurrently. Each reads some object,
# then one commits and the other tries to read an object
# modified by the first. This read should fail with a conflict
# error because the object state read is not necessarily
# consistent with the objects read earlier in the transaction.

tm1 = transaction.TransactionManager()
conn = self._db.open(transaction_manager=tm1)
r1 = conn.root()
r1["p"] = self.obj
self.obj.child1 = P()
tm1.get().commit()

# start a new transaction with a new connection
tm2 = transaction.TransactionManager()
cn2 = self._db.open(transaction_manager=tm2)
# start a new transaction with the other connection
r2 = cn2.root()

self.assertEqual(r1._p_serial, r2._p_serial)

self.obj.child2 = P()
tm1.get().commit()

# resume the transaction using cn2
obj = r2["p"]
# An attempt to access obj should fail, because r2 was read
# earlier in the transaction and obj was modified by the othe
# transaction.
if shouldFail:
self.assertRaises(ReadConflictError, lambda: obj.child1)
# And since ReadConflictError was raised, attempting to commit
# the transaction should re-raise it. checkNotIndependent()
# failed this part of the test for a long time.
self.assertRaises(ReadConflictError, tm2.get().commit)

# And since that commit failed, trying to commit again should
# fail again.
self.assertRaises(TransactionFailedError, tm2.get().commit)
# And again.
self.assertRaises(TransactionFailedError, tm2.get().commit)
# Etc.
self.assertRaises(TransactionFailedError, tm2.get().commit)

else:
# make sure that accessing the object succeeds
obj.child1
tm2.get().abort()


def checkReadConflict(self):
self.obj = P()
self.readConflict()

def checkReadConflictIgnored(self):
# Test that an application that catches a read conflict and
# continues can not commit the transaction later.
root = self._db.open().root()
root["real_data"] = real_data = PersistentMapping()
root["index"] = index = PersistentMapping()

real_data["a"] = PersistentMapping({"indexed_value": 0})
real_data["b"] = PersistentMapping({"indexed_value": 1})
index[1] = PersistentMapping({"b": 1})
index[0] = PersistentMapping({"a": 1})
transaction.commit()

# load some objects from one connection
tm = transaction.TransactionManager()
cn2 = self._db.open(transaction_manager=tm)
r2 = cn2.root()
real_data2 = r2["real_data"]
index2 = r2["index"]

real_data["b"]["indexed_value"] = 0
del index[1]["b"]
index[0]["b"] = 1
transaction.commit()

del real_data2["a"]
try:
del index2[0]["a"]
except ReadConflictError:
# This is the crux of the text. Ignore the error.
pass
else:
self.fail("No conflict occurred")

# real_data2 still ready to commit
self.assertTrue(real_data2._p_changed)

# index2 values not ready to commit
self.assertTrue(not index2._p_changed)
self.assertTrue(not index2[0]._p_changed)
self.assertTrue(not index2[1]._p_changed)

self.assertRaises(ReadConflictError, tm.get().commit)
self.assertRaises(TransactionFailedError, tm.get().commit)
tm.get().abort()

def checkReadConflictErrorClearedDuringAbort(self):
# When a transaction is aborted, the "memory" of which
# objects were the cause of a ReadConflictError during
# that transaction should be cleared.
root = self._db.open().root()
data = PersistentMapping({'d': 1})
root["data"] = data
transaction.commit()

# Provoke a ReadConflictError.
tm2 = transaction.TransactionManager()
cn2 = self._db.open(transaction_manager=tm2)
r2 = cn2.root()
data2 = r2["data"]

data['d'] = 2
transaction.commit()

try:
data2['d'] = 3
except ReadConflictError:
pass
else:
self.fail("No conflict occurred")

# Explicitly abort cn2's transaction.
tm2.get().abort()

# cn2 should retain no memory of the read conflict after an abort(),
# but 3.2.3 had a bug wherein it did.
data_conflicts = data._p_jar._conflicts
data2_conflicts = data2._p_jar._conflicts
self.assertFalse(data_conflicts)
self.assertFalse(data2_conflicts) # this used to fail

# And because of that, we still couldn't commit a change to data2['d']
# in the new transaction.
cn2.sync() # process the invalidation for data2['d']
data2['d'] = 3
tm2.get().commit() # 3.2.3 used to raise ReadConflictError

cn2.close()

class PoisonedError(Exception):
pass

5 changes: 2 additions & 3 deletions src/ZODB/tests/testmvcc.py
Original file line number Diff line number Diff line change
@@ -25,9 +25,8 @@
always see a consistent view of the database while it is executing.
If transaction A is running, has already read an object O1, and a
different transaction B modifies object O2, then transaction A can no
longer read the current revision of O2. It must either read the
version of O2 that is consistent with O1 or raise a ReadConflictError.
When MVCC is in use, A will do the former.
longer read the current revision of O2. It must read the
version of O2 that is consistent with O1.

This note includes doctests that explain how MVCC is implemented (and
test that the implementation is correct). The tests use a
3 changes: 3 additions & 0 deletions src/ZODB/transact.py
Original file line number Diff line number Diff line change
@@ -43,6 +43,9 @@ def g(*args, **kwargs):
try:
r = f(*args, **kwargs)
except ReadConflictError as msg:
# the only way ReadConflictError can happen here is due to
# simultaneous pack removing objects revision that f could try
# to load.
transaction.abort()
if not n:
raise