Skip to content

Commit 372de38

Browse files
committed
minor changes to the codebase from uqfoundation#475
1 parent 25a7e45 commit 372de38

File tree

5 files changed

+128
-114
lines changed

5 files changed

+128
-114
lines changed

dill/__init__.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@
1111
from .__info__ import __version__, __author__, __doc__, __license__
1212
except: # pragma: no cover
1313
import os
14-
import sys
14+
import sys
1515
parent = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
1616
sys.path.append(parent)
17-
# get distribution meta info
17+
# get distribution meta info
1818
from version import (__version__, __author__,
1919
get_license_text, get_readme_as_rst)
2020
__license__ = get_license_text(os.path.join(parent, 'LICENSE'))
@@ -24,9 +24,9 @@
2424

2525

2626
from ._dill import (
27-
Pickler, Unpickler,
28-
check, copy, dump, dumps, load, loads, pickle, pickles, register,
29-
DEFAULT_PROTOCOL, HIGHEST_PROTOCOL, CONTENTS_FMODE, FILE_FMODE, HANDLE_FMODE,
27+
dump, dumps, load, loads, copy,
28+
Pickler, Unpickler, register, pickle, pickles, check,
29+
DEFAULT_PROTOCOL, HIGHEST_PROTOCOL, HANDLE_FMODE, CONTENTS_FMODE, FILE_FMODE,
3030
PickleError, PickleWarning, PicklingError, PicklingWarning, UnpicklingError,
3131
UnpicklingWarning,
3232
)
@@ -42,8 +42,6 @@
4242
# make sure "trace" is turned off
4343
logger.trace(False)
4444

45-
from importlib import reload
46-
4745
objects = {}
4846
# local import of dill._objects
4947
#from . import _objects
@@ -68,6 +66,7 @@ def load_types(pickleable=True, unpickleable=True):
6866
Returns:
6967
None
7068
"""
69+
from importlib import reload
7170
# local import of dill.objects
7271
from . import _objects
7372
if pickleable:

dill/_dill.py

Lines changed: 77 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,27 @@
88
"""
99
dill: a utility for serialization of python objects
1010
11+
The main API of the package are the functions :func:`dump` and
12+
:func:`dumps` for serialization ("pickling"), and :func:`load`
13+
and :func:`loads` for deserialization ("unpickling"). The
14+
functions :func:`~dill.session.dump_module` and
15+
:func:`~dill.session.load_module` can be used to save and restore
16+
the intepreter session.
17+
1118
Based on code written by Oren Tirosh and Armin Ronacher.
1219
Extended to a (near) full set of the builtin types (in types module),
1320
and coded to the pickle interface, by <mmckerns@caltech.edu>.
1421
Initial port to python3 by Jonathan Dobson, continued by mmckerns.
1522
Test against "all" python types (Std. Lib. CH 1-15 @ 2.7) by mmckerns.
1623
Test against CH16+ Std. Lib. ... TBD.
1724
"""
25+
26+
from __future__ import annotations
27+
1828
__all__ = [
19-
'Pickler','Unpickler',
20-
'check','copy','dump','dumps','load','loads','pickle','pickles','register',
21-
'DEFAULT_PROTOCOL','HIGHEST_PROTOCOL','CONTENTS_FMODE','FILE_FMODE','HANDLE_FMODE',
29+
'dump','dumps','load','loads','copy',
30+
'Pickler','Unpickler','register','pickle','pickles','check',
31+
'DEFAULT_PROTOCOL','HIGHEST_PROTOCOL','HANDLE_FMODE','CONTENTS_FMODE','FILE_FMODE',
2232
'PickleError','PickleWarning','PicklingError','PicklingWarning','UnpicklingError',
2333
'UnpicklingWarning',
2434
]
@@ -39,6 +49,7 @@
3949
#XXX: get types from .objtypes ?
4050
import builtins as __builtin__
4151
from pickle import _Pickler as StockPickler, Unpickler as StockUnpickler
52+
from pickle import GLOBAL, POP
4253
from _thread import LockType
4354
from _thread import RLock as RLockType
4455
#from io import IOBase
@@ -58,6 +69,7 @@
5869
import marshal
5970
import gc
6071
# import zlib
72+
import dataclasses
6173
from weakref import ReferenceType, ProxyType, CallableProxyType
6274
from collections import OrderedDict
6375
from functools import partial
@@ -158,22 +170,19 @@ def get_file_type(*args, **kwargs):
158170
from socket import socket as SocketType
159171
#FIXME: additionally calls ForkingPickler.register several times
160172
from multiprocessing.reduction import _reduce_socket as reduce_socket
161-
try:
173+
try: #pragma: no cover
162174
IS_IPYTHON = __IPYTHON__ # is True
163-
ExitType = None # IPython.core.autocall.ExitAutocall
164-
singletontypes = ['exit', 'quit', 'get_ipython']
175+
ExitType = None # IPython.core.autocall.ExitAutocall
176+
IPYTHON_SINGLETONS = ('exit', 'quit', 'get_ipython')
165177
except NameError:
166178
IS_IPYTHON = False
167179
try: ExitType = type(exit) # apparently 'exit' can be removed
168180
except NameError: ExitType = None
169-
singletontypes = []
181+
IPYTHON_SINGLETONS = ()
170182

171183
import inspect
172-
import dataclasses
173184
import typing
174185

175-
from pickle import GLOBAL
176-
177186

178187
### Shims for different versions of Python and dill
179188
class Sentinel(object):
@@ -212,6 +221,9 @@ def __reduce_ex__(self, protocol):
212221
#: Pickles the entire file (handle and contents), preserving mode and position.
213222
FILE_FMODE = 2
214223

224+
# Exceptions commonly raised by unpickleable objects in the Standard Library.
225+
UNPICKLEABLE_ERRORS = (PicklingError, TypeError, ValueError, NotImplementedError)
226+
215227
### Shorthands (modified from python2.5/lib/pickle.py)
216228
def copy(obj, *args, **kwds):
217229
"""
@@ -320,9 +332,20 @@ class UnpicklingWarning(PickleWarning, UnpicklingError):
320332
### Extend the Picklers
321333
class Pickler(StockPickler):
322334
"""python's Pickler extended to interpreter sessions"""
323-
dispatch = MetaCatchingDict(StockPickler.dispatch.copy())
324-
_session = False
335+
dispatch: typing.Dict[type, typing.Callable[[Pickler, typing.Any], None]] \
336+
= MetaCatchingDict(StockPickler.dispatch.copy())
337+
"""The dispatch table, a dictionary of serializing functions used
338+
by Pickler to save objects of specific types. Use :func:`pickle`
339+
or :func:`register` to associate types to custom functions.
340+
341+
:meta hide-value:
342+
"""
325343
from .settings import settings
344+
# Flags set by dump_module() is dill.session:
345+
_refimported = False
346+
_refonfail = False
347+
_session = False
348+
_first_pass = False
326349

327350
def __init__(self, file, *args, **kwds):
328351
settings = Pickler.settings
@@ -341,11 +364,12 @@ def __init__(self, file, *args, **kwds):
341364
self._file = file
342365

343366
def save(self, obj, save_persistent_id=True):
344-
# register if the object is a numpy ufunc
345-
# thanks to Paul Kienzle for pointing out ufuncs didn't pickle
367+
# numpy hack
346368
obj_type = type(obj)
347369
if NumpyArrayType and not (obj_type is type or obj_type in Pickler.dispatch):
348-
if NumpyUfuncType and numpyufunc(obj_type):
370+
# register if the object is a numpy ufunc
371+
# thanks to Paul Kienzle for pointing out ufuncs didn't pickle
372+
if numpyufunc(obj_type):
349373
@register(obj_type)
350374
def save_numpy_ufunc(pickler, obj):
351375
logger.trace(pickler, "Nu: %s", obj)
@@ -359,7 +383,7 @@ def save_numpy_ufunc(pickler, obj):
359383
# def uload(name): return getattr(numpy, name)
360384
# copy_reg.pickle(NumpyUfuncType, udump, uload)
361385
# register if the object is a numpy dtype
362-
if NumpyDType and numpydtype(obj_type):
386+
if numpydtype(obj_type):
363387
@register(obj_type)
364388
def save_numpy_dtype(pickler, obj):
365389
logger.trace(pickler, "Dt: %s", obj)
@@ -372,7 +396,7 @@ def save_numpy_dtype(pickler, obj):
372396
# def udump(f): return uload, (f.type,)
373397
# copy_reg.pickle(NumpyDTypeType, udump, uload)
374398
# register if the object is a subclassed numpy array instance
375-
if NumpyArrayType and ndarraysubclassinstance(obj_type):
399+
if ndarraysubclassinstance(obj_type):
376400
@register(obj_type)
377401
def save_numpy_array(pickler, obj):
378402
logger.trace(pickler, "Nu: (%s, %s)", obj.shape, obj.dtype)
@@ -381,8 +405,9 @@ def save_numpy_array(pickler, obj):
381405
pickler.save_reduce(_create_array, (f,args,state,npdict), obj=obj)
382406
logger.trace(pickler, "# Nu")
383407
return
384-
# end hack
385-
if GENERATOR_FAIL and type(obj) == GeneratorType:
408+
# end numpy hack
409+
410+
if GENERATOR_FAIL and obj_type is GeneratorType:
386411
msg = "Can't pickle %s: attribute lookup builtins.generator failed" % GeneratorType
387412
raise PicklingError(msg)
388413
StockPickler.save(self, obj, save_persistent_id)
@@ -392,7 +417,6 @@ def save_numpy_array(pickler, obj):
392417
def dump(self, obj): #NOTE: if settings change, need to update attributes
393418
logger.trace_setup(self)
394419
StockPickler.dump(self, obj)
395-
396420
dump.__doc__ = StockPickler.dump.__doc__
397421

398422
class Unpickler(StockUnpickler):
@@ -436,12 +460,12 @@ def dispatch_table():
436460
pickle_dispatch_copy = StockPickler.dispatch.copy()
437461

438462
def pickle(t, func):
439-
"""expose dispatch table for user-created extensions"""
463+
"""expose :attr:`~Pickler.dispatch` table for user-created extensions"""
440464
Pickler.dispatch[t] = func
441465
return
442466

443467
def register(t):
444-
"""register type to Pickler's dispatch table """
468+
"""decorator to register types to Pickler's :attr:`~Pickler.dispatch` table"""
445469
def proxy(func):
446470
Pickler.dispatch[t] = func
447471
return func
@@ -460,7 +484,7 @@ def use_diff(on=True):
460484
Reduces size of pickles by only including object which have changed.
461485
462486
Decreases pickle size but increases CPU time needed.
463-
Also helps avoid some unpicklable objects.
487+
Also helps avoid some unpickleable objects.
464488
MUST be called at start of script, otherwise changes will not be recorded.
465489
"""
466490
global _use_diff, diff
@@ -1088,7 +1112,7 @@ def _save_with_postproc(pickler, reduction, is_pickler_dill=None, obj=Getattr.NO
10881112
else:
10891113
pickler.save_reduce(*reduction)
10901114
# pop None created by calling preprocessing step off stack
1091-
pickler.write(bytes('0', 'UTF-8'))
1115+
pickler.write(POP)
10921116

10931117
#@register(CodeType)
10941118
#def save_code(pickler, obj):
@@ -1158,7 +1182,7 @@ def save_code(pickler, obj):
11581182
return
11591183

11601184
def _repr_dict(obj):
1161-
"""make a short string representation of a dictionary"""
1185+
"""Make a short string representation of a dictionary."""
11621186
return "<%s object at %#012x>" % (type(obj).__name__, id(obj))
11631187

11641188
@register(dict)
@@ -1470,7 +1494,7 @@ def save_cell(pickler, obj):
14701494
# The result of this function call will be None
14711495
pickler.save_reduce(_shims._delattr, (obj, 'cell_contents'))
14721496
# pop None created by calling _delattr off stack
1473-
pickler.write(bytes('0', 'UTF-8'))
1497+
pickler.write(POP)
14741498
logger.trace(pickler, "# Ce3")
14751499
return
14761500
if is_dill(pickler, child=True):
@@ -1606,7 +1630,7 @@ def save_module(pickler, obj):
16061630
pass
16071631
else:
16081632
logger.trace(pickler, "M2: %s with diff", obj)
1609-
logger.trace(pickler, "Diff: %s", changed.keys())
1633+
logger.info("Diff: %s", changed.keys())
16101634
pickler.save_reduce(_import_module, (obj.__name__,), obj=obj,
16111635
state=changed)
16121636
logger.trace(pickler, "# M2")
@@ -1617,15 +1641,20 @@ def save_module(pickler, obj):
16171641
logger.trace(pickler, "# M1")
16181642
else:
16191643
builtin_mod = _is_builtin_module(obj)
1620-
if obj.__name__ not in ("builtins", "dill", "dill._dill") and not builtin_mod or \
1621-
is_dill(pickler, child=True) and obj is pickler._main:
1644+
is_session_main = is_dill(pickler, child=True) and obj is pickler._main
1645+
if (obj.__name__ not in ("builtins", "dill", "dill._dill") and not builtin_mod
1646+
or is_session_main):
16221647
logger.trace(pickler, "M1: %s", obj)
1623-
_main_dict = obj.__dict__.copy() #XXX: better no copy? option to copy?
1624-
[_main_dict.pop(item, None) for item in singletontypes
1625-
+ ["__builtins__", "__loader__"]]
1648+
# Hack for handling module-type objects in load_module().
16261649
mod_name = obj.__name__ if _is_imported_module(obj) else '__runtime__.%s' % obj.__name__
1627-
pickler.save_reduce(_import_module, (mod_name,), obj=obj,
1628-
state=_main_dict)
1650+
# Second references are saved as __builtin__.__main__ in save_module_dict().
1651+
main_dict = obj.__dict__.copy()
1652+
for item in ('__builtins__', '__loader__'):
1653+
main_dict.pop(item, None)
1654+
for item in IPYTHON_SINGLETONS: #pragma: no cover
1655+
if getattr(main_dict.get(item), '__module__', '').startswith('IPython'):
1656+
del main_dict[item]
1657+
pickler.save_reduce(_import_module, (mod_name,), obj=obj, state=main_dict)
16291658
logger.trace(pickler, "# M1")
16301659
elif obj.__name__ == "dill._dill":
16311660
logger.trace(pickler, "M2: %s", obj)
@@ -1635,7 +1664,6 @@ def save_module(pickler, obj):
16351664
logger.trace(pickler, "M2: %s", obj)
16361665
pickler.save_reduce(_import_module, (obj.__name__,), obj=obj)
16371666
logger.trace(pickler, "# M2")
1638-
return
16391667
return
16401668

16411669
@register(TypeType)
@@ -1661,7 +1689,7 @@ def save_type(pickler, obj, postproc_list=None):
16611689
elif obj is type(None):
16621690
logger.trace(pickler, "T7: %s", obj)
16631691
#XXX: pickler.save_reduce(type, (None,), obj=obj)
1664-
pickler.write(bytes('c__builtin__\nNoneType\n', 'UTF-8'))
1692+
pickler.write(GLOBAL + b'__builtin__\nNoneType\n')
16651693
logger.trace(pickler, "# T7")
16661694
elif obj is NotImplementedType:
16671695
logger.trace(pickler, "T7: %s", obj)
@@ -1702,9 +1730,18 @@ def save_type(pickler, obj, postproc_list=None):
17021730
else:
17031731
logger.trace(pickler, "T4: %s", obj)
17041732
if incorrectly_named:
1705-
warnings.warn('Cannot locate reference to %r.' % (obj,), PicklingWarning)
1733+
warnings.warn(
1734+
"Cannot locate reference to %r." % (obj,),
1735+
PicklingWarning,
1736+
stacklevel=3,
1737+
)
17061738
if obj_recursive:
1707-
warnings.warn('Cannot pickle %r: %s.%s has recursive self-references that trigger a RecursionError.' % (obj, obj.__module__, obj_name), PicklingWarning)
1739+
warnings.warn(
1740+
"Cannot pickle %r: %s.%s has recursive self-references that "
1741+
"trigger a RecursionError." % (obj, obj.__module__, obj_name),
1742+
PicklingWarning,
1743+
stacklevel=3,
1744+
)
17081745
#print (obj.__dict__)
17091746
#print ("%s\n%s" % (type(obj), obj.__name__))
17101747
#print ("%s\n%s" % (obj.__bases__, obj.__dict__))
@@ -1840,7 +1877,7 @@ def save_function(pickler, obj):
18401877
# Change the value of the cell
18411878
pickler.save_reduce(*possible_postproc)
18421879
# pop None created by calling preprocessing step off stack
1843-
pickler.write(bytes('0', 'UTF-8'))
1880+
pickler.write(POP)
18441881

18451882
logger.trace(pickler, "# F1")
18461883
else:
@@ -1949,7 +1986,7 @@ def pickles(obj,exact=False,safe=False,**kwds):
19491986
"""
19501987
if safe: exceptions = (Exception,) # RuntimeError, ValueError
19511988
else:
1952-
exceptions = (TypeError, AssertionError, NotImplementedError, PicklingError, UnpicklingError)
1989+
exceptions = UNPICKLEABLE_ERRORS + (AssertionError, UnpicklingError)
19531990
try:
19541991
pik = copy(obj, **kwds)
19551992
#FIXME: should check types match first, then check content if "exact"

dill/session.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,13 +192,29 @@ def dump_module(
192192
>>> [foo.sin(x) for x in foo.values]
193193
[0.8414709848078965, 0.9092974268256817, 0.1411200080598672]
194194
195+
- Use `refimported` to save imported objects by reference:
196+
197+
>>> import dill
198+
>>> from html.entities import html5
199+
>>> type(html5), len(html5)
200+
(dict, 2231)
201+
>>> import io
202+
>>> buf = io.BytesIO()
203+
>>> dill.dump_module(buf) # saves __main__, with html5 saved by value
204+
>>> len(buf.getvalue()) # pickle size in bytes
205+
71665
206+
>>> buf = io.BytesIO()
207+
>>> dill.dump_module(buf, refimported=True) # html5 saved by reference
208+
>>> len(buf.getvalue())
209+
438
210+
195211
*Changed in version 0.3.6:* Function ``dump_session()`` was renamed to
196212
``dump_module()``. Parameters ``main`` and ``byref`` were renamed to
197213
``module`` and ``refimported``, respectively.
198214
199215
Note:
200216
Currently, ``dill.settings['byref']`` and ``dill.settings['recurse']``
201-
don't apply to this function.`
217+
don't apply to this function.
202218
"""
203219
for old_par, par in [('main', 'module'), ('byref', 'refimported')]:
204220
if old_par in kwds:

0 commit comments

Comments
 (0)