Skip to content

Commit 7caf255

Browse files
authored
bug: omit session handler from serialization to avoid mp issues (#2366)
### Description The session handler variable can be anything, because it's specific to the SDK being used for the connector. This can break the serialization depending on what that is. To avoid this all together, the session handler itself is not serialized. Instead, it needs to be recreated if an object is serialized and then deserialized.
1 parent 0ca154a commit 7caf255

File tree

4 files changed

+6
-10
lines changed

4 files changed

+6
-10
lines changed

Diff for: CHANGELOG.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
## 0.11.9-dev3
1+
## 0.11.9-dev4
22

33
### Enhancements
44

Diff for: unstructured/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.11.9-dev3" # pragma: no cover
1+
__version__ = "0.11.9-dev4" # pragma: no cover

Diff for: unstructured/ingest/interfaces.py

+2
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,8 @@ def add_props(self, as_dict: dict, props: t.List[str]):
299299

300300
def to_dict(self, **kwargs) -> t.Dict[str, Json]:
301301
as_dict = _asdict(self, **kwargs)
302+
if "_session_handle" in as_dict:
303+
as_dict.pop("_session_handle", None)
302304
self.add_props(as_dict=as_dict, props=self.properties_to_serialize)
303305
if getattr(self, "_source_metadata") is not None:
304306
self.add_props(as_dict=as_dict, props=self.metadata_properties)

Diff for: unstructured/ingest/pipeline/source.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,8 @@ def get_single(self, doc: BaseSingleIngestDoc, ingest_doc_dict: dict) -> str:
2828
# Still need to fetch metadata if file exists locally
2929
doc.update_source_metadata()
3030
else:
31-
# TODO: update all to use doc.to_json(redact_sensitive=True) once session handler
32-
# can be serialized
33-
try:
34-
serialized_doc = doc.to_json(redact_sensitive=True)
35-
logger.debug(f"Fetching {serialized_doc} - PID: {os.getpid()}")
36-
except Exception as e:
37-
logger.warning("failed to print full doc: ", e)
38-
logger.debug(f"Fetching {doc.__class__.__name__} - PID: {os.getpid()}")
31+
serialized_doc = doc.to_json(redact_sensitive=True)
32+
logger.debug(f"Fetching {serialized_doc} - PID: {os.getpid()}")
3933
if self.retry_strategy:
4034
self.retry_strategy(doc.get_file)
4135
else:

0 commit comments

Comments
 (0)