Skip to content

Commit 27eeb44

Browse files
committed
Minor fixes
1 parent 906627b commit 27eeb44

2 files changed

Lines changed: 13 additions & 1 deletion

File tree

marker/converters/pdf.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import os
22

3+
from marker.schema.document import Document
4+
35
os.environ["TOKENIZERS_PARALLELISM"] = "false" # disables a tokenizers warning
46

57
from collections import defaultdict
@@ -171,7 +173,7 @@ def filepath_to_str(self, file_input: Union[str, io.BytesIO]):
171173
if temp_file is not None and os.path.exists(temp_file.name):
172174
os.unlink(temp_file.name)
173175

174-
def build_document(self, filepath: str):
176+
def build_document(self, filepath: str) -> Document:
175177
provider_cls = provider_from_filepath(filepath)
176178
layout_builder = self.resolve_dependencies(self.layout_builder_class)
177179
line_builder = self.resolve_dependencies(LineBuilder)

marker/schema/blocks/base.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,16 @@ def from_block(cls, block: Block) -> Block:
115115
block_attrs = block.model_dump(exclude=["id", "block_id", "block_type"])
116116
return cls(**block_attrs)
117117

118+
def set_internal_metadata(self, key, data):
119+
if self._metadata is None:
120+
self._metadata = {}
121+
self._metadata[key] = data
122+
123+
def get_internal_metadata(self, key):
124+
if self._metadata is None:
125+
return None
126+
return self._metadata.get(key)
127+
118128
def get_image(
119129
self,
120130
document: Document,

0 commit comments

Comments
 (0)