Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 7 additions & 9 deletions label_studio/data_export/mixins.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import hashlib
import io
import json
import logging
import pathlib
Expand Down Expand Up @@ -239,13 +238,17 @@ def update_export_serializer_option(self, base_export_serializer_option, annotat

@staticmethod
def eval_md5(file):
# Compute MD5 over the full content while preserving the current file position
current_position = file.tell()
file.seek(0)
md5_object = hashlib.md5() # nosec
block_size = 128 * md5_object.block_size
chunk = file.read(block_size)
while chunk:
md5_object.update(chunk)
chunk = file.read(block_size)
md5 = md5_object.hexdigest()
file.seek(current_position)
return md5

def save_file(self, file, md5):
Expand Down Expand Up @@ -338,8 +341,8 @@ def convert_file(self, to_format, download_resources=False, hostname=None):
input_name = pathlib.Path(self.file.name).name
input_file_path = pathlib.Path(tmp_dir) / input_name

with open(input_file_path, 'wb') as file_:
file_.write(self.file.open().read())
with open(input_file_path, 'wb') as dst, self.file.open('rb') as src:
shutil.copyfileobj(src, dst, length=1024 * 1024)

converter.convert(input_file_path, out_dir, to_format, is_dir=False)

Expand All @@ -356,12 +359,7 @@ def convert_file(self, to_format, download_resources=False, hostname=None):
output_file = pathlib.Path(tmp_dir) / (str(out_dir.stem) + '.zip')
filename = pathlib.Path(input_name).stem + '.zip'

# TODO(jo): can we avoid the `f.read()` here?
with open(output_file, mode='rb') as f:
return File(
io.BytesIO(f.read()),
name=filename,
)
return File(open(output_file, 'rb'), name=filename)


def export_background(
Expand Down
Loading