Skip to content

Commit 7b8269b

Browse files
committed
refactor fill_from_dict, add unzip file function, use logger.info instead
1 parent 5058b03 commit 7b8269b

File tree

2 files changed

+302
-177
lines changed

2 files changed

+302
-177
lines changed

synapseclient/models/wiki.py

Lines changed: 102 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import asyncio
44
import gzip
55
import os
6+
import pprint
67
from dataclasses import dataclass, field
78
from typing import Any, Dict, List, Literal, Optional, Union
89

@@ -355,7 +356,7 @@ class WikiPage(WikiPageSynchronousProtocol):
355356
"""The markdown content of this page."""
356357

357358
attachments: List[Dict[str, Any]] = field(default_factory=list)
358-
"""A list of file attachments associated with this page."""
359+
"""A list of file paths sassociated with this page."""
359360

360361
owner_id: Optional[str] = None
361362
"""The Synapse ID of the owning object (e.g., entity, evaluation, etc.)."""
@@ -398,14 +399,13 @@ def fill_from_dict(
398399
self.etag = synapse_wiki.get("etag", None)
399400
self.title = synapse_wiki.get("title", None)
400401
self.parent_id = synapse_wiki.get("parentWikiId", None)
401-
self.markdown = synapse_wiki.get("markdown", None)
402-
self.attachments = synapse_wiki.get("attachments", [])
403-
self.owner_id = synapse_wiki.get("ownerId", None)
402+
self.markdown = self.markdown
403+
self.attachments = self.attachments
404404
self.created_on = synapse_wiki.get("createdOn", None)
405405
self.created_by = synapse_wiki.get("createdBy", None)
406406
self.modified_on = synapse_wiki.get("modifiedOn", None)
407407
self.modified_by = synapse_wiki.get("modifiedBy", None)
408-
self.wiki_version = synapse_wiki.get("wikiVersion", None)
408+
self.wiki_version = synapse_wiki.get("wikiVersion", self.wiki_version)
409409
self.markdown_file_handle_id = synapse_wiki.get("markdownFileHandleId", None)
410410
self.attachment_file_handle_ids = synapse_wiki.get(
411411
"attachmentFileHandleIds", []
@@ -423,7 +423,6 @@ def to_synapse_request(
423423
"parentWikiId": self.parent_id,
424424
"markdown": self.markdown,
425425
"attachments": self.attachments,
426-
"ownerId": self.owner_id,
427426
"createdOn": self.created_on,
428427
"createdBy": self.created_by,
429428
"modifiedOn": self.modified_on,
@@ -456,7 +455,6 @@ def _to_gzip_file(
456455
cache_dir = os.path.join(synapse_client.cache.cache_root_dir, "wiki_content")
457456
if not os.path.exists(cache_dir):
458457
os.makedirs(cache_dir)
459-
460458
# Check if markdown looks like a file path and exists
461459
if os.path.isfile(wiki_content):
462460
# If it's already a gzipped file, use the file path directly
@@ -480,6 +478,46 @@ def _to_gzip_file(
480478

481479
return file_path
482480

481+
def _unzip_gzipped_file(self, file_path: str) -> str:
482+
"""Unzip the gzipped file and return the file path to the unzipped file.
483+
484+
If the file is a markdown file, the content will be printed.
485+
Arguments:
486+
file_path: The path to the gzipped file.
487+
Returns:
488+
The file path to the unzipped file.
489+
"""
490+
# Open in binary mode to handle both text and binary files
491+
with gzip.open(file_path, "rb") as f_in:
492+
unzipped_content_bytes = f_in.read()
493+
494+
# Try to decode as UTF-8 to check if it's a text file
495+
is_text_file = False
496+
unzipped_content_text = None
497+
try:
498+
unzipped_content_text = unzipped_content_bytes.decode("utf-8")
499+
is_text_file = True
500+
if file_path.endswith(".md.gz"):
501+
pprint.pp(unzipped_content_text)
502+
except UnicodeDecodeError:
503+
# It's a binary file, keep as bytes
504+
pass
505+
506+
# unzip the file and return the file path
507+
unzipped_file_path = os.path.join(
508+
os.path.dirname(file_path),
509+
os.path.basename(file_path).replace(".gz", ""),
510+
)
511+
# Write in text mode for text files, binary mode for binary files
512+
if is_text_file:
513+
with open(unzipped_file_path, "wt", encoding="utf-8") as f_out:
514+
f_out.write(unzipped_content_text)
515+
else:
516+
with open(unzipped_file_path, "wb") as f_out:
517+
f_out.write(unzipped_content_bytes)
518+
519+
return unzipped_file_path
520+
483521
@staticmethod
484522
def _get_file_size(filehandle_dict: dict, file_name: str) -> str:
485523
"""Get the file name from the response headers.
@@ -498,6 +536,20 @@ def _get_file_size(filehandle_dict: dict, file_name: str) -> str:
498536
f"File {file_name} not found in filehandle_dict. Available files: {available_files}"
499537
)
500538

539+
@staticmethod
540+
def _reformat_attachment_file_name(attachment_file_name: str) -> str:
541+
"""Reformat the attachment file name to be a valid attachment path.
542+
Arguments:
543+
attachment_file_name: The name of the attachment file.
544+
Returns:
545+
The reformatted attachment file name.
546+
"""
547+
attachment_file_name_reformatted = attachment_file_name.replace(".", "%2E")
548+
attachment_file_name_reformatted = attachment_file_name_reformatted.replace(
549+
"_", "%5F"
550+
)
551+
return attachment_file_name_reformatted
552+
501553
@otel_trace_method(
502554
method_to_trace_name=lambda self, **kwargs: f"Get the markdown file handle: {self.owner_id}"
503555
)
@@ -516,16 +568,19 @@ async def _get_markdown_file_handle(self, synapse_client: Synapse) -> "WikiPage"
516568
)
517569
try:
518570
# Upload the gzipped file to get a file handle
519-
file_handle = await upload_file_handle(
520-
syn=synapse_client,
521-
parent_entity_id=self.owner_id,
522-
path=file_path,
523-
)
524-
synapse_client.logger.debug(
525-
f"Uploaded file handle {file_handle.get('id')} for wiki page markdown."
526-
)
527-
# Set the markdown file handle ID from the upload response
528-
self.markdown_file_handle_id = file_handle.get("id")
571+
async with synapse_client._get_parallel_file_transfer_semaphore(
572+
asyncio_event_loop=asyncio.get_running_loop()
573+
):
574+
file_handle = await upload_file_handle(
575+
syn=synapse_client,
576+
parent_entity_id=self.owner_id,
577+
path=file_path,
578+
)
579+
synapse_client.logger.info(
580+
f"Uploaded file handle {file_handle.get('id')} for wiki page markdown."
581+
)
582+
# Set the markdown file handle ID from the upload response
583+
self.markdown_file_handle_id = file_handle.get("id")
529584
finally:
530585
# delete the temp directory saving the gzipped file
531586
if os.path.exists(file_path):
@@ -553,15 +608,18 @@ async def task_of_uploading_attachment(attachment: str) -> tuple[str, str]:
553608
wiki_content=attachment, synapse_client=synapse_client
554609
)
555610
try:
556-
file_handle = await upload_file_handle(
557-
syn=synapse_client,
558-
parent_entity_id=self.owner_id,
559-
path=file_path,
560-
)
561-
synapse_client.logger.debug(
562-
f"Uploaded file handle {file_handle.get('id')} for wiki page attachment."
563-
)
564-
return file_handle.get("id")
611+
async with synapse_client._get_parallel_file_transfer_semaphore(
612+
asyncio_event_loop=asyncio.get_running_loop()
613+
):
614+
file_handle = await upload_file_handle(
615+
syn=synapse_client,
616+
parent_entity_id=self.owner_id,
617+
path=file_path,
618+
)
619+
synapse_client.logger.info(
620+
f"Uploaded file handle {file_handle.get('id')} for wiki page attachment."
621+
)
622+
return file_handle.get("id")
565623
finally:
566624
if os.path.exists(file_path):
567625
os.remove(file_path)
@@ -910,19 +968,20 @@ async def get_attachment_async(
910968
file_size = int(WikiPage._get_file_size(filehandle_dict, file_name))
911969
# use single thread download if file size < 8 MiB
912970
if file_size < SINGLE_THREAD_DOWNLOAD_SIZE_LIMIT:
913-
download_from_url(
971+
downloaded_file_path = download_from_url(
914972
url=presigned_url_info.url,
915973
destination=download_location,
916974
url_is_presigned=True,
917975
)
918976
else:
919977
# download the file
920-
download_from_url_multi_threaded(
978+
downloaded_file_path = download_from_url_multi_threaded(
921979
presigned_url=presigned_url_info, destination=download_location
922980
)
923-
client.logger.debug(
924-
f"Downloaded file {presigned_url_info.file_name} to {download_location}"
981+
client.logger.info(
982+
f"Downloaded file {presigned_url_info.file_name} to {downloaded_file_path}."
925983
)
984+
return downloaded_file_path
926985
else:
927986
return attachment_url
928987

@@ -987,38 +1046,37 @@ async def get_attachment_preview_async(
9871046
file_size = int(WikiPage._get_file_size(filehandle_dict, file_name))
9881047
# use single thread download if file size < 8 MiB
9891048
if file_size < SINGLE_THREAD_DOWNLOAD_SIZE_LIMIT:
990-
download_from_url(
1049+
downloaded_file_path = download_from_url(
9911050
url=presigned_url_info.url,
9921051
destination=download_location,
9931052
url_is_presigned=True,
9941053
)
9951054
else:
9961055
# download the file
997-
download_from_url_multi_threaded(
1056+
downloaded_file_path = download_from_url_multi_threaded(
9981057
presigned_url=presigned_url_info, destination=download_location
9991058
)
1000-
client.logger.debug(
1001-
f"Downloaded the preview file {presigned_url_info.file_name} to {download_location}"
1059+
client.logger.info(
1060+
f"Downloaded the preview file {presigned_url_info.file_name} to {downloaded_file_path}."
10021061
)
1062+
return downloaded_file_path
10031063
else:
10041064
return attachment_preview_url
10051065

10061066
@otel_trace_method(
10071067
method_to_trace_name=lambda self, **kwargs: f"Get_Markdown_URL: Owner ID {self.owner_id}, Wiki ID {self.id}, Wiki Version {self.wiki_version}"
10081068
)
1009-
async def get_markdown_async(
1069+
async def get_markdown_file_async(
10101070
self,
10111071
*,
1012-
download_file_name: Optional[str] = None,
10131072
download_file: bool = True,
10141073
download_location: Optional[str] = None,
10151074
synapse_client: Optional["Synapse"] = None,
10161075
) -> Union[str, None]:
10171076
"""
1018-
Get the markdown URL of this wiki page.
1077+
Get the markdown URL of this wiki page. --> modify this to print the markdown file
10191078
10201079
Arguments:
1021-
download_file_name: The name of the file to download. Required if download_file is True.
10221080
download_file: Whether associated files should be downloaded. Default is True.
10231081
download_location: The directory to download the file to. Required if download_file is True.
10241082
synapse_client: Optionally provide a Synapse client.
@@ -1043,22 +1101,18 @@ async def get_markdown_async(
10431101
if download_file:
10441102
if not download_location:
10451103
raise ValueError("Must provide download_location to download a file.")
1046-
if not download_file_name:
1047-
raise ValueError("Must provide download_file_name to download a file.")
10481104

10491105
# construct PresignedUrlInfo for downloading
1050-
presigned_url_info = PresignedUrlInfo(
1106+
downloaded_file_path = download_from_url(
10511107
url=markdown_url,
1052-
file_name=download_file_name,
1053-
expiration_utc=_pre_signed_url_expiration_time(markdown_url),
1054-
)
1055-
download_from_url(
1056-
url=presigned_url_info.url,
10571108
destination=download_location,
10581109
url_is_presigned=True,
10591110
)
1060-
client.logger.debug(
1061-
f"Downloaded file {presigned_url_info.file_name} to {download_location}"
1111+
# unzip the file if it is a gzipped file
1112+
unzipped_file_path = self._unzip_gzipped_file(downloaded_file_path)
1113+
client.logger.info(
1114+
f"Downloaded and unzipped the markdown file for wiki page {self.id} to {unzipped_file_path}."
10621115
)
1116+
return unzipped_file_path
10631117
else:
10641118
return markdown_url

0 commit comments

Comments
 (0)