33import asyncio
44import gzip
55import os
6+ import pprint
67from dataclasses import dataclass , field
78from typing import Any , Dict , List , Literal , Optional , Union
89
@@ -355,7 +356,7 @@ class WikiPage(WikiPageSynchronousProtocol):
355356 """The markdown content of this page."""
356357
357358 attachments : List [Dict [str , Any ]] = field (default_factory = list )
358- """A list of file attachments associated with this page."""
359+ """A list of file paths sassociated with this page."""
359360
360361 owner_id : Optional [str ] = None
361362 """The Synapse ID of the owning object (e.g., entity, evaluation, etc.)."""
@@ -398,14 +399,13 @@ def fill_from_dict(
398399 self .etag = synapse_wiki .get ("etag" , None )
399400 self .title = synapse_wiki .get ("title" , None )
400401 self .parent_id = synapse_wiki .get ("parentWikiId" , None )
401- self .markdown = synapse_wiki .get ("markdown" , None )
402- self .attachments = synapse_wiki .get ("attachments" , [])
403- self .owner_id = synapse_wiki .get ("ownerId" , None )
402+ self .markdown = self .markdown
403+ self .attachments = self .attachments
404404 self .created_on = synapse_wiki .get ("createdOn" , None )
405405 self .created_by = synapse_wiki .get ("createdBy" , None )
406406 self .modified_on = synapse_wiki .get ("modifiedOn" , None )
407407 self .modified_by = synapse_wiki .get ("modifiedBy" , None )
408- self .wiki_version = synapse_wiki .get ("wikiVersion" , None )
408+ self .wiki_version = synapse_wiki .get ("wikiVersion" , self . wiki_version )
409409 self .markdown_file_handle_id = synapse_wiki .get ("markdownFileHandleId" , None )
410410 self .attachment_file_handle_ids = synapse_wiki .get (
411411 "attachmentFileHandleIds" , []
@@ -423,7 +423,6 @@ def to_synapse_request(
423423 "parentWikiId" : self .parent_id ,
424424 "markdown" : self .markdown ,
425425 "attachments" : self .attachments ,
426- "ownerId" : self .owner_id ,
427426 "createdOn" : self .created_on ,
428427 "createdBy" : self .created_by ,
429428 "modifiedOn" : self .modified_on ,
@@ -456,7 +455,6 @@ def _to_gzip_file(
456455 cache_dir = os .path .join (synapse_client .cache .cache_root_dir , "wiki_content" )
457456 if not os .path .exists (cache_dir ):
458457 os .makedirs (cache_dir )
459-
460458 # Check if markdown looks like a file path and exists
461459 if os .path .isfile (wiki_content ):
462460 # If it's already a gzipped file, use the file path directly
@@ -480,6 +478,46 @@ def _to_gzip_file(
480478
481479 return file_path
482480
481+ def _unzip_gzipped_file (self , file_path : str ) -> str :
482+ """Unzip the gzipped file and return the file path to the unzipped file.
483+
484+ If the file is a markdown file, the content will be printed.
485+ Arguments:
486+ file_path: The path to the gzipped file.
487+ Returns:
488+ The file path to the unzipped file.
489+ """
490+ # Open in binary mode to handle both text and binary files
491+ with gzip .open (file_path , "rb" ) as f_in :
492+ unzipped_content_bytes = f_in .read ()
493+
494+ # Try to decode as UTF-8 to check if it's a text file
495+ is_text_file = False
496+ unzipped_content_text = None
497+ try :
498+ unzipped_content_text = unzipped_content_bytes .decode ("utf-8" )
499+ is_text_file = True
500+ if file_path .endswith (".md.gz" ):
501+ pprint .pp (unzipped_content_text )
502+ except UnicodeDecodeError :
503+ # It's a binary file, keep as bytes
504+ pass
505+
506+ # unzip the file and return the file path
507+ unzipped_file_path = os .path .join (
508+ os .path .dirname (file_path ),
509+ os .path .basename (file_path ).replace (".gz" , "" ),
510+ )
511+ # Write in text mode for text files, binary mode for binary files
512+ if is_text_file :
513+ with open (unzipped_file_path , "wt" , encoding = "utf-8" ) as f_out :
514+ f_out .write (unzipped_content_text )
515+ else :
516+ with open (unzipped_file_path , "wb" ) as f_out :
517+ f_out .write (unzipped_content_bytes )
518+
519+ return unzipped_file_path
520+
483521 @staticmethod
484522 def _get_file_size (filehandle_dict : dict , file_name : str ) -> str :
485523 """Get the file name from the response headers.
@@ -498,6 +536,20 @@ def _get_file_size(filehandle_dict: dict, file_name: str) -> str:
498536 f"File { file_name } not found in filehandle_dict. Available files: { available_files } "
499537 )
500538
539+ @staticmethod
540+ def _reformat_attachment_file_name (attachment_file_name : str ) -> str :
541+ """Reformat the attachment file name to be a valid attachment path.
542+ Arguments:
543+ attachment_file_name: The name of the attachment file.
544+ Returns:
545+ The reformatted attachment file name.
546+ """
547+ attachment_file_name_reformatted = attachment_file_name .replace ("." , "%2E" )
548+ attachment_file_name_reformatted = attachment_file_name_reformatted .replace (
549+ "_" , "%5F"
550+ )
551+ return attachment_file_name_reformatted
552+
501553 @otel_trace_method (
502554 method_to_trace_name = lambda self , ** kwargs : f"Get the markdown file handle: { self .owner_id } "
503555 )
@@ -516,16 +568,19 @@ async def _get_markdown_file_handle(self, synapse_client: Synapse) -> "WikiPage"
516568 )
517569 try :
518570 # Upload the gzipped file to get a file handle
519- file_handle = await upload_file_handle (
520- syn = synapse_client ,
521- parent_entity_id = self .owner_id ,
522- path = file_path ,
523- )
524- synapse_client .logger .debug (
525- f"Uploaded file handle { file_handle .get ('id' )} for wiki page markdown."
526- )
527- # Set the markdown file handle ID from the upload response
528- self .markdown_file_handle_id = file_handle .get ("id" )
571+ async with synapse_client ._get_parallel_file_transfer_semaphore (
572+ asyncio_event_loop = asyncio .get_running_loop ()
573+ ):
574+ file_handle = await upload_file_handle (
575+ syn = synapse_client ,
576+ parent_entity_id = self .owner_id ,
577+ path = file_path ,
578+ )
579+ synapse_client .logger .info (
580+ f"Uploaded file handle { file_handle .get ('id' )} for wiki page markdown."
581+ )
582+ # Set the markdown file handle ID from the upload response
583+ self .markdown_file_handle_id = file_handle .get ("id" )
529584 finally :
530585 # delete the temp directory saving the gzipped file
531586 if os .path .exists (file_path ):
@@ -553,15 +608,18 @@ async def task_of_uploading_attachment(attachment: str) -> tuple[str, str]:
553608 wiki_content = attachment , synapse_client = synapse_client
554609 )
555610 try :
556- file_handle = await upload_file_handle (
557- syn = synapse_client ,
558- parent_entity_id = self .owner_id ,
559- path = file_path ,
560- )
561- synapse_client .logger .debug (
562- f"Uploaded file handle { file_handle .get ('id' )} for wiki page attachment."
563- )
564- return file_handle .get ("id" )
611+ async with synapse_client ._get_parallel_file_transfer_semaphore (
612+ asyncio_event_loop = asyncio .get_running_loop ()
613+ ):
614+ file_handle = await upload_file_handle (
615+ syn = synapse_client ,
616+ parent_entity_id = self .owner_id ,
617+ path = file_path ,
618+ )
619+ synapse_client .logger .info (
620+ f"Uploaded file handle { file_handle .get ('id' )} for wiki page attachment."
621+ )
622+ return file_handle .get ("id" )
565623 finally :
566624 if os .path .exists (file_path ):
567625 os .remove (file_path )
@@ -910,19 +968,20 @@ async def get_attachment_async(
910968 file_size = int (WikiPage ._get_file_size (filehandle_dict , file_name ))
911969 # use single thread download if file size < 8 MiB
912970 if file_size < SINGLE_THREAD_DOWNLOAD_SIZE_LIMIT :
913- download_from_url (
971+ downloaded_file_path = download_from_url (
914972 url = presigned_url_info .url ,
915973 destination = download_location ,
916974 url_is_presigned = True ,
917975 )
918976 else :
919977 # download the file
920- download_from_url_multi_threaded (
978+ downloaded_file_path = download_from_url_multi_threaded (
921979 presigned_url = presigned_url_info , destination = download_location
922980 )
923- client .logger .debug (
924- f"Downloaded file { presigned_url_info .file_name } to { download_location } "
981+ client .logger .info (
982+ f"Downloaded file { presigned_url_info .file_name } to { downloaded_file_path } . "
925983 )
984+ return downloaded_file_path
926985 else :
927986 return attachment_url
928987
@@ -987,38 +1046,37 @@ async def get_attachment_preview_async(
9871046 file_size = int (WikiPage ._get_file_size (filehandle_dict , file_name ))
9881047 # use single thread download if file size < 8 MiB
9891048 if file_size < SINGLE_THREAD_DOWNLOAD_SIZE_LIMIT :
990- download_from_url (
1049+ downloaded_file_path = download_from_url (
9911050 url = presigned_url_info .url ,
9921051 destination = download_location ,
9931052 url_is_presigned = True ,
9941053 )
9951054 else :
9961055 # download the file
997- download_from_url_multi_threaded (
1056+ downloaded_file_path = download_from_url_multi_threaded (
9981057 presigned_url = presigned_url_info , destination = download_location
9991058 )
1000- client .logger .debug (
1001- f"Downloaded the preview file { presigned_url_info .file_name } to { download_location } "
1059+ client .logger .info (
1060+ f"Downloaded the preview file { presigned_url_info .file_name } to { downloaded_file_path } . "
10021061 )
1062+ return downloaded_file_path
10031063 else :
10041064 return attachment_preview_url
10051065
10061066 @otel_trace_method (
10071067 method_to_trace_name = lambda self , ** kwargs : f"Get_Markdown_URL: Owner ID { self .owner_id } , Wiki ID { self .id } , Wiki Version { self .wiki_version } "
10081068 )
1009- async def get_markdown_async (
1069+ async def get_markdown_file_async (
10101070 self ,
10111071 * ,
1012- download_file_name : Optional [str ] = None ,
10131072 download_file : bool = True ,
10141073 download_location : Optional [str ] = None ,
10151074 synapse_client : Optional ["Synapse" ] = None ,
10161075 ) -> Union [str , None ]:
10171076 """
1018- Get the markdown URL of this wiki page.
1077+ Get the markdown URL of this wiki page. --> modify this to print the markdown file
10191078
10201079 Arguments:
1021- download_file_name: The name of the file to download. Required if download_file is True.
10221080 download_file: Whether associated files should be downloaded. Default is True.
10231081 download_location: The directory to download the file to. Required if download_file is True.
10241082 synapse_client: Optionally provide a Synapse client.
@@ -1043,22 +1101,18 @@ async def get_markdown_async(
10431101 if download_file :
10441102 if not download_location :
10451103 raise ValueError ("Must provide download_location to download a file." )
1046- if not download_file_name :
1047- raise ValueError ("Must provide download_file_name to download a file." )
10481104
10491105 # construct PresignedUrlInfo for downloading
1050- presigned_url_info = PresignedUrlInfo (
1106+ downloaded_file_path = download_from_url (
10511107 url = markdown_url ,
1052- file_name = download_file_name ,
1053- expiration_utc = _pre_signed_url_expiration_time (markdown_url ),
1054- )
1055- download_from_url (
1056- url = presigned_url_info .url ,
10571108 destination = download_location ,
10581109 url_is_presigned = True ,
10591110 )
1060- client .logger .debug (
1061- f"Downloaded file { presigned_url_info .file_name } to { download_location } "
1111+ # unzip the file if it is a gzipped file
1112+ unzipped_file_path = self ._unzip_gzipped_file (downloaded_file_path )
1113+ client .logger .info (
1114+ f"Downloaded and unzipped the markdown file for wiki page { self .id } to { unzipped_file_path } ."
10621115 )
1116+ return unzipped_file_path
10631117 else :
10641118 return markdown_url
0 commit comments