-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfile_utils.py
94 lines (75 loc) · 3.74 KB
/
file_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import os
import re
from config import TRANSCRIPT_FILES_DIR, SUMMARY_FILES_DIR, TEMPLATES_DIR, TRANSCRIPT_TEMPLATE_FILE, SUMMARY_TEMPLATE_FILE
class MarkdownWriter:
def write_content_to_file(self, content, destination_file_path):
directory = os.path.dirname(destination_file_path)
os.makedirs(directory, exist_ok=True)
with open(destination_file_path, 'w', encoding='utf-8') as dest_file:
dest_file.write(content)
print(f"Content written to {destination_file_path}")
# Function to write content to a markdown file
def write_to_md(self, directory, filename, content):
os.makedirs(directory, exist_ok=True)
file_path = os.path.join(directory, filename)
with open(file_path, 'w', encoding='utf-8') as md_file:
md_file.write(content)
return file_path
# Function to load and fill a template
def fill_template(self, template_name, **kwargs):
template_path = os.path.join(TEMPLATES_DIR, template_name)
template_content = self.read_template(template_path)
return template_content.format(**kwargs)
# Function to read a template file
def read_template(self, template_path):
with open(template_path, 'r', encoding='utf-8') as template_file:
return template_file.read()
# Function to sanitize a filename
def sanitize_filename(self, title):
sanitized_title = re.sub(r'[<>:"/\\|?*]', '', title)
sanitized_title = re.sub(r'\s+', ' ', sanitized_title)
return sanitized_title.strip()
# Function to save a transcript to markdown
def format_save_transcript(self, video_id, transcript, title, include_timestamps=False):
filename, filled_template = self.format_transcript(video_id, transcript, title, include_timestamps)
file_path = self.write_to_md(TRANSCRIPT_FILES_DIR, filename, filled_template)
print(f"Transcript saved to {file_path}.")
def format_transcript(self, video_id, transcript, title, include_timestamps):
sanitized_title = self.sanitize_filename(title)
filename = f"Tr-{sanitized_title}.md"
video_url = f"https://www.youtube.com/watch?v={video_id}"
if not include_timestamps:
transcript = transcript.replace('\n', ' ')
filled_template = self.fill_template(
TRANSCRIPT_TEMPLATE_FILE,
title=sanitized_title,
video_url=video_url,
transcript=transcript
)
return filename,filled_template
# Create a summary markdown
def format_save_summary(self, video_id, title):
filename, filled_template = self.format_summary(video_id, title)
file_path = self.write_to_md(SUMMARY_FILES_DIR, filename, filled_template)
print(f"Video link summary saved to {file_path}.")
def format_summary(self, video_id, title):
sanitized_title = self.sanitize_filename(title)
filename = f"VD-SM-{sanitized_title}.md"
video_url = f"https://www.youtube.com/watch?v={video_id}"
filled_template = self.fill_template(
SUMMARY_TEMPLATE_FILE,
title=title,
video_url=video_url
)
return filename,filled_template
if __name__ == "__main__":
# Initialize the markdown writer with the templates path
markdown_writer = MarkdownWriter(TEMPLATES_DIR)
# Example of saving a transcript
video_id = "abcd1234"
transcript = "This is an example transcript for the video."
title = "Example Video Title"
markdown_writer.format_save_transcript(video_id, transcript, title, include_timestamps=True)
# Example of saving a video link summary
title = "Another Example Video"
markdown_writer.format_save_summary(video_id, title)