Skip to content

Commit ed0ba0f

Browse files
committed
fix(docs): move the bigquery script to automation
1 parent 3dc24df commit ed0ba0f

File tree

6 files changed

+130
-60
lines changed

6 files changed

+130
-60
lines changed
Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,31 @@
11
import os
22
import re
3-
import pandas as pd
43
from pathlib import Path
5-
from typing import List, Tuple
64

7-
def parse_markdown(content: str) -> List[Tuple[str, str]]:
8-
5+
import pandas as pd
6+
7+
8+
def parse_markdown(content: str) -> list[tuple[str, str]]:
99
parsed_content = []
1010
pattern = re.compile(r"^---\n(.*?)\n---", re.DOTALL)
1111
metadata_match = pattern.search(content)
1212

1313
if metadata_match:
1414
metadata_str = metadata_match.group(1).strip()
1515
content_extract = content.split("---", 2)[-1].strip()
16-
parsed_content.append((metadata_str,content_extract))
17-
return parsed_content
18-
else:
19-
parsed_content.append(("",content))
16+
parsed_content.append((metadata_str, content_extract))
2017
return parsed_content
2118

19+
parsed_content.append(("", content))
20+
return parsed_content
21+
22+
2223
def process_markdown_files(directory: str, output_csv: str) -> None:
23-
2424
directory_path = Path(directory)
2525
if not directory_path.exists():
2626
print(f"Directory not found: {directory}")
2727
return
28-
28+
2929
file_content = []
3030
for root, _, files in os.walk(directory):
3131
for file in files:
@@ -35,23 +35,24 @@ def process_markdown_files(directory: str, output_csv: str) -> None:
3535

3636
print(f"Processing file: {file_path}")
3737
try:
38-
with open(file_path, mode="r", encoding="utf-8") as md_file:
38+
with Path.open(file_path, encoding="utf-8") as md_file:
3939
content = md_file.read()
4040
parsed_content = parse_markdown(content)
4141

4242
for metadata_, content in parsed_content:
4343
file_content.append([file_path.name, metadata_, content])
4444

45-
except Exception as e:
45+
except OSError as e:
4646
print(f"Error processing file {file_path}: {e}")
47-
pd_data = pd.DataFrame(file_content,columns=["Filename","Metadata","Contents"])
47+
pd_data = pd.DataFrame(file_content, columns=["Filename", "Metadata", "Contents"])
4848
pd_data.to_csv(output_csv, index=False)
4949

50+
5051
if __name__ == "__main__":
5152
# Define the directory containing .md/.mdx files and the output CSV file
52-
directory = r'../docs'
53+
directory = r"../docs"
5354
output_csv = "docs.csv"
5455

5556
# Process the files
5657
process_markdown_files(directory, output_csv)
57-
print(f"CSV file created at: {output_csv}")
58+
print(f"CSV file created at: {output_csv}")

automations/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ requires-python = ">=3.12"
77
dependencies = [
88
"pycoingecko>=3.1.0",
99
"web3>=7.2.0",
10+
"pandas>=2.2.3",
1011
]
1112

1213
[tool.uv]

0 commit comments

Comments
 (0)