Skip to content

Commit 627dbb1

Browse files
authored
Merge pull request #351 from legendu-net/dev
Merge dev into main
2 parents f43f565 + ff51fec commit 627dbb1

16 files changed

+2266
-1801
lines changed

aiutil/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
from . import git
44
from . import poetry
55

6-
__version__ = "0.74.0"
6+
__version__ = "0.75.0"

aiutil/filesystem.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ def append_lines(
393393
def replace_patterns(
394394
path: Path,
395395
patterns: Union[str, Iterable[str]],
396-
repls: Union[Iterable[str], Callable],
396+
repls: Union[str, Iterable[str]],
397397
regex: bool = True,
398398
) -> None:
399399
"""Update a text file using regular expression substitution.
@@ -410,9 +410,8 @@ def replace_patterns(
410410
text = path.read_text(encoding="utf-8")
411411
if isinstance(patterns, str):
412412
patterns = [patterns]
413-
if callable(repls):
414-
func = repls
415-
repls = [func(pattern) for pattern in patterns]
413+
if isinstance(repls, str):
414+
repls = [repls]
416415
if regex:
417416
for pattern, repl in zip(patterns, repls):
418417
text = re.sub(pattern, repl, text)

aiutil/notebook/search.py

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
#!/usr/bin/env python3
2+
from typing import TypeAlias, Sequence
3+
import json
4+
from pathlib import Path
5+
from collections import Counter
6+
from argparse import ArgumentParser, Namespace
7+
from loguru import logger
8+
9+
Criterion: TypeAlias = str | list[str] | dict[str, list[str]]
10+
11+
12+
def _reg_criterion(criterion: str | list[str] | dict[str, list[str]]):
13+
if isinstance(criterion, str):
14+
if criterion == "":
15+
criterion = []
16+
else:
17+
criterion = [criterion]
18+
if isinstance(criterion, list):
19+
criterion = {"include": criterion, "exclude": []}
20+
return criterion
21+
22+
23+
class Cell():
24+
def __init__(self, cell: dict):
25+
self._cell = cell
26+
self._source = self.source(0)
27+
28+
def source(self, indent: int = 0):
29+
return "".join(" " * indent + line for line in self._cell["source"])
30+
31+
def match_keyword(self, keyword: Criterion):
32+
kwd = _reg_criterion(keyword)
33+
return all(k in self._source for k in kwd["include"]
34+
) and not any(k in self._source for k in kwd["exclude"])
35+
36+
def match_type(self, type_: str) -> bool:
37+
if type_ == "":
38+
return True
39+
return self._cell["cell_type"] == type_
40+
41+
42+
class Notebook():
43+
def __init__(self, path: str | Path):
44+
self.path = Path(path) if isinstance(path, str) else path
45+
self._notebook = self._read_notebook()
46+
self.lang = self._get_lang().lower()
47+
self._cells = [Cell(cell) for cell in self._notebook["cells"]]
48+
49+
def _get_lang(self) -> str:
50+
if self._notebook["nbformat"] <= 4:
51+
return self._notebook["metadata"]["language_info"]["name"]
52+
return self._notebook["metadata"]["kernelspec"]["language"]
53+
54+
def _read_notebook(self) -> dict:
55+
with self.path.open() as fin:
56+
return json.load(fin)
57+
58+
def match_language(self, language: Criterion) -> bool:
59+
lang = _reg_criterion(language)
60+
return all(self.lang == l.lower() for l in lang["include"]
61+
) and not any(self.lang == l.lower() for l in lang["exclude"])
62+
63+
def cells(self, keyword: Criterion, type_: str = "") -> list[Cell]:
64+
return [
65+
cell for cell in self._cells
66+
if cell.match_type(type_) and cell.match_keyword(keyword)
67+
]
68+
69+
def __repr__(self) -> str:
70+
return f"Notebook({self.path})"
71+
72+
73+
def print_nb_cells(
74+
nb_cells: tuple[tuple[Notebook, Cell], ...], num_notebooks: int, num_cells: int
75+
):
76+
n = len(nb_cells)
77+
print(f"Matched {n} notebooks")
78+
print(
79+
f"Display {min(n, num_notebooks)} notebooks each with up to {num_cells} cells\n"
80+
)
81+
for nb, cells in nb_cells[:num_notebooks]:
82+
print(f"{nb.path}: {nb.lang}")
83+
for idx, cell in enumerate(cells[:num_cells]):
84+
print(
85+
f" ------------------------------------ Cell {idx} ------------------------------------"
86+
)
87+
print(cell.source(4))
88+
print(
89+
f"========================================================================================\n\n"
90+
)
91+
92+
93+
def search_notebooks(
94+
notebooks: list[Notebook],
95+
keyword: Criterion = "",
96+
type_: str = "",
97+
language: Criterion = ""
98+
):
99+
notebooks = [nb for nb in notebooks if nb.match_language(language)]
100+
return tuple((nb, cells) for nb in notebooks if (cells := nb.cells(keyword, type_)))
101+
102+
103+
def list_languages(notebooks: list[Notebook]) -> list[tuple[str, int]]:
104+
counter = Counter(nb.lang for nb in notebooks)
105+
counter = list(counter.items())
106+
counter.sort(key=lambda t: -t[1])
107+
return counter
108+
109+
110+
def find_notebooks(paths: Sequence[str]) -> list[Notebook]:
111+
notebooks = set()
112+
for path in paths:
113+
path = Path(path)
114+
if path.is_file():
115+
if path.suffix == ".ipynb":
116+
notebooks.add(path)
117+
else:
118+
logger.warning(f"The file {path} is not a notebook!")
119+
elif path.is_dir():
120+
for p in path.glob("**/*.ipynb"):
121+
notebooks.add(p)
122+
return [Notebook(path) for path in notebooks]
123+
124+
125+
def _list_langs_args(args):
126+
notebooks = find_notebooks(args.paths)
127+
counter = list_languages(notebooks)
128+
for lang, freq in counter:
129+
print(f"{lang}: {freq}")
130+
print()
131+
132+
133+
def _search_notebooks_args(args):
134+
notebooks = find_notebooks(args.paths)
135+
lang = {
136+
"include": args.lang_include,
137+
"exclude": args.lang_exclude,
138+
}
139+
kwd = {
140+
"include": args.kwd_include,
141+
"exclude": args.kwd_exclude,
142+
}
143+
nb_cells = search_notebooks(notebooks, keyword=kwd, language=lang)
144+
print_nb_cells(nb_cells, args.num_notebooks, args.num_cells)
145+
146+
147+
def parse_args(args=None, namespace=None) -> Namespace:
148+
"""Parse command-line arguments.
149+
"""
150+
parser = ArgumentParser(description="Search for notebooks.")
151+
subparsers = parser.add_subparsers(dest="sub_cmd", help="Sub commands.")
152+
_subparse_search(subparsers)
153+
_subparse_list(subparsers)
154+
return parser.parse_args(args=args, namespace=namespace)
155+
156+
157+
def _subparse_list(subparsers):
158+
subparser_list = subparsers.add_parser(
159+
"list",
160+
aliases=["l", "ls"],
161+
help="List languages used by notebooks.",
162+
)
163+
subparser_list.add_argument(
164+
"-p",
165+
"--paths",
166+
dest="paths",
167+
nargs="+",
168+
required=True,
169+
help="Paths to notebooks or directories containing notebooks."
170+
)
171+
subparser_list.set_defaults(func=_list_langs_args)
172+
173+
174+
def _subparse_search(subparsers):
175+
subparser_search = subparsers.add_parser(
176+
"search",
177+
aliases=["s"],
178+
help="Search for notebooks.",
179+
)
180+
subparser_search.add_argument(
181+
"-p",
182+
"--paths",
183+
dest="paths",
184+
nargs="+",
185+
required=True,
186+
help="Paths to notebooks or directories containing notebooks."
187+
)
188+
subparser_search.add_argument(
189+
"-l",
190+
"--lang-include",
191+
dest="lang_include",
192+
nargs="*",
193+
default=(),
194+
help="The language of notebooks."
195+
)
196+
subparser_search.add_argument(
197+
"-L",
198+
"--lang-exclude",
199+
dest="lang_exclude",
200+
nargs="*",
201+
default=(),
202+
help="Languages that notebooks shouldn't include."
203+
)
204+
subparser_search.add_argument(
205+
"-k",
206+
"--kwd-include",
207+
dest="kwd_include",
208+
nargs="*",
209+
default=(),
210+
help="Keywords to search for in cells of notebooks."
211+
)
212+
subparser_search.add_argument(
213+
"-K",
214+
"--kwd-exclude",
215+
dest="kwd_exclude",
216+
nargs="*",
217+
default=(),
218+
help="Keywords that cells of notebooks shouldn't include."
219+
)
220+
subparser_search.add_argument(
221+
"-n",
222+
"--num-notebooks",
223+
dest="num_notebooks",
224+
type=int,
225+
default=10,
226+
help="Number of matched notebooks to display."
227+
)
228+
subparser_search.add_argument(
229+
"-c",
230+
"--num-cells",
231+
dest="num_cells",
232+
type=int,
233+
default=10,
234+
help="Number of matched cells in each notebook to display."
235+
)
236+
subparser_search.set_defaults(func=_search_notebooks_args)
237+
238+
239+
def main() -> None:
240+
"""The main function of the script.
241+
"""
242+
args = parse_args()
243+
args.func(args)
244+
245+
246+
if __name__ == "__main__":
247+
main()
File renamed without changes.

0 commit comments

Comments
 (0)