Skip to content

Commit 29bd503

Browse files
authored
Merge pull request #313 from dclong/dev
Merge dev into main
2 parents 459e3d2 + fe117d2 commit 29bd503

File tree

4 files changed

+18
-11
lines changed

4 files changed

+18
-11
lines changed

dsutil/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
from . import git
44
from . import poetry
55

6-
__version__ = "0.67.0"
6+
__version__ = "0.68.0"

dsutil/hadoop/pyspark_submit.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import yaml
1919
from loguru import logger
2020
import notifiers
21+
import dsutil.filesystem as fs
2122

2223

2324
class SparkSubmit:
@@ -194,11 +195,16 @@ def _notify_log(self, app_id, subject):
194195
logger.info("Waiting for 300 seconds for the log to be available...")
195196
time.sleep(300)
196197
sp.run(f"logf fetch {app_id}", shell=True, check=True)
198+
lines = fs.filter(
199+
path=Path(app_id + "_s"),
200+
pattern=r"^-+\s+Deduped Error Lines\s+-+$",
201+
num_lines=999
202+
)
197203
notifiers.get_notifier("email").notify(
198204
from_=self.email["from"],
199205
to=self.email["to"],
200206
subject="Re: " + subject,
201-
message=Path(app_id + "_s").read_text(),
207+
message="".join(lines[0]),
202208
host=self.email["host"],
203209
username="",
204210
password="",
@@ -320,10 +326,10 @@ def _submit_local(args, config: dict[str, Any]) -> bool:
320326
python = _python(config)
321327
lines.append(f"--conf spark.pyspark.driver.python={python}")
322328
lines.append(f"--conf spark.pyspark.python={python}")
323-
lines.extend(args.cmd)
329+
lines.extend(args.pyfile)
324330
for idx in range(2, len(lines)):
325331
lines[idx] = " " * 4 + lines[idx]
326-
return SparkSubmit().submit(" \\\n".join(lines) + "\n", args.cmd[:1])
332+
return SparkSubmit().submit(" \\\n".join(lines) + "\n", args.pyfile[:1])
327333

328334

329335
def _submit_cluster(args, config: dict[str, Any]) -> bool:
@@ -348,11 +354,11 @@ def _submit_cluster(args, config: dict[str, Any]) -> bool:
348354
lines = [config["spark-submit"]] + [
349355
f"--{opt} {config[opt]}" for opt in opts if opt in config and config[opt]
350356
] + [f"--conf {k}={v}" for k, v in config["conf"].items()]
351-
lines.extend(args.cmd)
357+
lines.extend(args.pyfile)
352358
for idx in range(1, len(lines)):
353359
lines[idx] = " " * 4 + lines[idx]
354360
return SparkSubmit(email=config["email"]
355-
).submit(" \\\n".join(lines) + "\n", args.cmd[:1])
361+
).submit(" \\\n".join(lines) + "\n", args.pyfile[:1])
356362

357363

358364
def submit(args: Namespace) -> None:
@@ -431,8 +437,9 @@ def parse_args(args=None, namespace=None) -> Namespace:
431437
help="Specify a path for generating a configration example."
432438
)
433439
mutex_group.add_argument(
434-
"--cmd",
435-
dest="cmd",
440+
"--py",
441+
"--pyfile",
442+
dest="pyfile",
436443
nargs="+",
437444
help="The command (of PySpark script) to submit to Spark to run."
438445
)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "dsutil"
3-
version = "0.67.0"
3+
version = "0.68.0"
44
description = "A utils Python package for data scientists."
55
authors = ["Benjamin Du <[email protected]>"]
66

readme.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Currently, Python 3.7 and 3.8 are supported.
2525
2626
You can download a copy of the latest release and install it using pip.
2727
```bash
28-
pip3 install --user -U https://github.com/dclong/dsutil/releases/download/v0.67.0/dsutil-0.67.0-py3-none-any.whl
28+
pip3 install --user -U https://github.com/dclong/dsutil/releases/download/v0.68.0/dsutil-0.68.0-py3-none-any.whl
2929
```
3030
Or you can use the following command to install the latest master branch
3131
if you have pip 20.0+.
@@ -35,7 +35,7 @@ pip3 install --user -U git+https://github.com/dclong/dsutil@main
3535
Use one of the following commands if you want to install all components of dsutil.
3636
Available additional components are `cv`, `docker`, `pdf`, `jupyter`, `admin` and `all`.
3737
```bash
38-
pip3 install "dsutil[cv] @ https://github.com/dclong/dsutil/releases/download/v0.67.0/dsutil-0.67.0-py3-none-any.whl"
38+
pip3 install "dsutil[cv] @ https://github.com/dclong/dsutil/releases/download/v0.68.0/dsutil-0.68.0-py3-none-any.whl"
3939
# or
4040
pip3 install --user -U "dsutil[all] @ git+https://github.com/dclong/dsutil@main"
4141
```

0 commit comments

Comments
 (0)