diff --git a/CHANGELOG.md b/CHANGELOG.md index cbfef3bcc7..07de38036c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ - fix: ensure path object converted to string before stripping quotes ([#2878](https://github.com/nf-core/tools/pull/2878)) - Fix incorrect assertions for called_with on mocks ([#2891](https://github.com/nf-core/tools/pull/2891)) - Make cli-provided module/subworkflow names case insensitive ([#2869](https://github.com/nf-core/tools/pull/2869)) +- Add plugin entrypoints for toolong ([#2895](https://github.com/nf-core/tools/pull/2895)) - Update gitpod/workspace-base Docker digest to 168d78b ([#2899](https://github.com/nf-core/tools/pull/2899)) - Update pre-commit hook astral-sh/ruff-pre-commit to v0.3.4 ([#2894](https://github.com/nf-core/tools/pull/2894)) - Update GitHub Actions ([#2902](https://github.com/nf-core/tools/pull/2902)) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 807bc776bb..42398bfcb1 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -3,6 +3,7 @@ import logging import os +import subprocess import sys from pathlib import Path @@ -36,6 +37,7 @@ "commands": [ "list", "launch", + "log", "create-params-file", "download", "licences", @@ -311,6 +313,34 @@ def launch( sys.exit(1) +# nf-core log +@nf_core_cli.command("log") +@click.argument("filenames", required=False, nargs=-1, metavar="") +def view_logs(filenames): + """ + Render .nextflow.log files nicely. + + Uses [link=https://github.com/textualize/toolong/]Toolong[/] with the included nf-core extension. + Shows files globbed with `.nextflow.log*` by default, or supplied filenames. + """ + filenames = list(filenames) + if len(filenames) == 0: + p = Path(".") + p.glob(".nextflow.log*") + filenames = [str(f) for f in p.glob(".nextflow.log*")] + + if len(filenames) == 0: + log.error("No .nextflow.log files found.") + sys.exit(1) + + filenames.sort() + log.info(f"Launching log viewer for: [green]{", ".join(filenames)}") + log.info( + "This uses the [link=https://github.com/textualize/toolong/]Toolong[/] log viewer - you can view any file with it using the `[magenta]tl[/]` command!" + ) + subprocess.run(["tl", *filenames]) + + # nf-core create-params-file @nf_core_cli.command() @click.argument("pipeline", required=False, metavar="") diff --git a/nf_core/toolong_formatter.py b/nf_core/toolong_formatter.py new file mode 100644 index 0000000000..cca2fce27a --- /dev/null +++ b/nf_core/toolong_formatter.py @@ -0,0 +1,326 @@ +from __future__ import annotations + +import re +from datetime import datetime +from typing import Optional + +from rich.text import Text +from toolong.format_parser import FormatParser +from toolong.highlighter import LogHighlighter +from typing_extensions import TypeAlias + +ParseResult: TypeAlias = "tuple[Optional[datetime], str, Text]" + +MOVE_LOG_LEVEL_COL = True +LOG_LEVELS = { + "DEBUG": ["dim white on black", "dim"], + "INFO": ["bold black on green", ""], + "WARN": ["bold black on yellow", "yellow"], + "ERROR": ["bold black on red", "red"], +} + + +class LogFormat: + def parse(self, line: str) -> ParseResult | None: + raise NotImplementedError() + + +class NextflowLogFormat(LogFormat): + """ + Formatter for regular Nextflow log files. + + Examples: + + Mar-24 00:11:47.302 [main] DEBUG nextflow.util.CustomThreadPool - Creating default thread pool > poolSize: 11; maxThreads: 1000 + Mar-24 00:12:04.942 [Task monitor] INFO nextflow.Session - Execution cancelled -- Finishing pending tasks before exit + """ + + REGEX = re.compile( + r"(?P\w+-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) (?P\[.*\]?) (?P\w+)\s+(?P[\w\.]+) - (?P.*?)$" + ) + + highlighter = LogHighlighter() + + def parse(self, line: str) -> ParseResult | None: + match = self.REGEX.fullmatch(line) + if match is None: + return None + + text = Text.from_ansi(line) + groups = match.groupdict() + if date := groups.get("date", None): + timestamp = datetime.strptime(groups["date"], "%b-%d %H:%M:%S.%f") + text.highlight_words([date], "not bold magenta") + if thread := groups.get("thread", None): + text.highlight_words([thread], "blue") + if log_level := groups.get("log_level", None): + text.highlight_words([f" {log_level} "], LOG_LEVELS[log_level][0]) + if logger_name := groups.get("logger_name", None): + text.highlight_words([logger_name], "cyan") + if process_name := groups.get("process_name", None): + text.highlight_words([process_name], "bold cyan") + if message := groups.get("message", None): + text.highlight_words([message], "dim" if log_level == "DEBUG" else "") + + return timestamp, line, text + + +class NextflowLogAbortedProcessNames(LogFormat): + """ + Formatter for process names when a pipeline is aborted. + + Example: + + The following lines: + [process] NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN + [process] NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT + + In blocks that look like this: + + Mar-12 23:56:10.538 [SIGINT handler] DEBUG nextflow.Session - Session aborted -- Cause: SIGINT + Mar-12 23:56:10.572 [SIGINT handler] DEBUG nextflow.Session - The following nodes are still active: + [process] NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN + status=ACTIVE + port 0: (queue) OPEN ; channel: - + port 1: (value) bound ; channel: - + port 2: (value) bound ; channel: - + port 3: (value) bound ; channel: star_ignore_sjdbgtf + port 4: (value) bound ; channel: seq_platform + port 5: (value) bound ; channel: seq_center + port 6: (cntrl) - ; channel: $ + + [process] NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT + status=ACTIVE + port 0: (queue) OPEN ; channel: - + port 1: (cntrl) - ; channel: $ + """ + + REGEX = re.compile(r"^(?P\[process\]) (?P.*?)(?P[^:]+?)?$") + highlighter = LogHighlighter() + + def parse(self, line: str) -> ParseResult | None: + match = self.REGEX.fullmatch(line) + if match is None: + return None + + text = Text.from_ansi(line) + text.stylize_before("dim") + groups = match.groupdict() + if process := groups.get("process", None): + text.highlight_words([process], "blue not dim") + if process_name := groups.get("process_name", None): + text.highlight_words([process_name], "bold cyan not dim") + + return None, line, text + + +class NextflowLogAbortedProcessPorts(LogFormat): + """ + Formatter for process names when a pipeline is aborted. + + Example: + + The following lines: + port 0: (queue) OPEN ; channel: - + port 1: (value) bound ; channel: - + port 2: (value) bound ; channel: - + port 3: (value) bound ; channel: star_ignore_sjdbgtf + + In blocks that look like this: + + Mar-12 23:56:10.538 [SIGINT handler] DEBUG nextflow.Session - Session aborted -- Cause: SIGINT + Mar-12 23:56:10.572 [SIGINT handler] DEBUG nextflow.Session - The following nodes are still active: + [process] NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN + status=ACTIVE + port 0: (queue) OPEN ; channel: - + port 1: (value) bound ; channel: - + port 2: (value) bound ; channel: - + port 3: (value) bound ; channel: star_ignore_sjdbgtf + port 4: (value) bound ; channel: seq_platform + port 5: (value) bound ; channel: seq_center + port 6: (cntrl) - ; channel: $ + + [process] NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT + status=ACTIVE + port 0: (queue) OPEN ; channel: - + port 1: (cntrl) - ; channel: $ + """ + + REGEX = re.compile( + r" (?Pport \d+): (?P\((value|queue|cntrl)\)) (?P\S+)\s+; channel: (?P.*?)$" + ) + CHANNEL_TYPES = { + "(value)": "green", + "(cntrl)": "yellow", + "(queue)": "magenta", + } + highlighter = LogHighlighter() + + def parse(self, line: str) -> ParseResult | None: + match = self.REGEX.fullmatch(line) + if match is None: + return None + + text = Text.from_ansi(line) + groups = match.groupdict() + if port := groups.get("port", None): + text.highlight_words([port], "blue") + if channel_type := groups.get("channel_type", None): + text.highlight_words([channel_type], self.CHANNEL_TYPES[channel_type]) + if channel_state := groups.get("channel_state", None): + text.highlight_words([channel_state], "cyan" if channel_state == "OPEN" else "yellow") + text.highlight_words(["; channel:"], "dim") + if channel_name := groups.get("channel_name", None): + text.highlight_words([channel_name], "cyan") + + return None, line, text + + +class NextflowLogAbortedProcessStatus(LogFormat): + """ + Formatter for process names when a pipeline is aborted. + + Example: + + The following lines: + status=ACTIVE + + In blocks that look like this: + + Mar-12 23:56:10.538 [SIGINT handler] DEBUG nextflow.Session - Session aborted -- Cause: SIGINT + Mar-12 23:56:10.572 [SIGINT handler] DEBUG nextflow.Session - The following nodes are still active: + [process] NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN + status=ACTIVE + port 0: (queue) OPEN ; channel: - + port 1: (value) bound ; channel: - + port 2: (value) bound ; channel: - + port 3: (value) bound ; channel: star_ignore_sjdbgtf + port 4: (value) bound ; channel: seq_platform + port 5: (value) bound ; channel: seq_center + port 6: (cntrl) - ; channel: $ + + [process] NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT + status=ACTIVE + port 0: (queue) OPEN ; channel: - + port 1: (cntrl) - ; channel: $ + """ + + REGEX = re.compile(r"^ status=(?P.*?)?$") + highlighter = LogHighlighter() + + def parse(self, line: str) -> ParseResult | None: + match = self.REGEX.fullmatch(line) + if match is None: + return None + + text = Text.from_ansi(line) + text.stylize_before("dim") + groups = match.groupdict() + text.highlight_words(["status="], "dim") + if status := groups.get("status", None): + text.highlight_words([status], "cyan not dim") + + return None, line, text + + +class NextflowLogParsedScripts(LogFormat): + """ + Formatter for parsed scriptp names. + + For example: + + Mar-24 00:12:03.547 [main] DEBUG nextflow.script.ScriptRunner - Parsed script files: + Script_e2630658c898fe40: /Users/ewels/GitHub/nf-core/rnaseq/./workflows/rnaseq/../../modules/local/deseq2_qc/main.nf + Script_56c7c9e8363ee20a: /Users/ewels/GitHub/nf-core/rnaseq/./workflows/rnaseq/../../subworkflows/local/quantify_pseudo_alignment/../../../modules/nf-core/custom/tx2gene/main.nf + """ + + REGEX = re.compile(r"^ (?PScript_\w+:) (?P.*?)$") + highlighter = LogHighlighter() + + def parse(self, line: str) -> ParseResult | None: + match = self.REGEX.fullmatch(line) + if match is None: + return None + + text = Text.from_ansi(line) + groups = match.groupdict() + if script_id := groups.get("script_id", None): + text.highlight_words([script_id], "blue") + if script_path := groups.get("script_path", None): + text.highlight_words([script_path], "magenta") + + return None, line, text + + +def nextflow_format_parser(logfile_obj): + is_nextflow = logfile_obj.name.startswith(".nextflow.log") + + class NextflowFormatParser(FormatParser): + """Parses a log line.""" + + def __init__(self) -> None: + super().__init__() + if is_nextflow: + self._formats = [ + NextflowLogFormat(), + NextflowLogAbortedProcessNames(), + NextflowLogAbortedProcessPorts(), + NextflowLogAbortedProcessStatus(), + NextflowLogParsedScripts(), + ] + self._log_status = "" + + def parse(self, line: str) -> ParseResult: + """Use the toolong parser with custom formatters.""" + + # Return if not a netflow log file + if not is_nextflow: + return super().parse(line) + + # Copied from toolong source, but without the default log parser + if len(line) > 10_000: + line = line[:10_000] + parse_result = None + if line.strip(): + for index, format in enumerate(self._formats): + parse_result = format.parse(line) + if parse_result is not None: + if index: + self._formats = [*self._formats[index:], *self._formats[:index]] + timestamp, line, text = parse_result + break + + if parse_result is None: + timestamp = None + line = line + text = Text(line) + + # Custom formatting with log levels + for logtype in LOG_LEVELS.keys(): + if logtype in line: + # Set log status for next lines, if multi-line + self._log_status = logtype + # Set the base stlying for this line + text.stylize_before(LOG_LEVELS[logtype][1]) + # Move the "INFO" log level to the start of the line + if MOVE_LOG_LEVEL_COL: + line = "{} {}".format( + logtype, + line.replace(f" {logtype} ", ""), + ) + logtype_str = f"[{LOG_LEVELS[logtype][0]}] {logtype: <5} [/] " + text = Text.from_markup( + logtype_str + text.markup.replace(f" {logtype} ", "[reset] [/]"), + ) + # Return - on to next line + return timestamp, line, text + + # Multi-line log message - add colour character at start of line + for logtype in LOG_LEVELS.keys(): + if self._log_status == logtype: + text = Text.from_markup(f"[{LOG_LEVELS[logtype][0]}] [/] " + text.markup) + text.stylize_before(LOG_LEVELS[logtype][1]) + + return timestamp, line, text + + return NextflowFormatParser() diff --git a/requirements.txt b/requirements.txt index 6b5b3ab57d..081847d655 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,3 +20,6 @@ rich-click>=1.6.1 rich>=13.3.1 tabulate trogon +# Remove dev brach dep when merged: https://github.com/Textualize/toolong/pull/47 +# toolong +toolong @ git+https://github.com/ewels/toolong.git@add-entry-points#egg=toolong diff --git a/setup.py b/setup.py index fc7b69ac1e..d9fde6d9fd 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,9 @@ entry_points={ "console_scripts": ["nf-core=nf_core.__main__:run_nf_core"], "refgenie.hooks.post_update": ["nf-core-refgenie=nf_core.refgenie:update_config"], + "toolong.application.format_parsers": [ + "nextflow_format_parser = nf_core.toolong_formatter:nextflow_format_parser", + ], }, python_requires=">=3.8, <4", install_requires=required,