From e0043e0c79b6d211537e910a18771d5286499196 Mon Sep 17 00:00:00 2001 From: Ellen <38250543+ellen364@users.noreply.github.com> Date: Sat, 28 Oct 2023 03:32:20 +0100 Subject: [PATCH] [osh] Implement shopt -s nocasematch (#1748) All tests in spec/nocasematch-match/test.sh now pass (3 failures previously allowed). [translation] Use #included macros directly. We should move away from this style with mycpp/yaks "modules" --------- Co-authored-by: Andy C --- cpp/libc.cc | 10 +++++----- cpp/libc.h | 4 ++-- cpp/preamble.h | 2 ++ frontend/option_def.py | 2 +- osh/cmd_eval.py | 5 ++++- osh/sh_expr_eval.py | 11 ++++++++--- pyext/libc.c | 20 +++++++++++++++----- pyext/libc.pyi | 7 +++++-- spec/nocasematch-match.test.sh | 2 +- 9 files changed, 43 insertions(+), 20 deletions(-) diff --git a/cpp/libc.cc b/cpp/libc.cc index ebf1308a1c..25bf662e6b 100644 --- a/cpp/libc.cc +++ b/cpp/libc.cc @@ -36,16 +36,16 @@ BigStr* realpath(BigStr* path) { return result; } -int fnmatch(BigStr* pat, BigStr* str) { +int fnmatch(BigStr* pat, BigStr* str, int flags) { // TODO: We should detect this at ./configure time, and then maybe flag these // at parse time, not runtime #ifdef FNM_EXTMATCH - int flags = FNM_EXTMATCH; + int flags_todo = FNM_EXTMATCH; #else - int flags = 0; + int flags_todo = 0; #endif - int result = ::fnmatch(pat->data_, str->data_, flags); + int result = ::fnmatch(pat->data_, str->data_, flags_todo); switch (result) { case 0: return 1; @@ -106,7 +106,7 @@ List* glob(BigStr* pat) { // Raises RuntimeError if the pattern is invalid. TODO: Use a different // exception? -List* regex_match(BigStr* pattern, BigStr* str) { +List* regex_match(BigStr* pattern, BigStr* str, int flags) { List* results = NewList(); regex_t pat; diff --git a/cpp/libc.h b/cpp/libc.h index fc98261068..e6e699a85c 100644 --- a/cpp/libc.h +++ b/cpp/libc.h @@ -20,14 +20,14 @@ BigStr* realpath(BigStr* path); BigStr* gethostname(); -int fnmatch(BigStr* pat, BigStr* str); +int fnmatch(BigStr* pat, BigStr* str, int flags = 0); List* glob(BigStr* pat); Tuple2* regex_first_group_match(BigStr* pattern, BigStr* str, int pos); -List* regex_match(BigStr* pattern, BigStr* str); +List* regex_match(BigStr* pattern, BigStr* str, int flags = 0); int wcswidth(BigStr* str); int get_terminal_width(); diff --git a/cpp/preamble.h b/cpp/preamble.h index 6803484b11..429a409d06 100644 --- a/cpp/preamble.h +++ b/cpp/preamble.h @@ -2,6 +2,8 @@ #include #include // e.g. F_DUPFD used directly +#include // FNM_CASEFOLD in osh/sh_expr_eval.py +#include // REG_ICASE in osh/sh_expr_eval.py #include // e.g. WIFSIGNALED() called directly #include "_gen/core/optview.h" diff --git a/frontend/option_def.py b/frontend/option_def.py index 6b9918fc6f..ba338200f6 100644 --- a/frontend/option_def.py +++ b/frontend/option_def.py @@ -232,7 +232,6 @@ def DoneWithImplementedOptions(self): 'mailwarn', 'no_empty_cmd_completion', 'nocaseglob', - 'nocasematch', 'progcomp_alias', 'promptvars', 'restricted_shell', @@ -278,6 +277,7 @@ def _Init(opt_def): # shopt options that aren't in any groups. opt_def.Add('failglob') opt_def.Add('extglob') + opt_def.Add('nocasematch') # Compatibility opt_def.Add( diff --git a/osh/cmd_eval.py b/osh/cmd_eval.py index 2684a3e0e6..e6a2b27c07 100644 --- a/osh/cmd_eval.py +++ b/osh/cmd_eval.py @@ -89,6 +89,8 @@ import posix_ as posix import libc # for fnmatch +# Import this name directly because the C++ translation uses macros literally. +from libc import FNM_CASEFOLD from typing import List, Dict, Tuple, Optional, Any, cast, TYPE_CHECKING @@ -1436,6 +1438,7 @@ def _DoCase(self, node): # type: (command.Case) -> int to_match = self._EvalCaseArg(node.to_match, node.case_kw) + fnmatch_flags = FNM_CASEFOLD if self.exec_opts.nocasematch() else 0 self._MaybeRunDebugTrap() status = 0 # If there are no arms, it should be zero? @@ -1454,7 +1457,7 @@ def _DoCase(self, node): word_val = self.word_ev.EvalWordToString( pat_word, word_eval.QUOTE_FNMATCH) - if libc.fnmatch(word_val.s, to_match_str.s): + if libc.fnmatch(word_val.s, to_match_str.s, fnmatch_flags): status = self._ExecuteList(case_arm.action) matched = True # TODO: Parse ;;& and for fallthrough and such? break diff --git a/osh/sh_expr_eval.py b/osh/sh_expr_eval.py index b692b7f5d5..3b8e14cc7e 100644 --- a/osh/sh_expr_eval.py +++ b/osh/sh_expr_eval.py @@ -57,6 +57,8 @@ from osh import word_eval import libc # for fnmatch +# Import these names directly because the C++ translation uses macros literally. +from libc import FNM_CASEFOLD, REG_ICASE from typing import Tuple, Optional, cast, TYPE_CHECKING if TYPE_CHECKING: @@ -1044,14 +1046,15 @@ def EvalB(self, node): raise AssertionError(op_id) # should never happen if arg_type == bool_arg_type_e.Str: + fnmatch_flags = FNM_CASEFOLD if self.exec_opts.nocasematch() else 0 if op_id in (Id.BoolBinary_GlobEqual, Id.BoolBinary_GlobDEqual): #log('Matching %s against pattern %s', s1, s2) - return libc.fnmatch(s2, s1) + return libc.fnmatch(s2, s1, fnmatch_flags) if op_id == Id.BoolBinary_GlobNEqual: - return not libc.fnmatch(s2, s1) + return not libc.fnmatch(s2, s1, fnmatch_flags) if op_id in (Id.BoolBinary_Equal, Id.BoolBinary_DEqual): return s1 == s2 @@ -1062,8 +1065,10 @@ def EvalB(self, node): if op_id == Id.BoolBinary_EqualTilde: # TODO: This should go to --debug-file #log('Matching %r against regex %r', s1, s2) + regex_flags = REG_ICASE if self.exec_opts.nocasematch() else 0 + try: - matches = libc.regex_match(s2, s1) + matches = libc.regex_match(s2, s1, regex_flags) except RuntimeError as e: # Status 2 indicates a regex parse error. This is fatal in OSH but # not in bash, which treats [[ like a command with an exit code. diff --git a/pyext/libc.c b/pyext/libc.c index 7b43667b06..17d4764d17 100644 --- a/pyext/libc.c +++ b/pyext/libc.c @@ -55,12 +55,12 @@ static PyObject * func_fnmatch(PyObject *self, PyObject *args) { const char *pattern; const char *str; + int flags = 0; - if (!PyArg_ParseTuple(args, "ss", &pattern, &str)) { + if (!PyArg_ParseTuple(args, "ss|i", &pattern, &str, &flags)) { return NULL; } - int flags = 0; // NOTE: Testing for __GLIBC__ is the version detection anti-pattern. We // should really use feature detection in our configure script. But I plan // to get rid of the dependency on FNM_EXTMATCH because it doesn't work on @@ -185,12 +185,15 @@ static PyObject * func_regex_match(PyObject *self, PyObject *args) { const char* pattern; const char* str; - if (!PyArg_ParseTuple(args, "ss", &pattern, &str)) { + int flags = 0; + + if (!PyArg_ParseTuple(args, "ss|i", &pattern, &str, &flags)) { return NULL; } + flags |= REG_EXTENDED; regex_t pat; - int status = regcomp(&pat, pattern, REG_EXTENDED); + int status = regcomp(&pat, pattern, flags); if (status != 0) { char error_string[80]; regerror(status, &pat, error_string, 80); @@ -408,7 +411,14 @@ static PyMethodDef methods[] = { #endif void initlibc(void) { - Py_InitModule("libc", methods); + PyObject *module; + + module = Py_InitModule("libc", methods); + if (module != NULL) { + PyModule_AddIntConstant(module, "FNM_CASEFOLD", FNM_CASEFOLD); + PyModule_AddIntConstant(module, "REG_ICASE", REG_ICASE); + } + errno_error = PyErr_NewException("libc.error", PyExc_IOError, NULL); } diff --git a/pyext/libc.pyi b/pyext/libc.pyi index c01b96d630..8ea696d692 100644 --- a/pyext/libc.pyi +++ b/pyext/libc.pyi @@ -1,10 +1,13 @@ from typing import List, Optional, Tuple +FNM_CASEFOLD: int +REG_ICASE: int + def gethostname() -> str: ... def glob(pat: str) -> List[str]: ... -def fnmatch(pat: str, s: str) -> bool: ... +def fnmatch(pat: str, s: str, flags: int = 0) -> bool: ... def regex_first_group_match(regex: str, s: str, pos: int) -> Optional[Tuple[int, int]]: ... -def regex_match(regex: str, s: str) -> Optional[List[str]]: ... +def regex_match(regex: str, s: str, flags: int = 0) -> Optional[List[str]]: ... def wcswidth(s: str) -> int: ... def get_terminal_width() -> int: ... def print_time(real: float, user: float, sys: float) -> None: ... diff --git a/spec/nocasematch-match.test.sh b/spec/nocasematch-match.test.sh index f6ec57bf0f..cd6872b23a 100644 --- a/spec/nocasematch-match.test.sh +++ b/spec/nocasematch-match.test.sh @@ -1,5 +1,5 @@ ## compare_shells: bash -## oils_failures_allowed: 3 +## oils_failures_allowed: 0 # Tests nocasematch matching