Skip to content

v3.30 vsnp3_group_on_defining_snps.py bug report + fix #18

@duceppemo

Description

@duceppemo

Hi Tod,

I'm getting the following error running vsnp3_step2.py in v.3.30:

Sorting defining SNPs into groups...
Traceback (most recent call last):
  File "/home/bioinfo/miniconda3/envs/vsnp3.30/bin/vsnp3_step2.py", line 692, in <module>
    group = Group(cwd=global_working_dir, metadata=args.metadata, defining_snps=args.defining_snps, excel_remove=args.remove_by_name, gbk_list=args.gbk, dataframes=vcf_to_df.datafr
ames, all_vcf=args.all_vcf, find_new_filters=args.find_new_filters, no_filters=args.no_filters, qual_threshold=int(args.qual_threshold), n_threshold=int(args.n_threshold), mq_thres
hold=int(args.mq_threshold), abs_pos=args.abs_pos, group=args.group, show_groups=args.show_groups, hash_groups=args.hash_groups, html_tree=args.html_tree, dp=args.dp, debug=args.de
bug)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^
  File "/home/bioinfo/miniconda3/envs/vsnp3.30/bin/vsnp3_group_on_defining_snps.py", line 164, in __init__
    metadata_df['metadata'] = metadata_df['metadata'].replace({'*':'_'}, regex=True)
                              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/bioinfo/miniconda3/envs/vsnp3.30/lib/python3.12/site-packages/pandas/core/generic.py", line 7819, in replace
    return self.replace(
           ^^^^^^^^^^^^^
  File "/home/bioinfo/miniconda3/envs/vsnp3.30/lib/python3.12/site-packages/pandas/core/generic.py", line 7867, in replace
    new_data = self._mgr.replace_list(
               ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/bioinfo/miniconda3/envs/vsnp3.30/lib/python3.12/site-packages/pandas/core/internals/base.py", line 253, in replace_list
    bm = self.apply_with_block(
         ^^^^^^^^^^^^^^^^^^^^^^
  File "/home/bioinfo/miniconda3/envs/vsnp3.30/lib/python3.12/site-packages/pandas/core/internals/managers.py", line 354, in apply
    applied = getattr(b, f)(**kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/bioinfo/miniconda3/envs/vsnp3.30/lib/python3.12/site-packages/pandas/core/internals/blocks.py", line 874, in replace_list
    for i, ((src, dest), mask) in enumerate(zip(pairs, masks)):
  File "/home/bioinfo/miniconda3/envs/vsnp3.30/lib/python3.12/site-packages/pandas/core/internals/blocks.py", line 852, in <genexpr>
    compare_or_regex_search(
  File "/home/bioinfo/miniconda3/envs/vsnp3.30/lib/python3.12/site-packages/pandas/core/array_algos/replace.py", line 84, in compare_or_regex_search
    if not regex or not should_use_regex(regex, b):
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/bioinfo/miniconda3/envs/vsnp3.30/lib/python3.12/site-packages/pandas/core/array_algos/replace.py", line 38, in should_use_regex
    regex = regex and is_re_compilable(to_replace)
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/bioinfo/miniconda3/envs/vsnp3.30/lib/python3.12/site-packages/pandas/core/dtypes/inference.py", line 188, in is_re_compilable
    re.compile(obj)
  File "/home/bioinfo/miniconda3/envs/vsnp3.30/lib/python3.12/re/__init__.py", line 228, in compile
    return _compile(pattern, flags)
           ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/bioinfo/miniconda3/envs/vsnp3.30/lib/python3.12/re/__init__.py", line 307, in _compile
    p = _compiler.compile(pattern, flags)
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/bioinfo/miniconda3/envs/vsnp3.30/lib/python3.12/re/_compiler.py", line 743, in compile
    p = _parser.parse(p, flags)
        ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/bioinfo/miniconda3/envs/vsnp3.30/lib/python3.12/re/_parser.py", line 972, in parse
    p = _parse_sub(source, state, flags & SRE_FLAG_VERBOSE, 0)
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/bioinfo/miniconda3/envs/vsnp3.30/lib/python3.12/re/_parser.py", line 453, in _parse_sub
    itemsappend(_parse(source, state, verbose, nested + 1,
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/bioinfo/miniconda3/envs/vsnp3.30/lib/python3.12/re/_parser.py", line 680, in _parse
    raise source.error("nothing to repeat",
re.error: nothing to repeat at position 0

looking a the traceback I found that your "replace" code was problematic. Removing the "regex=True" solved the issue:

# Lines 161-179

# From
            #fix metadata tags
            metadata_df['metadata'] = metadata_df['metadata'].replace({'/':'_'}, regex=True)
            metadata_df['metadata'] = metadata_df['metadata'].replace({'\.':'_'}, regex=True)
            metadata_df['metadata'] = metadata_df['metadata'].replace({'\*':'_'}, regex=True)
            metadata_df['metadata'] = metadata_df['metadata'].replace({'\?':'_'}, regex=True)
            metadata_df['metadata'] = metadata_df['metadata'].replace({'\(':'_'}, regex=True)
            metadata_df['metadata'] = metadata_df['metadata'].replace({'\)':'_'}, regex=True)
            metadata_df['metadata'] = metadata_df['metadata'].replace({'\[':'_'}, regex=True)
            metadata_df['metadata'] = metadata_df['metadata'].replace({'\]':'_'}, regex=True)
            metadata_df['metadata'] = metadata_df['metadata'].replace({' ':'_'}, regex=True)
            metadata_df['metadata'] = metadata_df['metadata'].replace({'{':'_'}, regex=True)
            metadata_df['metadata'] = metadata_df['metadata'].replace({'}':'_'}, regex=True)
            metadata_df['metadata'] = metadata_df['metadata'].replace({'-_':'_'}, regex=True)
            metadata_df['metadata'] = metadata_df['metadata'].replace({'_-':'_'}, regex=True)
            metadata_df['metadata'] = metadata_df['metadata'].replace({'--':'_'}, regex=True)
            metadata_df['metadata'] = metadata_df['metadata'].replace({'_$':''}, regex=True)
            metadata_df['metadata'] = metadata_df['metadata'].replace({'-$':''}, regex=True)
            metadata_df['metadata'] = metadata_df['metadata'].replace({"\'": ""}, regex=True)
            metadata_df['metadata'] = metadata_df['metadata'].replace({',':''}, regex=True)

# To
            #fix metadata tags
            metadata_df['metadata'] = metadata_df['metadata'].replace({'/':'_'})
            metadata_df['metadata'] = metadata_df['metadata'].replace({'.':'_'})
            metadata_df['metadata'] = metadata_df['metadata'].replace({'*':'_'})
            metadata_df['metadata'] = metadata_df['metadata'].replace({'?':'_'})
            metadata_df['metadata'] = metadata_df['metadata'].replace({'(':'_'})
            metadata_df['metadata'] = metadata_df['metadata'].replace({')':'_'})
            metadata_df['metadata'] = metadata_df['metadata'].replace({'[':'_'})
            metadata_df['metadata'] = metadata_df['metadata'].replace({']':'_'})
            metadata_df['metadata'] = metadata_df['metadata'].replace({' ':'_'})
            metadata_df['metadata'] = metadata_df['metadata'].replace({'{':'_'})
            metadata_df['metadata'] = metadata_df['metadata'].replace({'}':'_'})
            metadata_df['metadata'] = metadata_df['metadata'].replace({'-_':'_'})
            metadata_df['metadata'] = metadata_df['metadata'].replace({'_-':'_'})
            metadata_df['metadata'] = metadata_df['metadata'].replace({'--':'_'})
            metadata_df['metadata'] = metadata_df['metadata'].replace({'_$':''})
            metadata_df['metadata'] = metadata_df['metadata'].replace({'-$':''})
            metadata_df['metadata'] = metadata_df['metadata'].replace({"'": ""})
            metadata_df['metadata'] = metadata_df['metadata'].replace({',':''})

Cheers,
Marco

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions