Merge pull request #86 from miRTop/patch_0.4.27

lpantano · web-flow · commit 7ccffa861a9e · 2024-09-13T16:32:48.000-04:00
release 0.4.27
diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml
@@ -0,0 +1,94 @@
+name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI
+
+on: push
+
+jobs:
+  build:
+    name: Build distribution 📦
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: "3.x"
+    - name: Install pypa/build
+      run: >-
+        python3 -m
+        pip install
+        build
+        --user
+    - name: Build a binary wheel and a source tarball
+      run: python3 -m build
+    - name: Store the distribution packages
+      uses: actions/upload-artifact@v4
+      with:
+        name: python-package-distributions
+        path: dist/
+
+  publish-to-pypi:
+    name: >-
+      Publish Python 🐍 distribution 📦 to PyPI
+    if: startsWith(github.ref, 'refs/tags/')  # only publish to PyPI on tag pushes
+    needs:
+    - build
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/mirtop  # Replace <package-name> with your PyPI project name
+    permissions:
+      id-token: write  # IMPORTANT: mandatory for trusted publishing
+
+    steps:
+    - name: Download all the dists
+      uses: actions/download-artifact@v4
+      with:
+        name: python-package-distributions
+        path: dist/
+    - name: Publish distribution 📦 to PyPI
+      uses: pypa/gh-action-pypi-publish@release/v1
+
+  github-release:
+    name: >-
+      Sign the Python 🐍 distribution 📦 with Sigstore
+      and upload them to GitHub Release
+    needs:
+    - publish-to-pypi
+    runs-on: ubuntu-latest
+
+    permissions:
+      contents: write  # IMPORTANT: mandatory for making GitHub Releases
+      id-token: write  # IMPORTANT: mandatory for sigstore
+
+    steps:
+    - name: Download all the dists
+      uses: actions/download-artifact@v4
+      with:
+        name: python-package-distributions
+        path: dist/
+    - name: Sign the dists with Sigstore
+      uses: sigstore/gh-action-sigstore-python@v2.1.1
+      with:
+        inputs: >-
+          ./dist/*.tar.gz
+          ./dist/*.whl
+    - name: Create GitHub Release
+      env:
+        GITHUB_TOKEN: ${{ github.token }}
+      run: >-
+        gh release create
+        '${{ github.ref_name }}'
+        --repo '${{ github.repository }}'
+        --notes ""
+    - name: Upload artifact signatures to GitHub Release
+      env:
+        GITHUB_TOKEN: ${{ github.token }}
+      # Upload to GitHub Release using the `gh` CLI.
+      # `dist/` contains the built packages, and the
+      # sigstore-produced signatures and certificates.
+      run: >-
+        gh release upload
+        '${{ github.ref_name }}' dist/**
+        --repo '${{ github.repository }}'
+
diff --git a/HISTORY.md b/HISTORY.md
@@ -1,3 +1,9 @@
+0.4.27
+
+* fix random order in Variant field [#84](https://github.com/miRTop/mirtop/issues/83)
+* fix possible duplication of lines [#80](https://github.com/miRTop/mirtop/issues/80)
+* accept prefix for gff output [#84](https://github.com/miRTop/mirtop/issues/84)
+
 0.4.26
 
 *  Support spaces and special characters in bam files
diff --git a/mirtop/gff/__init__.py b/mirtop/gff/__init__.py
@@ -68,7 +68,7 @@ def reader(args):
     if args.low_memory:
         return None
     merged = merge.merge(out_dts, samples)
-    fn_merged_out = op.join(args.out, "mirtop.%s" % args.out_format)
+    fn_merged_out = op.join(args.out, "%s.%s" % (args.prefix, args.out_format))
     _write(merged, header.create(samples, database, header.make_tools([args.format])), fn_merged_out, args)
 
 
diff --git a/mirtop/gff/convert.py b/mirtop/gff/convert.py
@@ -3,6 +3,7 @@
 from __future__ import print_function
 
 import os.path as op
+import pandas as pd
 
 from mirtop.mirna import fasta, mapper
 from mirtop.mirna.realign import read_id
@@ -25,69 +26,69 @@ def convert_gff_counts(args):
             UID miRNA Variant Sample1 Sample2 ... Sample N
     """
     sep = "\t"
-    variant_header = sep.join(['iso_5p', 'iso_3p',
-                               'iso_add3p', 'iso_snp'])
+    variant_header = ['iso_5p', 'iso_3p',
+                      'iso_add3p', 'iso_snp']
     if args.add_extra:
         precursors = fasta.read_precursor(args.hairpin, args.sps)
         matures = mapper.read_gtf_to_precursor(args.gtf)
-        variant_header = sep.join([variant_header,
-                                   'iso_5p_nt', 'iso_3p_nt',
-                                   'iso_add3p_nt', 'iso_snp_nt'])
+        variant_header = variant_header + ['iso_5p_nt', 'iso_3p_nt', 'iso_add3p_nt', 'iso_snp_nt']
 
     logger.info("INFO Reading GFF file %s", args.gff)
     logger.info("INFO Writing TSV file to directory %s", args.out)
 
     gff_file = open(args.gff, 'r')
     out_file = op.join(args.out, "%s.tsv" % op.splitext(op.basename(args.gff))[0])
+    all_lines = []
     missing_parent = 0
     missing_mirna = 0
     unvalid_uid = 0
-    with open(out_file, 'w') as outh:
-
-        for samples_line in gff_file:
-            if samples_line.startswith("## COLDATA:"):
-                samples = sep.join(samples_line.strip().split("COLDATA:")[1].strip().split(","))
-                header = sep.join(['UID', 'Read', 'miRNA', 'Variant',
-                                   variant_header, samples])
-                print(header, file=outh)
-                break
-
-        for mirna_line in gff_file:
-            gff = feature(mirna_line)
-            attr = gff.attributes
-            UID = attr["UID"]
-            Read = attr["Read"]
-            mirna = attr["Name"]
-            parent = attr["Parent"]
-            variant = attr["Variant"]
-            try:
-                read_id(UID)
-            except KeyError:
-                unvalid_uid += 1
+    #with open(out_file, 'w') as outh:
+    
+    for samples_line in gff_file:
+        if samples_line.startswith("## COLDATA:"):
+            samples = [sep.join(samples_line.strip().split("COLDATA:")[1].strip().split(","))]
+            #header = sep.join(['UID', 'Read', 'miRNA', 'Variant',
+            #                   variant_header, samples])
+            #print(header, file=outh)
+            break
+    
+    for mirna_line in gff_file:
+        gff = feature(mirna_line)
+        attr = gff.attributes
+        UID = attr["UID"]
+        Read = attr["Read"]
+        mirna = attr["Name"]
+        parent = attr["Parent"]
+        variant = attr["Variant"]
+        try:
+            read_id(UID)
+        except KeyError:
+            unvalid_uid += 1
+            continue
+    
+        expression = [sep.join(attr["Expression"].strip().split(","))]
+        cols_variants = _expand(variant)
+        logger.debug("COUNTS::Read:%s" % Read)
+        logger.debug("COUNTS::EXTRA:%s" % variant)
+        if args.add_extra:
+            if parent not in precursors:
+                missing_parent += 1
                 continue
-
-            expression = sep.join(attr["Expression"].strip().split(","))
-            cols_variants = sep.join(_expand(variant))
-            logger.debug("COUNTS::Read:%s" % Read)
-            logger.debug("COUNTS::EXTRA:%s" % variant)
-            if args.add_extra:
-                if parent not in precursors:
-                    missing_parent += 1
-                    continue
-                if mirna not in matures[parent]:
-                    missing_mirna += 1
-                    continue
-                extra = variant_with_nt(mirna_line, precursors, matures)
-                if extra == "Invalid":
-                    continue
-                logger.debug("COUNTS::EXTRA:%s" % extra)
-                cols_variants = sep.join([cols_variants] + _expand(extra, True))
-            summary = sep.join([UID, Read,  mirna, variant,
-                                cols_variants, expression])
-            logger.debug(summary)
-            print(summary, file=outh)
-
-    gff_file.close()
+            if mirna not in matures[parent]:
+                missing_mirna += 1
+                continue
+            extra = variant_with_nt(mirna_line, precursors, matures)
+            if extra == "Invalid":
+                continue
+            logger.debug("COUNTS::EXTRA:%s" % extra)
+            cols_variants = cols_variants + _expand(extra, True)
+        summary = [UID, Read,  mirna, variant] + cols_variants + expression
+        logger.debug(summary)
+        all_lines.append(summary)
+    #import pdb; pdb.set_trace()   
+    df = pd.DataFrame(all_lines, columns = ['UID', 'Read', 'miRNA', 'Variant'] + variant_header + samples)
+    df = df.drop_duplicates()
+    df.to_csv(out_file, sep="\t", index=False)
     logger.info("Missing Parents in hairpin file: %s" % missing_parent)
     logger.info("Missing MiRNAs in GFF file: %s" % missing_mirna)
     logger.info("Non valid UID: %s" % unvalid_uid)
diff --git a/mirtop/gff/stats.py b/mirtop/gff/stats.py
@@ -107,13 +107,13 @@ def _add_missing(df):
     # ref_miRNA_mean
     category = "ref_miRNA_mean"
     if sum(df['category']==category) == 0:
-        df2 = {'category': category, 'sample': df['sample'].iat[0], 'counts': 0}
-        df = df.append(df2, ignore_index = True)
+        df2 = pd.DataFrame({'category': category, 'sample': df['sample'].iat[0], 'counts': 0}, index=[0])
+        df = pd.concat([df, df2], ignore_index = True)
     
     category = "isomiR_sum"
     if sum(df['category']==category) == 0:
-        df2 = {'category': category, 'sample': df['sample'].iat[0], 'counts': 0}
-        df = df.append(df2, ignore_index = True)
+        df2 =  pd.DataFrame({'category': category, 'sample': df['sample'].iat[0], 'counts': 0}, index=[0])
+        df = pd.concat([df, df2], ignore_index = True)
     
     return df
 
diff --git a/mirtop/libs/parse.py b/mirtop/libs/parse.py
@@ -82,6 +82,8 @@ def _add_subparser_gff(subparsers):
     parser.add_argument("files", nargs="*", help="Bam files.")
     parser.add_argument("-o", "--out", dest="out", required=1,
                         help="dir of output files")
+    parser.add_argument("--prefix", dest="prefix", required=0,
+                        default="mirtop", help="prefix for output file")
     parser.add_argument("--sps",
                         help="species")
     parser.add_argument("--keep-name", action="store_true",
diff --git a/setup.py b/setup.py
@@ -3,8 +3,7 @@
 import os
 from setuptools import setup, find_packages
 
-version = '0.4.26'
-
+version = '0.4.27'
 url = 'http://github.com/mirtop/mirtop'
 
 
diff --git a/test/test_automated_analysis.py b/test/test_automated_analysis.py
diff --git a/test/test_functions.py b/test/test_functions.py