|
345 | 345 | "metadata": {}, |
346 | 346 | "outputs": [], |
347 | 347 | "source": [ |
348 | | - "data = data[data['cleaned_preds'].str.len() >= MIN_LENGTH]" |
| 348 | + "data = data[data['cleaned_preds'].str.len() >= MIN_LENGTH]\n", |
| 349 | + "\n", |
| 350 | + "# exclude psms greather than 20\n", |
| 351 | + "MAX_LENGHT = 20\n", |
| 352 | + "data = data[data['cleaned_preds'].str.len() <= MAX_LENGHT]" |
349 | 353 | ] |
350 | 354 | }, |
351 | 355 | { |
|
445 | 449 | ")" |
446 | 450 | ] |
447 | 451 | }, |
| 452 | + { |
| 453 | + "cell_type": "code", |
| 454 | + "execution_count": null, |
| 455 | + "id": "2b24383b", |
| 456 | + "metadata": {}, |
| 457 | + "outputs": [], |
| 458 | + "source": [ |
| 459 | + "import importlib\n", |
| 460 | + "importlib.reload(assembly)" |
| 461 | + ] |
| 462 | + }, |
448 | 463 | { |
449 | 464 | "cell_type": "code", |
450 | 465 | "execution_count": null, |
|
616 | 631 | " plt.close()" |
617 | 632 | ] |
618 | 633 | }, |
| 634 | + { |
| 635 | + "cell_type": "code", |
| 636 | + "execution_count": null, |
| 637 | + "id": "76f57434", |
| 638 | + "metadata": {}, |
| 639 | + "outputs": [], |
| 640 | + "source": [ |
| 641 | + "mapped_scaffolds" |
| 642 | + ] |
| 643 | + }, |
619 | 644 | { |
620 | 645 | "cell_type": "code", |
621 | 646 | "execution_count": null, |
|
640 | 665 | "outputs": [], |
641 | 666 | "source": [ |
642 | 667 | "def plot_coverage_boxplot_seaborn_layered(\n", |
643 | | - " file_path: str, \n", |
644 | | - " output_image: str = f'{FIGURES_DIR}/fig3a_coverage_boxplot_layered.svg'\n", |
| 668 | + " file_path, \n", |
| 669 | + " output_image=f'{FIGURES_DIR}/fig4a_coverage_boxplot_layered.svg',\n", |
| 670 | + " ratio=1\n", |
645 | 671 | "):\n", |
| 672 | + " visualization.set_publication_style()\n", |
646 | 673 | "\n", |
647 | 674 | " try:\n", |
648 | 675 | " df = pd.read_csv(file_path)\n", |
649 | 676 | " except FileNotFoundError:\n", |
650 | | - " print(f\"File not found: {file_path}. Generating dummy data.\")\n", |
651 | 677 | " data = {\n", |
652 | 678 | " 'assembly_method': ['greedy (Contigs)']*15 + ['greedy (Scaffolds)']*15,\n", |
653 | 679 | " 'coverage': np.concatenate([\n", |
|
663 | 689 | " \n", |
664 | 690 | " colors = {\"Contigs\": \"#a6cee3\", \"Scaffolds\": \"#1f78b4\"}\n", |
665 | 691 | "\n", |
666 | | - " fig_w, fig_h = (3.5, 4) \n", |
| 692 | + " fig_w, fig_h = visualization.get_figsize(width_ratio=ratio)\n", |
| 693 | + " fig, ax = plt.subplots(figsize=(fig_w, fig_h))\n", |
667 | 694 | "\n", |
668 | | - " plt.figure(figsize=(fig_w, fig_h))\n", |
669 | | - "\n", |
670 | | - " ax = sns.boxplot(\n", |
| 695 | + " sns.boxplot(\n", |
671 | 696 | " data=df, \n", |
672 | 697 | " x='Type', \n", |
673 | 698 | " y='coverage', \n", |
674 | 699 | " palette=colors,\n", |
675 | | - " width=0.5,\n", |
| 700 | + " width=0.4,\n", |
676 | 701 | " showfliers=False, \n", |
677 | | - " linewidth=1,\n", |
| 702 | + " linewidth=1.2,\n", |
| 703 | + " ax=ax,\n", |
| 704 | + " boxprops=dict(alpha=0.8),\n", |
| 705 | + " whiskerprops=dict(color='#333333'),\n", |
| 706 | + " capprops=dict(color='#333333'),\n", |
| 707 | + " medianprops=dict(color='#333333', linewidth=1.5)\n", |
678 | 708 | " )\n", |
679 | 709 | " \n", |
680 | 710 | " sns.stripplot(\n", |
681 | 711 | " data=df, \n", |
682 | 712 | " x='Type', \n", |
683 | 713 | " y='coverage', \n", |
684 | 714 | " palette=colors, \n", |
685 | | - " size=6,\n", |
686 | | - " jitter=0.15,\n", |
687 | | - " edgecolor=\"#333333\",\n", |
688 | | - " linewidth=0.8, \n", |
689 | | - " alpha=0.9, \n", |
690 | | - " ax=ax \n", |
| 715 | + " size=5,\n", |
| 716 | + " jitter=0.2,\n", |
| 717 | + " linewidth=0.8,\n", |
| 718 | + " alpha=0.6, \n", |
| 719 | + " ax=ax,\n", |
| 720 | + " edgecolor=\"#333333\" \n", |
691 | 721 | " )\n", |
692 | | - " ax.set_ylabel('Coverage', fontsize=10)\n", |
693 | | - " ax.set_xlabel('', fontsize=10)\n", |
694 | | - " \n", |
695 | | - " ax.tick_params(axis='both', which='major', labelsize=9)\n", |
| 722 | + "\n", |
| 723 | + " ax.set_ylabel('Protein coverage', fontweight='normal')\n", |
| 724 | + " ax.set_xlabel('', fontweight='normal')\n", |
696 | 725 | " \n", |
697 | | - " sns.despine()\n", |
| 726 | + " if df['coverage'].max() <= 1.0:\n", |
| 727 | + " ax.set_ylim(0.4, 1.05)\n", |
| 728 | + " else:\n", |
| 729 | + " ax.set_ylim(0, 105)\n", |
| 730 | + "\n", |
| 731 | + " sns.despine(ax=ax, offset=0, trim=False)\n", |
698 | 732 | "\n", |
699 | | - " os.makedirs(os.path.dirname(output_image) if os.path.dirname(output_image) else '.', exist_ok=True)\n", |
700 | | - " plt.savefig(output_image, dpi=300, bbox_inches='tight')\n", |
| 733 | + " if output_image:\n", |
| 734 | + " Path(os.path.dirname(output_image)).mkdir(parents=True, exist_ok=True)\n", |
| 735 | + " plt.savefig(output_image, format='svg', bbox_inches='tight')\n", |
| 736 | + " \n", |
701 | 737 | " plt.show()" |
702 | 738 | ] |
703 | 739 | }, |
|
718 | 754 | "metadata": {}, |
719 | 755 | "outputs": [], |
720 | 756 | "source": [ |
721 | | - "import matplotlib.pyplot as plt\n", |
722 | | - "import seaborn as sns\n", |
723 | | - "import numpy as np\n", |
724 | | - "import os\n", |
725 | | - "\n", |
726 | | - "# Assicurati di avere set_publication_style definita/importata\n", |
727 | | - "\n", |
728 | 757 | "def plot_psm_depth_standardized(\n", |
729 | 758 | " reference_seq: str, \n", |
730 | 759 | " peptides: list, \n", |
731 | 760 | " cdrs: dict, \n", |
732 | | - " output_file: str = 'fig_4C_matplotlib.svg'\n", |
| 761 | + " output_file: str = 'fig_4C_matplotlib.svg',\n", |
| 762 | + " ratio=2\n", |
733 | 763 | "):\n", |
734 | | - " \"\"\"\n", |
735 | | - " Standardized PSM Depth Plot.\n", |
736 | | - " Target Size: 2/3 of A4 Page Width (~4.75 inches).\n", |
737 | | - " \"\"\"\n", |
| 764 | + " visualization.set_publication_style()\n", |
738 | 765 | " \n", |
739 | | - " # --- Helper: Robust Search (L/I Tolerant) ---\n", |
740 | 766 | " def chars_equal(a, b):\n", |
741 | 767 | " return (a in ['L', 'I'] and b in ['L', 'I']) or a == b\n", |
742 | 768 | "\n", |
|
747 | 773 | " starts.append(pos)\n", |
748 | 774 | " return starts\n", |
749 | 775 | "\n", |
750 | | - " # 2. Calcolo Depth\n", |
751 | 776 | " depth = np.zeros(len(reference_seq), dtype=int)\n", |
752 | 777 | " for pep in peptides:\n", |
753 | 778 | " found_indices = find_all_occurrences(reference_seq, pep)\n", |
754 | 779 | " for start_idx in found_indices:\n", |
755 | 780 | " end_idx = start_idx + len(pep)\n", |
756 | 781 | " depth[start_idx:end_idx] += 1\n", |
757 | 782 | " \n", |
758 | | - " # 3. Dimensioni Fisiche (2/3 Page Width)\n", |
759 | | - " FULL_A4_WIDTH = 7.1\n", |
760 | | - " FIG_W = FULL_A4_WIDTH * (2/3) # ~4.73 pollici\n", |
761 | | - " FIG_H = 3.0 # Altezza standard\n", |
| 783 | + " fig_w, fig_h = visualization.get_figsize(width_ratio=ratio)\n", |
762 | 784 | " \n", |
763 | | - " # layout='constrained' è cruciale per gestire i margini in spazi più stretti\n", |
764 | | - " fig = plt.figure(figsize=(FIG_W, FIG_H), layout='constrained')\n", |
765 | | - " ax = fig.gca()\n", |
| 785 | + " fig, ax = plt.subplots(figsize=(fig_w, fig_h))\n", |
766 | 786 | " \n", |
767 | | - " # 4. Plotting\n", |
768 | 787 | " x = np.arange(len(reference_seq))\n", |
769 | 788 | " \n", |
770 | | - " ax.plot(x, depth, color='steelblue', linewidth=1.5, label='PSM Depth')\n", |
771 | | - " ax.fill_between(x, depth, color='steelblue', alpha=0.2)\n", |
| 789 | + " BLUE_COLOR = '#4A90E2'\n", |
| 790 | + " ax.plot(x, depth, color=BLUE_COLOR, linewidth=1.2, label='PSM Depth')\n", |
| 791 | + " ax.fill_between(x, depth, color=BLUE_COLOR, alpha=0.15)\n", |
772 | 792 | " \n", |
773 | | - " # 5. CDR Highlights\n", |
774 | 793 | " highlight_colors = {\n", |
775 | | - " \"CDR1\": \"orange\", \n", |
776 | | - " \"CDR2\": \"lightgreen\", \n", |
777 | | - " \"CDR3\": \"deepskyblue\"\n", |
| 794 | + " \"CDR1\": \"#FFB347\",\n", |
| 795 | + " \"CDR2\": \"#77DD77\",\n", |
| 796 | + " \"CDR3\": \"#89CFF0\"\n", |
778 | 797 | " }\n", |
779 | 798 | " \n", |
780 | 799 | " max_y = depth.max() * 1.1 if depth.max() > 0 else 1.0\n", |
781 | 800 | " \n", |
782 | 801 | " for label, (start, end) in cdrs.items():\n", |
783 | | - " # start-1 per correzione 0-based\n", |
784 | | - " ax.axvspan(start-1, end, color=highlight_colors.get(label, 'gray'), alpha=0.3, zorder=0)\n", |
| 802 | + " ax.axvspan(start-1, end, color=highlight_colors.get(label, 'gray'), alpha=0.2, zorder=0)\n", |
785 | 803 | " \n", |
786 | | - " # Etichetta un po' più piccola (fontsize=8) per stare nel grafico più stretto\n", |
787 | | - " ax.text((start-1 + end)/2, max_y, label, \n", |
788 | | - " ha='center', va='top', fontsize=8, fontweight='bold', color='black')\n", |
789 | | - "\n", |
790 | | - " # 6. Formatting\n", |
791 | | - " ax.set_title('PSM depth across protein sequence', fontsize=10)\n", |
792 | | - " ax.set_xlabel('Amino acid position', fontsize=9)\n", |
793 | | - " ax.set_ylabel('Depth (PSMs)', fontsize=9)\n", |
| 804 | + " ax.text((start-1 + end)/2, max_y * 0.95, label, \n", |
| 805 | + " ha='center', va='top', fontweight='normal', color='black')\n", |
| 806 | + "\n", |
| 807 | + " ax.set_title('PSM depth across protein sequence', fontweight='normal', pad=15)\n", |
| 808 | + " ax.set_xlabel('Amino acid position', fontweight='normal')\n", |
| 809 | + " ax.set_ylabel('Depth (PSMs)', fontweight='normal')\n", |
794 | 810 | " \n", |
795 | | - " ax.spines['top'].set_visible(False)\n", |
796 | | - " ax.spines['right'].set_visible(False)\n", |
| 811 | + " sns.despine(ax=ax, offset=0, trim=False)\n", |
797 | 812 | " \n", |
798 | 813 | " ax.set_xlim(0, len(reference_seq))\n", |
799 | | - " ax.set_ylim(0, max_y * 1.05)\n", |
| 814 | + " ax.set_ylim(0, max_y)\n", |
800 | 815 | " \n", |
801 | | - " # 7. Save\n", |
802 | | - " os.makedirs(os.path.dirname(output_file) if os.path.dirname(output_file) else '.', exist_ok=True)\n", |
| 816 | + " if output_file:\n", |
| 817 | + " Path(os.path.dirname(output_file)).mkdir(parents=True, exist_ok=True)\n", |
| 818 | + " plt.savefig(output_file, format='svg', bbox_inches='tight')\n", |
803 | 819 | " \n", |
804 | | - " # Manteniamo bbox_inches=None per rispettare esattamente i 4.75 pollici\n", |
805 | | - " plt.savefig(output_file, dpi=300, bbox_inches=None)\n", |
806 | 820 | " plt.show()" |
807 | 821 | ] |
808 | 822 | }, |
| 823 | + { |
| 824 | + "cell_type": "code", |
| 825 | + "execution_count": null, |
| 826 | + "id": "b56fafa2", |
| 827 | + "metadata": {}, |
| 828 | + "outputs": [], |
| 829 | + "source": [ |
| 830 | + "plot_psm_depth_standardized(\n", |
| 831 | + " reference_seq=protein_norm, \n", |
| 832 | + " peptides=data_abundance['cleaned_preds'].tolist(), \n", |
| 833 | + " cdrs={\n", |
| 834 | + " \"CDR1\": (26, 34),\n", |
| 835 | + " \"CDR2\": (50, 66),\n", |
| 836 | + " \"CDR3\": (99, 112)\n", |
| 837 | + " }, \n", |
| 838 | + " output_file=f'{FIGURES_DIR}/fig4b_{RUN_NAME}_psm_depth.svg'\n", |
| 839 | + ")" |
| 840 | + ] |
| 841 | + }, |
809 | 842 | { |
810 | 843 | "cell_type": "code", |
811 | 844 | "execution_count": null, |
|
0 commit comments