Skip to content

Commit 28d3446

Browse files
notebook for nanobodies panel updated
1 parent 545877f commit 28d3446

File tree

1 file changed

+101
-68
lines changed

1 file changed

+101
-68
lines changed

docs/source/tutorials/case_studies/_panel_nanobodies.ipynb

Lines changed: 101 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,11 @@
345345
"metadata": {},
346346
"outputs": [],
347347
"source": [
348-
"data = data[data['cleaned_preds'].str.len() >= MIN_LENGTH]"
348+
"data = data[data['cleaned_preds'].str.len() >= MIN_LENGTH]\n",
349+
"\n",
350+
"# exclude psms greather than 20\n",
351+
"MAX_LENGHT = 20\n",
352+
"data = data[data['cleaned_preds'].str.len() <= MAX_LENGHT]"
349353
]
350354
},
351355
{
@@ -445,6 +449,17 @@
445449
")"
446450
]
447451
},
452+
{
453+
"cell_type": "code",
454+
"execution_count": null,
455+
"id": "2b24383b",
456+
"metadata": {},
457+
"outputs": [],
458+
"source": [
459+
"import importlib\n",
460+
"importlib.reload(assembly)"
461+
]
462+
},
448463
{
449464
"cell_type": "code",
450465
"execution_count": null,
@@ -616,6 +631,16 @@
616631
" plt.close()"
617632
]
618633
},
634+
{
635+
"cell_type": "code",
636+
"execution_count": null,
637+
"id": "76f57434",
638+
"metadata": {},
639+
"outputs": [],
640+
"source": [
641+
"mapped_scaffolds"
642+
]
643+
},
619644
{
620645
"cell_type": "code",
621646
"execution_count": null,
@@ -640,14 +665,15 @@
640665
"outputs": [],
641666
"source": [
642667
"def plot_coverage_boxplot_seaborn_layered(\n",
643-
" file_path: str, \n",
644-
" output_image: str = f'{FIGURES_DIR}/fig3a_coverage_boxplot_layered.svg'\n",
668+
" file_path, \n",
669+
" output_image=f'{FIGURES_DIR}/fig4a_coverage_boxplot_layered.svg',\n",
670+
" ratio=1\n",
645671
"):\n",
672+
" visualization.set_publication_style()\n",
646673
"\n",
647674
" try:\n",
648675
" df = pd.read_csv(file_path)\n",
649676
" except FileNotFoundError:\n",
650-
" print(f\"File not found: {file_path}. Generating dummy data.\")\n",
651677
" data = {\n",
652678
" 'assembly_method': ['greedy (Contigs)']*15 + ['greedy (Scaffolds)']*15,\n",
653679
" 'coverage': np.concatenate([\n",
@@ -663,41 +689,51 @@
663689
" \n",
664690
" colors = {\"Contigs\": \"#a6cee3\", \"Scaffolds\": \"#1f78b4\"}\n",
665691
"\n",
666-
" fig_w, fig_h = (3.5, 4) \n",
692+
" fig_w, fig_h = visualization.get_figsize(width_ratio=ratio)\n",
693+
" fig, ax = plt.subplots(figsize=(fig_w, fig_h))\n",
667694
"\n",
668-
" plt.figure(figsize=(fig_w, fig_h))\n",
669-
"\n",
670-
" ax = sns.boxplot(\n",
695+
" sns.boxplot(\n",
671696
" data=df, \n",
672697
" x='Type', \n",
673698
" y='coverage', \n",
674699
" palette=colors,\n",
675-
" width=0.5,\n",
700+
" width=0.4,\n",
676701
" showfliers=False, \n",
677-
" linewidth=1,\n",
702+
" linewidth=1.2,\n",
703+
" ax=ax,\n",
704+
" boxprops=dict(alpha=0.8),\n",
705+
" whiskerprops=dict(color='#333333'),\n",
706+
" capprops=dict(color='#333333'),\n",
707+
" medianprops=dict(color='#333333', linewidth=1.5)\n",
678708
" )\n",
679709
" \n",
680710
" sns.stripplot(\n",
681711
" data=df, \n",
682712
" x='Type', \n",
683713
" y='coverage', \n",
684714
" palette=colors, \n",
685-
" size=6,\n",
686-
" jitter=0.15,\n",
687-
" edgecolor=\"#333333\",\n",
688-
" linewidth=0.8, \n",
689-
" alpha=0.9, \n",
690-
" ax=ax \n",
715+
" size=5,\n",
716+
" jitter=0.2,\n",
717+
" linewidth=0.8,\n",
718+
" alpha=0.6, \n",
719+
" ax=ax,\n",
720+
" edgecolor=\"#333333\" \n",
691721
" )\n",
692-
" ax.set_ylabel('Coverage', fontsize=10)\n",
693-
" ax.set_xlabel('', fontsize=10)\n",
694-
" \n",
695-
" ax.tick_params(axis='both', which='major', labelsize=9)\n",
722+
"\n",
723+
" ax.set_ylabel('Protein coverage', fontweight='normal')\n",
724+
" ax.set_xlabel('', fontweight='normal')\n",
696725
" \n",
697-
" sns.despine()\n",
726+
" if df['coverage'].max() <= 1.0:\n",
727+
" ax.set_ylim(0.4, 1.05)\n",
728+
" else:\n",
729+
" ax.set_ylim(0, 105)\n",
730+
"\n",
731+
" sns.despine(ax=ax, offset=0, trim=False)\n",
698732
"\n",
699-
" os.makedirs(os.path.dirname(output_image) if os.path.dirname(output_image) else '.', exist_ok=True)\n",
700-
" plt.savefig(output_image, dpi=300, bbox_inches='tight')\n",
733+
" if output_image:\n",
734+
" Path(os.path.dirname(output_image)).mkdir(parents=True, exist_ok=True)\n",
735+
" plt.savefig(output_image, format='svg', bbox_inches='tight')\n",
736+
" \n",
701737
" plt.show()"
702738
]
703739
},
@@ -718,25 +754,15 @@
718754
"metadata": {},
719755
"outputs": [],
720756
"source": [
721-
"import matplotlib.pyplot as plt\n",
722-
"import seaborn as sns\n",
723-
"import numpy as np\n",
724-
"import os\n",
725-
"\n",
726-
"# Assicurati di avere set_publication_style definita/importata\n",
727-
"\n",
728757
"def plot_psm_depth_standardized(\n",
729758
" reference_seq: str, \n",
730759
" peptides: list, \n",
731760
" cdrs: dict, \n",
732-
" output_file: str = 'fig_4C_matplotlib.svg'\n",
761+
" output_file: str = 'fig_4C_matplotlib.svg',\n",
762+
" ratio=2\n",
733763
"):\n",
734-
" \"\"\"\n",
735-
" Standardized PSM Depth Plot.\n",
736-
" Target Size: 2/3 of A4 Page Width (~4.75 inches).\n",
737-
" \"\"\"\n",
764+
" visualization.set_publication_style()\n",
738765
" \n",
739-
" # --- Helper: Robust Search (L/I Tolerant) ---\n",
740766
" def chars_equal(a, b):\n",
741767
" return (a in ['L', 'I'] and b in ['L', 'I']) or a == b\n",
742768
"\n",
@@ -747,65 +773,72 @@
747773
" starts.append(pos)\n",
748774
" return starts\n",
749775
"\n",
750-
" # 2. Calcolo Depth\n",
751776
" depth = np.zeros(len(reference_seq), dtype=int)\n",
752777
" for pep in peptides:\n",
753778
" found_indices = find_all_occurrences(reference_seq, pep)\n",
754779
" for start_idx in found_indices:\n",
755780
" end_idx = start_idx + len(pep)\n",
756781
" depth[start_idx:end_idx] += 1\n",
757782
" \n",
758-
" # 3. Dimensioni Fisiche (2/3 Page Width)\n",
759-
" FULL_A4_WIDTH = 7.1\n",
760-
" FIG_W = FULL_A4_WIDTH * (2/3) # ~4.73 pollici\n",
761-
" FIG_H = 3.0 # Altezza standard\n",
783+
" fig_w, fig_h = visualization.get_figsize(width_ratio=ratio)\n",
762784
" \n",
763-
" # layout='constrained' è cruciale per gestire i margini in spazi più stretti\n",
764-
" fig = plt.figure(figsize=(FIG_W, FIG_H), layout='constrained')\n",
765-
" ax = fig.gca()\n",
785+
" fig, ax = plt.subplots(figsize=(fig_w, fig_h))\n",
766786
" \n",
767-
" # 4. Plotting\n",
768787
" x = np.arange(len(reference_seq))\n",
769788
" \n",
770-
" ax.plot(x, depth, color='steelblue', linewidth=1.5, label='PSM Depth')\n",
771-
" ax.fill_between(x, depth, color='steelblue', alpha=0.2)\n",
789+
" BLUE_COLOR = '#4A90E2'\n",
790+
" ax.plot(x, depth, color=BLUE_COLOR, linewidth=1.2, label='PSM Depth')\n",
791+
" ax.fill_between(x, depth, color=BLUE_COLOR, alpha=0.15)\n",
772792
" \n",
773-
" # 5. CDR Highlights\n",
774793
" highlight_colors = {\n",
775-
" \"CDR1\": \"orange\", \n",
776-
" \"CDR2\": \"lightgreen\", \n",
777-
" \"CDR3\": \"deepskyblue\"\n",
794+
" \"CDR1\": \"#FFB347\",\n",
795+
" \"CDR2\": \"#77DD77\",\n",
796+
" \"CDR3\": \"#89CFF0\"\n",
778797
" }\n",
779798
" \n",
780799
" max_y = depth.max() * 1.1 if depth.max() > 0 else 1.0\n",
781800
" \n",
782801
" for label, (start, end) in cdrs.items():\n",
783-
" # start-1 per correzione 0-based\n",
784-
" ax.axvspan(start-1, end, color=highlight_colors.get(label, 'gray'), alpha=0.3, zorder=0)\n",
802+
" ax.axvspan(start-1, end, color=highlight_colors.get(label, 'gray'), alpha=0.2, zorder=0)\n",
785803
" \n",
786-
" # Etichetta un po' più piccola (fontsize=8) per stare nel grafico più stretto\n",
787-
" ax.text((start-1 + end)/2, max_y, label, \n",
788-
" ha='center', va='top', fontsize=8, fontweight='bold', color='black')\n",
789-
"\n",
790-
" # 6. Formatting\n",
791-
" ax.set_title('PSM depth across protein sequence', fontsize=10)\n",
792-
" ax.set_xlabel('Amino acid position', fontsize=9)\n",
793-
" ax.set_ylabel('Depth (PSMs)', fontsize=9)\n",
804+
" ax.text((start-1 + end)/2, max_y * 0.95, label, \n",
805+
" ha='center', va='top', fontweight='normal', color='black')\n",
806+
"\n",
807+
" ax.set_title('PSM depth across protein sequence', fontweight='normal', pad=15)\n",
808+
" ax.set_xlabel('Amino acid position', fontweight='normal')\n",
809+
" ax.set_ylabel('Depth (PSMs)', fontweight='normal')\n",
794810
" \n",
795-
" ax.spines['top'].set_visible(False)\n",
796-
" ax.spines['right'].set_visible(False)\n",
811+
" sns.despine(ax=ax, offset=0, trim=False)\n",
797812
" \n",
798813
" ax.set_xlim(0, len(reference_seq))\n",
799-
" ax.set_ylim(0, max_y * 1.05)\n",
814+
" ax.set_ylim(0, max_y)\n",
800815
" \n",
801-
" # 7. Save\n",
802-
" os.makedirs(os.path.dirname(output_file) if os.path.dirname(output_file) else '.', exist_ok=True)\n",
816+
" if output_file:\n",
817+
" Path(os.path.dirname(output_file)).mkdir(parents=True, exist_ok=True)\n",
818+
" plt.savefig(output_file, format='svg', bbox_inches='tight')\n",
803819
" \n",
804-
" # Manteniamo bbox_inches=None per rispettare esattamente i 4.75 pollici\n",
805-
" plt.savefig(output_file, dpi=300, bbox_inches=None)\n",
806820
" plt.show()"
807821
]
808822
},
823+
{
824+
"cell_type": "code",
825+
"execution_count": null,
826+
"id": "b56fafa2",
827+
"metadata": {},
828+
"outputs": [],
829+
"source": [
830+
"plot_psm_depth_standardized(\n",
831+
" reference_seq=protein_norm, \n",
832+
" peptides=data_abundance['cleaned_preds'].tolist(), \n",
833+
" cdrs={\n",
834+
" \"CDR1\": (26, 34),\n",
835+
" \"CDR2\": (50, 66),\n",
836+
" \"CDR3\": (99, 112)\n",
837+
" }, \n",
838+
" output_file=f'{FIGURES_DIR}/fig4b_{RUN_NAME}_psm_depth.svg'\n",
839+
")"
840+
]
841+
},
809842
{
810843
"cell_type": "code",
811844
"execution_count": null,

0 commit comments

Comments
 (0)