|
23 | 23 | "name": "stderr", |
24 | 24 | "output_type": "stream", |
25 | 25 | "text": [ |
26 | | - "/Users/b260-admin/miniforge3/envs/liana311/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", |
| 26 | + "/Users/b260-admin/miniforge3/envs/liana313/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", |
27 | 27 | "Downloading data from `https://omnipathdb.org/queries/enzsub?format=json`\n", |
28 | 28 | "Downloading data from `https://omnipathdb.org/queries/interactions?format=json`\n", |
29 | 29 | "Downloading data from `https://omnipathdb.org/queries/complexes?format=json`\n", |
|
192 | 192 | "execution_count": 4, |
193 | 193 | "metadata": {}, |
194 | 194 | "outputs": [ |
195 | | - { |
196 | | - "name": "stderr", |
197 | | - "output_type": "stream", |
198 | | - "text": [ |
199 | | - "Downloading data from `https://omnipathdb.org/interactions?datasets=kinaseextra%2Cligrecextra%2Comnipath%2Cpathwayextra&fields=curation_effort%2Creferences%2Csources%2Ctype&format=tsv&license=commercial`\n", |
200 | | - "10.5MB [00:00, 37.9MB/s]\n", |
201 | | - "Downloading data from `https://omnipathdb.org/intercell?causality=trans&databases=CellChatDB&format=tsv&scope=generic`\n", |
202 | | - "124kB [00:00, 91.7MB/s]\n", |
203 | | - "Downloading data from `https://omnipathdb.org/intercell?causality=rec&databases=CellChatDB&format=tsv&scope=generic`\n", |
204 | | - "84.4kB [00:00, 108MB/s]\n" |
205 | | - ] |
206 | | - }, |
207 | 195 | { |
208 | 196 | "data": { |
209 | 197 | "text/html": [ |
|
463 | 451 | { |
464 | 452 | "cell_type": "markdown", |
465 | 453 | "metadata": {}, |
466 | | - "source": "## Homology Mapping\n\nSimilarly, LIANA+ provides on demand homology mapping beyond mouse symbols. It utilises the [HCOP database](https://www.genenames.org/help/hcop/) to obtain homologous genes across species. Files are downloaded from the HGNC Google Cloud Storage bucket.\n\nThe homology mapping is accessible through the `resource` module:" |
| 454 | + "source": [ |
| 455 | + "## Homology Mapping\n", |
| 456 | + "\n", |
| 457 | + "Similarly, LIANA+ provides on demand homology mapping beyond mouse symbols. It utilises the [HCOP database](https://www.genenames.org/help/hcop/) to obtain homologous genes across species. Files are downloaded from the HGNC Google Cloud Storage bucket.\n", |
| 458 | + "\n", |
| 459 | + "The homology mapping is accessible through the `resource` module:" |
| 460 | + ] |
467 | 461 | }, |
468 | 462 | { |
469 | 463 | "cell_type": "code", |
470 | | - "execution_count": null, |
| 464 | + "execution_count": 5, |
471 | 465 | "metadata": {}, |
472 | | - "outputs": [], |
473 | | - "source": "# let's say we are interested in zebrafish homologs of human genes\nmap_df = li.rs.get_hcop_orthologs(target_organism='zebrafish',\n columns=['human_symbol', 'zebrafish_symbol'],\n # NOTE: HCOP integrates multiple resource, so we can filter out mappings in at least 3 of them for confidence\n min_evidence=3\n )\n# rename the columns to source and target, respectively for the original organism and the target organism\nmap_df = map_df.rename(columns={'human_symbol':'source', 'zebrafish_symbol':'target'})\nmap_df.tail()" |
| 466 | + "outputs": [ |
| 467 | + { |
| 468 | + "name": "stderr", |
| 469 | + "output_type": "stream", |
| 470 | + "text": [ |
| 471 | + "/Users/b260-admin/Repos/liana-py/src/liana/resource/_orthology.py:217: DtypeWarning: Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.\n" |
| 472 | + ] |
| 473 | + }, |
| 474 | + { |
| 475 | + "data": { |
| 476 | + "text/html": [ |
| 477 | + "<div>\n", |
| 478 | + "<style scoped>\n", |
| 479 | + " .dataframe tbody tr th:only-of-type {\n", |
| 480 | + " vertical-align: middle;\n", |
| 481 | + " }\n", |
| 482 | + "\n", |
| 483 | + " .dataframe tbody tr th {\n", |
| 484 | + " vertical-align: top;\n", |
| 485 | + " }\n", |
| 486 | + "\n", |
| 487 | + " .dataframe thead th {\n", |
| 488 | + " text-align: right;\n", |
| 489 | + " }\n", |
| 490 | + "</style>\n", |
| 491 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 492 | + " <thead>\n", |
| 493 | + " <tr style=\"text-align: right;\">\n", |
| 494 | + " <th></th>\n", |
| 495 | + " <th>source</th>\n", |
| 496 | + " <th>target</th>\n", |
| 497 | + " </tr>\n", |
| 498 | + " </thead>\n", |
| 499 | + " <tbody>\n", |
| 500 | + " <tr>\n", |
| 501 | + " <th>132672</th>\n", |
| 502 | + " <td>ZYG11B</td>\n", |
| 503 | + " <td>zyg11</td>\n", |
| 504 | + " </tr>\n", |
| 505 | + " <tr>\n", |
| 506 | + " <th>132673</th>\n", |
| 507 | + " <td>ZYG11B</td>\n", |
| 508 | + " <td>zyg11l</td>\n", |
| 509 | + " </tr>\n", |
| 510 | + " <tr>\n", |
| 511 | + " <th>132674</th>\n", |
| 512 | + " <td>ZYX</td>\n", |
| 513 | + " <td>zyx</td>\n", |
| 514 | + " </tr>\n", |
| 515 | + " <tr>\n", |
| 516 | + " <th>132676</th>\n", |
| 517 | + " <td>ZZEF1</td>\n", |
| 518 | + " <td>zzef1</td>\n", |
| 519 | + " </tr>\n", |
| 520 | + " <tr>\n", |
| 521 | + " <th>132677</th>\n", |
| 522 | + " <td>ZZZ3</td>\n", |
| 523 | + " <td>zzz3</td>\n", |
| 524 | + " </tr>\n", |
| 525 | + " </tbody>\n", |
| 526 | + "</table>\n", |
| 527 | + "</div>" |
| 528 | + ], |
| 529 | + "text/plain": [ |
| 530 | + " source target\n", |
| 531 | + "132672 ZYG11B zyg11\n", |
| 532 | + "132673 ZYG11B zyg11l\n", |
| 533 | + "132674 ZYX zyx\n", |
| 534 | + "132676 ZZEF1 zzef1\n", |
| 535 | + "132677 ZZZ3 zzz3" |
| 536 | + ] |
| 537 | + }, |
| 538 | + "execution_count": 5, |
| 539 | + "metadata": {}, |
| 540 | + "output_type": "execute_result" |
| 541 | + } |
| 542 | + ], |
| 543 | + "source": [ |
| 544 | + "# let's say we are interested in zebrafish homologs of human genes\n", |
| 545 | + "map_df = li.rs.get_hcop_orthologs(target_organism='zebrafish',\n", |
| 546 | + " columns=['human_symbol', 'zebrafish_symbol'],\n", |
| 547 | + " # NOTE: HCOP integrates multiple resource, so we can filter out mappings in at least 3 of them for confidence\n", |
| 548 | + " min_evidence=3\n", |
| 549 | + " )\n", |
| 550 | + "# rename the columns to source and target, respectively for the original organism and the target organism\n", |
| 551 | + "map_df = map_df.rename(columns={'human_symbol':'source', 'zebrafish_symbol':'target'})\n", |
| 552 | + "map_df.tail()" |
| 553 | + ] |
474 | 554 | }, |
475 | 555 | { |
476 | 556 | "cell_type": "markdown", |
|
504 | 584 | }, |
505 | 585 | { |
506 | 586 | "cell_type": "code", |
507 | | - "execution_count": null, |
| 587 | + "execution_count": 7, |
508 | 588 | "metadata": {}, |
509 | | - "outputs": [], |
510 | | - "source": "map_df = li.rs.get_hcop_orthologs(target_organism='mouse',\n columns=['human_symbol', 'mouse_symbol'],\n # NOTE: HCOP integrates multiple resource, so we can filter out mappings in at least 3 of them for confidence\n min_evidence=3\n )\n# rename the columns to source and target, respectively for the original organism and the target organism\nmap_df = map_df.rename(columns={'human_symbol':'source', 'mouse_symbol':'target'})\n\n# We will then translate\nmouse = li.rs.translate_resource(resource,\n map_df=map_df,\n columns=['ligand', 'receptor'],\n replace=True,\n # Here, we will be harsher and only keep mappings that don't map to more than 1 mouse gene\n one_to_many=1\n )\nmouse" |
| 589 | + "outputs": [ |
| 590 | + { |
| 591 | + "name": "stderr", |
| 592 | + "output_type": "stream", |
| 593 | + "text": [ |
| 594 | + "/Users/b260-admin/Repos/liana-py/src/liana/resource/_orthology.py:217: DtypeWarning: Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.\n" |
| 595 | + ] |
| 596 | + }, |
| 597 | + { |
| 598 | + "data": { |
| 599 | + "text/html": [ |
| 600 | + "<div>\n", |
| 601 | + "<style scoped>\n", |
| 602 | + " .dataframe tbody tr th:only-of-type {\n", |
| 603 | + " vertical-align: middle;\n", |
| 604 | + " }\n", |
| 605 | + "\n", |
| 606 | + " .dataframe tbody tr th {\n", |
| 607 | + " vertical-align: top;\n", |
| 608 | + " }\n", |
| 609 | + "\n", |
| 610 | + " .dataframe thead th {\n", |
| 611 | + " text-align: right;\n", |
| 612 | + " }\n", |
| 613 | + "</style>\n", |
| 614 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 615 | + " <thead>\n", |
| 616 | + " <tr style=\"text-align: right;\">\n", |
| 617 | + " <th></th>\n", |
| 618 | + " <th>ligand</th>\n", |
| 619 | + " <th>receptor</th>\n", |
| 620 | + " </tr>\n", |
| 621 | + " </thead>\n", |
| 622 | + " <tbody>\n", |
| 623 | + " <tr>\n", |
| 624 | + " <th>0</th>\n", |
| 625 | + " <td>Lgals9</td>\n", |
| 626 | + " <td>Ptprc</td>\n", |
| 627 | + " </tr>\n", |
| 628 | + " <tr>\n", |
| 629 | + " <th>1</th>\n", |
| 630 | + " <td>Lgals9</td>\n", |
| 631 | + " <td>Met</td>\n", |
| 632 | + " </tr>\n", |
| 633 | + " <tr>\n", |
| 634 | + " <th>2</th>\n", |
| 635 | + " <td>Lgals9</td>\n", |
| 636 | + " <td>Cd44</td>\n", |
| 637 | + " </tr>\n", |
| 638 | + " <tr>\n", |
| 639 | + " <th>3</th>\n", |
| 640 | + " <td>Lgals9</td>\n", |
| 641 | + " <td>Lrp1</td>\n", |
| 642 | + " </tr>\n", |
| 643 | + " <tr>\n", |
| 644 | + " <th>4</th>\n", |
| 645 | + " <td>Lgals9</td>\n", |
| 646 | + " <td>Cd47</td>\n", |
| 647 | + " </tr>\n", |
| 648 | + " <tr>\n", |
| 649 | + " <th>...</th>\n", |
| 650 | + " <td>...</td>\n", |
| 651 | + " <td>...</td>\n", |
| 652 | + " </tr>\n", |
| 653 | + " <tr>\n", |
| 654 | + " <th>4619</th>\n", |
| 655 | + " <td>Bmp2</td>\n", |
| 656 | + " <td>Actr2</td>\n", |
| 657 | + " </tr>\n", |
| 658 | + " <tr>\n", |
| 659 | + " <th>4620</th>\n", |
| 660 | + " <td>Bmp15</td>\n", |
| 661 | + " <td>Actr2</td>\n", |
| 662 | + " </tr>\n", |
| 663 | + " <tr>\n", |
| 664 | + " <th>4621</th>\n", |
| 665 | + " <td>Csf1</td>\n", |
| 666 | + " <td>Csf3r</td>\n", |
| 667 | + " </tr>\n", |
| 668 | + " <tr>\n", |
| 669 | + " <th>4622</th>\n", |
| 670 | + " <td>Il36g</td>\n", |
| 671 | + " <td>Ifnar1</td>\n", |
| 672 | + " </tr>\n", |
| 673 | + " <tr>\n", |
| 674 | + " <th>4623</th>\n", |
| 675 | + " <td>Il36g</td>\n", |
| 676 | + " <td>Ifnar2</td>\n", |
| 677 | + " </tr>\n", |
| 678 | + " </tbody>\n", |
| 679 | + "</table>\n", |
| 680 | + "<p>4055 rows × 2 columns</p>\n", |
| 681 | + "</div>" |
| 682 | + ], |
| 683 | + "text/plain": [ |
| 684 | + " ligand receptor\n", |
| 685 | + "0 Lgals9 Ptprc\n", |
| 686 | + "1 Lgals9 Met\n", |
| 687 | + "2 Lgals9 Cd44\n", |
| 688 | + "3 Lgals9 Lrp1\n", |
| 689 | + "4 Lgals9 Cd47\n", |
| 690 | + "... ... ...\n", |
| 691 | + "4619 Bmp2 Actr2\n", |
| 692 | + "4620 Bmp15 Actr2\n", |
| 693 | + "4621 Csf1 Csf3r\n", |
| 694 | + "4622 Il36g Ifnar1\n", |
| 695 | + "4623 Il36g Ifnar2\n", |
| 696 | + "\n", |
| 697 | + "[4055 rows x 2 columns]" |
| 698 | + ] |
| 699 | + }, |
| 700 | + "execution_count": 7, |
| 701 | + "metadata": {}, |
| 702 | + "output_type": "execute_result" |
| 703 | + } |
| 704 | + ], |
| 705 | + "source": [ |
| 706 | + "map_df = li.rs.get_hcop_orthologs(target_organism='mouse',\n", |
| 707 | + " columns=['human_symbol', 'mouse_symbol'],\n", |
| 708 | + " # NOTE: HCOP integrates multiple resource, so we can filter out mappings in at least 3 of them for confidence\n", |
| 709 | + " min_evidence=3\n", |
| 710 | + " )\n", |
| 711 | + "# rename the columns to source and target, respectively for the original organism and the target organism\n", |
| 712 | + "map_df = map_df.rename(columns={'human_symbol':'source', 'mouse_symbol':'target'})\n", |
| 713 | + "\n", |
| 714 | + "# We will then translate\n", |
| 715 | + "mouse = li.rs.translate_resource(resource,\n", |
| 716 | + " map_df=map_df,\n", |
| 717 | + " columns=['ligand', 'receptor'],\n", |
| 718 | + " replace=True,\n", |
| 719 | + " # Here, we will be harsher and only keep mappings that don't map to more than 1 mouse gene\n", |
| 720 | + " one_to_many=1\n", |
| 721 | + " )\n", |
| 722 | + "mouse" |
| 723 | + ] |
511 | 724 | }, |
512 | 725 | { |
513 | 726 | "cell_type": "markdown", |
|
761 | 974 | "name": "stderr", |
762 | 975 | "output_type": "stream", |
763 | 976 | "text": [ |
764 | | - "Downloading annotations for all proteins from the following resources: `['DisGeNet']`\n", |
765 | | - "Downloading data from `https://omnipathdb.org/annotations?format=tsv&resources=DisGeNet`\n", |
766 | | - "38.3MB [00:00, 67.5MB/s]\n" |
| 977 | + "Downloading annotations for all proteins from the following resources: `['DisGeNet']`\n" |
767 | 978 | ] |
768 | 979 | } |
769 | 980 | ], |
|
782 | 993 | "name": "stderr", |
783 | 994 | "output_type": "stream", |
784 | 995 | "text": [ |
785 | | - "/var/folders/gk/kmrvz5m90sb9wftqk2n94p0h0000gq/T/ipykernel_79557/3054253547.py:2: FutureWarning: The default value of observed=False is deprecated and will change to observed=True in a future version of pandas. Specify observed=False to silence this warning and retain the current behavior\n" |
| 996 | + "/var/folders/gk/kmrvz5m90sb9wftqk2n94p0h0000gq/T/ipykernel_26684/3054253547.py:2: FutureWarning: The default value of observed=False is deprecated and will change to observed=True in a future version of pandas. Specify observed=False to silence this warning and retain the current behavior\n" |
786 | 997 | ] |
787 | 998 | }, |
788 | 999 | { |
|
992 | 1203 | "execution_count": 14, |
993 | 1204 | "metadata": {}, |
994 | 1205 | "outputs": [ |
995 | | - { |
996 | | - "name": "stderr", |
997 | | - "output_type": "stream", |
998 | | - "text": [ |
999 | | - "Downloading data from `https://omnipathdb.org/interactions?datasets=omnipath&fields=curation_effort%2Creferences%2Csources&format=tsv&genesymbols=1`\n", |
1000 | | - "10.0MB [00:00, 32.3MB/s]\n" |
1001 | | - ] |
1002 | | - }, |
1003 | 1206 | { |
1004 | 1207 | "data": { |
1005 | 1208 | "text/html": [ |
|
1594 | 1797 | ], |
1595 | 1798 | "metadata": { |
1596 | 1799 | "kernelspec": { |
1597 | | - "display_name": "spiana", |
| 1800 | + "display_name": "liana313", |
1598 | 1801 | "language": "python", |
1599 | 1802 | "name": "python3" |
1600 | 1803 | }, |
|
1608 | 1811 | "name": "python", |
1609 | 1812 | "nbconvert_exporter": "python", |
1610 | 1813 | "pygments_lexer": "ipython3", |
1611 | | - "version": "3.10.10" |
| 1814 | + "version": "3.13.7" |
1612 | 1815 | } |
1613 | 1816 | }, |
1614 | 1817 | "nbformat": 4, |
|
0 commit comments