2222from .io_utils import (
2323 CONFIDENCE_PRECISION ,
2424 get_confidence_str ,
25- get_name_by_curie ,
25+ get_name_by_reference ,
2626 safe_open ,
2727 safe_open_writer ,
2828)
@@ -595,23 +595,13 @@ def get_sssom_df(
595595 """
596596 fallback_mapping_set_id = _get_fallback_mapping_set_id ()
597597 rows = [
598- _get_sssom_row (mapping , evidence , fallback_mapping_set_id )
598+ _get_sssom_row (mapping , evidence , fallback_mapping_set_id , add_labels = add_labels )
599599 for mapping in tqdm (
600600 mappings , desc = "Preparing SSSOM" , leave = False , unit = "mapping" , unit_scale = True
601601 )
602602 for evidence in mapping .evidence
603603 ]
604604 df = pd .DataFrame (rows , columns = SSSOM_DEFAULT_COLUMNS )
605- if add_labels :
606- with logging_redirect_tqdm ():
607- for label_column , id_column in [
608- ("subject_label" , "subject_id" ),
609- ("object_label" , "object_id" ),
610- ]:
611- df [label_column ] = [
612- name or get_name_by_curie (curie )
613- for curie , name in df [[id_column , label_column ]].values
614- ]
615605
616606 if prune :
617607 # remove empty columns
@@ -626,7 +616,9 @@ def _format_confidence(confidence: float) -> str:
626616 return str (round (confidence , CONFIDENCE_PRECISION ))
627617
628618
629- def _get_sssom_row (mapping : Mapping , e : Evidence , fallback_mapping_set_id : str ) -> SSSOMRow :
619+ def _get_sssom_row (
620+ mapping : Mapping , e : Evidence , fallback_mapping_set_id : str , * , add_labels : bool = False
621+ ) -> SSSOMRow :
630622 if isinstance (e , SimpleEvidence ):
631623 if e .mapping_set .purl :
632624 mapping_set_id = e .mapping_set .purl
@@ -648,12 +640,20 @@ def _get_sssom_row(mapping: Mapping, e: Evidence, fallback_mapping_set_id: str)
648640 else :
649641 raise TypeError
650642
643+ if add_labels :
644+ with logging_redirect_tqdm ():
645+ subject_label = mapping .subject .name or get_name_by_reference (mapping .subject ) or ""
646+ object_label = mapping .object .name or get_name_by_reference (mapping .object ) or ""
647+ else :
648+ subject_label = mapping .subject .name or ""
649+ object_label = mapping .object .name or ""
650+
651651 return SSSOMRow (
652652 subject_id = mapping .subject .curie ,
653- subject_label = mapping . subject . name or "" ,
653+ subject_label = subject_label ,
654654 predicate_id = mapping .predicate .curie ,
655655 object_id = mapping .object .curie ,
656- object_label = mapping . object . name or "" ,
656+ object_label = object_label ,
657657 mapping_justification = e .justification .curie ,
658658 mapping_set_id = mapping_set_id ,
659659 mapping_set_title = mapping_set_title ,
@@ -700,13 +700,13 @@ def write_sssom(
700700 stream : bool = False ,
701701) -> None | Generator [Mapping ]:
702702 """Export mappings as an SSSOM file (could be lossy)."""
703- if not add_labels and not prune :
703+ if not prune :
704704 if stream :
705- return _write_sssom_stream (mappings , file , stream = stream )
705+ return _write_sssom_stream (mappings , file , stream = stream , add_labels = add_labels )
706706 else :
707- return _write_sssom_stream (mappings , file , stream = stream )
707+ return _write_sssom_stream (mappings , file , stream = stream , add_labels = add_labels )
708708 elif stream :
709- raise ValueError
709+ raise ValueError ( "can not prune and stream at the same time" )
710710 else :
711711 df = get_sssom_df (mappings , add_labels = add_labels )
712712 df .to_csv (file , sep = "\t " , index = False )
@@ -716,41 +716,57 @@ def write_sssom(
716716# docstr-coverage:excused `overload`
717717@overload
718718def _write_sssom_stream (
719- mappings : Iterable [Mapping ], file : str | Path | TextIO , * , stream : Literal [False ] = False
719+ mappings : Iterable [Mapping ],
720+ file : str | Path | TextIO ,
721+ * ,
722+ stream : Literal [False ] = False ,
723+ add_labels : bool = ...,
720724) -> None : ...
721725
722726
723727# docstr-coverage:excused `overload`
724728@overload
725729def _write_sssom_stream (
726- mappings : Iterable [Mapping ], file : str | Path | TextIO , * , stream : Literal [True ] = True
730+ mappings : Iterable [Mapping ],
731+ file : str | Path | TextIO ,
732+ * ,
733+ stream : Literal [True ] = True ,
734+ add_labels : bool = ...,
727735) -> Generator [Mapping ]: ...
728736
729737
730738def _write_sssom_stream (
731- mappings : Iterable [Mapping ], file : str | Path | TextIO , * , stream : bool = False
739+ mappings : Iterable [Mapping ],
740+ file : str | Path | TextIO ,
741+ * ,
742+ stream : bool = False ,
743+ add_labels : bool = False ,
732744) -> Generator [Mapping ] | None :
733745 fallback_mapping_set_id = _get_fallback_mapping_set_id ()
734746 it = tqdm (mappings , desc = "Writing SSSOM" , leave = False , unit = "mapping" , unit_scale = True )
735747 if stream :
736- return _stream_write_sssom (file , it , fallback_mapping_set_id )
748+ return _stream_write_sssom (file , it , fallback_mapping_set_id , add_labels = add_labels )
737749 else :
738- with safe_open_writer (file ) as writer :
739- writer .writerow (SSSOM_DEFAULT_COLUMNS )
740- for mapping in it :
741- for evidence in mapping .evidence :
742- writer .writerow (_get_sssom_row (mapping , evidence , fallback_mapping_set_id ))
743- return None
750+ for _ in _stream_write_sssom (file , it , fallback_mapping_set_id , add_labels = add_labels ):
751+ pass
752+ return None
744753
745754
746755def _stream_write_sssom (
747- path : str | Path | TextIO , mappings : Iterable [Mapping ], fallback_mapping_set_id : str
756+ path : str | Path | TextIO ,
757+ mappings : Iterable [Mapping ],
758+ fallback_mapping_set_id : str ,
759+ add_labels : bool = False ,
748760) -> Generator [Mapping ]:
749761 with safe_open_writer (path ) as writer :
750762 writer .writerow (SSSOM_DEFAULT_COLUMNS )
751763 for mapping in mappings :
752764 for evidence in mapping .evidence :
753- writer .writerow (_get_sssom_row (mapping , evidence , fallback_mapping_set_id ))
765+ writer .writerow (
766+ _get_sssom_row (
767+ mapping , evidence , fallback_mapping_set_id , add_labels = add_labels
768+ )
769+ )
754770 yield mapping
755771
756772
0 commit comments