@@ -385,46 +385,39 @@ class MetricFileHeader:
385
385
fieldnames : list [str ]
386
386
387
387
388
- def get_header (
389
- reader : io .ReadableFileHandle ,
390
- file_format : MetricFileFormat ,
391
- ) -> Optional [MetricFileHeader ]:
392
- """
393
- Read the header from an open file.
394
-
395
- The first row after any commented or empty lines will be used as the fieldnames.
388
+ def asdict (metric : Metric ) -> dict [str , Any ]:
389
+ """Convert a Metric instance to a dictionary."""
390
+ assert_is_metric (type (metric ))
396
391
397
- Lines preceding the fieldnames will be returned in the `preface.`
392
+ if dataclasses .is_dataclass (metric ):
393
+ return dataclasses .asdict (metric )
394
+ elif attr .has (metric ):
395
+ return attr .asdict (metric )
396
+ else :
397
+ assert False , "Unreachable"
398
398
399
- NB: This function returns `Optional` instead of raising an error because the name of the
400
- source file is not in scope, making it difficult to provide a helpful error message. It is
401
- the responsibility of the caller to raise an error if the file is empty.
402
399
403
- See original proof-of-concept here: https://github.com/fulcrumgenomics/fgpyo/pull/103
400
+ def get_fieldnames (metric_class : type [Metric ]) -> list [str ]:
401
+ """
402
+ Get the fieldnames of the specified metric class.
404
403
405
404
Args:
406
- reader: An open, readable file handle.
407
- file_format: A dataclass containing (at minimum) the file's delimiter and the string
408
- prefixing any comment lines.
405
+ metric_class: A Metric class.
409
406
410
407
Returns:
411
- A `FileHeader` containing the field names and any preceding lines.
412
- None if the file was empty or contained only comments or empty lines.
413
- """
408
+ A list of fieldnames.
414
409
415
- preface : list [str ] = []
410
+ Raises:
411
+ TypeError: If the given class is not a Metric.
412
+ """
413
+ assert_is_metric (metric_class )
416
414
417
- for line in reader :
418
- if line .startswith (file_format .comment ) or line .strip () == "" :
419
- preface .append (line .strip ())
420
- else :
421
- break
415
+ if dataclasses .is_dataclass (metric_class ):
416
+ return [f .name for f in dataclasses .fields (metric_class )]
417
+ elif attr .has (metric_class ):
418
+ return [f .name for f in attr .fields (metric_class )]
422
419
else :
423
- return None
424
-
425
- fieldnames = line .strip ().split (file_format .delimiter )
426
-
427
- return MetricFileHeader (preface = preface , fieldnames = fieldnames )
420
+ assert False , "Unreachable"
428
421
429
422
430
423
class MetricWriter :
@@ -546,53 +539,96 @@ def writeall(self, metrics: Iterable[Metric]) -> None:
546
539
self .write (metric )
547
540
548
541
549
- def assert_is_metric (cls : type [Metric ]) -> None :
542
+ def _get_header (
543
+ reader : io .ReadableFileHandle ,
544
+ file_format : MetricFileFormat ,
545
+ ) -> Optional [MetricFileHeader ]:
550
546
"""
551
- Assert that the given class is a Metric.
547
+ Read the header from an open file.
548
+
549
+ The first row after any commented or empty lines will be used as the fieldnames.
550
+
551
+ Lines preceding the fieldnames will be returned in the `preface.`
552
+
553
+ NB: This function returns `Optional` instead of raising an error because the name of the
554
+ source file is not in scope, making it difficult to provide a helpful error message. It is
555
+ the responsibility of the caller to raise an error if the file is empty.
556
+
557
+ See original proof-of-concept here: https://github.com/fulcrumgenomics/fgpyo/pull/103
552
558
553
559
Args:
554
- cls: A class object.
560
+ reader: An open, readable file handle.
561
+ file_format: A dataclass containing (at minimum) the file's delimiter and the string
562
+ prefixing any comment lines.
555
563
556
- Raises:
557
- TypeError: If the given class is not a Metric.
564
+ Returns:
565
+ A `FileHeader` containing the field names and any preceding lines.
566
+ None if the file was empty or contained only comments or empty lines.
558
567
"""
559
- if not is_metric (cls ):
560
- raise TypeError (f"Not a dataclass or attr decorated Metric: { cls } " )
561
568
569
+ preface : list [str ] = []
562
570
563
- def asdict (metric : Metric ) -> dict [str , Any ]:
564
- """Convert a Metric instance to a dictionary."""
565
- assert_is_metric (type (metric ))
571
+ for line in reader :
572
+ if line .startswith (file_format .comment ) or line .strip () == "" :
573
+ preface .append (line .strip ())
574
+ else :
575
+ break
576
+ else :
577
+ return None
566
578
567
- if dataclasses .is_dataclass (metric ):
568
- return dataclasses .asdict (metric )
569
- elif attr .has (metric ):
570
- return attr .asdict (metric )
579
+ fieldnames = line .strip ().split (file_format .delimiter )
580
+
581
+ return MetricFileHeader (preface = preface , fieldnames = fieldnames )
582
+
583
+
584
+ def _validate_output_fieldnames (
585
+ metric_class : type [MetricType ],
586
+ include_fields : list [str ] | None = None ,
587
+ exclude_fields : list [str ] | None = None ,
588
+ ) -> list [str ]:
589
+ """
590
+ Subset and/or re-order the dataclass's fieldnames based on the specified include/exclude lists.
591
+
592
+ * Only one of `include_fields` and `exclude_fields` may be specified.
593
+ * All fieldnames specified in `include_fields` must be fields on `dataclass_type`. If this
594
+ argument is specified, fields will be returned in the order they appear in the list.
595
+ * All fieldnames specified in `exclude_fields` must be fields on `dataclass_type`. (This is
596
+ technically unnecessary, but is a safeguard against passing an incorrect list.)
597
+ * If neither `include_fields` or `exclude_fields` are specified, return the `dataclass_type`'s
598
+ fieldnames.
599
+
600
+ Raises:
601
+ ValueError: If both `include_fields` and `exclude_fields` are specified.
602
+ """
603
+
604
+ if include_fields is not None and exclude_fields is not None :
605
+ raise ValueError (
606
+ "Only one of `include_fields` and `exclude_fields` may be specified, not both."
607
+ )
608
+ elif exclude_fields is not None :
609
+ assert_fieldnames_are_metric_attributes (exclude_fields , metric_class )
610
+ output_fieldnames = [f for f in get_fieldnames (metric_class ) if f not in exclude_fields ]
611
+ elif include_fields is not None :
612
+ assert_fieldnames_are_metric_attributes (include_fields , metric_class )
613
+ output_fieldnames = include_fields
571
614
else :
572
- assert False , "Unreachable"
615
+ output_fieldnames = get_fieldnames ( metric_class )
573
616
617
+ return output_fieldnames
574
618
575
- def get_fieldnames (metric_class : type [Metric ]) -> list [str ]:
619
+
620
+ def assert_is_metric (cls : type [Metric ]) -> None :
576
621
"""
577
- Get the fieldnames of the specified metric class .
622
+ Assert that the given class is a Metric .
578
623
579
624
Args:
580
- metric_class: A Metric class.
581
-
582
- Returns:
583
- A list of fieldnames.
625
+ cls: A class object.
584
626
585
627
Raises:
586
628
TypeError: If the given class is not a Metric.
587
629
"""
588
- assert_is_metric (metric_class )
589
-
590
- if dataclasses .is_dataclass (metric_class ):
591
- return [f .name for f in dataclasses .fields (metric_class )]
592
- elif attr .has (metric_class ):
593
- return [f .name for f in attr .fields (metric_class )]
594
- else :
595
- assert False , "Unreachable"
630
+ if not is_metric (cls ):
631
+ raise TypeError (f"Not a dataclass or attr decorated Metric: { cls } " )
596
632
597
633
598
634
def assert_file_header_matches_metric (
@@ -604,7 +640,7 @@ def assert_file_header_matches_metric(
604
640
Check that the specified file has a header and its fields match those of the provided Metric.
605
641
"""
606
642
with path .open ("r" ) as fin :
607
- header : MetricFileHeader = get_header (fin , file_format = file_format )
643
+ header : MetricFileHeader = _get_header (fin , file_format = file_format )
608
644
609
645
if header is None :
610
646
raise ValueError (f"Could not find a header in the provided file: { path } " )
@@ -637,39 +673,3 @@ def assert_fieldnames_are_metric_attributes(
637
673
+ f"{ metric_class .__name__ } : "
638
674
+ ", " .join (invalid_fieldnames )
639
675
)
640
-
641
-
642
- def _validate_output_fieldnames (
643
- metric_class : type [MetricType ],
644
- include_fields : list [str ] | None = None ,
645
- exclude_fields : list [str ] | None = None ,
646
- ) -> list [str ]:
647
- """
648
- Subset and/or re-order the dataclass's fieldnames based on the specified include/exclude lists.
649
-
650
- * Only one of `include_fields` and `exclude_fields` may be specified.
651
- * All fieldnames specified in `include_fields` must be fields on `dataclass_type`. If this
652
- argument is specified, fields will be returned in the order they appear in the list.
653
- * All fieldnames specified in `exclude_fields` must be fields on `dataclass_type`. (This is
654
- technically unnecessary, but is a safeguard against passing an incorrect list.)
655
- * If neither `include_fields` or `exclude_fields` are specified, return the `dataclass_type`'s
656
- fieldnames.
657
-
658
- Raises:
659
- ValueError: If both `include_fields` and `exclude_fields` are specified.
660
- """
661
-
662
- if include_fields is not None and exclude_fields is not None :
663
- raise ValueError (
664
- "Only one of `include_fields` and `exclude_fields` may be specified, not both."
665
- )
666
- elif exclude_fields is not None :
667
- assert_fieldnames_are_metric_attributes (exclude_fields , metric_class )
668
- output_fieldnames = [f for f in get_fieldnames (metric_class ) if f not in exclude_fields ]
669
- elif include_fields is not None :
670
- assert_fieldnames_are_metric_attributes (include_fields , metric_class )
671
- output_fieldnames = include_fields
672
- else :
673
- output_fieldnames = get_fieldnames (metric_class )
674
-
675
- return output_fieldnames
0 commit comments