Cleanup unused commented out code

jstjohn · jstjohn · commit 4709e95556e1 · 2026-03-11T22:46:46.000Z
Signed-off-by: John St. John &lt;jstjohn@nvidia.com&gt;
diff --git a/bionemo-recipes/recipes/evo2_megatron/src/bionemo/evo2/models/evo2_provider.py b/bionemo-recipes/recipes/evo2_megatron/src/bionemo/evo2/models/evo2_provider.py
@@ -764,334 +764,6 @@ class HyenaNV1b2ModelProvider(HyenaNV1bModelProvider):
     # glu_linear_offset: float = 1.0
 
 
-# FIXME use the following as a starting point for the new megatron bridge style model importer/exporter.
-# @io.model_importer(HyenaModel, "pytorch")
-# class PyTorchHyenaImporter(io.ModelConnector["HyenaModel", HyenaModel]):
-#     """Importer class for converting PyTorch Hyena models to NeMo format."""
-
-#     def __new__(cls, path: str, model_config=None):
-#         """Creates a new importer instance.
-
-#         Args:
-#             path: Path to the PyTorch model
-#             model_config: Optional model configuration
-
-#         Returns:
-#             PyTorchHyenaImporter instance
-#         """
-#         instance = super().__new__(cls, path)
-#         instance.model_config = model_config
-#         return instance
-
-#     def init(self) -> HyenaModel:
-#         """Initializes a new HyenaModel instance.
-
-#         Returns:
-#             HyenaModel: Initialized model
-#         """
-#         return HyenaModel(self.config, tokenizer=self.tokenizer)
-
-#     def get_source_model(self):
-#         """Returns the source model."""
-#         return torch.load(str(self), map_location="cpu")
-
-#     def apply(self, output_path: Path, checkpoint_format: str = "torch_dist") -> Path:
-#         """Applies the model conversion from PyTorch to NeMo format.
-
-#         Args:
-#             output_path: Path to save the converted model
-#             checkpoint_format: Format for saving checkpoints
-
-#         Returns:
-#             Path: Path to the saved NeMo model
-#         """
-#         source = self.get_source_model()
-
-#         if "model" in source:
-#             source = source["model"]
-
-#         class ModelState:
-#             """Wrapper around the source model state dictionary that also handles some weight transformations."""
-
-#             def __init__(self, state_dict, num_layers, fp32_suffixes):
-#                 """Wrapper around the source model state dictionary that also handles some weight transformations.
-
-#                 Args:
-#                     state_dict: original state dictionary from the source model
-#                     num_layers: number of layers in the source model
-#                     fp32_suffixes: suffixes of the weights that should be converted to float32
-#                 """
-#                 self.num_layers = num_layers
-#                 state_dict = self.transform_source_dict(state_dict)
-#                 self._state_dict = state_dict
-#                 self.fp32_suffixes = fp32_suffixes
-
-#             def state_dict(self):
-#                 """Return the state dictionary."""
-#                 return self._state_dict
-
-#             def to(self, dtype):
-#                 """Convert the state dictionary to the target dtype."""
-#                 for k, v in self._state_dict.items():
-#                     if "_extra" not in k:
-#                         if v.dtype != dtype:
-#                             logging.warning(f"Converting {k} from {v.dtype} (source model) to {dtype} (target model)")
-#                         k_suffix = k.split(".")[-1]
-#                         if k_suffix in self.fp32_suffixes:
-#                             _dtype = torch.float32
-#                         else:
-#                             _dtype = dtype
-#                         self._state_dict[k] = v.to(_dtype)
-
-#             def adjust_medium_filter(self, updated_data):
-#                 """Adjust the medium filter."""
-#                 from nemo.collections.llm.gpt.model.megatron.hyena.hyena_config import HyenaConfig
-
-#                 for k, v in updated_data.items():
-#                     if "filter.h" in k or "filter.decay" in k:
-#                         updated_data[k] = v[:, : HyenaConfig().hyena_medium_conv_len]
-#                 return updated_data
-
-#             def transform_source_dict(self, source):
-#                 """Transform the source state dictionary.
-
-#                 This function works by applying some challenging layer name re-mappings and
-#                 removing extra keys, as well as truncating a filter that didn't need to extend to the full
-#                 sequence length dim.
-#                 """
-#                 import re
-
-#                 layer_map = {i + 2: i for i in range(self.num_layers)}
-#                 layer_map[self.num_layers + 3] = self.num_layers
-#                 updated_data = {}
-
-#                 for key in list(source["module"].keys()):
-#                     if "_extra" in key:
-#                         source["module"].pop(key)
-#                     else:
-#                         match = re.search(r"sequential\.(\d+)", key)
-#                         if match:
-#                             original_layer_num = int(match.group(1))
-#                             if original_layer_num in layer_map:
-#                                 # Create the updated key by replacing the layer number
-#                                 new_key = re.sub(rf"\b{original_layer_num}\b", str(layer_map[original_layer_num]), key)
-#                                 updated_data[new_key] = source["module"][key]
-#                             else:
-#                                 # Keep the key unchanged if no mapping exists
-#                                 updated_data[key] = source["module"][key]
-#                         else:
-#                             updated_data[key] = source["module"][key]
-#                 updated_data = self.adjust_medium_filter(updated_data)
-#                 return updated_data
-
-#         target = self.init()
-#         trainer = self.nemo_setup(target, ckpt_async_save=False, save_ckpt_format=checkpoint_format)
-#         target.to(self.config.params_dtype)
-#         fp32_suffixes = {n.split(".")[-1] for n, p in target.named_parameters() if p.dtype == torch.float32}
-#         source = ModelState(source, self.config.num_layers, fp32_suffixes)
-#         source.to(self.config.params_dtype)
-#         self.convert_state(source, target)
-#         self.nemo_save(output_path, trainer)
-
-#         logging.info(f"Converted Hyena model to Nemo, model saved to {output_path}")
-
-#         teardown(trainer, target)
-#         del trainer, target
-
-#         return output_path
-
-#     def convert_state(self, source, target):
-#         """Converts the state dictionary from source format to target format.
-
-#         Args:
-#             source: Source model state
-#             target: Target model
-
-#         Returns:
-#             Result of applying state transforms
-#         """
-#         mapping = {}
-#         mapping["sequential.0.word_embeddings.weight"] = "embedding.word_embeddings.weight"
-#         mapping[f"sequential.{len(self.config.hybrid_override_pattern)}.norm.weight"] = "decoder.final_norm.weight"
-#         te_enabled = self.config.use_te
-#         for i, symbol in enumerate(self.config.hybrid_override_pattern):
-#             if te_enabled:
-#                 mapping[f"sequential.{i}.pre_mlp_layernorm.weight"] = (
-#                     f"decoder.layers.{i}.mlp.linear_fc1.layer_norm_weight"
-#                 )
-#             else:
-#                 mapping[f"sequential.{i}.pre_mlp_layernorm.weight"] = f"decoder.layers.{i}.pre_mlp_layernorm.weight"
-#             mapping[f"sequential.{i}.mlp.w3.weight"] = f"decoder.layers.{i}.mlp.linear_fc2.weight"
-
-#             if symbol != "*":
-#                 if te_enabled:
-#                     mapping[f"sequential.{i}.input_layernorm.weight"] = (
-#                         f"decoder.layers.{i}.mixer.dense_projection.layer_norm_weight"
-#                     )
-#                 else:
-#                     mapping[f"sequential.{i}.input_layernorm.weight"] = f"decoder.layers.{i}.norm.weight"
-
-#                 mapping[f"sequential.{i}.mixer.dense_projection.weight"] = (
-#                     f"decoder.layers.{i}.mixer.dense_projection.weight"
-#                 )
-#                 mapping[f"sequential.{i}.mixer.hyena_proj_conv.short_conv_weight"] = (
-#                     f"decoder.layers.{i}.mixer.hyena_proj_conv.short_conv_weight"
-#                 )
-#                 mapping[f"sequential.{i}.mixer.dense.weight"] = f"decoder.layers.{i}.mixer.dense.weight"
-#                 mapping[f"sequential.{i}.mixer.dense.bias"] = f"decoder.layers.{i}.mixer.dense.bias"
-
-#                 if symbol == "S":
-#                     mapping[f"sequential.{i}.mixer.mixer.short_conv.short_conv_weight"] = (
-#                         f"decoder.layers.{i}.mixer.mixer.short_conv.short_conv_weight"
-#                     )
-
-#                 elif symbol == "D":
-#                     mapping[f"sequential.{i}.mixer.mixer.conv_bias"] = f"decoder.layers.{i}.mixer.mixer.conv_bias"
-#                     mapping[f"sequential.{i}.mixer.mixer.filter.h"] = f"decoder.layers.{i}.mixer.mixer.filter.h"
-#                     mapping[f"sequential.{i}.mixer.mixer.filter.decay"] = (
-#                         f"decoder.layers.{i}.mixer.mixer.filter.decay"
-#                     )
-
-#                 elif symbol == "H":
-#                     mapping[f"sequential.{i}.mixer.mixer.conv_bias"] = f"decoder.layers.{i}.mixer.mixer.conv_bias"
-#                     mapping[f"sequential.{i}.mixer.mixer.filter.gamma"] = (
-#                         f"decoder.layers.{i}.mixer.mixer.filter.gamma"
-#                     )
-#                     mapping[f"sequential.{i}.mixer.mixer.filter.R"] = f"decoder.layers.{i}.mixer.mixer.filter.R"
-#                     mapping[f"sequential.{i}.mixer.mixer.filter.p"] = f"decoder.layers.{i}.mixer.mixer.filter.p"
-
-#             elif symbol == "*":
-#                 if te_enabled:
-#                     mapping[f"sequential.{i}.input_layernorm.weight"] = (
-#                         f"decoder.layers.{i}.self_attention.linear_qkv.layer_norm_weight"
-#                     )
-#                 else:
-#                     mapping[f"sequential.{i}.input_layernorm.weight"] = f"decoder.layers.{i}.input_layernorm.weight"
-
-#                 mapping[f"sequential.{i}.mixer.dense_projection.weight"] = (
-#                     f"decoder.layers.{i}.self_attention.linear_qkv.weight"
-#                 )
-#                 mapping[f"sequential.{i}.mixer.dense.weight"] = f"decoder.layers.{i}.self_attention.linear_proj.weight"
-#                 mapping[f"sequential.{i}.mixer.dense.bias"] = f"decoder.layers.{i}.self_attention.linear_proj.bias"
-#             else:
-#                 raise ValueError(f"Unknown symbol: {symbol}")
-
-#         return io.apply_transforms(
-#             source,
-#             target,
-#             mapping=mapping,
-#             transforms=[
-#                 # Transforms that are more complicated than a simple mapping of an old key name to a new one:
-#                 io.state_transform(
-#                     source_key=("sequential.*.mlp.w1.weight", "sequential.*.mlp.w2.weight"),
-#                     target_key="decoder.layers.*.mlp.linear_fc1.weight",
-#                     fn=TransformFns.merge_fc1,
-#                 )
-#             ],
-#         )
-
-#     @property
-#     def tokenizer(self):
-#         """Gets the tokenizer for the model.
-
-#         Returns:
-#             Tokenizer instance
-#         """
-#         from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer
-
-#         tokenizer = get_nmt_tokenizer(
-#             library=self.model_config.tokenizer_library,
-#         )
-
-#         return tokenizer
-
-#     @property
-#     def config(self) -> HyenaConfig:
-#         """Gets the model configuration.
-
-#         Returns:
-#             HyenaConfig: Model configuration
-#         """
-#         return self.model_config
-
-
-# @io.model_importer(HyenaModel, "hf")
-# class HuggingFaceSavannaHyenaImporter(PyTorchHyenaImporter):
-#     """Importer class for converting HuggingFace Savanna Hyena models to NeMo format.
-
-#     See: https://huggingface.co/arcinstitute/savanna_evo2_7b for an example of a savanna model that this can
-#     import and convert to NeMo format. Any of the Arc models that start with "savanna_" should work.
-#     """
-
-#     def get_source_model(self):
-#         """Returns the source model."""
-#         import huggingface_hub.errors
-#         from huggingface_hub import hf_hub_download
-
-#         if os.path.exists(str(self)):
-#             logging.info(f"Loading model from local path {self!s}")
-#             return torch.load(str(self), map_location="cpu", weights_only=False)
-#         else:
-#             if ":" in str(self):
-#                 repo_id, revision = str(self).split(":")
-#             else:
-#                 repo_id = str(self)
-#                 revision = None
-#             # See HF download logic here:
-#             #   https://github.com/ArcInstitute/evo2/blob/96ac9d9cd/evo2/models.py#L191-L231
-#             modelname = repo_id.split("/")[-1]
-#             download_dir = str(NEMO_MODELS_CACHE / repo_id)
-#             weights_filename = f"{modelname}.pt"
-#             try:
-#                 weights_path = hf_hub_download(
-#                     repo_id=repo_id, local_dir=download_dir, revision=revision, filename=weights_filename
-#                 )
-#             except Exception:
-#                 # Try downloading multi-part
-#                 # If file is split, download and join parts
-#                 logging.warning(f"Single path download failed, try loading checkpoint shards for {modelname}")
-#                 # If file is split, get the first part's directory to use the same cache location
-#                 weights_path = os.path.join(download_dir, weights_filename)
-#                 if os.path.exists(weights_path):
-#                     logging.info(f"Found {weights_path}")
-#                 else:
-#                     # Download and join parts
-#                     parts = []
-#                     part_num = 0
-#                     while True:
-#                         try:
-#                             part_path = hf_hub_download(
-#                                 repo_id=repo_id,
-#                                 local_dir=download_dir,
-#                                 revision=revision,
-#                                 filename=f"{weights_filename}.part{part_num}",
-#                             )
-#                             parts.append(part_path)
-#                             part_num += 1
-#                         except huggingface_hub.errors.EntryNotFoundError:
-#                             break
-
-#                     # Join in the same directory
-#                     with open(weights_path, "wb") as outfile:
-#                         for part in parts:
-#                             with open(part, "rb") as infile:
-#                                 while True:
-#                                     chunk = infile.read(8192 * 1024)
-#                                     if not chunk:
-#                                         break
-#                                     outfile.write(chunk)
-
-#                     # Cleaning up the parts
-#                     for part in parts:
-#                         try:
-#                             os.remove(part)
-#                         except OSError as e:
-#                             print(f"Error removing {part}: {e}")
-#                         print("Cleaned up shards, final checkpoint saved to", weights_path)
-
-#         return torch.load(weights_path, map_location="cpu", weights_only=False)
-
-
 HYENA_MODEL_OPTIONS: dict[str, Type[HyenaModelProvider]] = {
     # ARC public checkpoint names (evo2_ prefix matches HuggingFace repo names)
     "evo2_1b_base": Hyena1bModelProvider,