CUNY-CL
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎yoyodyne/data/batches.py‎
Lines changed: 1 addition & 1 deletion b/‎yoyodyne/data/batches.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎yoyodyne/data/datamodules.py‎
Lines changed: 4 additions & 0 deletions b/‎yoyodyne/data/datamodules.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎yoyodyne/models/base.py‎
Lines changed: 14 additions & 18 deletions b/‎yoyodyne/models/base.py‎
Lines changed: 14 additions & 18 deletions
diff --git a/‎yoyodyne/models/beam_search.py‎
Lines changed: 164 additions & 0 deletions b/‎yoyodyne/models/beam_search.py‎
Lines changed: 164 additions & 0 deletions
@@ -11,7 +11,7 @@ exclude = ["examples*"]
 
 [project]
 name = "yoyodyne"
-version = "0.2.20"
+version = "0.3.0"
 description = "Small-vocabulary neural sequence-to-sequence models"
 readme = "README.md"
 requires-python = ">= 3.9"
 
@@ -84,7 +84,7 @@ def lengths(self) -> torch.Tensor:
         Returns:
             torch.Tensor.
         """
-        return (self.mask == 0).sum(dim=1).cpu()
+        return (~self.mask).sum(dim=1).cpu()
 
 
 class PaddedBatch(nn.Module):
 
@@ -180,6 +180,10 @@ def has_features(self) -> bool:
     def has_target(self) -> bool:
         return self.parser.has_target
 
+    @property
+    def has_separate_features(self) -> bool:
+        return self.collator.separate_features
+
     # Required API.
 
     def train_dataloader(self) -> data.DataLoader:
 
@@ -55,9 +55,9 @@ class BaseModel(abc.ABC, lightning.LightningModule):
     embedding_size: int
     encoder_layers: int
     decoder_layers: int
-    features_encoder_cls: Optional[modules.base.BaseModule]
+    features_encoder_cls: Optional[modules.BaseModule]
     hidden_size: int
-    source_encoder_cls: modules.base.BaseModule
+    source_encoder_cls: modules.BaseModule
     # Other stuff.
     eval_metrics: Set[evaluators.Evaluator]
     loss_func: Callable[[torch.Tensor, torch.Tensor], torch.Tensor]
@@ -249,6 +249,12 @@ def has_features_encoder(self):
     def num_parameters(self) -> int:
         return sum(part.numel() for part in self.parameters())
 
+    def start_symbol(self, batch_size: int) -> torch.Tensor:
+        """Generates a tensor of start symbols for the batch."""
+        return torch.tensor([special.START_IDX], device=self.device).repeat(
+            batch_size, 1
+        )
+
     def training_step(
         self,
         batch: data.PaddedBatch,
@@ -280,14 +286,16 @@ def training_step(
         )
         return loss
 
-    def validation_epoch_end(self, validation_step_outputs: Dict) -> Dict:
+    def validation_epoch_end(
+        self, validation_step_outputs: Dict
+    ) -> Dict[str, float]:
         """Computes average loss and average accuracy.
 
         Args:
             validation_step_outputs (Dict).
 
         Returns:
-            Dict: averaged metrics over all validation steps.
+            Dict[str, float]: averaged metrics over all validation steps.
         """
         avg_val_loss = torch.tensor(
             [v["val_loss"] for v in validation_step_outputs]
@@ -311,7 +319,7 @@ def validation_step(
         self,
         batch: data.PaddedBatch,
         batch_idx: int,
-    ) -> Dict:
+    ) -> Dict[str, float]:
         """Runs one validation step.
 
         This is called by the PL Trainer.
@@ -366,19 +374,7 @@ def predict_step(
         if self.beam_width > 1:
             return self(batch)
         else:
-            return self._get_predicted(self(batch))
-
-    def _get_predicted(self, predictions: torch.Tensor) -> torch.Tensor:
-        """Picks the best index from the vocabulary.
-
-        Args:
-            predictions (torch.Tensor): B x seq_len x target_vocab_size.
-
-        Returns:
-            torch.Tensor: indices of the argmax at each timestep.
-        """
-        assert len(predictions.size()) == 3
-        return torch.argmax(predictions, dim=2)
+            return torch.argmax(self(batch), dim=2)
 
 
 def add_argparse_args(parser: argparse.ArgumentParser) -> None:
 
@@ -0,0 +1,164 @@
+"""Beam search classes.
+
+A Cell is a (possibly partial) hypothesis containing the decoder output,
+the symbol sequence, and the hypothesis's log-likelihood. Cells can
+generate their candidate extensions (in the form of new Cells) when
+provided with additional decoder output; they also know when they have reached
+a final state (i.e., when END has been generated).
+
+A Beam holds a collection of Cells and an in-progress heap.
+
+Current limitations:
+
+* Beam search uses Python's heap implementation; this is reasonably performant
+  in cPython (it uses a C extension module where available) but there may be a
+  better pure PyTorch solution.
+* Beam search assumes a batch size of 1; it is not clear how to extend it to
+  larger batches.
+* We hard-code the use of log-likelihoods; the addition of two log
+  probabilities is equivalent to multiplying real numbers.
+* Beam search is designed to support RNN and attentive RNN models and interface
+  issues might arise with other architectures.
+* Not much attention has been paid to keeping data on device.
+
+See rnn.py for sample usage.
+"""
+
+from __future__ import annotations
+
+import dataclasses
+import heapq
+
+from typing import Iterator, List
+
+import torch
+from torch import nn
+
+from . import modules
+from .. import special
+
+
+@dataclasses.dataclass(order=True)
+class Cell:
+    """Represents a (potentially partial) hypotheses in the beam search.
+
+    Only the log-likelihood field is used for comparison.
+
+    A cell is "final" once it has decoded the END symbol.
+
+    Args:
+        state (modules.RNNState).
+        symbols (List[int], optional).
+        score (float, optional).
+    """
+
+    state: modules.RNNState = dataclasses.field(compare=False)
+    symbols: List[int] = dataclasses.field(
+        compare=False, default_factory=lambda: [special.START_IDX]
+    )
+    score: float = dataclasses.field(compare=True, default=0.0)
+
+    def extensions(
+        self, state: modules.RNNState, scores: torch.Tensor
+    ) -> Iterator[Cell]:
+        """Generates extension cells.
+
+        Args:
+            state (modules.RNNState).
+            scores (torch.Tensor):
+
+        Yields:
+            Cell: all single-symbol extensions of the current cell.
+        """
+        for symbol, score in enumerate(scores):
+            yield Cell(
+                state, self.symbols + [symbol], self.score + score.item()
+            )
+
+    @property
+    def symbol(self) -> int:
+        return self.symbols[-1]
+
+    @property
+    def final(self) -> bool:
+        return self.symbols[-1] == special.END_IDX
+
+
+class Beam:
+    """The beam.
+
+    This stores stores the current set of beam cells and an in-progress heap of
+    the next set separately.
+
+    A beam is "final" once every cell has decoded the END symbol.
+
+    Args:
+        beam_width (int).
+        state (modules.RNNState).
+    """
+
+    beam_width: int
+    # Current cells.
+    cells: List[Cell]
+    # Heap of the next set of cells.
+    heap: List[Cell]
+
+    def __init__(self, beam_width, state: modules.RNNState):
+        self.beam_width = beam_width
+        self.cells = [Cell(state)]
+        self.heap = []
+
+    def __len__(self) -> int:
+        return len(self.cells)
+
+    def push(self, cell: Cell) -> None:
+        """Inserts the cell into the heap, maintaining the specified beam size.
+
+        Args:
+            cell (Cell).
+        """
+        if len(self.heap) < self.beam_width:
+            heapq.heappush(self.heap, cell)
+        else:
+            heapq.heappushpop(self.heap, cell)
+
+    def update(self) -> None:
+        """Replaces the current cells and clears the heap."""
+        self.cells = sorted(self.heap, reverse=True)
+        self.heap.clear()
+
+    @property
+    def final(self) -> bool:
+        return all(cell.final for cell in self.cells)
+
+    def predictions(self, device: torch.device) -> torch.Tensor:
+        """Converts the best sequences into a padded tensor of predictions.
+
+        This implementation assumes batch size is 1.
+
+        Args:
+            device (torch.device): the device to move the data to.
+
+        Returns:
+            torch.Tensor: a B x beam_width x seq_length tensor of predictions.
+        """
+        return nn.utils.rnn.pad_sequence(
+            [torch.tensor(cell.symbols, device=device) for cell in self.cells],
+            batch_first=True,
+            padding_value=special.PAD_IDX,
+        ).unsqueeze(0)
+
+    def scores(self, device: torch.device) -> torch.Tensor:
+        """Converts the sequence scores into tensors.
+
+        This implementation assumes batch size is 1.
+
+        Args:
+            device (torch.device): the device to move the data to.
+
+        Returns:
+            torch.Tensor: a B x beam_width tensor of log-likelihoods.
+        """
+        return torch.tensor(
+            [cell.score for cell in self.cells], device=device
+        ).unsqueeze(0)