Skip to content

Commit 6938e81

Browse files
committed
implement CountTableRows/Columns fully
rowspan and colspan/columnspan are fully handled as dictated by MathML descriptions. Empty rows and cells are properly accounted for.
1 parent 42eb0e8 commit 6938e81

File tree

2 files changed

+135
-55
lines changed

2 files changed

+135
-55
lines changed

Rules/Intent/general.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -804,7 +804,7 @@
804804
-
805805
name: mtable-array-property
806806
tag: mtable
807-
match: "count(*) > 0 and ((@frame='solid' or @frame='dashed') or child::*[@rowspan]) or child::*/child::*[@rowspan or @colspan]"
807+
match: "count(*) > 0 and ((@frame='solid' or @frame='dashed') or child::*[@rowspan]) or child::*/child::*[@rowspan or @colspan or @columnspan]"
808808
replace:
809809
- with:
810810
variables:

src/xpath_functions.rs

Lines changed: 134 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use sxd_xpath::{Value, Context, context, function::*, nodeset::*};
2222
use crate::definitions::{Definitions, SPEECH_DEFINITIONS, BRAILLE_DEFINITIONS};
2323
use regex::Regex;
2424
use crate::pretty_print::mml_to_string;
25-
use std::cell::{Ref, RefCell};
25+
use std::{cell::{Ref, RefCell}, collections::HashMap};
2626
use log::{debug, error, warn};
2727
use std::sync::LazyLock;
2828
use std::thread::LocalKey;
@@ -333,7 +333,7 @@ static ALL_MATHML_ELEMENTS: phf::Set<&str> = phf_set!{
333333
};
334334

335335
static MATHML_LEAF_NODES: phf::Set<&str> = phf_set! {
336-
"mi", "mo", "mn", "mtext", "ms", "mspace", "mglyph",
336+
"mi", "mo", "mn", "mtext", "ms", "mspace", "mglyph",
337337
"none", "annotation", "ci", "cn", "csymbol", // content could be inside an annotation-xml (faster to allow here than to check lots of places)
338338
};
339339

@@ -1413,69 +1413,150 @@ impl Function for ReplaceAll {
14131413
}
14141414
}
14151415

1416-
struct CountTableDims;
1416+
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
1417+
enum CTDRowType {
1418+
Normal,
1419+
Labeled,
1420+
Implicit,
1421+
}
1422+
1423+
/// A single-use structure for computing the proper dimensions of an
1424+
/// `mtable`.
1425+
struct CountTableDims {
1426+
num_rows: usize,
1427+
num_cols: usize,
1428+
/// map from number of remaining in extra row-span to number of
1429+
/// columns with that value.
1430+
extended_cells: HashMap<usize, usize>,
1431+
/// rowspan=0 cells extend for the rest of the table, however
1432+
/// long that may be as determined by all other finite cells.
1433+
permanent_cols: usize,
1434+
}
1435+
14171436
impl CountTableDims {
1437+
1438+
fn new() -> CountTableDims {
1439+
Self { num_rows: 0, num_cols: 0, extended_cells: HashMap::new(), permanent_cols: 0 }
1440+
}
1441+
1442+
/// Returns the number of columns the cell contributes to the
1443+
/// current row. Also updates `extended_cells` as appropriate.
1444+
fn process_cell_in_row<'d>(&mut self, mtd: Element<'d>, is_first: bool, row_type: CTDRowType) -> usize {
1445+
// Rows can only contain `mtd`s. If this is not an `mtd`, we will just skip it.
1446+
if name(mtd) != "mtd" {
1447+
return 0;
1448+
}
1449+
1450+
// Add the contributing columns, taking colspan into account. Don't contribute if
1451+
// this is the first element of a labeled row.
1452+
let colspan = mtd.attribute_value("colspan")
1453+
.or_else(|| mtd.attribute_value("columnspan"))
1454+
.map_or(1, |e| e.parse::<usize>().unwrap_or(1));
1455+
if row_type == CTDRowType::Labeled && is_first {
1456+
// This is a label for the row and does not contibute to
1457+
// the size of the table. NOTE: Can this label have a
1458+
// non-trivial rowspan? If so, can it otherwise extend the
1459+
// size of the table?
1460+
return 0;
1461+
}
1462+
1463+
let rowspan = mtd.attribute_value("rowspan").map_or(1, |e| {
1464+
e.parse::<usize>().unwrap_or(1)
1465+
});
1466+
1467+
if rowspan > 1 {
1468+
*self.extended_cells.entry(rowspan).or_default() += colspan;
1469+
} else if rowspan == 0 {
1470+
self.permanent_cols += colspan;
1471+
}
1472+
1473+
colspan
1474+
}
1475+
1476+
/// Update the number of rows, and update the extended cells.
1477+
/// Returns the total number of columns accross all extended
1478+
/// cells.
1479+
fn next_row(&mut self) -> usize {
1480+
self.num_rows += 1;
1481+
let mut ext_cols = 0;
1482+
self.extended_cells = self.extended_cells.iter().filter(|&(k, _)| *k > 1).map(|(k, v)| {
1483+
ext_cols += *v;
1484+
(k-1, *v)
1485+
}).collect();
1486+
ext_cols
1487+
}
1488+
14181489
/// For an `mtable` element, count the number of rows and columns in the table.
14191490
///
14201491
/// This function is relatively permissive. Non-`mtr` rows are
14211492
/// ignored. The number of columns is determined only from the first
14221493
/// row, if it exists. Within that row, non-`mtd` elements are ignored.
1423-
fn count_table_dims<'d>(e: Element<'_>) -> Result<(Value<'d>, Value<'d>), Error> {
1424-
let mut num_cols = 0;
1425-
let mut num_rows = 0;
1494+
fn count_table_dims<'d>(mut self, e: Element<'_>) -> Result<(Value<'d>, Value<'d>), Error> {
14261495
for child in e.children() {
14271496
let ChildOfElement::Element(row) = child else {
14281497
continue
14291498
};
14301499

1431-
// each child of mtable should be an mtr. Ignore non-mtr rows.
1500+
// Each child of mtable should be an mtr or mlabeledtr. According to the spec, though,
1501+
// bare `mtd`s should also be treated as having an implicit wrapping `<mtr>`.
1502+
// Other elements should be ignored.
14321503
let row_name = name(row);
14331504

1434-
let labeled_row = if row_name == "mlabeledtr" {
1435-
true
1505+
let row_type = if row_name == "mlabeledtr" {
1506+
CTDRowType::Labeled
14361507
} else if row_name == "mtr" {
1437-
false
1508+
CTDRowType::Normal
1509+
} else if row_name == "mtd" {
1510+
CTDRowType::Implicit
14381511
} else {
14391512
continue;
14401513
};
1441-
num_rows += 1;
1442-
1443-
// count columns based on the number of rows.
1444-
if num_rows == 1 {
1445-
// count the number of columns, including column spans, in the first row.
1446-
let mut first_elem = true;
1447-
for row_child in row.children() {
1448-
let ChildOfElement::Element(mtd) = row_child else {
1449-
continue;
1450-
};
1451-
if name(mtd) != "mtd" {
1452-
continue;
1453-
}
1454-
// Add the contributing columns, taking colspan into account. Don't contribute if
1455-
// this is the first element of a labeled row.
1456-
let colspan = mtd.attribute_value("colspan").map_or(1, |e| e.parse::<usize>().unwrap_or(0));
1457-
if !(labeled_row && first_elem) {
1458-
num_cols += colspan;
1514+
1515+
let ext_cols = self.next_row();
1516+
1517+
let mut num_cols_in_row = 0;
1518+
match row_type {
1519+
CTDRowType::Normal | CTDRowType::Labeled => {
1520+
let mut first_elem = true;
1521+
for row_child in row.children() {
1522+
let ChildOfElement::Element(mtd) = row_child else {
1523+
continue;
1524+
};
1525+
1526+
num_cols_in_row += self.process_cell_in_row(mtd, first_elem, row_type);
1527+
first_elem= false;
14591528
}
1460-
first_elem = false;
1529+
}
1530+
CTDRowType::Implicit => {
1531+
num_cols_in_row += self.process_cell_in_row(row, true, row_type)
14611532
}
14621533
}
1534+
// update the number of columns based on this row.
1535+
self.num_cols = self.num_cols.max(num_cols_in_row + ext_cols + self.permanent_cols);
14631536
}
14641537

1465-
Ok((Value::Number(num_rows as f64), Value::Number(num_cols as f64)))
1538+
// At this point, the number of columns is correct. If we have
1539+
// any leftover rows from rowspan extended cells, we need to
1540+
// account for them here.
1541+
//
1542+
// NOTE: It does not appear that renderers respect these extra
1543+
// columns, so we will not use them.
1544+
let _extra_rows = self.extended_cells.keys().max().map(|k| k-1).unwrap_or(0);
1545+
1546+
Ok((Value::Number(self.num_rows as f64), Value::Number(self.num_cols as f64)))
14661547
}
14671548

1468-
fn evaluate<'d>(fn_name: &str,
1549+
fn evaluate<'d>(self, fn_name: &str,
14691550
args: Vec<Value<'d>>) -> Result<(Value<'d>, Value<'d>), Error> {
14701551
let mut args = Args(args);
14711552
args.exactly(1)?;
14721553
let element = args.pop_nodeset()?;
14731554
let node = validate_one_node(element, fn_name)?;
14741555
if let Node::Element(e) = node {
1475-
return Self::count_table_dims(e);
1556+
return self.count_table_dims(e);
14761557
}
14771558

1478-
Err( Error::Other("couldn't count table rows".to_string()) )
1559+
Err( Error::Other("Could not count dimensions of non-Element.".to_string()) )
14791560
}
14801561
}
14811562

@@ -1484,7 +1565,7 @@ impl Function for CountTableRows {
14841565
fn evaluate<'c, 'd>(&self,
14851566
_context: &context::Evaluation<'c, 'd>,
14861567
args: Vec<Value<'d>>) -> Result<Value<'d>, Error> {
1487-
CountTableDims::evaluate("CountTableRows", args).map(|a| a.0)
1568+
CountTableDims::new().evaluate("CountTableRows", args).map(|a| a.0)
14881569
}
14891570
}
14901571

@@ -1493,7 +1574,7 @@ impl Function for CountTableColumns {
14931574
fn evaluate<'c, 'd>(&self,
14941575
_context: &context::Evaluation<'c, 'd>,
14951576
args: Vec<Value<'d>>) -> Result<Value<'d>, Error> {
1496-
CountTableDims::evaluate("CountTableColumns", args).map(|a| a.1)
1577+
CountTableDims::new().evaluate("CountTableColumns", args).map(|a| a.1)
14971578
}
14981579
}
14991580

@@ -1693,26 +1774,25 @@ mod tests {
16931774

16941775
}
16951776

1777+
fn check_table_dims(mathml: &str, dims: (usize, usize)) {
1778+
let package = parser::parse(mathml).expect("failed to parse XML");
1779+
let math_elem = get_element(&package);
1780+
let child = as_element(math_elem.children()[0]);
1781+
assert!(CountTableDims::new().count_table_dims(child) == Ok((Value::Number(dims.0 as f64), Value::Number(dims.1 as f64))));
1782+
}
1783+
16961784
#[test]
1697-
fn table_row_count() {
1698-
let mathml = "<math><mtable><mtr><mtd>a</mtd></mtr></mtable></math>";
1699-
let package = parser::parse(mathml).expect("failed to parse XML");
1700-
let math_elem = get_element(&package);
1701-
let child = as_element(math_elem.children()[0]);
1702-
assert!(CountTableDims::count_table_dims(child) == Ok((Value::Number(1.0), Value::Number(1.0))));
1703-
1704-
let mathml = "<math><mtable><mtr><mtd colspan=\"3\">a</mtd><mtd>b</mtd></mtr><mtr><mtd></mtd></mtr></mtable></math>";
1705-
let package = parser::parse(mathml).expect("failed to parse XML");
1706-
let math_elem = get_element(&package);
1707-
let child = as_element(math_elem.children()[0]);
1708-
assert!(CountTableDims::count_table_dims(child) == Ok((Value::Number(2.0), Value::Number(4.0))));
1709-
1710-
let mathml = "<math><mtable><mlabeledtr><mtd>label</mtd><mtd>a</mtd><mtd>b</mtd></mlabeledtr><mtr><mtd>c</mtd><mtd>d</mtd></mtr></mtable></math>";
1711-
let package = parser::parse(mathml).expect("failed to parse XML");
1712-
let math_elem = get_element(&package);
1713-
let child = as_element(math_elem.children()[0]);
1714-
let ctd = CountTableDims::count_table_dims(child);
1715-
assert!(ctd == Ok((Value::Number(2.0), Value::Number(2.0))));
1785+
fn table_dim() {
1786+
check_table_dims("<math><mtable><mtr><mtd>a</mtd></mtr></mtable></math>", (1, 1));
1787+
check_table_dims("<math><mtable><mtr><mtd colspan=\"3\">a</mtd><mtd>b</mtd></mtr><mtr><mtd></mtd></mtr></mtable></math>", (2, 4));
1788+
1789+
check_table_dims("<math><mtable><mlabeledtr><mtd>label</mtd><mtd>a</mtd><mtd>b</mtd></mlabeledtr><mtr><mtd>c</mtd><mtd>d</mtd></mtr></mtable></math>", (2, 2));
1790+
// extended rows beyond the `mtr`s do *not* count towards the row count.
1791+
check_table_dims("<math><mtable><mtr><mtd rowspan=\"3\">a</mtd></mtr></mtable></math>", (1, 1));
1792+
1793+
check_table_dims("<math><mtable><mtr><mtd rowspan=\"3\">a</mtd></mtr>
1794+
<mtr><mtd columnspan=\"2\">b</mtd></mtr></mtable></math>", (2, 3));
1795+
17161796
}
17171797

17181798
#[test]

0 commit comments

Comments
 (0)