Skip to content

Commit 7b5c622

Browse files
authored
Merge pull request #94 from LaurenzV/3.2.0
Port to 3.2.0
2 parents 0621e70 + 16489e9 commit 7b5c622

File tree

7 files changed

+160
-78
lines changed

7 files changed

+160
-78
lines changed

examples/shape.rs

+45-34
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ OPTIONS:
3030
--no-clusters Do not output cluster indices
3131
--show-extents Output glyph extents
3232
--show-flags Output glyph flags
33+
--single-par Treat the input string as a single paragraph
3334
--ned No Extra Data; Do not output clusters or advances
3435
3536
ARGS:
@@ -59,6 +60,7 @@ struct Args {
5960
no_clusters: bool,
6061
show_extents: bool,
6162
show_flags: bool,
63+
single_par: bool,
6264
ned: bool,
6365
free: Vec<String>,
6466
}
@@ -95,6 +97,7 @@ fn parse_args() -> Result<Args, pico_args::Error> {
9597
no_clusters: args.contains("--no-clusters"),
9698
show_extents: args.contains("--show-extents"),
9799
show_flags: args.contains("--show-flags"),
100+
single_par: args.contains("--single-par"),
98101
ned: args.contains("--ned"),
99102
free: args
100103
.finish()
@@ -165,53 +168,61 @@ fn main() {
165168
std::process::exit(1);
166169
};
167170

168-
let mut buffer = rustybuzz::UnicodeBuffer::new();
169-
buffer.push_str(&text);
171+
let lines = if args.single_par {
172+
vec![text.as_str()]
173+
} else {
174+
text.split("\n").filter(|s| !s.is_empty()).collect()
175+
};
170176

171-
if let Some(d) = args.direction {
172-
buffer.set_direction(d);
173-
}
177+
for text in lines {
178+
let mut buffer = rustybuzz::UnicodeBuffer::new();
179+
buffer.push_str(&text);
174180

175-
buffer.set_language(args.language);
181+
if let Some(d) = args.direction {
182+
buffer.set_direction(d);
183+
}
176184

177-
if let Some(script) = args.script {
178-
buffer.set_script(script);
179-
}
185+
buffer.set_language(args.language.clone());
180186

181-
buffer.set_cluster_level(args.cluster_level);
187+
if let Some(script) = args.script {
188+
buffer.set_script(script);
189+
}
182190

183-
if !args.utf8_clusters {
184-
buffer.reset_clusters();
185-
}
191+
buffer.set_cluster_level(args.cluster_level);
186192

187-
let glyph_buffer = rustybuzz::shape(&face, &args.features, buffer);
193+
if !args.utf8_clusters {
194+
buffer.reset_clusters();
195+
}
188196

189-
let mut format_flags = rustybuzz::SerializeFlags::default();
190-
if args.no_glyph_names {
191-
format_flags |= rustybuzz::SerializeFlags::NO_GLYPH_NAMES;
192-
}
197+
let glyph_buffer = rustybuzz::shape(&face, &args.features, buffer);
193198

194-
if args.no_clusters || args.ned {
195-
format_flags |= rustybuzz::SerializeFlags::NO_CLUSTERS;
196-
}
199+
let mut format_flags = rustybuzz::SerializeFlags::default();
200+
if args.no_glyph_names {
201+
format_flags |= rustybuzz::SerializeFlags::NO_GLYPH_NAMES;
202+
}
197203

198-
if args.no_positions {
199-
format_flags |= rustybuzz::SerializeFlags::NO_POSITIONS;
200-
}
204+
if args.no_clusters || args.ned {
205+
format_flags |= rustybuzz::SerializeFlags::NO_CLUSTERS;
206+
}
201207

202-
if args.no_advances || args.ned {
203-
format_flags |= rustybuzz::SerializeFlags::NO_ADVANCES;
204-
}
208+
if args.no_positions {
209+
format_flags |= rustybuzz::SerializeFlags::NO_POSITIONS;
210+
}
205211

206-
if args.show_extents {
207-
format_flags |= rustybuzz::SerializeFlags::GLYPH_EXTENTS;
208-
}
212+
if args.no_advances || args.ned {
213+
format_flags |= rustybuzz::SerializeFlags::NO_ADVANCES;
214+
}
209215

210-
if args.show_flags {
211-
format_flags |= rustybuzz::SerializeFlags::GLYPH_FLAGS;
212-
}
216+
if args.show_extents {
217+
format_flags |= rustybuzz::SerializeFlags::GLYPH_EXTENTS;
218+
}
213219

214-
println!("{}", glyph_buffer.serialize(&face, format_flags));
220+
if args.show_flags {
221+
format_flags |= rustybuzz::SerializeFlags::GLYPH_FLAGS;
222+
}
223+
224+
println!("{}", glyph_buffer.serialize(&face, format_flags));
225+
}
215226
}
216227

217228
fn parse_unicodes(s: &str) -> Result<String, String> {

scripts/gen-shaping-tests.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
# There is no sane way to test them.
1111
IGNORE_TESTS = [
1212
'macos.tests',
13+
'coretext.tests',
14+
'directwrite.tests',
15+
'uniscribe.tests',
1316
]
1417

1518
IGNORE_TEST_CASES = [
@@ -115,10 +118,11 @@ def convert_test(hb_dir, hb_shape_exe, tests_name, file_name, idx, data, fonts):
115118
glyphs_expected = subprocess.run(options_list, check=True, stdout=subprocess.PIPE)\
116119
.stdout.decode()
117120

118-
glyphs_expected = glyphs_expected[1:-2] # remove `[..]\n`
121+
glyphs_expected = glyphs_expected.strip()[1:-1] # remove leading and trailing whitespaces and `[..]`
119122
glyphs_expected = glyphs_expected.replace('|', '|\\\n ')
120123

121124
options = options.replace('"', '\\"')
125+
options = options.replace(' --single-par', '')
122126

123127
fonts.add(os.path.split(fontfile_rs)[1])
124128

scripts/gen-tag-table.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -330,11 +330,15 @@ def __init__(self):
330330
self.from_bcp_47 = collections.defaultdict(set)
331331
# Whether the parser is in a <td> element
332332
self._td = False
333+
# Whether the parser is after a <br> element within the current <tr> element
334+
self._br = False
333335
# The text of the <td> elements of the current <tr> element.
334336
self._current_tr = []
335337

336338
def handle_starttag(self, tag, attrs):
337-
if tag == 'meta':
339+
if tag == 'br':
340+
self._br = True
341+
elif tag == 'meta':
338342
for attr, value in attrs:
339343
if attr == 'name' and value == 'updated_at':
340344
self.header = self.get_starttag_text()
@@ -343,6 +347,7 @@ def handle_starttag(self, tag, attrs):
343347
self._td = True
344348
self._current_tr.append('')
345349
elif tag == 'tr':
350+
self._br = False
346351
self._current_tr = []
347352

348353
def handle_endtag(self, tag):
@@ -367,7 +372,7 @@ def handle_endtag(self, tag):
367372
self.ranks[tag] = rank
368373

369374
def handle_data(self, data):
370-
if self._td:
375+
if self._td and not self._br:
371376
self._current_tr[-1] += data
372377

373378
def handle_charref(self, name):
@@ -699,6 +704,8 @@ def get_name(self, lt):
699704
ot.remove_language_ot('MONT')
700705
ot.add_language('mnw', 'MONT')
701706

707+
ot.add_language ('mnw-TH', 'MONT')
708+
702709
ot.add_language('no', 'NOR')
703710

704711
ot.add_language('oc-provenc', 'PRO')

src/aat/metamorphosis.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -87,13 +87,13 @@ pub fn apply(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) -> Option<()> {
8787
};
8888

8989
if reverse {
90-
buffer.reverse();
90+
buffer.reverse_graphemes();
9191
}
9292

9393
apply_subtable(&subtable.kind, buffer, face);
9494

9595
if reverse {
96-
buffer.reverse();
96+
buffer.reverse_graphemes();
9797
}
9898
}
9999
}

src/buffer.rs

+81-18
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,60 @@ impl Buffer {
733733
}
734734
}
735735

736+
pub fn reverse_groups<F>(&mut self, group: F, merge_clusters: bool)
737+
where
738+
F: Fn(&GlyphInfo, &GlyphInfo) -> bool,
739+
{
740+
if self.is_empty() {
741+
return;
742+
}
743+
744+
let mut start = 0;
745+
746+
for i in 1..self.len {
747+
if !group(&self.info[i - 1], &self.info[i]) {
748+
if merge_clusters {
749+
self.merge_clusters(start, i);
750+
}
751+
752+
self.reverse_range(start, i);
753+
start = i;
754+
}
755+
756+
if merge_clusters {
757+
self.merge_clusters(start, i);
758+
}
759+
760+
self.reverse_range(start, i);
761+
762+
self.reverse();
763+
}
764+
}
765+
766+
pub fn reverse_graphemes(&mut self) {
767+
self.reverse_groups(
768+
_grapheme_group_func,
769+
self.cluster_level == BufferClusterLevel::MonotoneCharacters,
770+
)
771+
}
772+
773+
pub fn group_end<F>(&self, mut start: usize, group: F) -> usize
774+
where
775+
F: Fn(&GlyphInfo, &GlyphInfo) -> bool,
776+
{
777+
start += 1;
778+
779+
while start < self.len && group(&self.info[start - 1], &self.info[start]) {
780+
start += 1;
781+
}
782+
783+
start
784+
}
785+
786+
pub fn reverse_clusters(&mut self) {
787+
self.reverse_groups(_cluster_group_func, false);
788+
}
789+
736790
#[inline]
737791
fn reset_clusters(&mut self) {
738792
for (i, info) in self.info.iter_mut().enumerate() {
@@ -1159,7 +1213,7 @@ impl Buffer {
11591213

11601214
fn unsafe_to_break_impl(&mut self, start: usize, end: usize) {
11611215
let mut cluster = core::u32::MAX;
1162-
cluster = Self::_unsafe_to_break_find_min_cluster(&self.info, start, end, cluster);
1216+
cluster = Self::_infos_find_min_cluster(&self.info, start, end, cluster);
11631217
let unsafe_to_break = Self::_unsafe_to_break_set_mask(&mut self.info, start, end, cluster);
11641218
if unsafe_to_break {
11651219
self.scratch_flags |= BufferScratchFlags::HAS_UNSAFE_TO_BREAK;
@@ -1176,9 +1230,8 @@ impl Buffer {
11761230
assert!(self.idx <= end);
11771231

11781232
let mut cluster = core::u32::MAX;
1179-
cluster =
1180-
Self::_unsafe_to_break_find_min_cluster(self.out_info(), start, self.out_len, cluster);
1181-
cluster = Self::_unsafe_to_break_find_min_cluster(&self.info, self.idx, end, cluster);
1233+
cluster = Self::_infos_find_min_cluster(self.out_info(), start, self.out_len, cluster);
1234+
cluster = Self::_infos_find_min_cluster(&self.info, self.idx, end, cluster);
11821235
let idx = self.idx;
11831236
let out_len = self.out_len;
11841237
let unsafe_to_break1 =
@@ -1341,7 +1394,7 @@ impl Buffer {
13411394
info.cluster = cluster;
13421395
}
13431396

1344-
fn _unsafe_to_break_find_min_cluster(
1397+
fn _infos_find_min_cluster(
13451398
info: &[GlyphInfo],
13461399
start: usize,
13471400
end: usize,
@@ -1452,16 +1505,32 @@ impl Buffer {
14521505
}
14531506
}
14541507

1508+
pub(crate) fn _cluster_group_func(a: &GlyphInfo, b: &GlyphInfo) -> bool {
1509+
a.cluster == b.cluster
1510+
}
1511+
1512+
pub(crate) fn _grapheme_group_func(_: &GlyphInfo, b: &GlyphInfo) -> bool {
1513+
b.is_continuation()
1514+
}
1515+
14551516
// TODO: to iter if possible
14561517

14571518
macro_rules! foreach_cluster {
1458-
($buffer:expr, $start:ident, $end:ident, $($body:tt)*) => {{
1519+
($buffer:expr, $start:ident, $end:ident, $($body:tt)*) => {
1520+
foreach_group!($buffer, $start, $end, crate::buffer::_cluster_group_func, $($body)*)
1521+
};
1522+
}
1523+
1524+
macro_rules! foreach_group {
1525+
($buffer:expr, $start:ident, $end:ident, $group_func:expr, $($body:tt)*) => {{
1526+
let count = $buffer.len;
14591527
let mut $start = 0;
1460-
let mut $end = $buffer.next_cluster(0);
1461-
while $start < $buffer.len {
1528+
let mut $end = if count > 0 { $buffer.group_end(0, $group_func) } else { 0 };
1529+
1530+
while $start < count {
14621531
$($body)*;
14631532
$start = $end;
1464-
$end = $buffer.next_cluster($start);
1533+
$end = $buffer.group_end($start, $group_func);
14651534
}
14661535
}};
14671536
}
@@ -1479,15 +1548,9 @@ macro_rules! foreach_syllable {
14791548
}
14801549

14811550
macro_rules! foreach_grapheme {
1482-
($buffer:expr, $start:ident, $end:ident, $($body:tt)*) => {{
1483-
let mut $start = 0;
1484-
let mut $end = $buffer.next_grapheme(0);
1485-
while $start < $buffer.len {
1486-
$($body)*;
1487-
$start = $end;
1488-
$end = $buffer.next_grapheme($start);
1489-
}
1490-
}};
1551+
($buffer:expr, $start:ident, $end:ident, $($body:tt)*) => {
1552+
foreach_group!($buffer, $start, $end, crate::buffer::_grapheme_group_func, $($body)*)
1553+
};
14911554
}
14921555

14931556
bitflags::bitflags! {

src/shape.rs

+5-13
Original file line numberDiff line numberDiff line change
@@ -480,22 +480,14 @@ fn ensure_native_direction(buffer: &mut Buffer) {
480480
}
481481
}
482482

483+
// TODO vertical:
484+
// The only BTT vertical script is Ogham, but it's not clear to me whether OpenType
485+
// Ogham fonts are supposed to be implemented BTT or not. Need to research that
486+
// first.
483487
if (dir.is_horizontal() && dir != hor && hor != Direction::Invalid)
484488
|| (dir.is_vertical() && dir != Direction::TopToBottom)
485489
{
486-
if buffer.cluster_level == BufferClusterLevel::MonotoneCharacters {
487-
foreach_grapheme!(buffer, start, end, {
488-
buffer.merge_clusters(start, end);
489-
buffer.reverse_range(start, end);
490-
});
491-
} else {
492-
foreach_grapheme!(buffer, start, end, {
493-
// form_clusters() merged clusters already, we don't merge.
494-
buffer.reverse_range(start, end);
495-
})
496-
}
497-
498-
buffer.reverse();
490+
buffer.reverse_graphemes();
499491
buffer.direction = buffer.direction.reverse();
500492
}
501493
}

0 commit comments

Comments
 (0)