Skip to content

Commit 36c0603

Browse files
author
bors-servo
authored
Auto merge of #35 - behnam:dev, r=mbrubeck
Fix a bug in reorder_line() and improve benchmarks Summary of changes: * Fix bug in reorder_line() where optimized branch returned the full text (instead of line text), after (supposedly) reorder. * Improve benchmarks logic to measure `reorder_line()` in isolation, and for full text of the test. * Add new `basic` benchmarks to be able to measure perf for small, common text strings. * Move `serde_test` to `dev-dependencies`, as it's only used in `test` profile. * Bump version to `0.3.2`. <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/unicode-bidi/35) <!-- Reviewable:end -->
2 parents d2d1180 + 6b20563 commit 36c0603

11 files changed

+173
-74
lines changed

Cargo.toml

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "unicode-bidi"
3-
version = "0.3.1"
3+
version = "0.3.2"
44
authors = ["The Servo Project Developers"]
55
license = "MIT / Apache-2.0"
66
description = "Implementation of the Unicode Bidirectional Algorithm"
@@ -16,10 +16,12 @@ name = "unicode_bidi"
1616
[dependencies]
1717
matches = "0.1"
1818
serde = {version = ">=0.8, <2.0", optional = true}
19-
serde_test = {version = ">=0.8, <2.0", optional = true}
2019
serde_derive = {version = ">=0.8, <2.0", optional = true}
2120

21+
[dev-dependencies]
22+
serde_test = ">=0.8, <2.0"
23+
2224
[features]
2325
default = []
2426
unstable = [] # Use in benches/
25-
with_serde = ["serde", "serde_test", "serde_derive"]
27+
with_serde = ["serde", "serde_derive"]

benches/basic.rs

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// Copyright 2014 The html5ever Project Developers. See the
2+
// COPYRIGHT file at the top-level directory of this distribution.
3+
//
4+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7+
// option. This file may not be copied, modified, or distributed
8+
// except according to those terms.
9+
10+
#![cfg(all(test, feature = "unstable"))]
11+
#![feature(test)]
12+
13+
extern crate test;
14+
extern crate unicode_bidi;
15+
16+
use test::Bencher;
17+
18+
use unicode_bidi::BidiInfo;
19+
20+
21+
const LTR_TEXTS: &[&str] = &[
22+
"abc\ndef\nghi",
23+
"abc 123\ndef 456\nghi 789",
24+
];
25+
26+
const BIDI_TEXTS: &[&str] = &[
27+
"ابجد\nهوز\nحتی",
28+
"ابجد ۱۲۳\nهوز ۴۵۶\nحتی ۷۸۹",
29+
];
30+
31+
32+
fn bench_bidi_info_new(b: &mut Bencher, texts: &[&str]) {
33+
for text in texts {
34+
b.iter(|| { BidiInfo::new(text, None); });
35+
}
36+
}
37+
38+
fn bench_reorder_line(b: &mut Bencher, texts: &[&str]) {
39+
for text in texts {
40+
let bidi_info = BidiInfo::new(text, None);
41+
b.iter(
42+
|| for para in &bidi_info.paragraphs {
43+
let line = para.range.clone();
44+
bidi_info.reorder_line(para, line);
45+
}
46+
);
47+
}
48+
}
49+
50+
51+
#[bench]
52+
fn bench_1_bidi_info_new_for_ltr_texts(b: &mut Bencher) {
53+
bench_bidi_info_new(b, LTR_TEXTS);
54+
}
55+
56+
#[bench]
57+
fn bench_2_bidi_info_new_for_bidi_texts(b: &mut Bencher) {
58+
bench_bidi_info_new(b, BIDI_TEXTS);
59+
}
60+
61+
#[bench]
62+
fn bench_3_reorder_line_for_ltr_texts(b: &mut Bencher) {
63+
bench_reorder_line(b, LTR_TEXTS);
64+
}
65+
66+
#[bench]
67+
fn bench_4_reorder_line_for_bidi_texts(b: &mut Bencher) {
68+
bench_reorder_line(b, BIDI_TEXTS);
69+
}

benches/udhr.rs

+32-33
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,21 @@ use test::Bencher;
1717

1818
use unicode_bidi::BidiInfo;
1919

20+
2021
const LTR_TEXTS: &[&str] = &[
2122
include_str!("udhr_data/ltr/udhr_acu_1.txt"),
2223
include_str!("udhr_data/ltr/udhr_auc.txt"),
24+
include_str!("udhr_data/ltr/udhr_eng.txt"),
2325
include_str!("udhr_data/ltr/udhr_knc.txt"),
2426
include_str!("udhr_data/ltr/udhr_krl.txt"),
2527
include_str!("udhr_data/ltr/udhr_kwi.txt"),
2628
include_str!("udhr_data/ltr/udhr_lot.txt"),
29+
include_str!("udhr_data/ltr/udhr_mly_latn.txt"),
2730
include_str!("udhr_data/ltr/udhr_piu.txt"),
2831
include_str!("udhr_data/ltr/udhr_qug.txt"),
2932
include_str!("udhr_data/ltr/udhr_snn.txt"),
3033
include_str!("udhr_data/ltr/udhr_tiv.txt"),
34+
include_str!("udhr_data/ltr/udhr_uig_latn.txt"),
3135
];
3236

3337
const BIDI_TEXTS: &[&str] = &[
@@ -37,56 +41,51 @@ const BIDI_TEXTS: &[&str] = &[
3741
include_str!("udhr_data/bidi/udhr_pes_1.txt"),
3842
include_str!("udhr_data/bidi/udhr_skr.txt"),
3943
include_str!("udhr_data/bidi/udhr_urd.txt"),
40-
include_str!("udhr_data/bidi/udhr_eng.txt"),
41-
include_str!("udhr_data/bidi/udhr_mly_latn.txt"),
4244
include_str!("udhr_data/bidi/udhr_pes_2.txt"),
4345
include_str!("udhr_data/bidi/udhr_uig_arab.txt"),
4446
include_str!("udhr_data/bidi/udhr_urd_2.txt"),
4547
include_str!("udhr_data/bidi/udhr_heb.txt"),
4648
include_str!("udhr_data/bidi/udhr_pbu.txt"),
4749
include_str!("udhr_data/bidi/udhr_pnb.txt"),
48-
include_str!("udhr_data/bidi/udhr_uig_latn.txt"),
4950
include_str!("udhr_data/bidi/udhr_ydd.txt"),
5051
];
5152

53+
54+
fn bench_bidi_info_new(b: &mut Bencher, texts: &[&str]) {
55+
for text in texts {
56+
b.iter(|| { BidiInfo::new(text, None); });
57+
}
58+
}
59+
60+
fn bench_reorder_line(b: &mut Bencher, texts: &[&str]) {
61+
for text in texts {
62+
let bidi_info = BidiInfo::new(text, None);
63+
b.iter(
64+
|| for para in &bidi_info.paragraphs {
65+
let line = para.range.clone();
66+
bidi_info.reorder_line(para, line);
67+
}
68+
);
69+
}
70+
}
71+
72+
5273
#[bench]
53-
fn bench_bidi_info_new_for_ltr_texts(b: &mut Bencher) {
54-
b.iter(
55-
|| for text in LTR_TEXTS {
56-
BidiInfo::new(text, None);
57-
},
58-
);
74+
fn bench_1_bidi_info_new_for_ltr_texts(b: &mut Bencher) {
75+
bench_bidi_info_new(b, LTR_TEXTS);
5976
}
6077

6178
#[bench]
62-
fn bench_bidi_info_new_for_bidi_texts(b: &mut Bencher) {
63-
b.iter(
64-
|| for text in BIDI_TEXTS {
65-
BidiInfo::new(text, None);
66-
},
67-
);
79+
fn bench_2_bidi_info_new_for_bidi_texts(b: &mut Bencher) {
80+
bench_bidi_info_new(b, BIDI_TEXTS);
6881
}
6982

7083
#[bench]
71-
fn bench_bidi_info_new_and_reordered_for_ltr_texts(b: &mut Bencher) {
72-
b.iter(
73-
|| for text in LTR_TEXTS {
74-
let bidi_info = BidiInfo::new(text, None);
75-
let para = &bidi_info.paragraphs[0];
76-
let line = para.range.clone();
77-
bidi_info.reordered_levels(para, line);
78-
},
79-
);
84+
fn bench_3_reorder_line_for_ltr_texts(b: &mut Bencher) {
85+
bench_reorder_line(b, LTR_TEXTS);
8086
}
8187

8288
#[bench]
83-
fn bench_bidi_info_new_and_reordered_for_bidi_texts(b: &mut Bencher) {
84-
b.iter(
85-
|| for text in BIDI_TEXTS {
86-
let bidi_info = BidiInfo::new(text, None);
87-
let para = &bidi_info.paragraphs[0];
88-
let line = para.range.clone();
89-
bidi_info.reordered_levels(para, line);
90-
},
91-
);
89+
fn bench_4_reorder_line_for_bidi_texts(b: &mut Bencher) {
90+
bench_reorder_line(b, BIDI_TEXTS);
9291
}
File renamed without changes.

src/char_data/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) ->
3939
Less
4040
} else {
4141
Greater
42-
},
42+
}
4343
) {
4444
Ok(idx) => {
4545
let (_, _, cat) = r[idx];

src/explicit.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,12 @@ pub fn compute(
6464
overflow_embedding_count == 0 {
6565
let new_level = new_level.unwrap();
6666
stack.push(
67-
new_level,
68-
match initial_classes[i] {
67+
new_level, match initial_classes[i] {
6968
RLO => OverrideStatus::RTL,
7069
LRO => OverrideStatus::LTR,
7170
RLI | LRI | FSI => OverrideStatus::Isolate,
7271
_ => OverrideStatus::Neutral,
73-
},
72+
}
7473
);
7574
if is_isolate {
7675
valid_isolate_count += 1;
@@ -180,7 +179,7 @@ impl DirectionalStatusStack {
180179
Status {
181180
level: level,
182181
status: status,
183-
},
182+
}
184183
);
185184
}
186185

src/lib.rs

+57-27
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ impl<'text> InitialInfo<'text> {
150150
range: para_start..para_end,
151151
// P3. If no character is found in p2, set the paragraph level to zero.
152152
level: para_level.unwrap_or(Level::ltr()),
153-
},
153+
}
154154
);
155155
// Reset state for the start of the next paragraph.
156156
para_start = para_end;
@@ -182,7 +182,7 @@ impl<'text> InitialInfo<'text> {
182182
Level::rtl()
183183
} else {
184184
Level::ltr()
185-
},
185+
}
186186
);
187187
}
188188
}
@@ -202,7 +202,7 @@ impl<'text> InitialInfo<'text> {
202202
ParagraphInfo {
203203
range: para_start..text.len(),
204204
level: para_level.unwrap_or(Level::ltr()),
205-
},
205+
}
206206
);
207207
}
208208
assert!(original_classes.len() == text.len());
@@ -301,8 +301,9 @@ impl<'text> BidiInfo<'text> {
301301
pub fn reorder_line(&self, para: &ParagraphInfo, line: Range<usize>) -> Cow<'text, str> {
302302
let (levels, runs) = self.visual_runs(para, line.clone());
303303
if runs.len() == 1 && levels[runs[0].start].is_ltr() {
304-
return self.text.into();
304+
return self.text[line.clone()].into();
305305
}
306+
306307
let mut result = String::with_capacity(line.len());
307308
for run in runs {
308309
if levels[run.start].is_rtl() {
@@ -666,47 +667,76 @@ mod tests {
666667
assert_eq!(BidiInfo::new("אבּג\n123", None).has_rtl(), true);
667668
}
668669

670+
fn reorder_paras(text: &str) -> Vec<Cow<str>> {
671+
let bidi_info = BidiInfo::new(text, None);
672+
bidi_info
673+
.paragraphs
674+
.iter()
675+
.map(|para| bidi_info.reorder_line(para, para.range.clone()))
676+
.collect()
677+
}
678+
669679
#[test]
670680
fn test_reorder_line() {
671-
fn reorder(text: &str) -> Cow<str> {
672-
let bidi_info = BidiInfo::new(text, None);
673-
let para = &bidi_info.paragraphs[0];
674-
let line = para.range.clone();
675-
bidi_info.reorder_line(para, line)
676-
}
681+
/// Bidi_Class: L L L B L L L B L L L
682+
assert_eq!(
683+
reorder_paras("abc\ndef\nghi"),
684+
vec!["abc\n", "def\n", "ghi"]
685+
);
686+
687+
/// Bidi_Class: L L EN B L L EN B L L EN
688+
assert_eq!(
689+
reorder_paras("ab1\nde2\ngh3"),
690+
vec!["ab1\n", "de2\n", "gh3"]
691+
);
692+
693+
/// Bidi_Class: L L L B AL AL AL
694+
assert_eq!(reorder_paras("abc\nابج"), vec!["abc\n", "جبا"]);
677695

678-
assert_eq!(reorder("abc123"), "abc123");
679-
assert_eq!(reorder("1.-2"), "1.-2");
680-
assert_eq!(reorder("1-.2"), "1-.2");
681-
assert_eq!(reorder("abc אבג"), "abc גבא");
696+
/// Bidi_Class: AL AL AL B L L L
697+
assert_eq!(reorder_paras("ابج\nabc"), vec!["\nجبا", "abc"]);
698+
699+
assert_eq!(reorder_paras("1.-2"), vec!["1.-2"]);
700+
assert_eq!(reorder_paras("1-.2"), vec!["1-.2"]);
701+
assert_eq!(reorder_paras("abc אבג"), vec!["abc גבא"]);
682702

683703
// Numbers being weak LTR characters, cannot reorder strong RTL
684-
assert_eq!(reorder("123 אבג"), "גבא 123");
704+
assert_eq!(reorder_paras("123 אבג"), vec!["גבא 123"]);
685705

686706
// Testing for RLE Character
687707
assert_eq!(
688-
reorder("\u{202B}abc אבג\u{202C}"),
689-
"\u{202B}\u{202C}גבא abc"
708+
reorder_paras("\u{202B}abc אבג\u{202C}"),
709+
vec!["\u{202B}\u{202C}גבא abc"]
690710
);
691711

692712
// Testing neutral characters
693-
assert_eq!(reorder("אבג? אבג"), "גבא ?גבא");
713+
assert_eq!(reorder_paras("אבג? אבג"), vec!["גבא ?גבא"]);
694714

695715
// Testing neutral characters with special case
696-
assert_eq!(reorder("A אבג?"), "A גבא?");
716+
assert_eq!(reorder_paras("A אבג?"), vec!["A גבא?"]);
697717

698718
// Testing neutral characters with Implicit RTL Marker
699-
// The given test highlights a possible non-conformance issue that will perhaps be fixed in
700-
// the subsequent steps.
701-
//assert_eq!(reorder("A אבג?\u{202f}"), "A \u{202f}?גבא");
702-
assert_eq!(reorder("אבג abc"), "abc גבא");
703719
assert_eq!(
704-
reorder("abc\u{2067}.-\u{2069}ghi"),
705-
"abc\u{2067}-.\u{2069}ghi"
720+
reorder_paras("A אבג?\u{200F}"),
721+
vec!["A \u{200F}?גבא"]
722+
);
723+
assert_eq!(reorder_paras("אבג abc"), vec!["abc גבא"]);
724+
assert_eq!(
725+
reorder_paras("abc\u{2067}.-\u{2069}ghi"),
726+
vec!["abc\u{2067}-.\u{2069}ghi"]
727+
);
728+
assert_eq!(
729+
reorder_paras("Hello, \u{2068}\u{202E}world\u{202C}\u{2069}!"),
730+
vec!["Hello, \u{2068}\u{202E}\u{202C}dlrow\u{2069}!"]
706731
);
732+
733+
// With mirrorable characters in RTL run
734+
assert_eq!(reorder_paras("א(ב)ג."), vec![".ג)ב(א"]);
735+
736+
// With mirrorable characters on level boundry
707737
assert_eq!(
708-
reorder("Hello, \u{2068}\u{202E}world\u{202C}\u{2069}!"),
709-
"Hello, \u{2068}\u{202E}\u{202C}dlrow\u{2069}!"
738+
reorder_paras("אב(גד[&ef].)gh"),
739+
vec!["ef].)gh&[דג(בא"]
710740
);
711741
}
712742
}

src/prepare.rs

+2-4
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,7 @@ pub fn isolating_run_sequences(
9696
let level = levels[start];
9797

9898
// Get the level of the last non-removed char before the runs.
99-
let pred_level = match initial_classes[..start]
100-
.iter()
101-
.rposition(not_removed_by_x9) {
99+
let pred_level = match initial_classes[..start].iter().rposition(not_removed_by_x9) {
102100
Some(idx) => levels[idx],
103101
None => para_level,
104102
};
@@ -118,7 +116,7 @@ pub fn isolating_run_sequences(
118116
sos: max(level, pred_level).bidi_class(),
119117
eos: max(level, succ_level).bidi_class(),
120118
}
121-
},
119+
}
122120
)
123121
.collect();
124122
}

0 commit comments

Comments
 (0)