Skip to content

Commit 011d7cf

Browse files
author
bors-servo
authored
Auto merge of #31 - behnam:serde, r=mbrubeck
Add serde support and bump version to 0.3.0 Add `with_serde` feature, which implements serde for the new `struct Level`, mainly used in `servo`, supporting serde `0.8`, `0.9` and `1.0`. Add tests for the `with_serde` feature. The `serde_tests` modules only works for `serde:>=1.0`, though. `servo` has dependency on the loose implementation of `visual_runs()`, which couldn not be improved without breaking the API, as it needs more information to process the levels correctly, and the call-site in `servo` does not have all the information needed and needs a non-trivial change to work with the new improved version. Therefore, I have moved the old version to a `deprecated` module, to be used for now until `servo` is fixed and we drop the old implementation. Bump version to `0.3.0`, as we now ready for a release: can build `servo` (patch ready) and `idna` crates. <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/unicode-bidi/31) <!-- Reviewable:end -->
2 parents 0fa0cfe + efa3b2a commit 011d7cf

File tree

4 files changed

+189
-11
lines changed

4 files changed

+189
-11
lines changed

Cargo.toml

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "unicode-bidi"
3-
version = "0.2.6"
3+
version = "0.3.0"
44
authors = ["The Servo Project Developers"]
55
license = "MIT / Apache-2.0"
66
description = "Implementation of the Unicode Bidirectional Algorithm"
@@ -15,3 +15,10 @@ name = "unicode_bidi"
1515

1616
[dependencies]
1717
matches = "0.1"
18+
serde = {version = ">=0.8, <2.0", optional = true}
19+
serde_test = {version = ">=0.8, <2.0", optional = true}
20+
serde_derive = {version = ">=0.8, <2.0", optional = true}
21+
22+
[features]
23+
default = []
24+
with_serde = ["serde", "serde_test", "serde_derive"]

src/deprecated.rs

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
// Copyright 2015 The Servo Project Developers. See the
2+
// COPYRIGHT file at the top-level directory of this distribution.
3+
//
4+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7+
// option. This file may not be copied, modified, or distributed
8+
// except according to those terms.
9+
10+
//! This module holds deprecated assets only.
11+
12+
use super::*;
13+
14+
/// Find the level runs within a line and return them in visual order.
15+
///
16+
/// NOTE: This implementation is incomplete. The algorithm needs information about the text,
17+
/// including original BidiClass property of each character, to be able to perform correctly.
18+
/// Please see [`BidiInfo::visual_runs()`](../struct.BidiInfo.html#method.visual_runs) for the
19+
/// improved implementation.
20+
///
21+
/// `line` is a range of bytes indices within `levels`.
22+
///
23+
/// http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels
24+
#[deprecated(since="0.3.0", note="please use `BidiInfo::visual_runs()` instead.")]
25+
pub fn visual_runs(line: Range<usize>, levels: &[Level]) -> Vec<LevelRun> {
26+
assert!(line.start <= levels.len());
27+
assert!(line.end <= levels.len());
28+
29+
let mut runs = Vec::new();
30+
31+
// Find consecutive level runs.
32+
let mut start = line.start;
33+
let mut level = levels[start];
34+
let mut min_level = level;
35+
let mut max_level = level;
36+
37+
for i in (start + 1)..line.end {
38+
let new_level = levels[i];
39+
if new_level != level {
40+
// End of the previous run, start of a new one.
41+
runs.push(start..i);
42+
start = i;
43+
level = new_level;
44+
45+
min_level = min(level, min_level);
46+
max_level = max(level, max_level);
47+
}
48+
}
49+
runs.push(start..line.end);
50+
51+
let run_count = runs.len();
52+
53+
// Re-order the odd runs.
54+
// http://www.unicode.org/reports/tr9/#L2
55+
56+
// Stop at the lowest *odd* level.
57+
min_level = min_level.new_lowest_ge_rtl().expect("Level error");
58+
59+
while max_level >= min_level {
60+
// Look for the start of a sequence of consecutive runs of max_level or higher.
61+
let mut seq_start = 0;
62+
while seq_start < run_count {
63+
if levels[runs[seq_start].start] < max_level {
64+
seq_start += 1;
65+
continue;
66+
}
67+
68+
// Found the start of a sequence. Now find the end.
69+
let mut seq_end = seq_start + 1;
70+
while seq_end < run_count {
71+
if levels[runs[seq_end].start] < max_level {
72+
break;
73+
}
74+
seq_end += 1;
75+
}
76+
77+
// Reverse the runs within this sequence.
78+
runs[seq_start..seq_end].reverse();
79+
80+
seq_start = seq_end;
81+
}
82+
max_level
83+
.lower(1)
84+
.expect("Lowering embedding level below zero");
85+
}
86+
87+
runs
88+
}

src/level.rs

+30
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
//! Bidi Embedding Level
1111
//!
12+
//! See [`Level`](struct.Level.html) for more details.
13+
//!
1214
//! http://www.unicode.org/reports/tr9/#BD2
1315
1416
use std::convert::{From, Into};
@@ -26,6 +28,7 @@ use super::char_data::BidiClass;
2628
///
2729
/// http://www.unicode.org/reports/tr9/#BD2
2830
#[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
31+
#[cfg_attr(feature = "with_serde", derive(Serialize, Deserialize))]
2932
pub struct Level(u8);
3033

3134
pub const LTR_LEVEL: Level = Level(0);
@@ -338,3 +341,30 @@ mod tests {
338341
assert_ne!(Level::vec(&[0, 1, 4, 125]), vec!["0", "1", "5", "125"]);
339342
}
340343
}
344+
345+
#[cfg(all(feature = "with_serde", test))]
346+
mod serde_tests {
347+
use serde_test::{Token, assert_tokens};
348+
use super::*;
349+
350+
#[test]
351+
fn test_statics() {
352+
assert_tokens(
353+
&Level::ltr(),
354+
&[Token::NewtypeStruct { name: "Level" }, Token::U8(0)],
355+
);
356+
assert_tokens(
357+
&Level::rtl(),
358+
&[Token::NewtypeStruct { name: "Level" }, Token::U8(1)],
359+
);
360+
}
361+
362+
#[test]
363+
fn test_new() {
364+
let level = Level::new(42).unwrap();
365+
assert_tokens(
366+
&level,
367+
&[Token::NewtypeStruct { name: "Level" }, Token::U8(42)],
368+
);
369+
}
370+
}

src/lib.rs

+63-10
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,14 @@
6060
#[macro_use]
6161
extern crate matches;
6262

63+
#[cfg(feature = "with_serde")]
64+
#[macro_use]
65+
extern crate serde_derive;
66+
67+
#[cfg(all(feature = "with_serde", test))]
68+
extern crate serde_test;
69+
70+
pub mod deprecated;
6371
pub mod format_chars;
6472
pub mod level;
6573

@@ -156,8 +164,8 @@ impl<'text> InitialInfo<'text> {
156164
match isolate_stack.last() {
157165
Some(&start) => {
158166
if original_classes[start] == FSI {
159-
// X5c. If the first strong character between FSI and its matching PDI
160-
// is R or AL, treat it as RLI. Otherwise, treat it as LRI.
167+
// X5c. If the first strong character between FSI and its matching
168+
// PDI is R or AL, treat it as RLI. Otherwise, treat it as LRI.
161169
for j in 0..chars::FSI.len_utf8() {
162170
original_classes[start + j] =
163171
if class == L { LRI } else { RLI };
@@ -166,8 +174,9 @@ impl<'text> InitialInfo<'text> {
166174
}
167175
None => {
168176
if para_level.is_none() {
169-
// P2. Find the first character of type L, AL, or R, while skipping any
170-
// characters between an isolate initiator and its matching PDI.
177+
// P2. Find the first character of type L, AL, or R, while skipping
178+
// any characters between an isolate initiator and its matching
179+
// PDI.
171180
para_level = Some(
172181
if class != L {
173182
Level::rtl()
@@ -208,8 +217,9 @@ impl<'text> InitialInfo<'text> {
208217

209218
/// Bidi information of the text
210219
///
211-
/// The `original_classes` and `levels` vectors are indexed by byte offsets into the text. If a character
212-
/// is multiple bytes wide, then its class and level will appear multiple times in these vectors.
220+
/// The `original_classes` and `levels` vectors are indexed by byte offsets into the text. If a
221+
/// character is multiple bytes wide, then its class and level will appear multiple times in these
222+
/// vectors.
213223
// TODO: Impl `struct StringProperty<T> { values: Vec<T> }` and use instead of Vec<T>
214224
#[derive(Debug, PartialEq)]
215225
pub struct BidiInfo<'text> {
@@ -232,8 +242,8 @@ pub struct BidiInfo<'text> {
232242
impl<'text> BidiInfo<'text> {
233243
/// Split the text into paragraphs and determine the bidi embedding levels for each paragraph.
234244
///
235-
/// TODO: In early steps, check for special cases that allow later steps to be skipped. like text
236-
/// that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
245+
/// TODO: In early steps, check for special cases that allow later steps to be skipped. like
246+
/// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
237247
///
238248
/// TODO: Support auto-RTL base direction
239249
pub fn new(text: &str, default_para_level: Option<Level>) -> BidiInfo {
@@ -358,9 +368,8 @@ impl<'text> BidiInfo<'text> {
358368
}
359369
}
360370

361-
let mut runs = Vec::new();
362-
363371
// Find consecutive level runs.
372+
let mut runs = Vec::new();
364373
let mut start = line.start;
365374
let mut level = levels[start];
366375
let mut min_level = level;
@@ -440,6 +449,7 @@ fn assign_levels_to_removed_chars(para_level: Level, classes: &[BidiClass], leve
440449
}
441450
}
442451

452+
443453
#[cfg(test)]
444454
mod tests {
445455
use super::*;
@@ -695,3 +705,46 @@ mod tests {
695705
);
696706
}
697707
}
708+
709+
710+
#[cfg(all(feature = "with_serde", test))]
711+
mod serde_tests {
712+
use serde_test::{Token, assert_tokens};
713+
use super::*;
714+
715+
#[test]
716+
fn test_levels() {
717+
let text = "abc אבג";
718+
let bidi_info = BidiInfo::new(text, None);
719+
let levels = bidi_info.levels;
720+
assert_eq!(text.as_bytes().len(), 10);
721+
assert_eq!(levels.len(), 10);
722+
assert_tokens(
723+
&levels,
724+
&[
725+
Token::Seq { len: Some(10) },
726+
Token::NewtypeStruct { name: "Level" },
727+
Token::U8(0),
728+
Token::NewtypeStruct { name: "Level" },
729+
Token::U8(0),
730+
Token::NewtypeStruct { name: "Level" },
731+
Token::U8(0),
732+
Token::NewtypeStruct { name: "Level" },
733+
Token::U8(0),
734+
Token::NewtypeStruct { name: "Level" },
735+
Token::U8(1),
736+
Token::NewtypeStruct { name: "Level" },
737+
Token::U8(1),
738+
Token::NewtypeStruct { name: "Level" },
739+
Token::U8(1),
740+
Token::NewtypeStruct { name: "Level" },
741+
Token::U8(1),
742+
Token::NewtypeStruct { name: "Level" },
743+
Token::U8(1),
744+
Token::NewtypeStruct { name: "Level" },
745+
Token::U8(1),
746+
Token::SeqEnd,
747+
],
748+
);
749+
}
750+
}

0 commit comments

Comments
 (0)