Skip to content

Commit c460c96

Browse files
committed
refactor datagen
1 parent 775be2f commit c460c96

File tree

5 files changed

+199
-191
lines changed

5 files changed

+199
-191
lines changed

provider/source/src/calendar/eras.rs

Lines changed: 116 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ use crate::cldr_serde::eras::EraData;
77
use crate::datetime::DatagenCalendar;
88
use crate::SourceDataProvider;
99
use icu::calendar::provider::*;
10-
use icu::calendar::types::Month;
1110
use icu::calendar::{AnyCalendar, Date};
1211
use icu_provider::prelude::*;
1312
use std::collections::BTreeMap;
@@ -22,7 +21,10 @@ impl SourceDataProvider {
2221
#[expect(clippy::type_complexity)]
2322
pub(crate) fn all_eras(
2423
&self,
25-
) -> Result<&BTreeMap<DatagenCalendar, Vec<(usize, EraData)>>, DataError> {
24+
) -> Result<
25+
&BTreeMap<DatagenCalendar, (Option<DatagenCalendar>, Vec<(usize, EraData)>)>,
26+
DataError,
27+
> {
2628
let cldr = self.cldr()?;
2729
cldr.calendar_eras
2830
.get_or_init(|| {
@@ -58,72 +60,76 @@ impl SourceDataProvider {
5860
]
5961
.into_iter()
6062
.map(|cal| {
61-
let mut vec = if cal == DatagenCalendar::JapaneseExtended
62-
|| cal == DatagenCalendar::JapaneseModern
63-
{
64-
era_dates_map[DatagenCalendar::Gregorian.cldr_name()]
65-
.clone()
66-
.eras
67-
.into_iter()
68-
.filter_map(|(id, data)| {
69-
data.code.as_ref()?;
70-
Some((id.parse::<usize>().ok()?, data))
63+
let inherit = era_dates_map[cal.cldr_name()]
64+
.inherit_eras
65+
.as_ref()
66+
.map(|c| DatagenCalendar::from_cldr_name(&c.calendar));
67+
68+
struct EmptyProvider;
69+
impl<M: DataMarker<DataStruct = JapaneseEras<'static>>> DataProvider<M> for EmptyProvider{
70+
fn load(&self, _req: DataRequest) -> Result<DataResponse<M>, DataError> {
71+
Ok(DataResponse {
72+
metadata: Default::default(),
73+
payload: DataPayload::from_owned(
74+
JapaneseEras {
75+
dates_to_eras: [(
76+
EraStartDate {
77+
year: 3000,
78+
month: 1,
79+
day: 1,
80+
},
81+
tinystr::tinystr!(
82+
16, "dummy"
83+
),
84+
)]
85+
.into_iter()
86+
.collect(),
87+
},
88+
),
7189
})
72-
.chain(
73-
era_dates_map[cal.cldr_name()]
74-
.clone()
75-
.eras
76-
.into_iter()
77-
.filter_map(|(key, mut data)| {
78-
let key = key.parse::<usize>().ok()?;
79-
if data.code.as_ref().is_none() {
80-
if cal == DatagenCalendar::JapaneseExtended {
81-
data.code =
82-
Some(crate::calendar::eras::era_to_code(
83-
japanese_names
84-
.get(&(key - 2).to_string())?,
85-
data.start?.year,
86-
));
87-
} else {
88-
None?;
89-
}
90-
}
91-
Some((key, data))
92-
}),
93-
)
94-
.collect::<Vec<_>>()
95-
} else {
96-
let calendar =
97-
AnyCalendar::try_new_unstable(self, cal.canonical_any_calendar_kind())
98-
.unwrap();
99-
100-
era_dates_map[cal.cldr_name()]
101-
.clone()
102-
.eras
103-
.into_iter()
104-
.filter_map(|(key, mut data)| {
105-
let code = data.code.as_deref()?;
106-
// Check what ICU4X returns for the date 1-1-1 era
107-
data.icu4x_era_index = Date::try_new_from_codes(
108-
Some(code),
109-
1,
110-
Month::new(1).code(),
111-
1,
112-
icu::calendar::Ref(&calendar),
113-
)
114-
.inspect_err(|e| {
115-
log::warn!("Era '{code}' unknown by icu::calendar ({e:?})");
116-
})
117-
.ok()?
118-
.year()
119-
.era()?
120-
.era_index;
121-
Some((key.parse::<usize>().ok()?, data))
122-
})
123-
.collect::<Vec<_>>()
124-
};
90+
}
91+
}
92+
93+
let any_cal = AnyCalendar::try_new_unstable(&EmptyProvider, cal.canonical_any_calendar_kind()).unwrap();
94+
95+
let mut vec = era_dates_map[cal.cldr_name()]
96+
.eras
97+
.iter()
98+
.filter_map(|(key, data)| {
99+
let mut data = data.clone();
100+
match cal {
101+
DatagenCalendar::JapaneseExtended => {
102+
if data.code.is_empty() {
103+
data.code = crate::calendar::eras::era_to_code(
104+
japanese_names.get(&key.to_string())?,
105+
data.start?.year,
106+
);
107+
}
108+
}
109+
_ => {
110+
if cal == DatagenCalendar::JapaneseModern && data.start.unwrap().year < 1868 {
111+
return None;
112+
}
113+
let date = data.start.or(data.end).unwrap();
114+
let era_year =
115+
Date::try_new_gregorian(date.year, date.month, date.day)
116+
.unwrap()
117+
.to_calendar(icu::calendar::Ref(&any_cal))
118+
.year()
119+
.era()
120+
.unwrap();
121+
if era_year.era != data.code {
122+
println!("mismatched era code {era_year:?} - {data:?}");
123+
}
124+
data.icu4x_era_index = era_year.era_index;
125+
}
126+
}
127+
128+
Some((key.parse::<usize>().unwrap(), data))
129+
})
130+
.collect::<Vec<_>>();
125131
vec.sort_by_key(|&(k, _)| k);
126-
(cal, vec)
132+
(cal, (inherit, vec))
127133
})
128134
.collect())
129135
})
@@ -140,8 +146,6 @@ fn process_era_dates_map(
140146
core::mem::take(&mut data.get_mut("japanese").unwrap().eras)
141147
.into_iter()
142148
.map(|(idx, mut era)| {
143-
// https://unicode-org.atlassian.net/browse/CLDR-18388 for why we need to do + 2
144-
let idx = (idx.parse::<usize>().unwrap() + 2).to_string();
145149
if let Some(start) = era.start.as_mut() {
146150
// All pre-Taisho start dates are known to be wrong, this at least makes them valid.
147151
// See https://unicode-org.atlassian.net/browse/CLDR-11400
@@ -160,47 +164,45 @@ fn process_era_dates_map(
160164
data
161165
}
162166

163-
impl SourceDataProvider {
164-
fn load_japanese_eras(
165-
&self,
166-
cal: DatagenCalendar,
167-
) -> Result<DataResponse<CalendarJapaneseModernV1>, DataError> {
168-
let mut dates_to_eras = BTreeMap::new();
169-
170-
for (_, data) in self.all_eras()?[&cal].iter().skip(2) {
171-
let start_date = data.start.unwrap();
172-
let code = data.code.as_deref().unwrap();
173-
let code = code.parse().map_err(|_| {
174-
DataError::custom("Era code does not fit int TinyStr16").with_display_context(&code)
175-
})?;
176-
177-
dates_to_eras.insert(start_date, code);
178-
}
167+
impl DataProvider<CalendarJapaneseModernV1> for SourceDataProvider {
168+
fn load(&self, req: DataRequest) -> Result<DataResponse<CalendarJapaneseModernV1>, DataError> {
169+
self.check_req::<CalendarJapaneseModernV1>(req)?;
170+
171+
let (inherit, ref eras) = self.all_eras()?[&DatagenCalendar::JapaneseModern];
172+
173+
let dates_to_eras = inherit
174+
.iter()
175+
.flat_map(|i| self.all_eras().unwrap()[i].1.iter())
176+
.chain(eras)
177+
.filter(|(_, data)| !matches!(data.code.as_str(), "bce" | "ce"))
178+
.map(|(_, data)| (data.start.unwrap(), data.code.parse().unwrap()))
179+
.collect();
179180

180181
Ok(DataResponse {
181182
metadata: Default::default(),
182-
payload: DataPayload::from_owned(JapaneseEras {
183-
dates_to_eras: dates_to_eras.into_iter().collect(),
184-
}),
183+
payload: DataPayload::from_owned(JapaneseEras { dates_to_eras }),
185184
})
186185
}
187186
}
188187

189-
impl DataProvider<CalendarJapaneseModernV1> for SourceDataProvider {
190-
fn load(&self, req: DataRequest) -> Result<DataResponse<CalendarJapaneseModernV1>, DataError> {
191-
self.check_req::<CalendarJapaneseModernV1>(req)?;
192-
self.load_japanese_eras(DatagenCalendar::JapaneseModern)
193-
}
194-
}
195-
196188
impl DataProvider<CalendarJapaneseExtendedV1> for SourceDataProvider {
197189
fn load(
198190
&self,
199191
req: DataRequest,
200192
) -> Result<DataResponse<CalendarJapaneseExtendedV1>, DataError> {
201193
self.check_req::<CalendarJapaneseExtendedV1>(req)?;
202-
let DataResponse { metadata, payload } =
203-
self.load_japanese_eras(DatagenCalendar::JapaneseExtended)?;
194+
195+
let (inherit, ref eras) = self.all_eras()?[&DatagenCalendar::JapaneseExtended];
196+
197+
let dates_to_eras = inherit
198+
.iter()
199+
.flat_map(|i| self.all_eras().unwrap()[i].1.iter())
200+
.chain(eras)
201+
.filter(|(_, data)| !matches!(data.code.as_str(), "bce" | "ce"))
202+
.map(|(_, data)| (data.start.unwrap(), data.code.parse().unwrap()))
203+
.collect();
204+
205+
let eras = JapaneseEras { dates_to_eras };
204206

205207
// Integrity check
206208
//
@@ -211,7 +213,7 @@ impl DataProvider<CalendarJapaneseExtendedV1> for SourceDataProvider {
211213
let snapshot: JapaneseEras = serde_json::from_str(JAPANEXT_FILE)
212214
.expect("Failed to parse the precached golden. This is a bug.");
213215

214-
if snapshot != *payload.get() {
216+
if snapshot != eras {
215217
return Err(DataError::custom(
216218
"Era data has changed! This can be for two reasons: Either the CLDR locale data for Japanese eras has \
217219
changed in an incompatible way, or there is a new Japanese era. Run \
@@ -224,8 +226,8 @@ impl DataProvider<CalendarJapaneseExtendedV1> for SourceDataProvider {
224226
}
225227

226228
Ok(DataResponse {
227-
metadata,
228-
payload: payload.cast(),
229+
metadata: Default::default(),
230+
payload: DataPayload::from_owned(eras),
229231
})
230232
}
231233
}
@@ -279,6 +281,7 @@ impl crate::IterableDataProviderCached<CalendarJapaneseExtendedV1> for SourceDat
279281
#[test]
280282
pub fn ethiopic_and_ethioaa_are_compatible() {
281283
use icu::calendar::cal::{Ethiopian, EthiopianEraStyle};
284+
use icu::calendar::types::Month;
282285
assert_eq!(
283286
Date::try_new_from_codes(
284287
Some("aa"),
@@ -306,22 +309,19 @@ pub fn ethiopic_and_ethioaa_are_compatible() {
306309
#[test]
307310
pub fn japanese_and_japanext_are_compatible() {
308311
let provider = SourceDataProvider::new_testing();
309-
let japanese = &provider.all_eras().unwrap()[&DatagenCalendar::JapaneseModern];
310-
let japanext = &provider.all_eras().unwrap()[&DatagenCalendar::JapaneseExtended];
311-
assert_eq!(
312-
japanext
313-
.iter()
314-
.take(2)
315-
.zip(japanese.iter().take(2))
316-
.find(|(e, a)| e != a),
317-
None,
318-
);
312+
let japanese = DataProvider::<CalendarJapaneseModernV1>::load(&provider, Default::default())
313+
.unwrap()
314+
.payload;
315+
let japanext = DataProvider::<CalendarJapaneseExtendedV1>::load(&provider, Default::default())
316+
.unwrap()
317+
.payload;
319318
assert_eq!(
320319
japanext
320+
.get()
321+
.dates_to_eras
321322
.iter()
322-
.skip(2)
323323
.rev()
324-
.zip(japanese.iter().skip(2).rev())
324+
.zip(japanese.get().dates_to_eras.iter().rev())
325325
.find(|(e, a)| e != a),
326326
None,
327327
"{japanext:?} - {japanese:?}"
@@ -381,10 +381,9 @@ fn test_calendar_eras() {
381381
// Check that code and aliases produce identical results
382382
for era in era
383383
.aliases
384-
.as_deref()
385-
.into_iter()
386-
.flat_map(|s| s.split(' '))
387-
.chain(era.code.as_deref())
384+
.split(' ')
385+
.chain(Some(era.code.as_str()))
386+
.filter(|s| !s.is_empty())
388387
{
389388
assert_eq!(
390389
Date::try_new_from_codes(
@@ -409,8 +408,8 @@ fn test_calendar_eras() {
409408
}
410409

411410
// Check that the correct era code is returned
412-
if let Some(code) = era.code.as_deref() {
413-
assert_eq!(era_year.era, code);
411+
if !era.code.is_empty() {
412+
assert_eq!(era_year.era, era.code);
414413
}
415414

416415
// Check that the start/end date uses year 1, and minimal/maximal month/day

provider/source/src/cldr_cache.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,12 @@ pub(crate) struct CldrCache {
2727
dir_suffix: OnceLock<Result<&'static str, DataError>>,
2828
extended_locale_expander: OnceLock<Result<LocaleExpander, DataError>>,
2929
#[expect(clippy::type_complexity)]
30-
pub(crate) calendar_eras:
31-
OnceLock<Result<BTreeMap<DatagenCalendar, Vec<(usize, EraData)>>, DataError>>,
30+
pub(crate) calendar_eras: OnceLock<
31+
Result<
32+
BTreeMap<DatagenCalendar, (Option<DatagenCalendar>, Vec<(usize, EraData)>)>,
33+
DataError,
34+
>,
35+
>,
3236
#[cfg(feature = "experimental")]
3337
// used by transforms/mod.rs
3438
pub(crate) transforms: OnceLock<

provider/source/src/cldr_serde/eras.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@ pub(crate) struct EraData {
3838
pub(crate) start: Option<EraStartDate>,
3939
#[serde(rename = "_end", default, deserialize_with = "parse_era_start_date")]
4040
pub(crate) end: Option<EraStartDate>,
41-
#[serde(rename = "_code")]
42-
pub(crate) code: Option<String>,
43-
#[serde(rename = "_aliases")]
44-
pub(crate) aliases: Option<String>,
41+
#[serde(rename = "_code", default)]
42+
pub(crate) code: String,
43+
#[serde(rename = "_aliases", default)]
44+
pub(crate) aliases: String,
4545
/// EraYear::era_index
4646
#[serde(skip)]
4747
pub(crate) icu4x_era_index: Option<u8>,

provider/source/src/datetime/mod.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,27 @@ impl DatagenCalendar {
5353
}
5454
}
5555

56+
pub(crate) fn from_cldr_name(s: &str) -> Self {
57+
use DatagenCalendar::*;
58+
match s {
59+
"buddhist" => Buddhist,
60+
"chinese" => Chinese,
61+
"coptic" => Coptic,
62+
"dangi" => Dangi,
63+
"ethiopic" | "ethiopic-amete-alem" => Ethiopic,
64+
"gregorian" => Gregorian,
65+
"hebrew" => Hebrew,
66+
"indian" => Indian,
67+
"islamic" | "islamic-civil" | "islamic-umalqura" | "islamic-rgsa" | "islamic-tbla" => {
68+
Hijri
69+
}
70+
"japanese" => JapaneseExtended,
71+
"persian" => Persian,
72+
"roc" => Roc,
73+
c => panic!("{c}"),
74+
}
75+
}
76+
5677
pub(crate) fn canonical_any_calendar_kind(self) -> AnyCalendarKind {
5778
use DatagenCalendar::*;
5879
match self {

0 commit comments

Comments
 (0)