diff --git a/components/calendar/src/cal/japanese.rs b/components/calendar/src/cal/japanese.rs index 7bf82229d86..6923b3ef954 100644 --- a/components/calendar/src/cal/japanese.rs +++ b/components/calendar/src/cal/japanese.rs @@ -73,7 +73,9 @@ pub struct Japanese { /// These eras are loaded from data, requiring a data provider capable of providing [`CalendarJapaneseExtendedV1`] /// data. #[derive(Clone, Debug, Default)] -pub struct JapaneseExtended(Japanese); +pub struct JapaneseExtended { + eras: DataPayload, +} impl Japanese { /// Creates a new [`Japanese`] using only modern eras (post-meiji) from compiled data. @@ -116,11 +118,11 @@ impl JapaneseExtended { /// [📚 Help choosing a constructor](icu_provider::constructors) #[cfg(feature = "compiled_data")] pub const fn new() -> Self { - Self(Japanese { + Self { eras: DataPayload::from_static_ref( crate::provider::Baked::SINGLETON_CALENDAR_JAPANESE_EXTENDED_V1, ), - }) + } } icu_provider::gen_buffer_data_constructors!(() -> error: DataError, @@ -135,9 +137,9 @@ impl JapaneseExtended { pub fn try_new_unstable + ?Sized>( provider: &D, ) -> Result { - Ok(Self(Japanese { - eras: provider.load(Default::default())?.payload.cast(), - })) + Ok(Self { + eras: provider.load(Default::default())?.payload, + }) } } @@ -168,6 +170,103 @@ const REIWA_START: EraStartDate = EraStartDate { }; impl GregorianYears for &'_ Japanese { + fn extended_from_era_year( + &self, + era: Option<&[u8]>, + year: i32, + ) -> Result { + if let Ok(g) = CeBce.extended_from_era_year(era, year) { + return Ok(g); + } + let Some(era) = era else { + // unreachable, handled by CeBce + return Err(UnknownEraError); + }; + + // Avoid linear search by trying well known eras + if era == b"reiwa" { + return Ok(year - 1 + REIWA_START.year); + } else if era == b"heisei" { + return Ok(year - 1 + HEISEI_START.year); + } else if era == b"showa" { + return Ok(year - 1 + SHOWA_START.year); + } else if era == b"taisho" { + return Ok(year - 1 + TAISHO_START.year); + } else if era == b"meiji" { + return Ok(year - 1 + MEIJI_START.year); + } + + let era_start = self + .eras + .get() + .dates_to_eras + .iter() + .rev() + .find_map(|(s, e)| (e.as_bytes() == era).then_some(s)) + .ok_or(UnknownEraError)?; + Ok(era_start.year + year - 1) + } + + fn era_year_from_extended(&self, year: i32, month: u8, day: u8) -> types::EraYear { + let date: EraStartDate = EraStartDate { year, month, day }; + + let (start, era) = if date >= MEIJI_START + && self + .eras + .get() + .dates_to_eras + .last() + .is_some_and(|(_, e)| e == tinystr!(16, "reiwa")) + { + // We optimize for the five "modern" post-Meiji eras, which are stored in a smaller + // array and also hardcoded. The hardcoded version is not used if data indicates the + // presence of newer eras. + if date >= REIWA_START { + (REIWA_START, tinystr!(16, "reiwa")) + } else if date >= HEISEI_START { + (HEISEI_START, tinystr!(16, "heisei")) + } else if date >= SHOWA_START { + (SHOWA_START, tinystr!(16, "showa")) + } else if date >= TAISHO_START { + (TAISHO_START, tinystr!(16, "taisho")) + } else { + (MEIJI_START, tinystr!(16, "meiji")) + } + } else { + let data = &self.eras.get().dates_to_eras; + match data.iter().rfind(|&(s, _)| date >= s) { + None => { + return types::EraYear { + // TODO: return era indices? + era_index: None, + ..CeBce.era_year_from_extended(year, month, day) + }; + } + Some((s, e)) => (s, e), + } + }; + + types::EraYear { + era, + era_index: None, + year: year - start.year + 1, + extended_year: year, + ambiguity: types::YearAmbiguity::CenturyRequired, + } + } + + fn debug_name(&self) -> &'static str { + "Japanese" + } + + fn calendar_algorithm(&self) -> Option { + Some(crate::preferences::CalendarAlgorithm::Japanese) + } +} + +impl_with_abstract_gregorian!(Japanese, JapaneseDateInner, Japanese, this, this); + +impl GregorianYears for &'_ JapaneseExtended { fn extended_from_era_year( &self, era: Option<&[u8]>, @@ -273,30 +372,20 @@ impl GregorianYears for &'_ Japanese { } fn debug_name(&self) -> &'static str { - if self.eras.get().dates_to_eras.len() > 10 { - "Japanese (historical era data)" - } else { - "Japanese" - } + "Japanese (historical era data)" } fn calendar_algorithm(&self) -> Option { - if self.eras.get().dates_to_eras.len() > 10 { - None - } else { - Some(crate::preferences::CalendarAlgorithm::Japanese) - } + None } } -impl_with_abstract_gregorian!(Japanese, JapaneseDateInner, Japanese, this, this); - impl_with_abstract_gregorian!( JapaneseExtended, JapaneseExtendedDateInner, Japanese, this, - &this.0 + this ); impl Date { @@ -411,7 +500,7 @@ impl Date { year, month, day, - &AbstractGregorian(&japanext_calendar.as_calendar().0), + &AbstractGregorian(japanext_calendar.as_calendar()), ) .map(ArithmeticDate::cast) .map(JapaneseExtendedDateInner) diff --git a/provider/source/src/calendar/eras.rs b/provider/source/src/calendar/eras.rs index cd174df7362..77e234e4aa4 100644 --- a/provider/source/src/calendar/eras.rs +++ b/provider/source/src/calendar/eras.rs @@ -7,7 +7,6 @@ use crate::cldr_serde::eras::EraData; use crate::datetime::DatagenCalendar; use crate::SourceDataProvider; use icu::calendar::provider::*; -use icu::calendar::types::Month; use icu::calendar::{AnyCalendar, Date}; use icu_provider::prelude::*; use std::collections::BTreeMap; @@ -22,7 +21,10 @@ impl SourceDataProvider { #[expect(clippy::type_complexity)] pub(crate) fn all_eras( &self, - ) -> Result<&BTreeMap>, DataError> { + ) -> Result< + &BTreeMap, Vec<(usize, EraData)>)>, + DataError, + > { let cldr = self.cldr()?; cldr.calendar_eras .get_or_init(|| { @@ -58,72 +60,65 @@ impl SourceDataProvider { ] .into_iter() .map(|cal| { - let mut vec = if cal == DatagenCalendar::JapaneseExtended - || cal == DatagenCalendar::JapaneseModern - { - era_dates_map[DatagenCalendar::Gregorian.cldr_name()] - .clone() - .eras - .into_iter() - .filter_map(|(id, data)| { - data.code.as_ref()?; - Some((id.parse::().ok()?, data)) + let inherit = era_dates_map[cal.cldr_name()] + .inherit_eras + .as_ref() + .map(|c| DatagenCalendar::from_cldr_name(&c.calendar)); + + struct EmptyProvider; + impl>> DataProvider for EmptyProvider { + fn load(&self, _req: DataRequest) -> Result, DataError> { + Ok(DataResponse { + metadata: Default::default(), + payload: DataPayload::from_owned( + JapaneseEras { + dates_to_eras: [(EraStartDate { year: 3000, month: 1, day: 1 }, tinystr::tinystr!(16, "dummy"))].into_iter().collect(), + }, + ), }) - .chain( - era_dates_map[cal.cldr_name()] - .clone() - .eras - .into_iter() - .filter_map(|(key, mut data)| { - let key = key.parse::().ok()?; - if data.code.as_ref().is_none() { - if cal == DatagenCalendar::JapaneseExtended { - data.code = - Some(crate::calendar::eras::era_to_code( - japanese_names - .get(&(key - 2).to_string())?, - data.start?.year, - )); - } else { - None?; - } - } - Some((key, data)) - }), - ) - .collect::>() - } else { - let calendar = - AnyCalendar::try_new_unstable(self, cal.canonical_any_calendar_kind()) - .unwrap(); - - era_dates_map[cal.cldr_name()] - .clone() - .eras - .into_iter() - .filter_map(|(key, mut data)| { - let code = data.code.as_deref()?; - // Check what ICU4X returns for the date 1-1-1 era - data.icu4x_era_index = Date::try_new_from_codes( - Some(code), - 1, - Month::new(1).code(), - 1, - icu::calendar::Ref(&calendar), - ) - .inspect_err(|e| { - log::warn!("Era '{code}' unknown by icu::calendar ({e:?})"); - }) - .ok()? - .year() - .era()? - .era_index; - Some((key.parse::().ok()?, data)) - }) - .collect::>() - }; + } + } + + let any_cal = AnyCalendar::try_new_unstable(&EmptyProvider, cal.canonical_any_calendar_kind()).unwrap(); + + let mut vec = era_dates_map[cal.cldr_name()] + .eras + .iter() + .filter_map(|(key, data)| { + let mut data = data.clone(); + match cal { + DatagenCalendar::JapaneseExtended => { + if data.code.is_empty() { + data.code = crate::calendar::eras::era_to_code( + japanese_names.get(&key.to_string())?, + data.start?.year, + ); + } + } + _ => { + if cal == DatagenCalendar::JapaneseModern && data.start.unwrap().year < 1868 { + return None; + } + let date = data.start.or(data.end).unwrap(); + let era_year = + Date::try_new_gregorian(date.year, date.month, date.day) + .unwrap() + .to_calendar(icu::calendar::Ref(&any_cal)) + .year() + .era() + .unwrap(); + if era_year.era != data.code { + println!("mismatched era code {era_year:?} - {data:?}"); + } + data.icu4x_era_index = era_year.era_index; + } + } + + Some((key.parse::().unwrap(), data)) + }) + .collect::>(); vec.sort_by_key(|&(k, _)| k); - (cal, vec) + (cal, (inherit, vec)) }) .collect()) }) @@ -140,8 +135,6 @@ fn process_era_dates_map( core::mem::take(&mut data.get_mut("japanese").unwrap().eras) .into_iter() .map(|(idx, mut era)| { - // https://unicode-org.atlassian.net/browse/CLDR-18388 for why we need to do + 2 - let idx = (idx.parse::().unwrap() + 2).to_string(); if let Some(start) = era.start.as_mut() { // All pre-Taisho start dates are known to be wrong, this at least makes them valid. // See https://unicode-org.atlassian.net/browse/CLDR-11400 @@ -160,47 +153,45 @@ fn process_era_dates_map( data } -impl SourceDataProvider { - fn load_japanese_eras( - &self, - cal: DatagenCalendar, - ) -> Result, DataError> { - let mut dates_to_eras = BTreeMap::new(); - - for (_, data) in self.all_eras()?[&cal].iter().skip(2) { - let start_date = data.start.unwrap(); - let code = data.code.as_deref().unwrap(); - let code = code.parse().map_err(|_| { - DataError::custom("Era code does not fit int TinyStr16").with_display_context(&code) - })?; - - dates_to_eras.insert(start_date, code); - } +impl DataProvider for SourceDataProvider { + fn load(&self, req: DataRequest) -> Result, DataError> { + self.check_req::(req)?; + + let (inherit, ref eras) = self.all_eras()?[&DatagenCalendar::JapaneseModern]; + + let dates_to_eras = inherit + .iter() + .flat_map(|i| self.all_eras().unwrap()[i].1.iter()) + .chain(eras) + .filter(|(_, data)| !matches!(data.code.as_str(), "bce" | "ce")) + .map(|(_, data)| (data.start.unwrap(), data.code.parse().unwrap())) + .collect(); Ok(DataResponse { metadata: Default::default(), - payload: DataPayload::from_owned(JapaneseEras { - dates_to_eras: dates_to_eras.into_iter().collect(), - }), + payload: DataPayload::from_owned(JapaneseEras { dates_to_eras }), }) } } -impl DataProvider for SourceDataProvider { - fn load(&self, req: DataRequest) -> Result, DataError> { - self.check_req::(req)?; - self.load_japanese_eras(DatagenCalendar::JapaneseModern) - } -} - impl DataProvider for SourceDataProvider { fn load( &self, req: DataRequest, ) -> Result, DataError> { self.check_req::(req)?; - let DataResponse { metadata, payload } = - self.load_japanese_eras(DatagenCalendar::JapaneseExtended)?; + + let (inherit, ref eras) = self.all_eras()?[&DatagenCalendar::JapaneseExtended]; + + let dates_to_eras = inherit + .iter() + .flat_map(|i| self.all_eras().unwrap()[i].1.iter()) + .chain(eras) + .filter(|(_, data)| !matches!(data.code.as_str(), "bce" | "ce")) + .map(|(_, data)| (data.start.unwrap(), data.code.parse().unwrap())) + .collect(); + + let eras = JapaneseEras { dates_to_eras }; // Integrity check // @@ -211,7 +202,7 @@ impl DataProvider for SourceDataProvider { let snapshot: JapaneseEras = serde_json::from_str(JAPANEXT_FILE) .expect("Failed to parse the precached golden. This is a bug."); - if snapshot != *payload.get() { + if snapshot != eras { return Err(DataError::custom( "Era data has changed! This can be for two reasons: Either the CLDR locale data for Japanese eras has \ changed in an incompatible way, or there is a new Japanese era. Run \ @@ -224,8 +215,8 @@ impl DataProvider for SourceDataProvider { } Ok(DataResponse { - metadata, - payload: payload.cast(), + metadata: Default::default(), + payload: DataPayload::from_owned(eras), }) } } @@ -279,6 +270,7 @@ impl crate::IterableDataProviderCached for SourceDat #[test] pub fn ethiopic_and_ethioaa_are_compatible() { use icu::calendar::cal::{Ethiopian, EthiopianEraStyle}; + use icu::calendar::types::Month; assert_eq!( Date::try_new_from_codes( Some("aa"), @@ -306,22 +298,19 @@ pub fn ethiopic_and_ethioaa_are_compatible() { #[test] pub fn japanese_and_japanext_are_compatible() { let provider = SourceDataProvider::new_testing(); - let japanese = &provider.all_eras().unwrap()[&DatagenCalendar::JapaneseModern]; - let japanext = &provider.all_eras().unwrap()[&DatagenCalendar::JapaneseExtended]; - assert_eq!( - japanext - .iter() - .take(2) - .zip(japanese.iter().take(2)) - .find(|(e, a)| e != a), - None, - ); + let japanese = DataProvider::::load(&provider, Default::default()) + .unwrap() + .payload; + let japanext = DataProvider::::load(&provider, Default::default()) + .unwrap() + .payload; assert_eq!( japanext + .get() + .dates_to_eras .iter() - .skip(2) .rev() - .zip(japanese.iter().skip(2).rev()) + .zip(japanese.get().dates_to_eras.iter().rev()) .find(|(e, a)| e != a), None, "{japanext:?} - {japanese:?}" @@ -381,10 +370,9 @@ fn test_calendar_eras() { // Check that code and aliases produce identical results for era in era .aliases - .as_deref() - .into_iter() - .flat_map(|s| s.split(' ')) - .chain(era.code.as_deref()) + .split(' ') + .chain(Some(era.code.as_str())) + .filter(|s| !s.is_empty()) { assert_eq!( Date::try_new_from_codes( @@ -409,8 +397,8 @@ fn test_calendar_eras() { } // Check that the correct era code is returned - if let Some(code) = era.code.as_deref() { - assert_eq!(era_year.era, code); + if !era.code.is_empty() { + assert_eq!(era_year.era, era.code); } // Check that the start/end date uses year 1, and minimal/maximal month/day diff --git a/provider/source/src/cldr_cache.rs b/provider/source/src/cldr_cache.rs index cd221bd3647..28dcd445253 100644 --- a/provider/source/src/cldr_cache.rs +++ b/provider/source/src/cldr_cache.rs @@ -27,8 +27,12 @@ pub(crate) struct CldrCache { dir_suffix: OnceLock>, extended_locale_expander: OnceLock>, #[expect(clippy::type_complexity)] - pub(crate) calendar_eras: - OnceLock>, DataError>>, + pub(crate) calendar_eras: OnceLock< + Result< + BTreeMap, Vec<(usize, EraData)>)>, + DataError, + >, + >, #[cfg(feature = "experimental")] // used by transforms/mod.rs pub(crate) transforms: OnceLock< diff --git a/provider/source/src/cldr_serde/eras.rs b/provider/source/src/cldr_serde/eras.rs index c84c6e72b14..bcebfa90317 100644 --- a/provider/source/src/cldr_serde/eras.rs +++ b/provider/source/src/cldr_serde/eras.rs @@ -38,10 +38,10 @@ pub(crate) struct EraData { pub(crate) start: Option, #[serde(rename = "_end", default, deserialize_with = "parse_era_start_date")] pub(crate) end: Option, - #[serde(rename = "_code")] - pub(crate) code: Option, - #[serde(rename = "_aliases")] - pub(crate) aliases: Option, + #[serde(rename = "_code", default)] + pub(crate) code: String, + #[serde(rename = "_aliases", default)] + pub(crate) aliases: String, /// EraYear::era_index #[serde(skip)] pub(crate) icu4x_era_index: Option, diff --git a/provider/source/src/datetime/mod.rs b/provider/source/src/datetime/mod.rs index 53d8ec82810..d59a7ba0a29 100644 --- a/provider/source/src/datetime/mod.rs +++ b/provider/source/src/datetime/mod.rs @@ -53,6 +53,27 @@ impl DatagenCalendar { } } + pub(crate) fn from_cldr_name(s: &str) -> Self { + use DatagenCalendar::*; + match s { + "buddhist" => Buddhist, + "chinese" => Chinese, + "coptic" => Coptic, + "dangi" => Dangi, + "ethiopic" | "ethiopic-amete-alem" => Ethiopic, + "gregorian" => Gregorian, + "hebrew" => Hebrew, + "indian" => Indian, + "islamic" | "islamic-civil" | "islamic-umalqura" | "islamic-rgsa" | "islamic-tbla" => { + Hijri + } + "japanese" => JapaneseExtended, + "persian" => Persian, + "roc" => Roc, + c => panic!("{c}"), + } + } + pub(crate) fn canonical_any_calendar_kind(self) -> AnyCalendarKind { use DatagenCalendar::*; match self { diff --git a/provider/source/src/datetime/neo.rs b/provider/source/src/datetime/neo.rs index 9f4b5d645c4..5174f1737b4 100644 --- a/provider/source/src/datetime/neo.rs +++ b/provider/source/src/datetime/neo.rs @@ -214,78 +214,42 @@ fn dayperiods_convert( }) } -fn eras_convert( - provider: &SourceDataProvider, +fn eras_collect<'a>( + provider: &'a SourceDataProvider, locale: &DataLocale, - eras: &ca::Eras, + eras: &'a ca::Eras, calendar: DatagenCalendar, length: Length, -) -> Result, DataError> { - let eras = eras.load(length); - let all_eras = &provider.all_eras()?[&calendar]; - if matches!( - calendar, - DatagenCalendar::JapaneseModern | DatagenCalendar::JapaneseExtended - ) { - let greg_eras = provider - .get_dates_resource(locale, Some(DatagenCalendar::Gregorian))? - .eras - .as_ref() - .expect("gregorian must have eras") - .load(length); - - let mut out_eras = BTreeMap::new(); - - for &(cldr, ref data) in all_eras { - if cldr == 0 { - out_eras.insert( - data.code.as_deref().unwrap(), - greg_eras - .get("0") - .expect("gregorian calendar must have 0 era") - .as_str(), - ); - } else if cldr == 1 { - out_eras.insert( - data.code.as_deref().unwrap(), - greg_eras - .get("1") - .expect("gregorian calendar must have 1 era") - .as_str(), - ); - } else { - // https://unicode-org.atlassian.net/browse/CLDR-18388 for why we need to do -2 - if let Some(name) = eras.get(&(cldr - 2).to_string()) { - out_eras.insert(data.code.as_deref().unwrap(), name); - } else { - panic!("Unknown japanese era number {cldr}"); - } - } - } - let keys: Vec<&PotentialUtf8> = out_eras - .keys() - .map(|k| PotentialUtf8::from_str(k)) - .collect(); - let values: Vec<&str> = out_eras.values().copied().collect(); - let kv = (keys, values); - let cow = VarZeroCow::from_encodeable(&kv); - Ok(YearNames::VariableEras(cow)) - } else { - let max_era_index = all_eras.iter().flat_map(|(_, e)| e.icu4x_era_index).max(); - let mut out_eras: Vec<&str> = - vec![""; max_era_index.map(|n| n + 1).unwrap_or_default() as usize]; - for &(cldr, ref era) in all_eras.iter() { - if let Some(name) = eras.get(&cldr.to_string()) { - if let Some(icu4x_hardcoded_index) = era.icu4x_era_index { - out_eras[icu4x_hardcoded_index as usize] = &**name; - } - } else { - panic!("Did not find era data for era index {cldr} for {calendar:?} and {locale}"); - } - } +) -> Result, DataError> { + let (inherit, ref all_eras) = provider.all_eras()?[&calendar]; + + let mut out = BTreeMap::new(); + + for &(cldr, ref era) in all_eras { + out.insert( + ( + era.code.as_str(), + era.icu4x_era_index.unwrap_or(u8::MAX) as usize, + ), + &*eras.load(length)[&cldr.to_string()], + ); + } - Ok(YearNames::FixedEras((&out_eras).into())) + if let Some(inherit) = inherit { + out.extend(eras_collect( + provider, + locale, + provider + .get_dates_resource(locale, Some(inherit))? + .eras + .as_ref() + .unwrap(), + inherit, + length, + )?); } + + Ok(out) } fn years_convert( datagen: &SourceDataProvider, @@ -302,7 +266,27 @@ fn years_convert( ); if let Some(ref eras) = data.eras { - eras_convert(datagen, locale, eras, calendar, length) + let eras = eras_collect(datagen, locale, eras, calendar, length)?; + + let max_icu4x_era_index = eras + .keys() + .map(|(_, idx)| idx + 1) + .max() + .unwrap_or_default(); + + if max_icu4x_era_index > 10 { + let kv = eras + .iter() + .map(|(&(k, _), &v)| (PotentialUtf8::from_str(k), v)) + .unzip::<_, _, Vec<_>, Vec<_>>(); + Ok(YearNames::VariableEras(VarZeroCow::from_encodeable(&kv))) + } else { + let mut out_eras = vec![""; max_icu4x_era_index]; + for ((_, idx), era) in eras { + out_eras[idx] = era; + } + Ok(YearNames::FixedEras((&out_eras).into())) + } } else if let Some(years) = data .cyclic_name_sets .as_ref()