From 8c13d41c2a62a9b018158fd74df3e3f5dcf441db Mon Sep 17 00:00:00 2001 From: Serhii Potapov Date: Thu, 16 Oct 2025 21:12:33 +0200 Subject: [PATCH 1/2] Upgrade to Rust edition 2024 --- Cargo.toml | 2 +- benches/example.rs | 2 +- src/alphabets/cyrillic.rs | 2 +- src/alphabets/detection.rs | 2 +- src/alphabets/latin.rs | 4 ++-- src/combined/mod.rs | 4 ++-- src/core/detect.rs | 5 +++-- src/core/detector.rs | 4 ++-- src/core/filter_list.rs | 4 ++-- src/dev.rs | 12 ++++++------ src/error.rs | 6 +++--- src/lib.rs | 4 ++-- src/scripts/detect.rs | 6 +----- src/scripts/mod.rs | 2 +- src/scripts/script.rs | 2 +- src/trigrams/detection.rs | 10 +++++----- src/trigrams/mod.rs | 2 +- src/trigrams/profiles.rs | 2 +- src/trigrams/utils.rs | 8 ++------ tests/detect.rs | 2 +- 20 files changed, 39 insertions(+), 46 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2994d29..d0363bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "whatlang" version = "0.17.0" authors = ["Serhii Potapov "] -edition = "2021" +edition = "2024" description = "Fast and lightweight language identification library for Rust." keywords = ["language", "nlp", "lang", "whatlang", "text"] license = "MIT" diff --git a/benches/example.rs b/benches/example.rs index c044561..dadb877 100644 --- a/benches/example.rs +++ b/benches/example.rs @@ -4,7 +4,7 @@ extern crate bencher; use bencher::Bencher; use std::collections::HashMap; use whatlang::dev::{ - alphabet_cyrillic_calculate_scores, alphabet_latin_calculate_scores, FilterList, LowercaseText, + FilterList, LowercaseText, alphabet_cyrillic_calculate_scores, alphabet_latin_calculate_scores, }; use whatlang::{detect, detect_script}; diff --git a/src/alphabets/cyrillic.rs b/src/alphabets/cyrillic.rs index 0cc01ec..69ca5fd 100644 --- a/src/alphabets/cyrillic.rs +++ b/src/alphabets/cyrillic.rs @@ -1,5 +1,5 @@ -use super::common::{build_inverted_map, generic_alphabet_calculate_scores}; use super::RawOutcome; +use super::common::{build_inverted_map, generic_alphabet_calculate_scores}; use crate::core::{FilterList, LowercaseText}; use crate::{Lang, Script}; use std::sync::LazyLock; diff --git a/src/alphabets/detection.rs b/src/alphabets/detection.rs index 10c11a9..d88589e 100644 --- a/src/alphabets/detection.rs +++ b/src/alphabets/detection.rs @@ -1,7 +1,7 @@ use super::RawOutcome; use super::{cyrillic, latin}; -use crate::core::{calculate_confidence, FilterList, Info, InternalQuery, LowercaseText}; use crate::Lang; +use crate::core::{FilterList, Info, InternalQuery, LowercaseText, calculate_confidence}; pub fn detect(iquery: &InternalQuery) -> Option { let raw_outcome = raw_detect(iquery); diff --git a/src/alphabets/latin.rs b/src/alphabets/latin.rs index 267ac2f..5439647 100644 --- a/src/alphabets/latin.rs +++ b/src/alphabets/latin.rs @@ -1,7 +1,7 @@ use std::sync::LazyLock; -use super::common::{build_inverted_map, generic_alphabet_calculate_scores}; use super::RawOutcome; +use super::common::{build_inverted_map, generic_alphabet_calculate_scores}; use crate::core::{FilterList, LowercaseText}; use crate::{Lang, Script}; @@ -95,8 +95,8 @@ pub fn alphabet_calculate_scores(text: &LowercaseText, filter_list: &FilterList) #[cfg(test)] mod tests { use super::*; - use crate::utils::is_stop_char; use crate::Script; + use crate::utils::is_stop_char; // Old naive implementation, that is not very effective but easy to understand fn naive_alphabet_calculate_scores( diff --git a/src/combined/mod.rs b/src/combined/mod.rs index ed1ae89..bab3426 100644 --- a/src/combined/mod.rs +++ b/src/combined/mod.rs @@ -1,7 +1,7 @@ +use crate::Lang; use crate::alphabets; -use crate::core::{calculate_confidence, Info, InternalQuery}; +use crate::core::{Info, InternalQuery, calculate_confidence}; use crate::trigrams; -use crate::Lang; #[derive(Debug)] pub struct RawOutcome { diff --git a/src/core/detect.rs b/src/core/detect.rs index 3014a66..8fb4218 100644 --- a/src/core/detect.rs +++ b/src/core/detect.rs @@ -1,9 +1,10 @@ +use crate::Lang; use crate::core::{Info, Method, Options, Query}; use crate::scripts::{ + RawScriptInfo, Script, grouping::{MultiLangScript, ScriptLangGroup}, - raw_detect_script, RawScriptInfo, Script, + raw_detect_script, }; -use crate::Lang; use crate::{alphabets, combined, trigrams}; /// Detect only a language by a given text. diff --git a/src/core/detector.rs b/src/core/detector.rs index 626bc85..dd413fb 100644 --- a/src/core/detector.rs +++ b/src/core/detector.rs @@ -1,9 +1,9 @@ +use crate::Lang; use crate::core; use crate::core::FilterList; use crate::core::Info; use crate::core::Options; -use crate::scripts::{detect_script, Script}; -use crate::Lang; +use crate::scripts::{Script, detect_script}; /// Configurable structure that holds detection options and provides functions /// to detect language and script. diff --git a/src/core/filter_list.rs b/src/core/filter_list.rs index eed86d4..440f6f6 100644 --- a/src/core/filter_list.rs +++ b/src/core/filter_list.rs @@ -26,8 +26,8 @@ impl FilterList { pub fn is_allowed(&self, lang: Lang) -> bool { match self { Self::All => true, - Self::Allow(ref allowlist) => allowlist.contains(&lang), - Self::Deny(ref denylist) => !denylist.contains(&lang), + Self::Allow(allowlist) => allowlist.contains(&lang), + Self::Deny(denylist) => !denylist.contains(&lang), } } } diff --git a/src/dev.rs b/src/dev.rs index 915c245..eceb3f2 100644 --- a/src/dev.rs +++ b/src/dev.rs @@ -2,20 +2,20 @@ //! It exists only to enable tuning of the library with extra supporting tools (e.g. benchmarks). //! Developers are advised against relying on API. //! -pub use crate::alphabets::{raw_detect as alphabets_raw_detect, RawOutcome as RawAlphabetsInfo}; -pub use crate::combined::{raw_detect as combined_raw_detect, RawOutcome as RawCombinedInfo}; -pub use crate::core::{detect, detect_lang, detect_with_options, Detector, Info, Method, Options}; +pub use crate::alphabets::{RawOutcome as RawAlphabetsInfo, raw_detect as alphabets_raw_detect}; +pub use crate::combined::{RawOutcome as RawCombinedInfo, raw_detect as combined_raw_detect}; +pub use crate::core::{Detector, Info, Method, Options, detect, detect_lang, detect_with_options}; pub use crate::lang::Lang; -pub use crate::scripts::{detect_script, raw_detect_script, RawScriptInfo, Script}; -pub use crate::trigrams::{raw_detect as trigrams_raw_detect, RawOutcome as RawTrigramsInfo}; +pub use crate::scripts::{RawScriptInfo, Script, detect_script, raw_detect_script}; +pub use crate::trigrams::{RawOutcome as RawTrigramsInfo, raw_detect as trigrams_raw_detect}; pub use crate::alphabets::cyrillic::alphabet_calculate_scores as alphabet_cyrillic_calculate_scores; pub use crate::alphabets::latin::alphabet_calculate_scores as alphabet_latin_calculate_scores; pub use crate::core::{FilterList, LowercaseText}; // private imports -use crate::core::detect::detect_lang_base_on_mandarin_script; use crate::core::Query; +use crate::core::detect::detect_lang_base_on_mandarin_script; use crate::scripts::grouping::ScriptLangGroup; #[derive(Debug)] diff --git a/src/error.rs b/src/error.rs index 1b619d2..79cec51 100644 --- a/src/error.rs +++ b/src/error.rs @@ -11,13 +11,13 @@ pub enum ParseError { impl Display for ParseError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - ParseError::Script(ref val) => { + ParseError::Script(val) => { write!(f, "Cannot parse str into whatlang::Script: {:?}", val) } - ParseError::Lang(ref val) => { + ParseError::Lang(val) => { write!(f, "Cannot parse str into whatlang::Lang: {:?}", val) } - ParseError::Method(ref val) => { + ParseError::Method(val) => { write!(f, "Cannot parse str into whatlang::Method: {:?}", val) } } diff --git a/src/lib.rs b/src/lib.rs index e48f39a..7249fb8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -53,6 +53,6 @@ mod utils; #[cfg(feature = "dev")] pub mod dev; -pub use crate::core::{detect, detect_lang, Detector, Info}; +pub use crate::core::{Detector, Info, detect, detect_lang}; pub use crate::lang::Lang; -pub use crate::scripts::{detect_script, Script}; +pub use crate::scripts::{Script, detect_script}; diff --git a/src/scripts/detect.rs b/src/scripts/detect.rs index 702f03a..b10e492 100644 --- a/src/scripts/detect.rs +++ b/src/scripts/detect.rs @@ -35,11 +35,7 @@ impl RawScriptInfo { // expect - is safe because self.counters is never expected to be empty // See raw_detect_script(). let pair = self.counters.first().expect("counters must not be empty"); - if pair.1 > 0 { - Some(pair.0) - } else { - None - } + if pair.1 > 0 { Some(pair.0) } else { None } } pub(crate) fn count(&self, script: Script) -> usize { diff --git a/src/scripts/mod.rs b/src/scripts/mod.rs index 8421f69..b9adf6e 100644 --- a/src/scripts/mod.rs +++ b/src/scripts/mod.rs @@ -5,5 +5,5 @@ mod lang_mapping; mod script; pub use self::detect::detect_script; -pub use self::detect::{raw_detect_script, RawScriptInfo}; +pub use self::detect::{RawScriptInfo, raw_detect_script}; pub use self::script::Script; diff --git a/src/scripts/script.rs b/src/scripts/script.rs index cd0e8ef..07e21f4 100644 --- a/src/scripts/script.rs +++ b/src/scripts/script.rs @@ -2,8 +2,8 @@ use std::fmt; use std::str::FromStr; use super::lang_mapping; -use crate::error::ParseError; use crate::Lang; +use crate::error::ParseError; /// Represents a writing system (Latin, Cyrillic, Arabic, etc). #[cfg_attr(feature = "enum-map", derive(::enum_map::Enum))] diff --git a/src/trigrams/detection.rs b/src/trigrams/detection.rs index 423b755..de5f322 100644 --- a/src/trigrams/detection.rs +++ b/src/trigrams/detection.rs @@ -1,12 +1,12 @@ use hashbrown::HashMap; -use super::utils::{get_trigrams_with_positions, TrigramsWithPositions}; -use super::{LangProfile, LangProfileList}; -use super::{Trigram, MAX_TOTAL_DISTANCE, MAX_TRIGRAM_DISTANCE}; +use super::utils::{TrigramsWithPositions, get_trigrams_with_positions}; use super::{ARABIC_LANGS, CYRILLIC_LANGS, DEVANAGARI_LANGS, HEBREW_LANGS, LATIN_LANGS}; -use crate::core::{calculate_confidence, FilterList, Info, InternalQuery, Text}; -use crate::scripts::grouping::MultiLangScript; +use super::{LangProfile, LangProfileList}; +use super::{MAX_TOTAL_DISTANCE, MAX_TRIGRAM_DISTANCE, Trigram}; use crate::Lang; +use crate::core::{FilterList, Info, InternalQuery, Text, calculate_confidence}; +use crate::scripts::grouping::MultiLangScript; #[derive(Debug)] pub struct RawOutcome { diff --git a/src/trigrams/mod.rs b/src/trigrams/mod.rs index 6a74c02..745037d 100644 --- a/src/trigrams/mod.rs +++ b/src/trigrams/mod.rs @@ -4,7 +4,7 @@ pub mod utils; pub use profiles::*; -pub use detection::{detect, raw_detect, RawOutcome}; +pub use detection::{RawOutcome, detect, raw_detect}; #[derive(Debug, Eq, PartialEq, Hash, Ord, PartialOrd, Clone, Copy)] pub struct Trigram(pub(crate) char, pub(crate) char, pub(crate) char); diff --git a/src/trigrams/profiles.rs b/src/trigrams/profiles.rs index 137ae4e..1049de2 100644 --- a/src/trigrams/profiles.rs +++ b/src/trigrams/profiles.rs @@ -1,8 +1,8 @@ // NOTE: // This file is generated automatically. -use crate::trigrams::Trigram; use crate::Lang; +use crate::trigrams::Trigram; pub type LangProfile = &'static [Trigram]; pub type LangProfileList = &'static [(Lang, LangProfile)]; diff --git a/src/trigrams/utils.rs b/src/trigrams/utils.rs index 7cf22ac..d63ebe5 100644 --- a/src/trigrams/utils.rs +++ b/src/trigrams/utils.rs @@ -1,7 +1,7 @@ use hashbrown::HashMap; -use super::Trigram; use super::TEXT_TRIGRAMS_SIZE; +use super::Trigram; use crate::core::LowercaseText; use crate::utils::is_stop_char; @@ -84,11 +84,7 @@ fn count(text: &LowercaseText) -> CountResult { // Convert punctuations and digits to a space. #[inline] fn to_trigram_char(ch: char) -> char { - if is_stop_char(ch) { - ' ' - } else { - ch - } + if is_stop_char(ch) { ' ' } else { ch } } // In order to improve performance, define the initial capacity for trigrams hash map, diff --git a/tests/detect.rs b/tests/detect.rs index ba8aa67..83953f4 100644 --- a/tests/detect.rs +++ b/tests/detect.rs @@ -1,7 +1,7 @@ extern crate serde_json; extern crate whatlang; -use whatlang::{detect, detect_lang, Lang, Script}; +use whatlang::{Lang, Script, detect, detect_lang}; use std::collections::HashMap; From 2bc5bf0718ea732b6697a280f865d5c36f8179e6 Mon Sep 17 00:00:00 2001 From: Serhii Potapov Date: Thu, 16 Oct 2025 21:14:44 +0200 Subject: [PATCH 2/2] Upgrade to Rust 2024 --- CHANGELOG.md | 5 ++++- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0624a34..cadfb5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ +### v0.18.0 - 2025-10-16 +* [BREAKING] Update to Rust edition 2024 + ### v0.17.0 - 2025-10-16 -* Support Welsh +* [BREAKING] Support Welsh * Bump hashbrown to 15 ### v0.16.4 - 2024-01-04 diff --git a/Cargo.toml b/Cargo.toml index d0363bd..8f9c1b9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "whatlang" -version = "0.17.0" +version = "0.18.0" authors = ["Serhii Potapov "] edition = "2024" description = "Fast and lightweight language identification library for Rust."