Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
### v0.18.0 - 2025-10-16
* [BREAKING] Update to Rust edition 2024

### v0.17.0 - 2025-10-16
* Support Welsh
* [BREAKING] Support Welsh
* Bump hashbrown to 15

### v0.16.4 - 2024-01-04
Expand Down
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[package]
name = "whatlang"
version = "0.17.0"
version = "0.18.0"
authors = ["Serhii Potapov <blake131313@gmail.com>"]
edition = "2021"
edition = "2024"
description = "Fast and lightweight language identification library for Rust."
keywords = ["language", "nlp", "lang", "whatlang", "text"]
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion benches/example.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ extern crate bencher;
use bencher::Bencher;
use std::collections::HashMap;
use whatlang::dev::{
alphabet_cyrillic_calculate_scores, alphabet_latin_calculate_scores, FilterList, LowercaseText,
FilterList, LowercaseText, alphabet_cyrillic_calculate_scores, alphabet_latin_calculate_scores,
};
use whatlang::{detect, detect_script};

Expand Down
2 changes: 1 addition & 1 deletion src/alphabets/cyrillic.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use super::common::{build_inverted_map, generic_alphabet_calculate_scores};
use super::RawOutcome;
use super::common::{build_inverted_map, generic_alphabet_calculate_scores};
use crate::core::{FilterList, LowercaseText};
use crate::{Lang, Script};
use std::sync::LazyLock;
Expand Down
2 changes: 1 addition & 1 deletion src/alphabets/detection.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::RawOutcome;
use super::{cyrillic, latin};
use crate::core::{calculate_confidence, FilterList, Info, InternalQuery, LowercaseText};
use crate::Lang;
use crate::core::{FilterList, Info, InternalQuery, LowercaseText, calculate_confidence};

pub fn detect(iquery: &InternalQuery) -> Option<Info> {
let raw_outcome = raw_detect(iquery);
Expand Down
4 changes: 2 additions & 2 deletions src/alphabets/latin.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::sync::LazyLock;

use super::common::{build_inverted_map, generic_alphabet_calculate_scores};
use super::RawOutcome;
use super::common::{build_inverted_map, generic_alphabet_calculate_scores};
use crate::core::{FilterList, LowercaseText};
use crate::{Lang, Script};

Expand Down Expand Up @@ -95,8 +95,8 @@ pub fn alphabet_calculate_scores(text: &LowercaseText, filter_list: &FilterList)
#[cfg(test)]
mod tests {
use super::*;
use crate::utils::is_stop_char;
use crate::Script;
use crate::utils::is_stop_char;

// Old naive implementation, that is not very effective but easy to understand
fn naive_alphabet_calculate_scores(
Expand Down
4 changes: 2 additions & 2 deletions src/combined/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::Lang;
use crate::alphabets;
use crate::core::{calculate_confidence, Info, InternalQuery};
use crate::core::{Info, InternalQuery, calculate_confidence};
use crate::trigrams;
use crate::Lang;

#[derive(Debug)]
pub struct RawOutcome {
Expand Down
5 changes: 3 additions & 2 deletions src/core/detect.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use crate::Lang;
use crate::core::{Info, Method, Options, Query};
use crate::scripts::{
RawScriptInfo, Script,
grouping::{MultiLangScript, ScriptLangGroup},
raw_detect_script, RawScriptInfo, Script,
raw_detect_script,
};
use crate::Lang;
use crate::{alphabets, combined, trigrams};

/// Detect only a language by a given text.
Expand Down
4 changes: 2 additions & 2 deletions src/core/detector.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use crate::Lang;
use crate::core;
use crate::core::FilterList;
use crate::core::Info;
use crate::core::Options;
use crate::scripts::{detect_script, Script};
use crate::Lang;
use crate::scripts::{Script, detect_script};

/// Configurable structure that holds detection options and provides functions
/// to detect language and script.
Expand Down
4 changes: 2 additions & 2 deletions src/core/filter_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ impl FilterList {
pub fn is_allowed(&self, lang: Lang) -> bool {
match self {
Self::All => true,
Self::Allow(ref allowlist) => allowlist.contains(&lang),
Self::Deny(ref denylist) => !denylist.contains(&lang),
Self::Allow(allowlist) => allowlist.contains(&lang),
Self::Deny(denylist) => !denylist.contains(&lang),
}
}
}
Expand Down
12 changes: 6 additions & 6 deletions src/dev.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@
//! It exists only to enable tuning of the library with extra supporting tools (e.g. benchmarks).
//! Developers are advised against relying on API.
//!
pub use crate::alphabets::{raw_detect as alphabets_raw_detect, RawOutcome as RawAlphabetsInfo};
pub use crate::combined::{raw_detect as combined_raw_detect, RawOutcome as RawCombinedInfo};
pub use crate::core::{detect, detect_lang, detect_with_options, Detector, Info, Method, Options};
pub use crate::alphabets::{RawOutcome as RawAlphabetsInfo, raw_detect as alphabets_raw_detect};
pub use crate::combined::{RawOutcome as RawCombinedInfo, raw_detect as combined_raw_detect};
pub use crate::core::{Detector, Info, Method, Options, detect, detect_lang, detect_with_options};
pub use crate::lang::Lang;
pub use crate::scripts::{detect_script, raw_detect_script, RawScriptInfo, Script};
pub use crate::trigrams::{raw_detect as trigrams_raw_detect, RawOutcome as RawTrigramsInfo};
pub use crate::scripts::{RawScriptInfo, Script, detect_script, raw_detect_script};
pub use crate::trigrams::{RawOutcome as RawTrigramsInfo, raw_detect as trigrams_raw_detect};

pub use crate::alphabets::cyrillic::alphabet_calculate_scores as alphabet_cyrillic_calculate_scores;
pub use crate::alphabets::latin::alphabet_calculate_scores as alphabet_latin_calculate_scores;
pub use crate::core::{FilterList, LowercaseText};

// private imports
use crate::core::detect::detect_lang_base_on_mandarin_script;
use crate::core::Query;
use crate::core::detect::detect_lang_base_on_mandarin_script;
use crate::scripts::grouping::ScriptLangGroup;

#[derive(Debug)]
Expand Down
6 changes: 3 additions & 3 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ pub enum ParseError {
impl Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ParseError::Script(ref val) => {
ParseError::Script(val) => {
write!(f, "Cannot parse str into whatlang::Script: {:?}", val)
}
ParseError::Lang(ref val) => {
ParseError::Lang(val) => {
write!(f, "Cannot parse str into whatlang::Lang: {:?}", val)
}
ParseError::Method(ref val) => {
ParseError::Method(val) => {
write!(f, "Cannot parse str into whatlang::Method: {:?}", val)
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,6 @@ mod utils;
#[cfg(feature = "dev")]
pub mod dev;

pub use crate::core::{detect, detect_lang, Detector, Info};
pub use crate::core::{Detector, Info, detect, detect_lang};
pub use crate::lang::Lang;
pub use crate::scripts::{detect_script, Script};
pub use crate::scripts::{Script, detect_script};
6 changes: 1 addition & 5 deletions src/scripts/detect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,7 @@ impl RawScriptInfo {
// expect - is safe because self.counters is never expected to be empty
// See raw_detect_script().
let pair = self.counters.first().expect("counters must not be empty");
if pair.1 > 0 {
Some(pair.0)
} else {
None
}
if pair.1 > 0 { Some(pair.0) } else { None }
}

pub(crate) fn count(&self, script: Script) -> usize {
Expand Down
2 changes: 1 addition & 1 deletion src/scripts/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ mod lang_mapping;
mod script;

pub use self::detect::detect_script;
pub use self::detect::{raw_detect_script, RawScriptInfo};
pub use self::detect::{RawScriptInfo, raw_detect_script};
pub use self::script::Script;
2 changes: 1 addition & 1 deletion src/scripts/script.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ use std::fmt;
use std::str::FromStr;

use super::lang_mapping;
use crate::error::ParseError;
use crate::Lang;
use crate::error::ParseError;

/// Represents a writing system (Latin, Cyrillic, Arabic, etc).
#[cfg_attr(feature = "enum-map", derive(::enum_map::Enum))]
Expand Down
10 changes: 5 additions & 5 deletions src/trigrams/detection.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use hashbrown::HashMap;

use super::utils::{get_trigrams_with_positions, TrigramsWithPositions};
use super::{LangProfile, LangProfileList};
use super::{Trigram, MAX_TOTAL_DISTANCE, MAX_TRIGRAM_DISTANCE};
use super::utils::{TrigramsWithPositions, get_trigrams_with_positions};
use super::{ARABIC_LANGS, CYRILLIC_LANGS, DEVANAGARI_LANGS, HEBREW_LANGS, LATIN_LANGS};
use crate::core::{calculate_confidence, FilterList, Info, InternalQuery, Text};
use crate::scripts::grouping::MultiLangScript;
use super::{LangProfile, LangProfileList};
use super::{MAX_TOTAL_DISTANCE, MAX_TRIGRAM_DISTANCE, Trigram};
use crate::Lang;
use crate::core::{FilterList, Info, InternalQuery, Text, calculate_confidence};
use crate::scripts::grouping::MultiLangScript;

#[derive(Debug)]
pub struct RawOutcome {
Expand Down
2 changes: 1 addition & 1 deletion src/trigrams/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ pub mod utils;

pub use profiles::*;

pub use detection::{detect, raw_detect, RawOutcome};
pub use detection::{RawOutcome, detect, raw_detect};

#[derive(Debug, Eq, PartialEq, Hash, Ord, PartialOrd, Clone, Copy)]
pub struct Trigram(pub(crate) char, pub(crate) char, pub(crate) char);
Expand Down
2 changes: 1 addition & 1 deletion src/trigrams/profiles.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// NOTE:
// This file is generated automatically.

use crate::trigrams::Trigram;
use crate::Lang;
use crate::trigrams::Trigram;

pub type LangProfile = &'static [Trigram];
pub type LangProfileList = &'static [(Lang, LangProfile)];
Expand Down
8 changes: 2 additions & 6 deletions src/trigrams/utils.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use hashbrown::HashMap;

use super::Trigram;
use super::TEXT_TRIGRAMS_SIZE;
use super::Trigram;
use crate::core::LowercaseText;
use crate::utils::is_stop_char;

Expand Down Expand Up @@ -84,11 +84,7 @@ fn count(text: &LowercaseText) -> CountResult {
// Convert punctuations and digits to a space.
#[inline]
fn to_trigram_char(ch: char) -> char {
if is_stop_char(ch) {
' '
} else {
ch
}
if is_stop_char(ch) { ' ' } else { ch }
}

// In order to improve performance, define the initial capacity for trigrams hash map,
Expand Down
2 changes: 1 addition & 1 deletion tests/detect.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
extern crate serde_json;
extern crate whatlang;

use whatlang::{detect, detect_lang, Lang, Script};
use whatlang::{Lang, Script, detect, detect_lang};

use std::collections::HashMap;

Expand Down
Loading