Skip to content

Commit 472bddf

Browse files
committed
Spam filter and index configuration updates
1 parent fd37362 commit 472bddf

9 files changed

Lines changed: 100 additions & 45 deletions

File tree

.github/workflows/ci.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ jobs:
311311
- name: Build
312312
run: |
313313
rustup target add ${{matrix.target}}
314-
cargo build --release --target ${{matrix.target}} -p stalwart --no-default-features --features "sqlite postgres mysql rocks elastic s3 redis azure nats enterprise"
314+
cargo build --release --target ${{matrix.target}} -p stalwart --no-default-features --features "sqlite postgres mysql rocks s3 redis azure nats enterprise"
315315
cargo build --release --target ${{matrix.target}} -p stalwart-cli
316316
mkdir -p artifacts
317317
mv ./target/${{matrix.target}}/release/stalwart.exe ./artifacts/stalwart.exe
@@ -349,14 +349,14 @@ jobs:
349349
# Get latest FoundationDB installer
350350
curl --retry 5 -Lso foundationdb.pkg "$(gh api -X GET /repos/apple/foundationdb/releases --jq '.[] | select(.prerelease == false) | .assets[] | select(.name | test("${{startsWith(matrix.target, 'x86') && 'x86_64' || 'arm64'}}" + ".pkg$")) | .browser_download_url' | head -n1)"
351351
sudo installer -allowUntrusted -dumplog -pkg foundationdb.pkg -target /
352-
cargo build --release --target ${{matrix.target}} -p stalwart --no-default-features --features "foundationdb elastic s3 redis nats enterprise"
352+
cargo build --release --target ${{matrix.target}} -p stalwart --no-default-features --features "foundationdb s3 redis nats enterprise"
353353
mkdir -p artifacts
354354
mv ./target/${{matrix.target}}/release/stalwart ./artifacts/stalwart-foundationdb
355355
356356
- name: Build
357357
run: |
358358
rustup target add ${{matrix.target}}
359-
cargo build --release --target ${{matrix.target}} -p stalwart --no-default-features --features "sqlite postgres mysql rocks elastic s3 redis azure nats enterprise"
359+
cargo build --release --target ${{matrix.target}} -p stalwart --no-default-features --features "sqlite postgres mysql rocks s3 redis azure nats enterprise"
360360
cargo build --release --target ${{matrix.target}} -p stalwart-cli
361361
mkdir -p artifacts
362362
mv ./target/${{matrix.target}}/release/stalwart ./artifacts/stalwart

Dockerfile.build

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,15 +92,15 @@ RUN \
9292
--mount=type=cache,target=/usr/local/cargo/git \
9393
source /env-cargo && \
9494
if [ ! -z "${FDB_ARCH}" ]; then \
95-
RUSTFLAGS="-L /usr/lib" cargo chef cook --recipe-path recipe.json --zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "foundationdb elastic s3 redis nats enterprise"; \
95+
RUSTFLAGS="-L /usr/lib" cargo chef cook --recipe-path recipe.json --zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "foundationdb s3 redis nats enterprise"; \
9696
fi
9797
RUN \
9898
--mount=type=secret,id=ACTIONS_RESULTS_URL,env=ACTIONS_RESULTS_URL \
9999
--mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,env=ACTIONS_RUNTIME_TOKEN \
100100
--mount=type=cache,target=/usr/local/cargo/registry \
101101
--mount=type=cache,target=/usr/local/cargo/git \
102102
source /env-cargo && \
103-
cargo chef cook --recipe-path recipe.json --zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "sqlite postgres mysql rocks elastic s3 redis azure nats enterprise" && \
103+
cargo chef cook --recipe-path recipe.json --zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "sqlite postgres mysql rocks s3 redis azure nats enterprise" && \
104104
cargo chef cook --recipe-path recipe.json --zigbuild --release --target ${TARGET} -p stalwart-cli
105105
# Copy the source code
106106
COPY . .
@@ -114,7 +114,7 @@ RUN \
114114
--mount=type=cache,target=/usr/local/cargo/git \
115115
source /env-cargo && \
116116
if [ ! -z "${FDB_ARCH}" ]; then \
117-
RUSTFLAGS="-L /usr/lib" cargo zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "foundationdb elastic s3 redis nats enterprise" && \
117+
RUSTFLAGS="-L /usr/lib" cargo zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "foundationdb s3 redis nats enterprise" && \
118118
mv /app/target/${TARGET}/release/stalwart /app/artifact/stalwart-foundationdb; \
119119
fi
120120
# Build generic version
@@ -124,7 +124,7 @@ RUN \
124124
--mount=type=cache,target=/usr/local/cargo/registry \
125125
--mount=type=cache,target=/usr/local/cargo/git \
126126
source /env-cargo && \
127-
cargo zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "sqlite postgres mysql rocks elastic s3 redis azure nats enterprise" && \
127+
cargo zigbuild --release --target ${TARGET} -p stalwart --no-default-features --features "sqlite postgres mysql rocks s3 redis azure nats enterprise" && \
128128
cargo zigbuild --release --target ${TARGET} -p stalwart-cli && \
129129
mv /app/target/${TARGET}/release/stalwart /app/artifact/stalwart && \
130130
mv /app/target/${TARGET}/release/stalwart-cli /app/artifact/stalwart-cli

crates/common/src/config/jmap/settings.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ impl JmapConfig {
219219
let mut jmap = JmapConfig {
220220
default_language: Language::from_iso_639(
221221
config
222-
.value("storage.full-text.default-language")
222+
.value("storage.search-index.default-language")
223223
.unwrap_or("en"),
224224
)
225225
.unwrap_or(Language::English),
@@ -356,7 +356,9 @@ impl JmapConfig {
356356
calendar_parse_max_items: config
357357
.property("jmap.calendar.parse.max-items")
358358
.unwrap_or(10),
359-
index_batch_size: config.property("jmap.index.batch-size").unwrap_or(100),
359+
index_batch_size: config
360+
.property("storage.search-index.batch-size")
361+
.unwrap_or(100),
360362
index_fields: AHashMap::new(),
361363
default_folders,
362364
shared_folder,
@@ -379,14 +381,17 @@ impl JmapConfig {
379381
};
380382

381383
if !config
382-
.property_or_default::<bool>(&format!("jmap.index.{index_name}.enabled"), "true")
384+
.property_or_default::<bool>(
385+
&format!("storage.search-index.{index_name}.enabled"),
386+
"true",
387+
)
383388
.unwrap_or(true)
384389
{
385390
continue;
386391
}
387392

388-
for (_, field) in
389-
config.properties::<SearchField>(&format!("jmap.index.{index_name}.fields"))
393+
for (_, field) in config
394+
.properties::<SearchField>(&format!("storage.search-index.{index_name}.fields"))
390395
{
391396
fields.insert(field);
392397
}

crates/common/src/config/spamfilter.rs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ pub struct ClassifierConfig {
9191
pub auto_learn_ham_score: f32,
9292
pub hold_samples_for: u64,
9393
pub train_frequency: Option<u64>,
94+
pub log_scale: bool,
95+
pub l2_normalize: bool,
9496
}
9597

9698
#[derive(Debug, Clone, Default)]
@@ -454,7 +456,7 @@ impl ClassifierConfig {
454456
let ccfh = match config.value("spam-filter.classifier.model") {
455457
Some("ftrl-fh") | None => false,
456458
Some("ftrl-ccfh") => true,
457-
Some("disabled") => return None,
459+
Some("disabled" | "disable") => return None,
458460
Some(other) => {
459461
config.new_build_error(
460462
"spam-filter.classifier.model",
@@ -498,18 +500,24 @@ impl ClassifierConfig {
498500
.unwrap_or(Duration::from_secs(180 * 24 * 60 * 60))
499501
.as_secs(),
500502
min_ham_samples: config
501-
.property_or_default("spam-filter.classifier.samples.min-ham", "10")
502-
.unwrap_or(10),
503+
.property_or_default("spam-filter.classifier.samples.min-ham", "100")
504+
.unwrap_or(100),
503505
min_spam_samples: config
504-
.property_or_default("spam-filter.classifier.samples.min-spam", "10")
505-
.unwrap_or(10),
506+
.property_or_default("spam-filter.classifier.samples.min-spam", "100")
507+
.unwrap_or(100),
506508
train_frequency: config
507509
.property_or_default::<Option<Duration>>(
508510
"spam-filter.classifier.training.frequency",
509511
"12h",
510512
)
511513
.unwrap_or(Some(Duration::from_secs(12 * 60 * 60)))
512514
.map(|d| d.as_secs()),
515+
log_scale: config
516+
.property_or_default("spam-filter.classifier.features.log-scale", "true")
517+
.unwrap_or(true),
518+
l2_normalize: config
519+
.property_or_default("spam-filter.classifier.features.l2-normalize", "true")
520+
.unwrap_or(true),
513521
}
514522
.into()
515523
}

crates/http/src/management/spam.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,13 @@
44
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
55
*/
66

7-
use common::{Server, auth::AccessToken, config::spamfilter::SpamFilterAction, psl};
7+
use common::{
8+
Server,
9+
auth::AccessToken,
10+
config::spamfilter::SpamFilterAction,
11+
manager::{SPAM_CLASSIFIER_KEY, SPAM_TRAINER_KEY},
12+
psl,
13+
};
814
use directory::{
915
Permission,
1016
backend::internal::manage::{self, ManageDirectory},
@@ -87,7 +93,7 @@ impl ManageSpamHandler for Server {
8793
access_token: &AccessToken,
8894
) -> trc::Result<HttpResponse> {
8995
match (path.get(1).copied(), path.get(2).copied(), req.method()) {
90-
(Some("sample"), Some(class @ ("ham" | "spam")), &Method::POST) => {
96+
(Some("upload"), Some(class @ ("ham" | "spam")), &Method::POST) => {
9197
// Validate the access token
9298
access_token.assert_has_permission(Permission::SpamFilterTrain)?;
9399

@@ -166,6 +172,12 @@ impl ManageSpamHandler for Server {
166172
false
167173
}
168174
}
175+
Some("delete") => {
176+
for key in [SPAM_CLASSIFIER_KEY, SPAM_TRAINER_KEY] {
177+
self.blob_store().delete_blob(key).await?;
178+
}
179+
true
180+
}
169181
Some("status") => self.inner.ipc.train_task_controller.is_running(),
170182
_ => {
171183
return Err(trc::ResourceEvent::NotFound.into_err());

crates/migration/src/blob.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,10 @@ pub(crate) async fn migrate_blobs_v014(server: &Server) -> trc::Result<()> {
216216
);
217217
}
218218
OldType::Undelete { deleted_at, size } => {
219+
// SPDX-SnippetBegin
220+
// SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
221+
// SPDX-License-Identifier: LicenseRef-SEL
222+
219223
#[cfg(feature = "enterprise")]
220224
{
221225
batch
@@ -244,6 +248,8 @@ pub(crate) async fn migrate_blobs_v014(server: &Server) -> trc::Result<()> {
244248
.caused_by(trc::location!())?,
245249
);
246250
}
251+
252+
// SPDX-SnippetEnd
247253
}
248254
OldType::Temp => {
249255
batch.set(

crates/nlp/src/classifier/feature.rs

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ pub trait FeatureBuilder {
124124
&self,
125125
features_in: &HashMap<I, f32>,
126126
account_id: Option<u32>,
127+
l2_normalize: bool,
127128
) -> Vec<Self::Feature> {
128129
let mut features_out = Vec::with_capacity(features_in.len());
129130
let mut buf = Vec::with_capacity(2 + 4 + 63);
@@ -141,14 +142,16 @@ pub trait FeatureBuilder {
141142
}
142143

143144
// L2 normalization
144-
let sum_of_squares = features_out
145-
.iter()
146-
.map(|f| f.weight() as f64 * f.weight() as f64)
147-
.sum::<f64>();
148-
if sum_of_squares > 0.0 {
149-
let norm = sum_of_squares.sqrt() as f32;
150-
for feature in &mut features_out {
151-
*feature.weight_mut() /= norm;
145+
if l2_normalize {
146+
let sum_of_squares = features_out
147+
.iter()
148+
.map(|f| f.weight() as f64 * f.weight() as f64)
149+
.sum::<f64>();
150+
if sum_of_squares > 0.0 {
151+
let norm = sum_of_squares.sqrt() as f32;
152+
for feature in &mut features_out {
153+
*feature.weight_mut() /= norm;
154+
}
152155
}
153156
}
154157

crates/nlp/src/classifier/sgd.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,7 @@ pub mod tests {
410410
}
411411
builder.scale(&mut sample);
412412
samples.push(Sample {
413-
features: builder.build(&sample, 12345.into()),
413+
features: builder.build(&sample, 12345.into(), true),
414414
class: if *class { 1.0 } else { 0.0 },
415415
});
416416
}
@@ -431,7 +431,7 @@ pub mod tests {
431431
}
432432
builder.scale(&mut sample);
433433
samples.push(Sample {
434-
features: builder.build(&sample, 12345.into()),
434+
features: builder.build(&sample, 12345.into(), true),
435435
class: if *class { 1.0 } else { 0.0 },
436436
});
437437
}

crates/spam-filter/src/modules/classifier.rs

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -367,16 +367,20 @@ impl SpamClassifier for Server {
367367

368368
match &task {
369369
TrainTask::Fh { builder, .. } => {
370-
builder.scale(&mut tokens);
370+
if config.log_scale {
371+
builder.scale(&mut tokens);
372+
}
371373
fh_samples.push(Sample::new(
372-
builder.build(&tokens, account_id),
374+
builder.build(&tokens, account_id, config.l2_normalize),
373375
sample.is_spam,
374376
));
375377
}
376378
TrainTask::Ccfh { builder, .. } => {
377-
builder.scale(&mut tokens);
379+
if config.log_scale {
380+
builder.scale(&mut tokens);
381+
}
378382
ccfh_samples.push(Sample::new(
379-
builder.build(&tokens, account_id),
383+
builder.build(&tokens, account_id, config.l2_normalize),
380384
sample.is_spam,
381385
));
382386
}
@@ -558,6 +562,9 @@ impl SpamClassifier for Server {
558562

559563
async fn spam_classify(&self, ctx: &mut SpamFilterContext<'_>) -> trc::Result<()> {
560564
let classifier = self.inner.data.spam_classifier.load_full();
565+
let Some(config) = &self.core.spam.classifier else {
566+
return Ok(());
567+
};
561568

562569
let started = Instant::now();
563570
match classifier.as_ref() {
@@ -566,7 +573,9 @@ impl SpamClassifier for Server {
566573
let mut has_prediction = false;
567574
let mut tokens = self.spam_build_tokens(ctx).await.0;
568575
let feature_builder = classifier.feature_builder();
569-
feature_builder.scale(&mut tokens);
576+
if config.log_scale {
577+
feature_builder.scale(&mut tokens);
578+
}
570579

571580
for rcpt in &ctx.input.env_rcpt_to {
572581
let prediction = if let Some(account_id) = self
@@ -577,9 +586,11 @@ impl SpamClassifier for Server {
577586
{
578587
has_prediction = true;
579588
classifier
580-
.predict_proba_sample(
581-
&feature_builder.build(&tokens, account_id.into()),
582-
)
589+
.predict_proba_sample(&feature_builder.build(
590+
&tokens,
591+
account_id.into(),
592+
config.l2_normalize,
593+
))
583594
.into()
584595
} else {
585596
None
@@ -591,8 +602,11 @@ impl SpamClassifier for Server {
591602
ctx.result.classifier_confidence = classifier_confidence;
592603
} else {
593604
// None of the recipients are local, default to global model prediction
594-
let prediction =
595-
classifier.predict_proba_sample(&feature_builder.build(&tokens, None));
605+
let prediction = classifier.predict_proba_sample(&feature_builder.build(
606+
&tokens,
607+
None,
608+
config.l2_normalize,
609+
));
596610
ctx.result.classifier_confidence =
597611
vec![prediction.into(); ctx.input.env_rcpt_to.len()];
598612
}
@@ -602,7 +616,9 @@ impl SpamClassifier for Server {
602616
let mut has_prediction = false;
603617
let mut tokens = self.spam_build_tokens(ctx).await.0;
604618
let feature_builder = classifier.feature_builder();
605-
feature_builder.scale(&mut tokens);
619+
if config.log_scale {
620+
feature_builder.scale(&mut tokens);
621+
}
606622

607623
for rcpt in &ctx.input.env_rcpt_to {
608624
let prediction = if let Some(account_id) = self
@@ -613,9 +629,11 @@ impl SpamClassifier for Server {
613629
{
614630
has_prediction = true;
615631
classifier
616-
.predict_proba_sample(
617-
&feature_builder.build(&tokens, account_id.into()),
618-
)
632+
.predict_proba_sample(&feature_builder.build(
633+
&tokens,
634+
account_id.into(),
635+
config.l2_normalize,
636+
))
619637
.into()
620638
} else {
621639
None
@@ -627,8 +645,11 @@ impl SpamClassifier for Server {
627645
ctx.result.classifier_confidence = classifier_confidence;
628646
} else {
629647
// None of the recipients are local, default to global model prediction
630-
let prediction =
631-
classifier.predict_proba_sample(&feature_builder.build(&tokens, None));
648+
let prediction = classifier.predict_proba_sample(&feature_builder.build(
649+
&tokens,
650+
None,
651+
config.l2_normalize,
652+
));
632653
ctx.result.classifier_confidence =
633654
vec![prediction.into(); ctx.input.env_rcpt_to.len()];
634655
}

0 commit comments

Comments
 (0)