Skip to content

Commit a4283c3

Browse files
authored
merge pull request #8 from mariinkys/swap_mobile_ocr
OCR Changes: This PR changes the models used for OCR recognition; instead of using the server variants, we use the mobile ones, as the server where we host the application can't handle the load of the server variants. It also makes changes to the OCR processing function by filtering out those results with less than 50% confidence. Finally, I also introduced the ability to remove OCR capabilities by using an environment variable; for now, this does not make the app not load the models; instead, it only disables the OCR button on our frontend. Not loading the models at all would be a good improvement in the future.
2 parents e162907 + 4e8c0f1 commit a4283c3

File tree

9 files changed

+50
-13
lines changed

9 files changed

+50
-13
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,6 @@ playwright/.cache/
2222
# ocr_models
2323
ocr_models/ppocrv5_dict.txt
2424
ocr_models/ppocrv5_server_det.onnx
25-
ocr_models/ppocrv5_server_rec.onnx
25+
ocr_models/ppocrv5_server_rec.onnx
26+
ocr_models/ppocrv5_mobile_det.onnx
27+
ocr_models/ppocrv5_mobile_rec.onnx

Dockerfile

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,18 @@ COPY --from=builder /work/dictionaries /app/dictionaries
4646

4747
# Download OCR models and place them under /app/ocr_models
4848
RUN mkdir -p /app/ocr_models && \
49-
curl -L -o /app/ocr_models/ppocrv5_server_det.onnx \
50-
https://github.com/GreatV/oar-ocr/releases/download/v0.1.0/ppocrv5_server_det.onnx && \
51-
curl -L -o /app/ocr_models/ppocrv5_server_rec.onnx \
52-
https://github.com/GreatV/oar-ocr/releases/download/v0.1.0/ppocrv5_server_rec.onnx && \
49+
curl -L -o /app/ocr_models/ppocrv5_mobile_det.onnx \
50+
https://github.com/GreatV/oar-ocr/releases/download/v0.1.0/ppocrv5_mobile_det.onnx && \
51+
curl -L -o /app/ocr_models/ppocrv5_mobile_rec.onnx \
52+
https://github.com/GreatV/oar-ocr/releases/download/v0.1.0/ppocrv5_mobile_rec.onnx && \
5353
curl -L -o /app/ocr_models/ppocrv5_dict.txt \
5454
https://github.com/GreatV/oar-ocr/releases/download/v0.1.0/ppocrv5_dict.txt
5555

5656
ENV RUST_LOG="info"
5757
ENV LEPTOS_SITE_ADDR="0.0.0.0:8080"
5858
ENV LEPTOS_SITE_ROOT=./site
59+
ENV DISABLE_ORC="FALSE"
60+
5961
EXPOSE 8080
62+
6063
CMD ["/app/delphinus"]

ocr_models/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@ Models used are:
77
### Text Detection Models
88
| Model Type | Version | Category | Model File | Size | Description |
99
|----------------|----------|----------|-----------------------------------------------------------------------------------------------------------------|---------|------------------------------------------------|
10-
| Text Detection | PP-OCRv5 | Server | [`ppocrv5_server_det.onnx`](https://github.com/GreatV/oar-ocr/releases/download/v0.1.0/ppocrv5_server_det.onnx) | 87.7MB | Server variant for high-precision requirements |
10+
| Text Detection | PP-OCRv5 | Mobile | [`ppocrv5_mobile_det.onnx`](https://github.com/GreatV/oar-ocr/releases/download/v0.1.0/ppocrv5_mobile_det.onnx) | 4.8MB | Mobile variant for real-time applications |
1111

1212
### Text Recognition Models
1313
| Model Type | Version | Language/Category | Model File | Size | Description |
1414
|------------------|----------|-------------------|-------------------------------------------------------------------------------------------------------------------------|--------|----------------------------------|
15-
| Text Recognition | PP-OCRv5 | Chinese/General | [`ppocrv5_server_rec.onnx`](https://github.com/GreatV/oar-ocr/releases/download/v0.1.0/ppocrv5_server_rec.onnx) | 84.1MB | Server variant |
15+
| Text Recognition | PP-OCRv5 | Chinese/General | [`ppocrv5_mobile_rec.onnx`](https://github.com/GreatV/oar-ocr/releases/download/v0.1.0/ppocrv5_mobile_rec.onnx) | 16.5MB | Mobile variant |
1616

1717
### Character Dictionaries
1818
| File Type | Version | Category | Model File | Size | Description |

src/app.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use leptos_router::{
77

88
use crate::{
99
components::{NavbarComponent, ToastComponent},
10+
config::DelphinusConfig,
1011
pages::{FaqPage, GeneratorPage, HomePage, NotFound},
1112
};
1213

@@ -15,6 +16,11 @@ pub fn App() -> impl IntoView {
1516
// Provides context that manages stylesheets, titles, meta tags, etc.
1617
provide_meta_context();
1718

19+
// Provides the config as a context
20+
let (read, _write) =
21+
signal::<DelphinusConfig>(SharedValue::new(DelphinusConfig::get).into_inner());
22+
provide_context::<ReadSignal<DelphinusConfig>>(read);
23+
1824
view! {
1925
// injects a stylesheet into the document <head>
2026
// id=leptos means cargo-leptos will hot-reload this stylesheet

src/config.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
use serde::{Deserialize, Serialize};
2+
3+
#[derive(Debug, Clone, Serialize, Deserialize)]
4+
pub struct DelphinusConfig {
5+
pub disable_ocr: bool,
6+
}
7+
8+
impl DelphinusConfig {
9+
pub fn get() -> Self {
10+
let disable_ocr = std::env::var("DISABLE_OCR")
11+
.map(|v| v.to_lowercase() == "true")
12+
.unwrap_or(false);
13+
14+
Self { disable_ocr }
15+
}
16+
}

src/core/flashcard_generation/ocr.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,15 @@ pub async fn ocr_image(bytes: Vec<u8>, separation_char: String) -> Result<String
1414

1515
let mut clean_result = Vec::new();
1616
for region in &result.text_regions {
17-
if let (Some(text), Some(_confidence)) = (&region.text, region.confidence) {
17+
if let (Some(text), Some(confidence)) = (&region.text, region.confidence) {
1818
let filtered = filter_cjk(text);
1919

2020
if !filtered.is_empty() {
21-
//println!("Text: {} (confidence: {:.2})", chinese_only, confidence);
22-
clean_result.push(filtered);
21+
// only those with more than 50% confidence
22+
//println!("Text: {} (confidence: {:.2})", filtered, confidence);
23+
if confidence > 0.5 {
24+
clean_result.push(filtered);
25+
}
2326
}
2427
}
2528
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
pub mod app;
22
pub mod components;
3+
pub mod config;
34
pub mod core;
45
pub mod pages;
56

src/main.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
pub mod components;
2+
pub mod config;
23
pub mod core;
34
pub mod pages;
45

56
#[cfg(feature = "ssr")]
67
#[actix_web::main]
78
async fn main() -> std::io::Result<()> {
8-
use crate::core::flashcard_generation::dictionaries;
9+
use crate::{config::DelphinusConfig, core::flashcard_generation::dictionaries};
910
use actix_files::Files;
1011
use actix_web::*;
1112
use delphinus::app::*;
@@ -31,14 +32,16 @@ async fn main() -> std::io::Result<()> {
3132

3233
let ocr_client = Arc::new(
3334
OAROCRBuilder::new(
34-
String::from("ocr_models/ppocrv5_server_det.onnx"),
35-
String::from("ocr_models/ppocrv5_server_rec.onnx"),
35+
String::from("ocr_models/ppocrv5_mobile_det.onnx"),
36+
String::from("ocr_models/ppocrv5_mobile_rec.onnx"),
3637
String::from("ocr_models/ppocrv5_dict.txt"),
3738
)
3839
.build()
3940
.expect("Failed to create OCR Client"),
4041
);
4142

43+
let _shared_config = SharedValue::new(DelphinusConfig::get);
44+
4245
println!("listening on http://{}", &addr);
4346

4447
HttpServer::new(move || {

src/pages/flashcard_generator.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use crate::{
77
DialogComponent, PageTitleComponent, SelectOption, ToastType,
88
flashcard_generation::ModifyGeneratedFlashcards, toast::ToastMessage,
99
},
10+
config::DelphinusConfig,
1011
core::flashcard_generation::{
1112
entities::SeparationChar,
1213
flashcard::{Flashcard, remove_whitespace, search_dictionary},
@@ -16,6 +17,7 @@ use crate::{
1617
#[component]
1718
pub fn GeneratorPage() -> impl IntoView {
1819
let set_toast: WriteSignal<ToastMessage> = expect_context();
20+
let delphinus_config: ReadSignal<DelphinusConfig> = expect_context();
1921

2022
let (character_string, set_character_string) = signal(String::new());
2123
let (language, set_language) = signal("Chinese".to_string());
@@ -189,6 +191,7 @@ pub fn GeneratorPage() -> impl IntoView {
189191
<div class="max-w-4xl text-center m-auto p-2">
190192
<div class="flex justify-between my-3 items-center">
191193
<button
194+
disabled=move || {delphinus_config.get().disable_ocr}
192195
class="btn btn-sm btn-accent"
193196
on:click=move |_| {
194197
let _ = ocr_dialog_ref_node.get().unwrap().show_modal();

0 commit comments

Comments
 (0)