Skip to content

Commit 1acde10

Browse files
authored
Merge pull request #8250 from janhq/release/v0.8.2
2 parents 65838b1 + f95089d commit 1acde10

2 files changed

Lines changed: 21 additions & 18 deletions

File tree

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
pub const MAX_PDF_FILE_SIZE: u64 = 20 * 1024 * 1024;
2-
pub const MAX_CSV_FILE_SIZE: u64 = 20 * 1024 * 1024;
3-
pub const MAX_SPREADSHEET_FILE_SIZE: u64 = 20 * 1024 * 1024;
4-
pub const MAX_PPTX_FILE_SIZE: u64 = 20 * 1024 * 1024;
5-
pub const MAX_DOCX_FILE_SIZE: u64 = 20 * 1024 * 1024;
1+
/// Absolute ceiling for in-process document parsing, in bytes.
2+
///
3+
/// The user-facing limit is the `max_file_size_mb` RAG setting (1–200MB),
4+
/// enforced on the frontend before any file reaches the parser. This backstop
5+
/// only guards the parser against pathological inputs, so it is pinned to the
6+
/// maximum that setting permits — a smaller value here would wrongly reject
7+
/// files the user explicitly allowed (e.g. a 21.5MB file with a 50MB setting).
8+
pub const MAX_PARSE_FILE_SIZE: u64 = 200 * 1024 * 1024;

src-tauri/plugins/tauri-plugin-rag/src/parser.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::{RagError, MAX_PDF_FILE_SIZE, MAX_SPREADSHEET_FILE_SIZE, MAX_CSV_FILE_SIZE, MAX_PPTX_FILE_SIZE, MAX_DOCX_FILE_SIZE};
1+
use crate::{RagError, MAX_PARSE_FILE_SIZE};
22
use std::borrow::Cow;
33
use std::fs;
44
use std::io::{Cursor, Read};
@@ -12,8 +12,8 @@ use zip::read::ZipArchive;
1212

1313
pub fn parse_pdf(file_path: &str) -> Result<String, RagError> {
1414
let metadata = fs::metadata(file_path)?;
15-
if metadata.len() > MAX_PDF_FILE_SIZE {
16-
return Err(RagError::ParseError("File too large (max 20MB)".to_string()));
15+
if metadata.len() > MAX_PARSE_FILE_SIZE {
16+
return Err(RagError::ParseError("File too large (max 200MB)".to_string()));
1717
}
1818
let bytes = fs::read(file_path)?;
1919
// pdf-extract can panic on some malformed PDFs; guard to avoid crashing the app
@@ -128,8 +128,8 @@ pub fn parse_document(file_path: &str, file_type: &str) -> Result<String, RagErr
128128

129129
fn parse_docx(file_path: &str) -> Result<String, RagError> {
130130
let metadata = std::fs::metadata(file_path)?;
131-
if metadata.len() > MAX_DOCX_FILE_SIZE {
132-
return Err(RagError::ParseError("File too large (max 20MB)".to_string()));
131+
if metadata.len() > MAX_PARSE_FILE_SIZE {
132+
return Err(RagError::ParseError("File too large (max 200MB)".to_string()));
133133
}
134134
let file = std::fs::File::open(file_path)?;
135135
let mut zip = ZipArchive::new(file).map_err(|e| RagError::ParseError(e.to_string()))?;
@@ -202,8 +202,8 @@ fn parse_docx(file_path: &str) -> Result<String, RagError> {
202202

203203
fn parse_csv(file_path: &str) -> Result<String, RagError> {
204204
let metadata = fs::metadata(file_path)?;
205-
if metadata.len() > MAX_CSV_FILE_SIZE {
206-
return Err(RagError::ParseError("File too large (max 20MB)".to_string()));
205+
if metadata.len() > MAX_PARSE_FILE_SIZE {
206+
return Err(RagError::ParseError("File too large (max 200MB)".to_string()));
207207
}
208208
let mut rdr = csv_crate::ReaderBuilder::new()
209209
.has_headers(false)
@@ -221,8 +221,8 @@ fn parse_csv(file_path: &str) -> Result<String, RagError> {
221221

222222
fn parse_spreadsheet(file_path: &str) -> Result<String, RagError> {
223223
let metadata = fs::metadata(file_path)?;
224-
if metadata.len() > MAX_SPREADSHEET_FILE_SIZE {
225-
return Err(RagError::ParseError("File too large (max 20MB)".to_string()));
224+
if metadata.len() > MAX_PARSE_FILE_SIZE {
225+
return Err(RagError::ParseError("File too large (max 200MB)".to_string()));
226226
}
227227
let mut workbook = open_workbook_auto(file_path)
228228
.map_err(|e| RagError::ParseError(e.to_string()))?;
@@ -255,8 +255,8 @@ fn parse_spreadsheet(file_path: &str) -> Result<String, RagError> {
255255

256256
fn parse_pptx(file_path: &str) -> Result<String, RagError> {
257257
let metadata = std::fs::metadata(file_path)?;
258-
if metadata.len() > MAX_PPTX_FILE_SIZE {
259-
return Err(RagError::ParseError("File too large (max 20MB)".to_string()));
258+
if metadata.len() > MAX_PARSE_FILE_SIZE {
259+
return Err(RagError::ParseError("File too large (max 200MB)".to_string()));
260260
}
261261
let file = std::fs::File::open(file_path)?;
262262
let mut zip = ZipArchive::new(file).map_err(|e| RagError::ParseError(e.to_string()))?;
@@ -333,8 +333,8 @@ fn parse_html(file_path: &str) -> Result<String, RagError> {
333333

334334
fn read_text_auto(file_path: &str) -> Result<String, RagError> {
335335
let metadata = fs::metadata(file_path)?;
336-
if metadata.len() > 50 * 1024 * 1024 {
337-
return Err(RagError::ParseError("File too large (max 50MB)".to_string()));
336+
if metadata.len() > MAX_PARSE_FILE_SIZE {
337+
return Err(RagError::ParseError("File too large (max 200MB)".to_string()));
338338
}
339339
let bytes = fs::read(file_path)?;
340340
// Detect encoding

0 commit comments

Comments
 (0)