Skip to content

Commit 66b7654

Browse files
authored
revert to previous pdf-extract; remove test for encrypted pdf support (spiceai#5355)
1 parent 8b388f6 commit 66b7654

3 files changed

Lines changed: 21 additions & 127 deletions

File tree

Cargo.lock

Lines changed: 20 additions & 71 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/document_parse/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ version.workspace = true
1010

1111
[dependencies]
1212
docx-rs = { git = "https://github.com/spiceai/docx-rs", rev="2d86a60b58afb02b157f96b42a92626417e47f71"}
13-
pdf-extract = { git = "https://github.com/spiceai/pdf-extract.git", rev = "3989f973c061cd805ba6fe905f14b01074844dfc" }
13+
pdf-extract = { git = "https://github.com/spiceai/pdf-extract", rev = "3ca5e2c904cdf5897f3d671e3b2c4b62b1612b0c" }
1414
snafu.workspace = true
1515
bytes.workspace = true
1616
tokio = { workspace = true, features = ["sync"] }

crates/runtime/tests/s3/mod.rs

Lines changed: 0 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -127,61 +127,6 @@ async fn s3_federation() -> Result<(), anyhow::Error> {
127127
.await
128128
}
129129

130-
#[tokio::test]
131-
async fn s3_pdfs() -> Result<(), anyhow::Error> {
132-
let _tracing = init_tracing(Some("integration=debug,info"));
133-
134-
let mut dataset = Dataset::new("s3://spiceai-public-datasets/test_pdf_files", "pdfs");
135-
dataset.params = Some(Params::from_string_map(
136-
vec![
137-
("file_format".to_string(), "pdf".to_string()),
138-
("client_timeout".to_string(), "120s".to_string()),
139-
]
140-
.into_iter()
141-
.collect(),
142-
));
143-
144-
test_request_context()
145-
.scope(async {
146-
let app = AppBuilder::new("s3_pdfs").with_dataset(dataset).build();
147-
148-
let status = status::RuntimeStatus::new();
149-
let df = get_test_datafusion(Arc::clone(&status));
150-
151-
let rt = Runtime::builder()
152-
.with_datafusion(df)
153-
.with_app(app)
154-
.build()
155-
.await;
156-
let cloned_rt = Arc::new(rt.clone());
157-
158-
// Set a timeout for the test
159-
tokio::select! {
160-
() = tokio::time::sleep(std::time::Duration::from_secs(60)) => {
161-
return Err(anyhow::anyhow!("Timed out waiting for datasets to load"));
162-
}
163-
() = cloned_rt.load_components() => {}
164-
}
165-
166-
let mut query_result = rt
167-
.datafusion()
168-
.query_builder("SELECT * FROM pdfs")
169-
.build()
170-
.run()
171-
.await
172-
.map_err(|e| anyhow::anyhow!(e))?;
173-
let mut batches = vec![];
174-
while let Some(batch) = query_result.data.next().await {
175-
batches.push(batch?);
176-
}
177-
178-
assert_eq!(batches.len(), 2);
179-
180-
Ok(())
181-
})
182-
.await
183-
}
184-
185130
#[tokio::test]
186131
async fn s3_hive_partitioning() -> Result<(), anyhow::Error> {
187132
let _tracing = init_tracing(Some("integration=debug,info"));

0 commit comments

Comments
 (0)