Skip to content

Commit 1f7f83f

Browse files
committed
Using tpch script from datafusion-benchmarks
1 parent fe86387 commit 1f7f83f

28 files changed

+1938
-1751
lines changed

.github/workflows/rust.yml

+23-11
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,36 @@ name: Rust
22

33
on:
44
push:
5-
branches: [ "main" ]
65
pull_request:
7-
branches: [ "main" ]
86

97
env:
108
CARGO_TERM_COLOR: always
9+
PYTHON_VERSION: 3.9
10+
TPCH_SAMPLING_RATE: "1" # (1/100)
11+
TPCH_TEST_PARTITIONS: "2"
12+
TPCH_DATA_PATH: "data"
1113

1214
jobs:
1315
build:
14-
1516
runs-on: ubuntu-latest
1617

1718
steps:
18-
- uses: actions/checkout@v3
19-
- name: Install protobuf compiler
20-
shell: bash
21-
run: sudo apt-get install protobuf-compiler
22-
- name: Build Rust code
23-
run: cargo build --verbose
24-
- name: Run tests
25-
run: cargo test --verbose
19+
- uses: actions/checkout@v3
20+
- name: Install protobuf compiler
21+
shell: bash
22+
run: sudo apt-get install protobuf-compiler
23+
- name: Build Rust code
24+
run: cargo build --verbose
25+
- name: Set up Python
26+
uses: actions/setup-python@v2
27+
with:
28+
python-version: ${{ env.PYTHON_VERSION }}
29+
- name: Install test dependencies
30+
run: |
31+
python -m pip install --upgrade pip
32+
pip install -r tpch/requirements.txt
33+
- name: Generate test data
34+
run: |
35+
./scripts/gen-test-data.sh
36+
- name: Run tests
37+
run: cargo test --verbose

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ venv
55
*.so
66
*.log
77
results-sf*
8+
data

Cargo.toml

+5-1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ uuid = "1.2"
4545
rustc_version = "0.4.0"
4646
tonic-build = { version = "0.8", default-features = false, features = ["transport", "prost"] }
4747

48+
[dev-dependencies]
49+
anyhow = "1.0.89"
50+
pretty_assertions = "1.4.0"
51+
4852
[lib]
4953
name = "datafusion_ray"
5054
crate-type = ["cdylib", "rlib"]
@@ -54,4 +58,4 @@ name = "datafusion_ray._datafusion_ray_internal"
5458

5559
[profile.release]
5660
codegen-units = 1
57-
lto = true
61+
lto = true

scripts/gen-test-data.sh

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
3+
set -e
4+
mkdir -p data
5+
python -m tpch.tpchgen generate --scale-factor "$TPCH_SAMPLING_RATE" --partitions "$TPCH_TEST_PARTITIONS"
6+
python -m tpch.tpchgen convert --partitions "$TPCH_TEST_PARTITIONS:lua require('telescope').extensions.live_grep_args.live_grep_args()"

src/planner.rs

+27-28
Original file line numberDiff line numberDiff line change
@@ -264,125 +264,124 @@ mod test {
264264
use super::*;
265265
use datafusion::physical_plan::displayable;
266266
use datafusion::prelude::{ParquetReadOptions, SessionConfig, SessionContext};
267-
use std::fs;
267+
use pretty_assertions::assert_eq;
268268
use std::path::Path;
269+
use std::{env, fs};
270+
type TestResult<T> = std::result::Result<T, anyhow::Error>;
269271

270272
#[tokio::test]
271-
async fn test_q1() -> Result<()> {
273+
async fn test_q1() -> TestResult<()> {
272274
do_test(1).await
273275
}
274276

275277
#[tokio::test]
276-
async fn test_q2() -> Result<()> {
278+
async fn test_q2() -> TestResult<()> {
277279
do_test(2).await
278280
}
279281

280282
#[tokio::test]
281-
async fn test_q3() -> Result<()> {
283+
async fn test_q3() -> TestResult<()> {
282284
do_test(3).await
283285
}
284286

285287
#[tokio::test]
286-
async fn test_q4() -> Result<()> {
288+
async fn test_q4() -> TestResult<()> {
287289
do_test(4).await
288290
}
289291

290292
#[tokio::test]
291-
async fn test_q5() -> Result<()> {
293+
async fn test_q5() -> TestResult<()> {
292294
do_test(5).await
293295
}
294296

295297
#[tokio::test]
296-
async fn test_q6() -> Result<()> {
298+
async fn test_q6() -> TestResult<()> {
297299
do_test(6).await
298300
}
299301

300302
#[tokio::test]
301-
async fn test_q7() -> Result<()> {
303+
async fn test_q7() -> TestResult<()> {
302304
do_test(7).await
303305
}
304306

305307
#[tokio::test]
306-
async fn test_q8() -> Result<()> {
308+
async fn test_q8() -> TestResult<()> {
307309
do_test(8).await
308310
}
309311

310312
#[tokio::test]
311-
async fn test_q9() -> Result<()> {
313+
async fn test_q9() -> TestResult<()> {
312314
do_test(9).await
313315
}
314316

315317
#[tokio::test]
316-
async fn test_q10() -> Result<()> {
318+
async fn test_q10() -> TestResult<()> {
317319
do_test(10).await
318320
}
319321

320322
#[tokio::test]
321-
async fn test_q11() -> Result<()> {
323+
async fn test_q11() -> TestResult<()> {
322324
do_test(11).await
323325
}
324326

325327
#[tokio::test]
326-
async fn test_q12() -> Result<()> {
328+
async fn test_q12() -> TestResult<()> {
327329
do_test(12).await
328330
}
329331

330332
#[tokio::test]
331-
async fn test_q13() -> Result<()> {
333+
async fn test_q13() -> TestResult<()> {
332334
do_test(13).await
333335
}
334336

335337
#[tokio::test]
336-
async fn test_q14() -> Result<()> {
338+
async fn test_q14() -> TestResult<()> {
337339
do_test(14).await
338340
}
339341

340342
#[ignore]
341343
#[tokio::test]
342-
async fn test_q15() -> Result<()> {
344+
async fn test_q15() -> TestResult<()> {
343345
do_test(15).await
344346
}
345347

346348
#[tokio::test]
347-
async fn test_q16() -> Result<()> {
349+
async fn test_q16() -> TestResult<()> {
348350
do_test(16).await
349351
}
350352

351353
#[tokio::test]
352-
async fn test_q17() -> Result<()> {
354+
async fn test_q17() -> TestResult<()> {
353355
do_test(17).await
354356
}
355357

356358
#[tokio::test]
357-
async fn test_q18() -> Result<()> {
359+
async fn test_q18() -> TestResult<()> {
358360
do_test(18).await
359361
}
360362

361363
#[tokio::test]
362-
async fn test_q19() -> Result<()> {
364+
async fn test_q19() -> TestResult<()> {
363365
do_test(19).await
364366
}
365367

366368
#[tokio::test]
367-
async fn test_q20() -> Result<()> {
369+
async fn test_q20() -> TestResult<()> {
368370
do_test(20).await
369371
}
370372

371373
#[tokio::test]
372-
async fn test_q21() -> Result<()> {
374+
async fn test_q21() -> TestResult<()> {
373375
do_test(21).await
374376
}
375377

376378
#[tokio::test]
377-
async fn test_q22() -> Result<()> {
379+
async fn test_q22() -> TestResult<()> {
378380
do_test(22).await
379381
}
380382

381-
async fn do_test(n: u8) -> Result<()> {
382-
let data_path = "/mnt/bigdata/tpch/sf10-parquet";
383-
if !Path::new(&data_path).exists() {
384-
return Ok(());
385-
}
383+
async fn do_test(n: u8) -> TestResult<()> {
384+
let data_path = env::var("TPCH_DATA_PATH")?;
386385
let file = format!("testdata/queries/q{n}.sql");
387386
let sql = fs::read_to_string(&file)?;
388387
let config = SessionConfig::new().with_target_partitions(4);

testdata/expected-plans/q1.txt

+20-20
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)