Skip to content

Commit 7447881

Browse files
committed
feat: add parquet examples to the readme
1 parent 6e11566 commit 7447881

3 files changed

Lines changed: 56 additions & 27 deletions

File tree

README.md

Lines changed: 54 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,27 +15,22 @@ using the [tpchgen](https://github.com/clflushopt/tpchgen-rs) crates.
1515

1616
## Usage
1717

18-
The `datafusion-tpch` crate offers two possible ways to register the TPCH individual
19-
table functions.
18+
The `datafusion-tpch` crate offers two possible ways to register the TPCH table
19+
functions.
2020

21-
You can register functions individually.
21+
You can register the individual udtfs separately.
2222

2323
```rust
24+
use datafusion_tpch::register_tpch_udtfs;
25+
2426
#[tokio::main]
2527
async fn main() -> Result<()> {
2628
// create local execution context
2729
let ctx = SessionContext::new();
2830

2931
// Register all the UDTFs.
30-
ctx.register_udtf(TpchNation::name(), Arc::new(TpchNation {}));
31-
ctx.register_udtf(TpchCustomer::name(), Arc::new(TpchCustomer {}));
32-
ctx.register_udtf(TpchOrders::name(), Arc::new(TpchOrders {}));
33-
ctx.register_udtf(TpchLineitem::name(), Arc::new(TpchLineitem {}));
34-
ctx.register_udtf(TpchPart::name(), Arc::new(TpchPart {}));
35-
ctx.register_udtf(TpchPartsupp::name(), Arc::new(TpchPartsupp {}));
36-
ctx.register_udtf(TpchSupplier::name(), Arc::new(TpchSupplier {}));
37-
ctx.register_udtf(TpchRegion::name(), Arc::new(TpchRegion {}));
38-
32+
register_tpch_udtfs(&ctx);
33+
3934
// Generate the nation table with a scale factor of 1.
4035
let df = ctx
4136
.sql(format!("SELECT * FROM tpch_nation(1.0);").as_str())
@@ -45,8 +40,7 @@ async fn main() -> Result<()> {
4540
}
4641
```
4742

48-
Or use the helper function `register_tpch_udtfs` to register all of them
49-
at once (which is the preferred approach).
43+
Or you can register a single UDTF which generates all tables at once.
5044

5145
```rust
5246
use datafusion_tpch::register_tpch_udtfs;
@@ -57,17 +51,60 @@ async fn main() -> Result<()> {
5751
let ctx = SessionContext::new();
5852

5953
// Register all the UDTFs.
60-
register_tpch_udtfs(&ctx);
54+
register_tpch_udtf(&ctx);
6155

6256
// Generate the nation table with a scale factor of 1.
6357
let df = ctx
64-
.sql(format!("SELECT * FROM tpch_nation(1.0);").as_str())
58+
.sql(format!("SELECT * FROM tpch(1.0);").as_str())
6559
.await?;
6660
df.show().await?;
6761
Ok(())
6862
}
6963
```
7064

65+
## Examples
66+
67+
To keep things simple we don't bundle writing to parquet in the table provider
68+
but instead defer that to the user who can use the `COPY` command.
69+
70+
71+
```rust
72+
use datafusion::prelude::{SessionConfig, SessionContext};
73+
use datafusion_tpch::{register_tpch_udtf, register_tpch_udtfs};
74+
75+
#[tokio::main]
76+
async fn main() -> datafusion::error::Result<()> {
77+
let ctx = SessionContext::new_with_config(SessionConfig::new().with_information_schema(true));
78+
register_tpch_udtf(&ctx);
79+
80+
let sql_df = ctx.sql(&format!("SELECT * FROM tpch(1.0);")).await?;
81+
sql_df.show().await?;
82+
83+
let sql_df = ctx.sql(&format!("SHOW TABLES;")).await?;
84+
sql_df.show().await?;
85+
86+
let sql_df = ctx
87+
.sql(&format!(
88+
"COPY nation TO './tpch_nation.parquet' STORED AS PARQUET"
89+
))
90+
.await?;
91+
sql_df.show().await?;
92+
93+
register_tpch_udtfs(&ctx)?;
94+
95+
let sql_df = ctx
96+
.sql(&format!(
97+
"COPY (SELECT * FROM tpch_lineitem(1.0)) TO './tpch_lineitem_sf_10.parquet' STORED AS PARQUET"
98+
))
99+
.await?;
100+
sql_df.show().await?;
101+
102+
Ok(())
103+
}
104+
```
105+
106+
You can find other examples in the [examples](examples/) directory.
107+
71108
## License
72109

73-
The project is licensed under the [APACHE 2.0](LICENSE) license.
110+
The project is licensed under the [APACHE 2.0](LICENSE) license.

examples/parquet.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
//! Example of using the datafusion-tpch extension to generate TPCH tables
22
//! and writing them to disk via `COPY`.
3-
43
use datafusion::prelude::{SessionConfig, SessionContext};
54
use datafusion_tpch::{register_tpch_udtf, register_tpch_udtfs};
65

src/lib.rs

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,8 @@ macro_rules! define_tpch_udtf_provider {
3333
/// async fn main() -> Result<(), Error> {
3434
/// // create local execution context
3535
/// let ctx = SessionContext::new();
36-
/// // Register all the UDTFs.
37-
/// ctx.register_udtf(TpchNation::name(), Arc::new(TpchNation {}));
38-
/// ctx.register_udtf(TpchCustomer::name(), Arc::new(TpchCustomer {}));
39-
/// ctx.register_udtf(TpchOrders::name(), Arc::new(TpchOrders {}));
40-
/// ctx.register_udtf(TpchLineitem::name(), Arc::new(TpchLineitem {}));
41-
/// ctx.register_udtf(TpchPart::name(), Arc::new(TpchPart {}));
42-
/// ctx.register_udtf(TpchPartsupp::name(), Arc::new(TpchPartsupp {}));
43-
/// ctx.register_udtf(TpchSupplier::name(), Arc::new(TpchSupplier {}));
44-
/// ctx.register_udtf(TpchRegion::name(), Arc::new(TpchRegion {}));
36+
/// // Register all udtfs.
37+
/// register_tpch_udtfs(&ctx);
4538
/// // Generate the nation table with a scale factor of 1.
4639
/// let df = ctx
4740
/// .sql(format!("SELECT * FROM tpch_nation(1.0);").as_str())

0 commit comments

Comments
 (0)