Skip to content

Commit 91dfbe5

Browse files
committed
add truncate
1 parent 830c6ac commit 91dfbe5

6 files changed

Lines changed: 53 additions & 14 deletions

File tree

Cargo.lock

Lines changed: 7 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "flatterer"
3-
version = "0.19.16"
3+
version = "0.19.17"
44
authors = ["David Raznick <kindly@gmail.com>"]
55
edition = "2021"
66
license = "MIT"
@@ -14,9 +14,9 @@ serde_json = { version = "1.0.83", features = ["preserve_order"] }
1414
pyo3 = { version = "0.18.3", features = ["extension-module", "eyre"] }
1515
eyre = "0.6.8"
1616
#libflatterer={path = "../libflatterer"}
17-
libflatterer = "0.19.14"
17+
libflatterer = "0.19.16"
1818

19-
flatterer-web = "0.19.14"
19+
flatterer-web = "0.19.16"
2020
#flatterer-web={path = "../flatterer-web"}
2121

2222
env_logger = "0.10.1"

docs/changelog.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.
44

55
and this project adheres to [Semantic Versioning](http://semver.org/).
66

7+
## [0.19.17] - 2024-06-18
8+
9+
### New
10+
- truncate postgres
11+
12+
### Fixed
13+
- timezone date types now accepted in postgres
14+
715
## [0.19.15] - 2024-05-09
816

917
### Fixed

docs/options.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ Options:
6060
tables to fit data
6161
--drop When loading to postgres or sqlite, drop table
6262
if already exists.
63+
--truncate When loading to postgres or sqlite, truncate table
64+
if already exists.
6365
--id-prefix TEXT Prefix for all `_link` id fields
6466
--stats Produce stats about the data in the
6567
datapackage.json file
@@ -475,6 +477,26 @@ import flatterer
475477
flatterer.flatten('inputfile.json', 'ouput_dir', postgres='postgres://user:pass@host/dbname', drop=True)
476478
```
477479

480+
## Truncate Tables
481+
482+
**Warning: this could mean you loose data**
483+
484+
For postgres and sqlite. Truncate the existing table if it exists. This is useful if you want to load the data into a databse with the schema pre-defined.
485+
486+
### CLI Usage
487+
488+
```bash
489+
flatterer --postgres='postgres://user:pass@host/dbname' --sqlite-path=sqlite.db INPUT_FILE OUTPUT_DIRECTORY --truncate
490+
```
491+
492+
### Python Usage
493+
494+
```python
495+
import flatterer
496+
497+
flatterer.flatten('inputfile.json', 'ouput_dir', postgres='postgres://user:pass@host/dbname', truncate=True)
498+
```
499+
478500
## Fields File
479501

480502
Path to fields CSV file. The fields file can be used for:

flatterer/__init__.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ def flatten(
9797
gzip_input=False,
9898
json_path="",
9999
arrays_new_table=False,
100+
truncate=False,
100101
):
101102
global LOGGING_SETUP
102103
if not LOGGING_SETUP:
@@ -144,7 +145,8 @@ def flatten(
144145
table_prefix, id_prefix, emit_obj, force,
145146
schema, schema_titles, path, json_stream, ndjson,
146147
sqlite_path, threads, log_error, postgres, postgres_schema,
147-
drop, pushdown, sql_scripts, evolve, no_link, stats, low_disk, low_memory, gzip_input, json_path, arrays_new_table)
148+
drop, pushdown, sql_scripts, evolve, no_link, stats, low_disk, low_memory,
149+
gzip_input, json_path, arrays_new_table, truncate)
148150
elif method == 'iter':
149151
if path:
150152
raise AttributeError("path not allowed when supplying an iterator")
@@ -157,7 +159,7 @@ def flatten(
157159
table_prefix, id_prefix, emit_obj, force,
158160
schema, schema_titles, sqlite_path, threads, log_error,
159161
postgres, postgres_schema, drop, pushdown, sql_scripts, evolve,
160-
no_link, stats, low_disk, low_memory, gzip_input, json_path, arrays_new_table)
162+
no_link, stats, low_disk, low_memory, gzip_input, json_path, arrays_new_table, truncate)
161163
else:
162164
raise AttributeError("input needs to be a string or a generator of strings, dicts or bytes")
163165

@@ -241,6 +243,7 @@ def iterator_flatten(*args, **kw):
241243
@click.option('--postgres-schema', default="", help='When loading to postgres, put all tables into this schema.')
242244
@click.option('--evolve', is_flag=True, default=False, help='When loading to postgres or sqlite, evolve tables to fit data')
243245
@click.option('--drop', is_flag=True, default=False, help='When loading to postgres or sqlite, drop table if already exists.')
246+
@click.option('--truncate', is_flag=True, default=False, help='When loading to postgres or sqlite, truncate the table if it alraedy exists.')
244247
@click.option('--id-prefix', default="", help='Prefix for all `_link` id fields')
245248
@click.option('--stats', is_flag=True, default=False, help='Produce stats about the data in the datapackage.json file')
246249
@click.argument('inputs', required=False, nargs=-1)
@@ -280,6 +283,7 @@ def cli(
280283
stats=False,
281284
json_path="",
282285
arrays_new_table=False,
286+
truncate=False
283287
):
284288
if web:
285289
import pathlib
@@ -347,6 +351,7 @@ def cli(
347351
stats=stats,
348352
json_path=json_path,
349353
arrays_new_table=arrays_new_table,
354+
truncate=truncate,
350355
)
351356
except IOError:
352357
pass

src/lib.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ fn flatterer(_py: Python, m: &PyModule) -> PyResult<()> {
7777
low_memory:bool,
7878
gzip_input:bool,
7979
json_path_selector: String,
80-
arrays_new_table: bool
80+
arrays_new_table: bool,
81+
truncate: bool,
8182
) -> Result<()> {
8283

8384
let mut op = Options::default();
@@ -118,6 +119,7 @@ fn flatterer(_py: Python, m: &PyModule) -> PyResult<()> {
118119
op.gzip_input = gzip_input;
119120
op.json_path_selector = json_path_selector;
120121
op.arrays_new_table = arrays_new_table;
122+
op.truncate = truncate;
121123

122124

123125
if let Err(err) = flatten_all(input_files, output_dir, op) {
@@ -169,7 +171,8 @@ fn flatterer(_py: Python, m: &PyModule) -> PyResult<()> {
169171
low_memory:bool,
170172
gzip_input:bool,
171173
json_path_selector: String,
172-
arrays_new_table: bool
174+
arrays_new_table: bool,
175+
truncate: bool,
173176
) -> Result<()> {
174177
let mut options = Options::default();
175178

@@ -206,6 +209,7 @@ fn flatterer(_py: Python, m: &PyModule) -> PyResult<()> {
206209
options.gzip_input = gzip_input;
207210
options.json_path_selector = json_path_selector;
208211
options.arrays_new_table = arrays_new_table;
212+
options.truncate = truncate;
209213

210214
let final_output_path = PathBuf::from(output_dir);
211215
let parts_path = final_output_path.join("parts");

0 commit comments

Comments
 (0)