Skip to content

Commit e1b78c3

Browse files
authored
Adding quote option to writeCSV (#174)
* Adding quote option to writeCSV * Addtional options for writeCSV * Adding NonZeroUsize
1 parent 6793fe5 commit e1b78c3

File tree

5 files changed

+75
-12
lines changed

5 files changed

+75
-12
lines changed

__tests__/dataframe.test.ts

+11-2
Original file line numberDiff line numberDiff line change
@@ -1560,12 +1560,21 @@ describe("io", () => {
15601560
const expected = "fooXbar\n1X6\n2X2\n9X8\n";
15611561
expect(actual).toEqual(expected);
15621562
});
1563+
test("writeCSV:string:quote", () => {
1564+
const df = pl.DataFrame({
1565+
bar: ["a,b,c", "d,e,f", "g,h,i"],
1566+
foo: [1, 2, 3],
1567+
});
1568+
const actual = df.writeCSV({ quote: "^" }).toString();
1569+
const expected = "bar,foo\n^a,b,c^,1.0\n^d,e,f^,2.0\n^g,h,i^,3.0\n";
1570+
expect(actual).toEqual(expected);
1571+
});
15631572
test("writeCSV:string:header", () => {
15641573
const actual = df
15651574
.clone()
1566-
.writeCSV({ sep: "X", includeHeader: false })
1575+
.writeCSV({ sep: "X", includeHeader: false, lineTerminator: "|" })
15671576
.toString();
1568-
const expected = "1X6\n2X2\n9X8\n";
1577+
const expected = "1X6|2X2|9X8|";
15691578
expect(actual).toEqual(expected);
15701579
});
15711580
test("writeCSV:stream", (done) => {

polars/dataframe.ts

+20-4
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,25 @@ interface WriteMethods {
5757
* If no options are specified, it will return a new string containing the contents
5858
* ___
5959
* @param dest file or stream to write to
60-
* @param options
61-
* @param options.hasHeader - Whether or not to include header in the CSV output.
62-
* @param options.sep - Separate CSV fields with this symbol. _defaults to `,`_
60+
* @param options.includeBom - Whether to include UTF-8 BOM in the CSV output.
61+
* @param options.lineTerminator - String used to end each row.
62+
* @param options.includeHeader - Whether or not to include header in the CSV output.
63+
* @param options.sep - Separate CSV fields with this symbol. _defaults to `,`
64+
* @param options.quote - Character to use for quoting. Default: \" Note: it will note be used when sep is used
65+
* @param options.batchSize - Number of rows that will be processed per thread.
66+
* @param options.datetimeFormat - A format string, with the specifiers defined by the
67+
* `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
68+
* Rust crate. If no format specified, the default fractional-second
69+
* precision is inferred from the maximum timeunit found in the frame's
70+
* Datetime cols (if any).
71+
* @param options.dateFormat - A format string, with the specifiers defined by the
72+
* `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
73+
* Rust crate.
74+
* @param options.timeFormat A format string, with the specifiers defined by the
75+
* `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
76+
* Rust crate.
77+
* @param options.floatPrecision - Number of decimal places to write, applied to both `Float32` and `Float64` datatypes.
78+
* @param options.nullValue - A string representing null values (defaulting to the empty string).
6379
* @example
6480
* ```
6581
* > const df = pl.DataFrame({
@@ -86,7 +102,7 @@ interface WriteMethods {
86102
* ... callback(null);
87103
* ... }
88104
* ... });
89-
* > df.head(1).writeCSV(writeStream, {hasHeader: false});
105+
* > df.head(1).writeCSV(writeStream, {includeHeader: false});
90106
* writeStream: '1,6,a'
91107
* ```
92108
* @category IO

polars/types.ts

+9
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,17 @@ export interface ConcatOptions {
4343
* @category Options
4444
*/
4545
export interface WriteCsvOptions {
46+
includeBom?: boolean;
4647
includeHeader?: boolean;
4748
sep?: string;
49+
quote?: string;
50+
lineTerminator?: string;
51+
batchSize?: number;
52+
datetimeFormat?: string;
53+
dateFormat?: string;
54+
timeFormat?: string;
55+
floatPrecision?: number;
56+
nullValue?: string;
4857
}
4958
/**
5059
* Options for @see {@link LazyDataFrame.sinkCSV}

src/conversion.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -558,15 +558,22 @@ impl From<JsRowCount> for RowIndex {
558558

559559
#[napi(object)]
560560
pub struct WriteCsvOptions {
561+
pub include_bom: Option<bool>,
561562
pub include_header: Option<bool>,
562563
pub sep: Option<String>,
563564
pub quote: Option<String>,
565+
pub line_terminator: Option<String>,
566+
pub batch_size: Option<i64>,
567+
pub datetime_format: Option<String>,
568+
pub date_format: Option<String>,
569+
pub time_format: Option<String>,
570+
pub float_precision: Option<i64>,
571+
pub null_value: Option<String>
564572
}
565573

566574
#[napi(object)]
567575
pub struct SinkCsvOptions {
568576
pub include_header: Option<bool>,
569-
pub quote: Option<String>,
570577
pub include_bom: Option<bool>,
571578
pub separator: Option<String>,
572579
pub line_terminator: Option<String>,

src/dataframe.rs

+27-5
Original file line numberDiff line numberDiff line change
@@ -1312,11 +1312,17 @@ impl JsDataFrame {
13121312
env: Env,
13131313
) -> napi::Result<()> {
13141314
let include_header = options.include_header.unwrap_or(true);
1315-
let sep = options.sep.unwrap_or(",".to_owned());
1316-
let sep = sep.as_bytes()[0];
1317-
let quote = options.quote.unwrap_or(",".to_owned());
1318-
let quote = quote.as_bytes()[0];
1319-
1315+
let sep = options.sep.unwrap_or(",".to_owned()).as_bytes()[0];
1316+
let quote = options.quote.unwrap_or("\"".to_owned()).as_bytes()[0];
1317+
let include_bom = options.include_bom.unwrap_or(false);
1318+
let line_terminator = options.line_terminator.unwrap_or("\n".to_owned());
1319+
let batch_size = NonZeroUsize::new(options.batch_size.unwrap_or(1024) as usize);
1320+
let date_format = options.date_format;
1321+
let time_format = options.time_format;
1322+
let datetime_format = options.datetime_format;
1323+
let float_precision: Option<usize> = options.float_precision.map(|fp| fp as usize);
1324+
let null_value = options.null_value.unwrap_or(SerializeOptions::default().null);
1325+
13201326
match path_or_buffer.get_type()? {
13211327
ValueType::String => {
13221328
let path: napi::JsString = unsafe { path_or_buffer.cast() };
@@ -1325,8 +1331,16 @@ impl JsDataFrame {
13251331
let f = std::fs::File::create(path).unwrap();
13261332
let f = BufWriter::new(f);
13271333
CsvWriter::new(f)
1334+
.include_bom(include_bom)
13281335
.include_header(include_header)
13291336
.with_separator(sep)
1337+
.with_line_terminator(line_terminator)
1338+
.with_batch_size(batch_size.unwrap())
1339+
.with_datetime_format(datetime_format)
1340+
.with_date_format(date_format)
1341+
.with_time_format(time_format)
1342+
.with_float_precision(float_precision)
1343+
.with_null_value(null_value)
13301344
.with_quote_char(quote)
13311345
.finish(&mut self.df)
13321346
.map_err(JsPolarsErr::from)?;
@@ -1336,8 +1350,16 @@ impl JsDataFrame {
13361350
let writeable = JsWriteStream { inner, env: &env };
13371351

13381352
CsvWriter::new(writeable)
1353+
.include_bom(include_bom)
13391354
.include_header(include_header)
13401355
.with_separator(sep)
1356+
.with_line_terminator(line_terminator)
1357+
.with_batch_size(batch_size.unwrap())
1358+
.with_datetime_format(datetime_format)
1359+
.with_date_format(date_format)
1360+
.with_time_format(time_format)
1361+
.with_float_precision(float_precision)
1362+
.with_null_value(null_value)
13411363
.with_quote_char(quote)
13421364
.finish(&mut self.df)
13431365
.map_err(JsPolarsErr::from)?;

0 commit comments

Comments
 (0)