Skip to content

feat: Add built-in CSV loader #19167

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions packages/bun-types/bun.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,57 @@ declare module "bun" {
export function parse(input: string): object;
}

/**
* CSV related APIs
*/
namespace CSV {
/**
* Options for parsing CSV strings.
*/
export interface CSVParserOptions {
/**
* Whether the CSV has a header row.
* If true, returns an array of objects. If false, returns an array of arrays.
* @default true
*/
header?: boolean;
/**
* The delimiter character to use.
* @default ','
*/
delimiter?: string;
/**
* Whether to allow comments in the CSV.
* @default true
*/
comments?: boolean;
/**
* Whether to trim whitespace from fields.
* @default false
*/
trim_whitespace?: boolean;
/**
* Whether to enable dynamic typing for fields.
* @default false
*/
dynamic_typing?: boolean;
}

/**
* Parse a CSV string into a JavaScript array.
*
* @category Utilities
*
* @param input The CSV string to parse
* @param options Parsing options
* @returns If has_header is true (default), returns Record<string, string>[]; otherwise, string[][]
*/
export function parse(
input: string,
options?: CSVParserOptions,
): CSVParserOptions extends { has_header: false } ? string[][] : Record<string, string>[];
}

/**
* Synchronously resolve a `moduleId` as though it were imported from `parent`
*
Expand Down
20 changes: 20 additions & 0 deletions packages/bun-types/extensions.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,23 @@ declare module "*.html" {
var contents: any;
export = contents;
}

declare module "*.csv" {
var contents: Record<string, string>[];
export = contents;
}

declare module "*.csv?no_header" {
var contents: string[][];
export = contents;
}

declare module "*.tsv" {
var contents: Record<string, string>[];
export = contents;
}

declare module "*.tsv?no_header" {
var contents: string[][];
export = contents;
}
4 changes: 4 additions & 0 deletions src/api/schema.zig
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,10 @@ pub const Api = struct {
file,
json,
toml,
csv,
csv_no_header,
tsv,
tsv_no_header,
wasm,
napi,
base64,
Expand Down
4 changes: 4 additions & 0 deletions src/bake/DevServer.zig
Original file line number Diff line number Diff line change
Expand Up @@ -5187,6 +5187,10 @@ const DirectoryWatchStore = struct {
.json,
.jsonc,
.toml,
.csv,
.csv_no_header,
.tsv,
.tsv_no_header,
.wasm,
.napi,
.base64,
Expand Down
7 changes: 7 additions & 0 deletions src/bun.js/api/BunObject.zig
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ pub const BunObject = struct {
pub const SHA512 = toJSGetter(Crypto.SHA512.getter);
pub const SHA512_256 = toJSGetter(Crypto.SHA512_256.getter);
pub const TOML = toJSGetter(Bun.getTOMLObject);
pub const CSV = toJSGetter(Bun.getCSVObject);
pub const Transpiler = toJSGetter(Bun.getTranspilerConstructor);
pub const argv = toJSGetter(Bun.getArgv);
pub const cwd = toJSGetter(Bun.getCWD);
Expand Down Expand Up @@ -116,6 +117,7 @@ pub const BunObject = struct {
@export(&BunObject.SHA512_256, .{ .name = getterName("SHA512_256") });

@export(&BunObject.TOML, .{ .name = getterName("TOML") });
@export(&BunObject.CSV, .{ .name = getterName("CSV") });
@export(&BunObject.Glob, .{ .name = getterName("Glob") });
@export(&BunObject.Transpiler, .{ .name = getterName("Transpiler") });
@export(&BunObject.argv, .{ .name = getterName("argv") });
Expand Down Expand Up @@ -1253,6 +1255,10 @@ pub fn getHashObject(globalThis: *JSC.JSGlobalObject, _: *JSC.JSObject) JSC.JSVa
return HashObject.create(globalThis);
}

pub fn getCSVObject(globalThis: *JSC.JSGlobalObject, _: *JSC.JSObject) JSC.JSValue {
return CSVObject.create(globalThis);
}

pub fn getTOMLObject(globalThis: *JSC.JSGlobalObject, _: *JSC.JSObject) JSC.JSValue {
return TOMLObject.create(globalThis);
}
Expand Down Expand Up @@ -1328,6 +1334,7 @@ pub fn getUnsafe(globalThis: *JSC.JSGlobalObject, _: *JSC.JSObject) JSC.JSValue
pub const HashObject = @import("./HashObject.zig");
pub const UnsafeObject = @import("./UnsafeObject.zig");
pub const TOMLObject = @import("./TOMLObject.zig");
pub const CSVObject = @import("./CSVObject.zig");

const Debugger = JSC.Debugger;

Expand Down
122 changes: 122 additions & 0 deletions src/bun.js/api/CSVObject.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
pub fn create(globalThis: *JSC.JSGlobalObject) JSC.JSValue {
const object = JSValue.createEmptyObject(globalThis, 1);
object.put(
globalThis,
ZigString.static("parse"),
JSC.createCallback(
globalThis,
ZigString.static("parse"),
2,
parse,
),
);

return object;
}

pub fn parse(
globalThis: *JSC.JSGlobalObject,
callframe: *JSC.CallFrame,
) bun.JSError!JSC.JSValue {
var arena = bun.ArenaAllocator.init(globalThis.allocator());
const allocator = arena.allocator();
defer arena.deinit();
var log = logger.Log.init(default_allocator);
const arguments = callframe.arguments_old(2).slice();
if (arguments.len == 0 or arguments[0].isEmptyOrUndefinedOrNull()) {
return globalThis.throwInvalidArguments("Expected a string to parse", .{});
}

// Default parser options
var parser_options = CSVParser.CSVParserOptions{};

// Process options if provided
if (arguments.len > 1 and !arguments[1].isEmptyOrUndefinedOrNull() and arguments[1].isObject()) {
const options = arguments[1];

// Use 'try' to propagate potential errors from .get()
const header_value = try options.get(globalThis, "header");
// Check if header_value is non-null before accessing methods
if (header_value) |hv| {
if (hv.isBoolean()) {
parser_options.header = hv.asBoolean();
}
}

// Use 'try' to propagate potential errors from .get()
const delimiter_value = try options.get(globalThis, "delimiter");
// Check if delimiter_value is non-null before accessing methods
if (delimiter_value) |dv| {
if (dv.isString()) {
// Use 'try' to handle the error union returned by getZigString
const dv_ = try dv.getZigString(globalThis);
const delimiter_slice = dv_.slice();

if (delimiter_slice.len > 0) {
parser_options.delimiter = delimiter_slice[0];
}
}
}

if (try options.get(globalThis, "comments")) |comments_value| {
if (comments_value.isBoolean()) {
parser_options.comments = comments_value.asBoolean();
}
}

if (try options.get(globalThis, "trim_whitespace")) |trim_value| {
if (trim_value.isBoolean()) {
parser_options.trim_whitespace = trim_value.asBoolean();
}
}

if (try options.get(globalThis, "dynamic_typing")) |typing_value| {
if (typing_value.isBoolean()) {
parser_options.dynamic_typing = typing_value.asBoolean();
}
}
}

var input_slice = try arguments[0].toSlice(globalThis, bun.default_allocator);
defer input_slice.deinit();
var source = logger.Source.initPathString("input.csv", input_slice.slice());

// Parse the CSV data
const parse_result = CSVParser.CSV.parse(&source, &log, allocator, false, parser_options) catch {
return globalThis.throwValue(log.toJS(globalThis, default_allocator, "Failed to parse CSV"));
};

// for now...
const buffer_writer = js_printer.BufferWriter.init(allocator);
var writer = js_printer.BufferPrinter.init(buffer_writer);
_ = js_printer.printJSON(
*js_printer.BufferPrinter,
&writer,
parse_result,
&source,
.{
.mangled_props = null,
},
) catch {
return globalThis.throwValue(log.toJS(globalThis, default_allocator, "Failed to print csv"));
};

const slice = writer.ctx.buffer.slice();
var out = bun.String.fromUTF8(slice);
defer out.deref();

return out.toJSByParseJSON(globalThis);
}

const CSVObject = @This();
const JSC = bun.JSC;
const JSValue = JSC.JSValue;
const JSGlobalObject = JSC.JSGlobalObject;
const JSObject = JSC.JSObject;
const std = @import("std");
const ZigString = JSC.ZigString;
const logger = bun.logger;
const bun = @import("bun");
const js_printer = bun.js_printer;
const default_allocator = bun.default_allocator;
const CSVParser = @import("../../csv/csv_parser.zig");
1 change: 1 addition & 0 deletions src/bun.js/bindings/BunObject+exports.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
macro(SHA512) \
macro(SHA512_256) \
macro(TOML) \
macro(CSV) \
macro(Transpiler) \
macro(ValkeyClient) \
macro(argv) \
Expand Down
1 change: 1 addition & 0 deletions src/bun.js/bindings/BunObject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,7 @@ JSC_DEFINE_HOST_FUNCTION(functionFileURLToPath, (JSC::JSGlobalObject * globalObj
SHA512 BunObject_getter_wrap_SHA512 DontDelete|PropertyCallback
SHA512_256 BunObject_getter_wrap_SHA512_256 DontDelete|PropertyCallback
TOML BunObject_getter_wrap_TOML DontDelete|PropertyCallback
CSV BunObject_getter_wrap_CSV DontDelete|PropertyCallback
Transpiler BunObject_getter_wrap_Transpiler DontDelete|PropertyCallback
embeddedFiles BunObject_getter_wrap_embeddedFiles DontDelete|PropertyCallback
S3Client BunObject_getter_wrap_S3Client DontDelete|PropertyCallback
Expand Down
Loading