Skip to content

Commit 47ee364

Browse files
df schema fix
1 parent 23ed006 commit 47ee364

File tree

2 files changed

+148
-8
lines changed

2 files changed

+148
-8
lines changed

__tests__/dataframe.test.ts

+81
Original file line numberDiff line numberDiff line change
@@ -1916,6 +1916,87 @@ describe("create", () => {
19161916

19171917
expect(df.toObject()).toEqual(expected);
19181918
});
1919+
test("with schema", () => {
1920+
const df = pl.DataFrame(
1921+
{
1922+
a: [1, 2, 3],
1923+
b: ["1", "2", "3"],
1924+
},
1925+
{
1926+
schema: {
1927+
x: pl.Int32,
1928+
y: pl.String,
1929+
},
1930+
},
1931+
);
1932+
expect(df.schema).toEqual({ x: pl.Int32, y: pl.String });
1933+
});
1934+
test("with schema", () => {
1935+
const df = pl.DataFrame(
1936+
{
1937+
a: [1, 2, 3],
1938+
b: ["1", "2", "3"],
1939+
},
1940+
{
1941+
schema: {
1942+
x: pl.Int32,
1943+
y: pl.String,
1944+
},
1945+
},
1946+
);
1947+
expect(df.schema).toEqual({ x: pl.Int32, y: pl.String });
1948+
});
1949+
test("with schema overrides", () => {
1950+
const df = pl.DataFrame(
1951+
{
1952+
a: [1, 2, 3],
1953+
b: ["1", "2", "3"],
1954+
},
1955+
{
1956+
schemaOverrides: {
1957+
a: pl.Int32,
1958+
},
1959+
},
1960+
);
1961+
expect(df.schema).toEqual({ a: pl.Int32, b: pl.String });
1962+
});
1963+
test("errors if schemaOverrides and schema are both specified", () => {
1964+
const fn = () =>
1965+
pl.DataFrame(
1966+
{
1967+
a: [1, 2, 3],
1968+
b: ["1", "2", "3"],
1969+
},
1970+
{
1971+
schema: {
1972+
x: pl.Int32,
1973+
y: pl.String,
1974+
},
1975+
schemaOverrides: {
1976+
a: pl.Int32,
1977+
},
1978+
},
1979+
);
1980+
expect(fn).toThrow();
1981+
});
1982+
test("errors if schema mismatch", () => {
1983+
const fn = () => {
1984+
pl.DataFrame(
1985+
{
1986+
a: [1, 2, 3],
1987+
b: ["1", "2", "3"],
1988+
},
1989+
{
1990+
schema: {
1991+
a: pl.Int32,
1992+
b: pl.String,
1993+
c: pl.Int32,
1994+
},
1995+
},
1996+
);
1997+
};
1998+
expect(fn).toThrow();
1999+
});
19192000
});
19202001
describe("arithmetic", () => {
19212002
test("add", () => {

polars/dataframe.ts

+67-8
Original file line numberDiff line numberDiff line change
@@ -2633,6 +2633,28 @@ export interface DataFrameConstructor extends Deserialize<DataFrame> {
26332633
(): DataFrame;
26342634
/**
26352635
* Create a DataFrame from a JavaScript object
2636+
*
2637+
* @param data - object or array of data
2638+
* @param options - options
2639+
* @param options.columns - column names
2640+
* @param options.orient - orientation of the data [row, col]
2641+
* Whether to interpret two-dimensional data as columns or as rows. If None, the orientation is inferred by matching the columns and data dimensions. If this does not yield conclusive results, column orientation is used.
2642+
* @param options.schema - The schema of the resulting DataFrame. The schema may be declared in several ways:
2643+
*
2644+
* - As a dict of {name:type} pairs; if type is None, it will be auto-inferred.
2645+
*
2646+
* - As a list of column names; in this case types are automatically inferred.
2647+
*
2648+
* - As a list of (name,type) pairs; this is equivalent to the dictionary form.
2649+
*
2650+
* If you supply a list of column names that does not match the names in the underlying data, the names given here will overwrite them. The number of names given in the schema should match the underlying data dimensions.
2651+
*
2652+
* If set to null (default), the schema is inferred from the data.
2653+
* @param options.schemaOverrides - Support type specification or override of one or more columns; note that any dtypes inferred from the schema param will be overridden.
2654+
*
2655+
* @param options.inferSchemaLength - The maximum number of rows to scan for schema inference. If set to None, the full data may be scanned (this can be slow). This parameter only applies if the input data is a sequence or generator of rows; other input is read as-is.
2656+
* The number of entries in the schema should match the underlying data dimensions, unless a sequence of dictionaries is being passed, in which case a partial schema can be declared to prevent specific fields from being loaded.
2657+
*
26362658
* @example
26372659
* ```
26382660
* data = {'a': [1n, 2n], 'b': [3, 4]}
@@ -2656,6 +2678,7 @@ export interface DataFrameConstructor extends Deserialize<DataFrame> {
26562678
columns?: any[];
26572679
orient?: "row" | "col";
26582680
schema?: Record<string, string | DataType>;
2681+
schemaOverrides?: Record<string, string | DataType>;
26592682
inferSchemaLength?: number;
26602683
},
26612684
): DataFrame;
@@ -2671,17 +2694,53 @@ function DataFrameConstructor(data?, options?): DataFrame {
26712694
return _DataFrame(arrayToJsDataFrame(data, options));
26722695
}
26732696

2674-
return _DataFrame(objToDF(data as any));
2697+
return _DataFrame(objToDF(data as any, options));
26752698
}
26762699

2677-
function objToDF(obj: Record<string, Array<any>>): any {
2678-
const columns = Object.entries(obj).map(([name, values]) => {
2679-
if (Series.isSeries(values)) {
2680-
return values.rename(name).inner();
2700+
function objToDF(
2701+
obj: Record<string, Array<any>>,
2702+
options?: {
2703+
columns?: any[];
2704+
orient?: "row" | "col";
2705+
schema?: Record<string, string | DataType>;
2706+
schemaOverrides?: Record<string, string | DataType>;
2707+
inferSchemaLength?: number;
2708+
},
2709+
): any {
2710+
let columns;
2711+
if (options?.schema && options?.schemaOverrides) {
2712+
throw new Error("Cannot use both 'schema' and 'schemaOverrides'");
2713+
}
2714+
// explicit schema
2715+
if (options?.schema) {
2716+
const schema = options.schema;
2717+
const schemaKeys = Object.keys(options.schema);
2718+
const values = Object.values(obj);
2719+
if (schemaKeys.length !== values.length) {
2720+
throw new Error(
2721+
"The number of columns in the schema does not match the number of columns in the data",
2722+
);
26812723
}
2682-
2683-
return Series(name, values).inner();
2684-
});
2724+
columns = values.map((values, idx) => {
2725+
const name = schemaKeys[idx];
2726+
const dtype = schema[name];
2727+
return Series(name, values, dtype).inner();
2728+
});
2729+
} else {
2730+
columns = Object.entries(obj).map(([name, values]) => {
2731+
if (Series.isSeries(values)) {
2732+
return values.rename(name).inner();
2733+
}
2734+
// schema overrides
2735+
if (options?.schemaOverrides) {
2736+
const dtype = options.schemaOverrides[name];
2737+
if (dtype) {
2738+
return Series(name, values, dtype).inner();
2739+
}
2740+
}
2741+
return Series(name, values).inner();
2742+
});
2743+
}
26852744

26862745
return new pli.JsDataFrame(columns);
26872746
}

0 commit comments

Comments
 (0)