Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf, memory: Improve performance and memory use for large datasets #5927

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
217 changes: 134 additions & 83 deletions packages/table-core/src/core/row.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,134 @@ export interface CoreRow<TData extends RowData> {
subRows: Row<TData>[]
}

const rowProtosByTable = new WeakMap<Table<any>, any>()

/**
* Creates a table-specific row prototype object to hold shared row methods, including from all the
* features that have been registered on the table.
*/
export function getRowProto<TData extends RowData>(table: Table<TData>) {
let rowProto = rowProtosByTable.get(table)

if (!rowProto) {
const obj: CoreRow<TData> = {
// props are here only for typing; they are set on the instance at runtime
id: 'unused',
depth: 0,
index: -1,
original: undefined as TData,
subRows: [],
_valuesCache: {},
_uniqueValuesCache: {},

getValue(columnId: string) {
if (this._valuesCache.hasOwnProperty(columnId)) {
return this._valuesCache[columnId]
}

const column = table.getColumn(columnId)

if (!column?.accessorFn) {
return undefined
}

this._valuesCache[columnId] = column.accessorFn(
this.original as TData,
this.index
)

return this._valuesCache[columnId] as any
},

getUniqueValues(columnId: string) {
if (!this.hasOwnProperty('_uniqueValuesCache')) {
// lazy-init cache on the instance
this._uniqueValuesCache = {}
}

if (this._uniqueValuesCache.hasOwnProperty(columnId)) {
return this._uniqueValuesCache[columnId]
}

const column = table.getColumn(columnId)

if (!column?.accessorFn) {
return undefined
}

if (!column.columnDef.getUniqueValues) {
this._uniqueValuesCache[columnId] = [this.getValue(columnId)]
return this._uniqueValuesCache[columnId]
}

this._uniqueValuesCache[columnId] = column.columnDef.getUniqueValues(
this.original as TData,
this.index
)

return this._uniqueValuesCache[columnId] as any
},

renderValue(columnId: string) {
return this.getValue(columnId) ?? table.options.renderFallbackValue
},

getLeafRows() {
return flattenBy(this.subRows, d => d.subRows)
},

getParentRow() {
return this.parentId ? table.getRow(this.parentId, true) : undefined
},

getParentRows() {
let parentRows: Row<TData>[] = []
let currentRow = this
while (true) {
const parentRow = currentRow.getParentRow()
if (!parentRow) break
parentRows.push(parentRow)
currentRow = parentRow
}
return parentRows.reverse()
},

getAllCells: memo(
function (this: Row<TData>) {
return [this, table.getAllLeafColumns()]
},
(row, leafColumns) => {
return leafColumns.map(column => {
return createCell(table, row, column, column.id)
})
},
getMemoOptions(table.options, 'debugRows', 'getAllCells')
),

_getAllCellsByColumnId: memo(
function (this: Row<TData>) {
return [this.getAllCells()]
},
allCells => {
return allCells.reduce(
(acc, cell) => {
acc[cell.column.id] = cell
return acc
},
{} as Record<string, Cell<TData, unknown>>
)
},
getMemoOptions(table.options, 'debugRows', 'getAllCellsByColumnId')
),
}

rowProtosByTable.set(table, obj)
rowProto = obj
}

return rowProto as CoreRow<TData>
}

export const createRow = <TData extends RowData>(
table: Table<TData>,
id: string,
Expand All @@ -101,95 +229,18 @@ export const createRow = <TData extends RowData>(
subRows?: Row<TData>[],
parentId?: string
): Row<TData> => {
let row: CoreRow<TData> = {
const row: CoreRow<TData> = Object.create(getRowProto(table))
Object.assign(row, {
id,
index: rowIndex,
original,
depth,
parentId,
_valuesCache: {},
_uniqueValuesCache: {},
getValue: columnId => {
if (row._valuesCache.hasOwnProperty(columnId)) {
return row._valuesCache[columnId]
}

const column = table.getColumn(columnId)

if (!column?.accessorFn) {
return undefined
}

row._valuesCache[columnId] = column.accessorFn(
row.original as TData,
rowIndex
)

return row._valuesCache[columnId] as any
},
getUniqueValues: columnId => {
if (row._uniqueValuesCache.hasOwnProperty(columnId)) {
return row._uniqueValuesCache[columnId]
}

const column = table.getColumn(columnId)

if (!column?.accessorFn) {
return undefined
}

if (!column.columnDef.getUniqueValues) {
row._uniqueValuesCache[columnId] = [row.getValue(columnId)]
return row._uniqueValuesCache[columnId]
}

row._uniqueValuesCache[columnId] = column.columnDef.getUniqueValues(
row.original as TData,
rowIndex
)

return row._uniqueValuesCache[columnId] as any
},
renderValue: columnId =>
row.getValue(columnId) ?? table.options.renderFallbackValue,
subRows: subRows ?? [],
getLeafRows: () => flattenBy(row.subRows, d => d.subRows),
getParentRow: () =>
row.parentId ? table.getRow(row.parentId, true) : undefined,
getParentRows: () => {
let parentRows: Row<TData>[] = []
let currentRow = row
while (true) {
const parentRow = currentRow.getParentRow()
if (!parentRow) break
parentRows.push(parentRow)
currentRow = parentRow
}
return parentRows.reverse()
},
getAllCells: memo(
() => [table.getAllLeafColumns()],
leafColumns => {
return leafColumns.map(column => {
return createCell(table, row as Row<TData>, column, column.id)
})
},
getMemoOptions(table.options, 'debugRows', 'getAllCells')
),

_getAllCellsByColumnId: memo(
() => [row.getAllCells()],
allCells => {
return allCells.reduce(
(acc, cell) => {
acc[cell.column.id] = cell
return acc
},
{} as Record<string, Cell<TData, unknown>>
)
},
getMemoOptions(table.options, 'debugRows', 'getAllCellsByColumnId')
),
})

if (subRows) {
row.subRows = subRows
}

for (let i = 0; i < table._features.length; i++) {
Expand Down
27 changes: 18 additions & 9 deletions packages/table-core/src/features/ColumnFiltering.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { RowModel } from '..'
import { getRowProto, RowModel } from '..'
import { BuiltInFilterFn, filterFns } from '../filterFns'
import {
Column,
Expand Down Expand Up @@ -362,14 +362,6 @@ export const ColumnFiltering: TableFeature = {
}
},

createRow: <TData extends RowData>(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the core createRow function, we still call these feature.createRow functions if they exist, passing them the row and table instance. That should prevent breaking changes for existing custom features, but we may want to recommend custom features to take the same approach (i.e. extend the prototype). @KevinVandy what do you think about this?

I haven't thought all the details through but something like retaining a createRow function in each feature, and in the core createRow function both calling the feature.createRow function with the row and table instances (to prevent breaking changes for existing custom features), and also merging its prototype onto the core createRow prototype.

That way we could also retain the createRow functions in the core features, (just move the methods onto the prototype), and wouldn't need the getRowProto and Object.assign() approach I think.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 to generally recommending people use the same approach for implementing custom features. I considered making things more explicit by adding methods like initRowProto() to TableFeature interface, but decided against it for simplicity's sake, plus this is more of an internal implementation detail than a public API.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This kind of pattern will be useful to think about in the alpha branch though

row: Row<TData>,
_table: Table<TData>
): void => {
row.columnFilters = {}
row.columnFiltersMeta = {}
},

createTable: <TData extends RowData>(table: Table<TData>): void => {
table.setColumnFilters = (updater: Updater<ColumnFiltersState>) => {
const leafColumns = table.getAllLeafColumns()
Expand Down Expand Up @@ -411,6 +403,23 @@ export const ColumnFiltering: TableFeature = {

return table._getFilteredRowModel()
}

Object.assign(getRowProto(table), {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At one point, we had removed most usages of Object.assign in favor of direct assignment as a performance improvement at scale. Wonder if that's still applicable to consider here.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It wouldn't be an issue here since it's only called once per table anyway. Your question would apply more to createRow() in row.ts since we call it once per row there, but AFAIK, there are no known performance issues around Object.assign(). There have been some many years ago when it was just introduced and browser support was fresh (plus there were polyfills), but that hasn't been the case in quite some time.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@KevinVandy beat me to it. I like the idea but am not a big fan of typing the prototype as CoreRow which is not strictly accurate, (and requires us to create these dummy values to keep typescript happy).

@mleibman-db did you try making the createRow function into a constructor function, adding the methods directly to the prototype? I haven't tried it myself but intuitively it feels like it should work. Would need to always call createRow with the new keyword I think.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Typing the row proto as CoreRow is actually very useful since it provides type safety and makes sure the methods only access defined props there. The use of default unused values there doesn't strike me as concerning, but we could try to replace them with some purely TypeScript type annotations, though IMHO that would be more hacky.

  2. I'm not sure I understand what you're proposing. Could you elaborate?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since it provides type safety

It's the wrong type though, isn't it? The prototype shouldn't have the instance properties on it.

Could you elaborate?

I am imagining something approximately like the below. I haven't tried but think it should work, happy to be corrected. The naming would be a bit weird though. createRow should probably become just Row, but that would be a breaking change - not sure what to do about that.

const createRow = <TData>(
  this: CoreRow<TData>,
  table: Table<TData>,
  id: string,
  original: TData,
  rowIndex: number,
  depth: number,
  subRows?: Row<TData>[],
  parentId?: string
) => {
  this.id = id
  this.original = original
  // etc.
}

createRow.prototype.getValue = (columnId: string) => {
      
    // ...

    return this._valuesCache[columnId] as any
}

elsewhere:

const row = new createRow(...)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anywhere where we are thinking that an alternative would be cleaner, but it's a breaking change, can be reserved for a v9 pr. So far this PR looks mostly good. We don't have to assign dummy vars to the prototype just to satisfy TypeScript. A cast could be acceptable there.

If the Object.assign only gets called once, that is negligible and something we don't need to worry about. Direct assignment was a performance improvement in this pr that sped up rendering when creating 10k+ rows. This PR is solving the memory side of that same issue. In conclusion, I'm not worried about this after you explained more.

Copy link
Member

@tombuntus tombuntus Feb 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so you can't just create the proto once at the module level

But I think you can merge the feature.createRow prototypes into the prototype of the object returned by the core createRow function at runtime, when new createRow() is called. In the same loop where we currently call feature.createRow in the core createRow() function body. I haven't tested this though. In this case the prototype's methods would be created at module level on each of the features' createRow functions.

vastly preferring classes

Personally I am not opposed to using a class if it makes typing easier.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(and just for anyone reading this ... the code snippet in this comment should be using function createRow() {}, not an arrow function!)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But happy to change if you feel strongly about it

I was actually agreeing with you that since it's not called many times, it wouldn't be likely to cause issues. I was just trying to explain the likely cause of perf issues - not due to Object.assign() itself, but rather the fact that it it often called like this:

Object.assign(
  targetObject, // <-- existing object
  {
    // new source object which will be garbage collected eventually
  },
)

If it's used this way in a loop with many thousands of iterations, you can run into perf issues due to garbage collection.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't have to assign dummy vars to the prototype just to satisfy TypeScript. A cast could be acceptable there.

Done.

get columnFilters() {
// Lazy-init the backing cache on the instance so we don't take up memory for rows that don't need it
return ((
this as { _columnFilters?: ColumnFiltersRow<any>['columnFilters'] }
)._columnFilters ??= {})
},
get columnFiltersMeta() {
// Lazy-init the backing cache on the instance so we don't take up memory for rows that don't need it
return ((
this as {
_columnFiltersMeta?: ColumnFiltersRow<any>['columnFiltersMeta']
}
)._columnFiltersMeta ??= {})
},
} as ColumnFiltersRow<any> & Row<any>)
},
}

Expand Down
49 changes: 28 additions & 21 deletions packages/table-core/src/features/ColumnGrouping.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { RowModel } from '..'
import { getRowProto, RowModel } from '..'
import { BuiltInAggregationFn, aggregationFns } from '../aggregationFns'
import {
AggregationFns,
Expand Down Expand Up @@ -353,31 +353,38 @@ export const ColumnGrouping: TableFeature = {

return table._getGroupedRowModel()
}
},

createRow: <TData extends RowData>(
row: Row<TData>,
table: Table<TData>
): void => {
row.getIsGrouped = () => !!row.groupingColumnId
row.getGroupingValue = columnId => {
if (row._groupingValuesCache.hasOwnProperty(columnId)) {
return row._groupingValuesCache[columnId]
}
Object.assign(getRowProto(table), {
get _groupingValuesCache() {
// Lazy-init the backing cache on the instance so we don't take up memory for rows that don't need it
return ((
this as {
__groupingValuesCache?: GroupingRow['_groupingValuesCache']
}
).__groupingValuesCache ??= {})
},

getIsGrouped() {
return !!this.groupingColumnId
},
getGroupingValue(columnId) {
if (this._groupingValuesCache.hasOwnProperty(columnId)) {
return this._groupingValuesCache[columnId]
}

const column = table.getColumn(columnId)
const column = table.getColumn(columnId)

if (!column?.columnDef.getGroupingValue) {
return row.getValue(columnId)
}
if (!column?.columnDef.getGroupingValue) {
return this.getValue(columnId)
}

row._groupingValuesCache[columnId] = column.columnDef.getGroupingValue(
row.original
)
this._groupingValuesCache[columnId] = column.columnDef.getGroupingValue(
this.original
)

return row._groupingValuesCache[columnId]
}
row._groupingValuesCache = {}
return this._groupingValuesCache[columnId]
},
} as GroupingRow & Row<any>)
},

createCell: <TData extends RowData, TValue>(
Expand Down
Loading