@@ -2,6 +2,7 @@ import { type IGlobalVariable } from '@plumber/types'
22
33import z from 'zod'
44
5+ import HttpError from '@/errors/http'
56import StepError from '@/errors/step'
67
78import WorkbookSession from '../../../common/workbook-session'
@@ -16,13 +17,116 @@ const msGraphResponseSchema = z
1617 headerSheetRowIndex : response . rowIndex ,
1718 } ) )
1819
20+ // Can get more info e.g. formulas, numberFormat, text for possible date manipulation but too complex for now
21+ const msGraphRangeResponseSchema = z
22+ . object ( {
23+ rowCount : z . number ( ) ,
24+ rowIndex : z . number ( ) ,
25+ values : z . array ( z . array ( z . coerce . string ( ) ) ) , // first row is the header
26+ } )
27+ . transform ( ( response ) => {
28+ if ( response . values . length === 0 ) {
29+ throw new Error ( 'Excel table is missing the header row' )
30+ }
31+ return {
32+ rowCount : response . rowCount ,
33+ headerSheetRowIndex : response . rowIndex ,
34+ headerRowValues : response . values [ 0 ] ,
35+ rowValues : response . values . slice ( 1 ) ,
36+ }
37+ } )
38+
1939interface GetTopNTableRowsResult {
2040 columns : string [ ]
2141 rows : string [ ] [ ]
2242 headerSheetRowIndex : number
2343}
2444
25- export default async function getTopNTableRows (
45+ /**
46+ * Using range endpoint to get both the header row and data rows
47+ * instead of headerRowRange and rows to avoid extra API call
48+ * Range endpoint API time taken (similar to the previous resized range method)
49+ * - Around 0.5s for 50k rows with 5 columns of data
50+ * - Around 0.25s for 25k rows with 3 columns of empty data
51+ * - Around 0.15s for 10k rows with 3 columns of empty data
52+ *
53+ * Also, rows endpoint uses pagination so the query takes longer for large tables
54+ * Rows endpoint API time taken:
55+ * - Around 5s for 50k rows with 5 columns of data
56+ * - Around 2s for 25k rows with 3 columns of empty data
57+ * - Around 1s for 10k rows with 3 columns of empty data
58+ *
59+ * Considered using columns API but there is no headerSheetRowIndex returned so
60+ * an extra API call has to be made to retrieve it
61+ * Reference: https://learn.microsoft.com/en-us/graph/api/table-range?view=graph-rest-1.0&tabs=http
62+ */
63+ export async function getTopNTableRows (
64+ $ : IGlobalVariable ,
65+ session : WorkbookSession ,
66+ tableId : string ,
67+ n : number ,
68+ ) : Promise < GetTopNTableRowsResult > {
69+ try {
70+ const rangeParseResult = msGraphRangeResponseSchema . safeParse (
71+ (
72+ await session . request (
73+ `/tables/${ tableId } /range?$select=values,rowCount,rowIndex` ,
74+ 'get' ,
75+ )
76+ ) . data ,
77+ )
78+
79+ if ( rangeParseResult . success === false ) {
80+ throw new StepError (
81+ 'Invalid table range' ,
82+ 'Check your Excel file and try again' ,
83+ $ . step . position ,
84+ $ . app . name ,
85+ )
86+ }
87+
88+ const { rowCount, headerSheetRowIndex, headerRowValues, rowValues } =
89+ rangeParseResult . data
90+
91+ // rowCount includes the header row
92+ if ( rowCount > n + 1 ) {
93+ throw new StepError (
94+ `Your Excel table has more than ${ n . toLocaleString ( ) } rows.` ,
95+ `Reduce the number of rows and try again.` ,
96+ $ . step . position ,
97+ $ . app . name ,
98+ )
99+ }
100+
101+ return {
102+ columns : headerRowValues ,
103+ rows : rowValues ,
104+ headerSheetRowIndex,
105+ }
106+ } catch ( error ) {
107+ if ( error instanceof HttpError ) {
108+ if (
109+ error . response . status === 400 &&
110+ error . response . data . error . code === 'RangeExceedsLimit'
111+ ) {
112+ throw new StepError (
113+ 'Your Excel table is too large' ,
114+ 'Reduce the number of rows or columns and try again.' ,
115+ $ . step . position ,
116+ $ . app . name ,
117+ error ,
118+ )
119+ }
120+ }
121+ throw error
122+ }
123+ }
124+
125+ /**
126+ * @deprecated Use getTopNTableRows instead
127+ */
128+ // Old method in case we need to rollback fast
129+ export async function getTopNTableRowsOld (
26130 // Typically, we should avoid making $ viral though the codebase, but this is
27131 // an exception because getTopNTableRows is not a common helper function.
28132 $ : IGlobalVariable ,
@@ -86,3 +190,5 @@ export default async function getTopNTableRows(
86190 headerSheetRowIndex : tableRows . headerSheetRowIndex ,
87191 }
88192}
193+
194+ export default getTopNTableRows
0 commit comments