@@ -19,7 +19,6 @@ import (
19
19
20
20
"github.com/rudderlabs/rudder-server/jsonrs"
21
21
"github.com/rudderlabs/rudder-server/utils/timeutil"
22
- "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper"
23
22
"github.com/rudderlabs/rudder-server/warehouse/internal/model"
24
23
"github.com/rudderlabs/rudder-server/warehouse/internal/repo"
25
24
"github.com/rudderlabs/rudder-server/warehouse/logfield"
@@ -78,56 +77,89 @@ type schema struct {
78
77
fetchSchemaRepo fetchSchemaRepo
79
78
now func () time.Time
80
79
cachedSchema model.Schema
81
- cacheExpiry time.Time
82
80
cachedSchemaMu sync.RWMutex
83
81
}
84
82
85
83
func New (
86
- db * sqlquerywrapper. DB ,
84
+ ctx context. Context ,
87
85
warehouse model.Warehouse ,
88
86
conf * config.Config ,
89
- logger logger.Logger ,
87
+ slogger logger.Logger ,
90
88
statsFactory stats.Stats ,
91
89
fetchSchemaRepo fetchSchemaRepo ,
92
- ) Handler {
90
+ schemaRepo schemaRepo ,
91
+ stagingFileRepo stagingFileRepo ,
92
+ ) (Handler , error ) {
93
93
ttlInMinutes := conf .GetDurationVar (720 , time .Minute , "Warehouse.schemaTTLInMinutes" )
94
- schema := & schema {
94
+ sh := & schema {
95
95
warehouse : warehouse ,
96
- log : logger .Child ("schema" ),
96
+ log : slogger .Child ("schema" ),
97
97
ttlInMinutes : ttlInMinutes ,
98
- schemaRepo : repo . NewWHSchemas ( db ) ,
98
+ schemaRepo : schemaRepo ,
99
99
stagingFilesSchemaPaginationSize : conf .GetInt ("Warehouse.stagingFilesSchemaPaginationSize" , 100 ),
100
- stagingFileRepo : repo . NewStagingFiles ( db ) ,
100
+ stagingFileRepo : stagingFileRepo ,
101
101
fetchSchemaRepo : fetchSchemaRepo ,
102
102
enableIDResolution : conf .GetBool ("Warehouse.enableIDResolution" , false ),
103
103
now : timeutil .Now ,
104
104
}
105
- schema .stats .schemaSize = statsFactory .NewTaggedStat ("warehouse_schema_size" , stats .HistogramType , stats.Tags {
105
+ sh .stats .schemaSize = statsFactory .NewTaggedStat ("warehouse_schema_size" , stats .HistogramType , stats.Tags {
106
106
"module" : "warehouse" ,
107
107
"workspaceId" : warehouse .WorkspaceID ,
108
108
"destType" : warehouse .Destination .DestinationDefinition .Name ,
109
109
"sourceId" : warehouse .Source .ID ,
110
110
"destinationId" : warehouse .Destination .ID ,
111
111
})
112
- return schema
112
+ // cachedSchema can be computed in the constructor
113
+ // we need not worry about it getting expired in the middle of the job
114
+ // since we need the schema to be the same for the entireduration of the job
115
+ whSchema , err := sh .schemaRepo .GetForNamespace (
116
+ ctx ,
117
+ sh .warehouse .Source .ID ,
118
+ sh .warehouse .Destination .ID ,
119
+ sh .warehouse .Namespace ,
120
+ )
121
+ if err != nil {
122
+ return nil , fmt .Errorf ("getting schema for namespace: %w" , err )
123
+ }
124
+ if whSchema .Schema == nil {
125
+ sh .cachedSchema = model.Schema {}
126
+ return sh , nil
127
+ }
128
+ if whSchema .ExpiresAt .After (sh .now ()) {
129
+ sh .cachedSchema = whSchema .Schema
130
+ return sh , nil
131
+ }
132
+ sh .log .Infon ("Schema expired" , obskit .DestinationID (sh .warehouse .Destination .ID ), obskit .Namespace (sh .warehouse .Namespace ), logger .NewTimeField ("expiresAt" , whSchema .ExpiresAt ))
133
+ return sh , sh .fetchSchemaFromWarehouse (ctx )
113
134
}
114
135
115
- func (sh * schema ) IsSchemaEmpty (ctx context.Context ) bool {
116
- schema , err := sh .getSchema (ctx )
136
+ func (sh * schema ) fetchSchemaFromWarehouse (ctx context.Context ) error {
137
+ start := sh .now ()
138
+ warehouseSchema , err := sh .fetchSchemaRepo .FetchSchema (ctx )
139
+ if err != nil {
140
+ return fmt .Errorf ("fetching schema: %w" , err )
141
+ }
142
+ duration := math .Round ((sh .now ().Sub (start ).Minutes () * 1000 )) / 1000
143
+ sh .log .Infon ("Fetched schema from warehouse" , obskit .DestinationID (sh .warehouse .Destination .ID ), obskit .Namespace (sh .warehouse .Type ), logger .NewFloatField ("timeTakenInMinutes" , duration ))
144
+ removeDeprecatedColumns (warehouseSchema , sh .warehouse , sh .log )
145
+ err = sh .saveSchema (ctx , warehouseSchema )
117
146
if err != nil {
118
- sh .log .Warnn ("error getting schema" , obskit .Error (err ))
119
- return true
147
+ return fmt .Errorf ("saving schema: %w" , err )
120
148
}
121
- return len (schema ) == 0
149
+ sh .cachedSchema = warehouseSchema
150
+ return nil
151
+ }
152
+
153
+ func (sh * schema ) IsSchemaEmpty (ctx context.Context ) bool {
154
+ sh .cachedSchemaMu .RLock ()
155
+ defer sh .cachedSchemaMu .RUnlock ()
156
+ return len (sh .cachedSchema ) == 0
122
157
}
123
158
124
159
func (sh * schema ) GetTableSchema (ctx context.Context , tableName string ) model.TableSchema {
125
- schema , err := sh .getSchema (ctx )
126
- if err != nil {
127
- sh .log .Warnn ("error getting schema" , obskit .Error (err ))
128
- return model.TableSchema {}
129
- }
130
- return schema [tableName ]
160
+ sh .cachedSchemaMu .RLock ()
161
+ defer sh .cachedSchemaMu .RUnlock ()
162
+ return sh .cachedSchema [tableName ]
131
163
}
132
164
133
165
func (sh * schema ) UpdateSchema (ctx context.Context , updatedSchema model.Schema ) error {
@@ -136,32 +168,31 @@ func (sh *schema) UpdateSchema(ctx context.Context, updatedSchema model.Schema)
136
168
return fmt .Errorf ("marshaling schema: %w" , err )
137
169
}
138
170
sh .stats .schemaSize .Observe (float64 (len (updatedSchemaInBytes )))
139
- return sh .saveSchema (ctx , updatedSchema )
171
+ sh .cachedSchemaMu .Lock ()
172
+ defer sh .cachedSchemaMu .Unlock ()
173
+ err = sh .saveSchema (ctx , updatedSchema )
174
+ if err != nil {
175
+ return fmt .Errorf ("saving schema: %w" , err )
176
+ }
177
+ sh .cachedSchema = updatedSchema
178
+ return nil
140
179
}
141
180
142
181
func (sh * schema ) UpdateTableSchema (ctx context.Context , tableName string , tableSchema model.TableSchema ) error {
143
- schema , err := sh .getSchema (ctx )
144
- if err != nil {
145
- return fmt .Errorf ("getting schema: %w" , err )
146
- }
147
- schemaCopy := make (model.Schema )
148
- for k , v := range schema {
149
- schemaCopy [k ] = v
150
- }
151
- schemaCopy [tableName ] = tableSchema
152
- err = sh .saveSchema (ctx , schemaCopy )
182
+ sh .cachedSchemaMu .Lock ()
183
+ defer sh .cachedSchemaMu .Unlock ()
184
+ sh .cachedSchema [tableName ] = tableSchema
185
+ err := sh .saveSchema (ctx , sh .cachedSchema )
153
186
if err != nil {
154
187
return fmt .Errorf ("saving schema: %w" , err )
155
188
}
156
189
return nil
157
190
}
158
191
159
192
func (sh * schema ) GetColumnsCount (ctx context.Context , tableName string ) (int , error ) {
160
- schema , err := sh .getSchema (ctx )
161
- if err != nil {
162
- return 0 , fmt .Errorf ("getting schema: %w" , err )
163
- }
164
- return len (schema [tableName ]), nil
193
+ sh .cachedSchemaMu .RLock ()
194
+ defer sh .cachedSchemaMu .RUnlock ()
195
+ return len (sh .cachedSchema [tableName ]), nil
165
196
}
166
197
167
198
func (sh * schema ) ConsolidateStagingFilesSchema (ctx context.Context , stagingFiles []* model.StagingFile ) (model.Schema , error ) {
@@ -175,12 +206,10 @@ func (sh *schema) ConsolidateStagingFilesSchema(ctx context.Context, stagingFile
175
206
176
207
consolidatedSchema = consolidateStagingSchemas (consolidatedSchema , schemas )
177
208
}
178
- schema , err := sh .getSchema (ctx )
179
- if err != nil {
180
- return nil , fmt .Errorf ("getting schema: %v" , err )
181
- }
182
- consolidatedSchema = consolidateWarehouseSchema (consolidatedSchema , schema )
183
- consolidatedSchema = overrideUsersWithIdentifiesSchema (consolidatedSchema , sh .warehouse .Type , schema )
209
+ sh .cachedSchemaMu .RLock ()
210
+ defer sh .cachedSchemaMu .RUnlock ()
211
+ consolidatedSchema = consolidateWarehouseSchema (consolidatedSchema , sh .cachedSchema )
212
+ consolidatedSchema = overrideUsersWithIdentifiesSchema (consolidatedSchema , sh .warehouse .Type , sh .cachedSchema )
184
213
consolidatedSchema = enhanceDiscardsSchema (consolidatedSchema , sh .warehouse .Type )
185
214
consolidatedSchema = enhanceSchemaWithIDResolution (consolidatedSchema , sh .isIDResolutionEnabled (), sh .warehouse .Type )
186
215
@@ -192,23 +221,9 @@ func (sh *schema) isIDResolutionEnabled() bool {
192
221
}
193
222
194
223
func (sh * schema ) TableSchemaDiff (ctx context.Context , tableName string , tableSchema model.TableSchema ) (whutils.TableSchemaDiff , error ) {
195
- schema , err := sh .getSchema (ctx )
196
- if err != nil {
197
- return whutils.TableSchemaDiff {}, fmt .Errorf ("getting schema: %w" , err )
198
- }
199
- return tableSchemaDiff (tableName , schema , tableSchema ), nil
200
- }
201
-
202
- func (sh * schema ) fetchSchemaFromWarehouse (ctx context.Context ) (model.Schema , error ) {
203
- start := sh .now ()
204
- warehouseSchema , err := sh .fetchSchemaRepo .FetchSchema (ctx )
205
- if err != nil {
206
- return nil , fmt .Errorf ("fetching schema: %w" , err )
207
- }
208
- duration := math .Round ((sh .now ().Sub (start ).Minutes () * 1000 )) / 1000
209
- sh .log .Infon ("Fetched schema from warehouse" , obskit .DestinationID (sh .warehouse .Destination .ID ), obskit .Namespace (sh .warehouse .Type ), logger .NewFloatField ("timeTakenInMinutes" , duration ))
210
- removeDeprecatedColumns (warehouseSchema , sh .warehouse , sh .log )
211
- return warehouseSchema , sh .saveSchema (ctx , warehouseSchema )
224
+ sh .cachedSchemaMu .RLock ()
225
+ defer sh .cachedSchemaMu .RUnlock ()
226
+ return tableSchemaDiff (tableName , sh .cachedSchema , tableSchema ), nil
212
227
}
213
228
214
229
func (sh * schema ) saveSchema (ctx context.Context , newSchema model.Schema ) error {
@@ -224,48 +239,10 @@ func (sh *schema) saveSchema(ctx context.Context, newSchema model.Schema) error
224
239
if err != nil {
225
240
return fmt .Errorf ("inserting schema: %w" , err )
226
241
}
227
- sh .cachedSchemaMu .Lock ()
228
- sh .cachedSchema = newSchema
229
- sh .cachedSchemaMu .Unlock ()
230
- sh .cacheExpiry = expiresAt
242
+ sh .log .Infon ("Saved schema" , obskit .DestinationID (sh .warehouse .Destination .ID ), obskit .Namespace (sh .warehouse .Namespace ))
231
243
return nil
232
244
}
233
245
234
- func (sh * schema ) getSchema (ctx context.Context ) (model.Schema , error ) {
235
- sh .cachedSchemaMu .RLock ()
236
- if sh .cachedSchema != nil && sh .cacheExpiry .After (sh .now ()) {
237
- defer sh .cachedSchemaMu .RUnlock ()
238
- sh .log .Debugn ("Returning cached schema" , obskit .DestinationID (sh .warehouse .Destination .ID ), obskit .Namespace (sh .warehouse .Type ))
239
- return sh .cachedSchema , nil
240
- }
241
- sh .cachedSchemaMu .RUnlock ()
242
- whSchema , err := sh .schemaRepo .GetForNamespace (
243
- ctx ,
244
- sh .warehouse .Source .ID ,
245
- sh .warehouse .Destination .ID ,
246
- sh .warehouse .Namespace ,
247
- )
248
- if err != nil {
249
- return nil , fmt .Errorf ("getting schema for namespace: %w" , err )
250
- }
251
- if whSchema .Schema == nil {
252
- sh .cachedSchemaMu .Lock ()
253
- defer sh .cachedSchemaMu .Unlock ()
254
- sh .cachedSchema = model.Schema {}
255
- sh .cacheExpiry = sh .now ().Add (sh .ttlInMinutes )
256
- return sh .cachedSchema , nil
257
- }
258
- if whSchema .ExpiresAt .Before (sh .now ()) {
259
- sh .log .Infon ("Schema expired" , obskit .DestinationID (sh .warehouse .Destination .ID ), obskit .Namespace (sh .warehouse .Namespace ), logger .NewTimeField ("expiresAt" , whSchema .ExpiresAt ))
260
- return sh .fetchSchemaFromWarehouse (ctx )
261
- }
262
- sh .cachedSchemaMu .Lock ()
263
- defer sh .cachedSchemaMu .Unlock ()
264
- sh .cachedSchema = whSchema .Schema
265
- sh .cacheExpiry = whSchema .ExpiresAt
266
- return sh .cachedSchema , nil
267
- }
268
-
269
246
// consolidateStagingSchemas merges multiple schemas into one
270
247
// Prefer the type of the first schema, If the type is text, prefer text
271
248
func consolidateStagingSchemas (consolidatedSchema model.Schema , schemas []model.Schema ) model.Schema {
0 commit comments