|
| 1 | +public struct IndexParallelTuningConfig: Codable, Hashable, Equatable { |
| 2 | + public init( |
| 3 | + maxRowsInMemory: Int? = nil, |
| 4 | + maxBytesInMemory: Int? = nil, |
| 5 | + maxColumnsToMerge: Int? = nil, |
| 6 | + splitHintSpec: SplitHintSpec? = nil, |
| 7 | + partitionsSpec: PartitionsSpec? = nil, |
| 8 | + indexSpec: IndexSpec? = nil, |
| 9 | + indexSpecForIntermediatePersists: IndexSpec? = nil, |
| 10 | + maxPendingPersists: Int? = nil, |
| 11 | + forceGuaranteedRollup: Bool? = nil, |
| 12 | + reportParseExceptions: Bool? = nil, |
| 13 | + pushTimeout: Int? = nil, |
| 14 | + maxNumConcurrentSubTasks: Int? = nil, |
| 15 | + maxRetry: Int? = nil, |
| 16 | + maxNumSegmentsToMerge: Int? = nil, |
| 17 | + totalNumMergeTasks: Int? = nil, |
| 18 | + taskStatusCheckPeriodMs: Int? = nil, |
| 19 | + chatHandlerTimeout: String? = nil, |
| 20 | + chatHandlerNumRetries: Int? = nil, |
| 21 | + awaitSegmentAvailabilityTimeoutMillis: Int? = nil |
| 22 | + ) { |
| 23 | + self.maxRowsInMemory = maxRowsInMemory |
| 24 | + self.maxBytesInMemory = maxBytesInMemory |
| 25 | + self.maxColumnsToMerge = maxColumnsToMerge |
| 26 | + self.splitHintSpec = splitHintSpec |
| 27 | + self.partitionsSpec = partitionsSpec |
| 28 | + self.indexSpec = indexSpec |
| 29 | + self.indexSpecForIntermediatePersists = indexSpecForIntermediatePersists |
| 30 | + self.maxPendingPersists = maxPendingPersists |
| 31 | + self.forceGuaranteedRollup = forceGuaranteedRollup |
| 32 | + self.reportParseExceptions = reportParseExceptions |
| 33 | + self.pushTimeout = pushTimeout |
| 34 | + self.maxNumConcurrentSubTasks = maxNumConcurrentSubTasks |
| 35 | + self.maxRetry = maxRetry |
| 36 | + self.maxNumSegmentsToMerge = maxNumSegmentsToMerge |
| 37 | + self.totalNumMergeTasks = totalNumMergeTasks |
| 38 | + self.taskStatusCheckPeriodMs = taskStatusCheckPeriodMs |
| 39 | + self.chatHandlerTimeout = chatHandlerTimeout |
| 40 | + self.chatHandlerNumRetries = chatHandlerNumRetries |
| 41 | + self.awaitSegmentAvailabilityTimeoutMillis = awaitSegmentAvailabilityTimeoutMillis |
| 42 | + } |
| 43 | + |
| 44 | + /// Used in determining when intermediate persists to disk should occur. Normally user does not need to set this, but depending on the nature of data, if rows are short in terms of bytes, user may not want to store a million rows in memory and this value should be set. (default = 1000000) |
| 45 | + public let maxRowsInMemory: Int? |
| 46 | + |
| 47 | + /// Used in determining when intermediate persists to disk should occur. Normally this is computed internally and user does not need to set it. This value represents number of bytes to aggregate in heap memory before persisting. This is based on a rough estimate of memory usage and not actual usage. The maximum heap memory usage for indexing is maxBytesInMemory * (2 + maxPendingPersists) (default = 1/6 of max JVM memory) |
| 48 | + public let maxBytesInMemory: Int? |
| 49 | + |
| 50 | + /// Limit of the number of segments to merge in a single phase when merging segments for publishing. This limit affects the total number of columns present in a set of segments to merge. If the limit is exceeded, segment merging occurs in multiple phases. Druid merges at least 2 segments per phase, regardless of this setting. (Default = -1 i.e. no limit) |
| 51 | + public let maxColumnsToMerge: Int? |
| 52 | + |
| 53 | + /// Hint to control the amount of data that each first phase task reads. Druid may ignore the hint depending on the implementation of the input source. (default: size-based split hint spec) |
| 54 | + public let splitHintSpec: SplitHintSpec? |
| 55 | + |
| 56 | + /// Defines how to partition data in each timeChunk |
| 57 | + public let partitionsSpec: PartitionsSpec? |
| 58 | + |
| 59 | + /// Defines segment storage format options to use at indexing time |
| 60 | + public let indexSpec: IndexSpec? |
| 61 | + |
| 62 | + /// Defines segment storage format options to use at indexing time for intermediate persisted temporary segments. You can use indexSpecForIntermediatePersists to disable dimension/metric compression on intermediate segments to reduce memory required for final merging. However, disabling compression on intermediate segments might increase page cache use while they are used before getting merged into final segment published. |
| 63 | + public let indexSpecForIntermediatePersists: IndexSpec? |
| 64 | + |
| 65 | + /// Maximum number of persists that can be pending but not started. If a new intermediate persist exceeds this limit, Druid blocks ingestion until the currently running persist finishes. One persist can be running concurrently with ingestion, and none can be queued up. The maximum heap memory usage for indexing scales is maxRowsInMemory * (2 + maxPendingPersists). |
| 66 | + public let maxPendingPersists: Int? |
| 67 | + |
| 68 | + /// Forces perfect rollup. The perfect rollup optimizes the total size of generated segments and querying time but increases indexing time. If true, specify intervals in the granularitySpec and use either hashed or single_dim for the partitionsSpec. You cannot use this flag in conjunction with appendToExisting of IOConfig. (default = false) |
| 69 | + public let forceGuaranteedRollup: Bool? |
| 70 | + |
| 71 | + /// If true, Druid throws exceptions encountered during parsing and halts ingestion. If false, Druid skips unparseable rows and fields. (default = false) |
| 72 | + public let reportParseExceptions: Bool? |
| 73 | + |
| 74 | + /// Milliseconds to wait to push segments. Must be >= 0, where 0 means to wait forever. (default = 0) |
| 75 | + public let pushTimeout: Int? |
| 76 | + |
| 77 | + // not implemented: |
| 78 | + // public let segmentWriteOutMediumFactory |
| 79 | + |
| 80 | + /// Maximum number of worker tasks which can be run in parallel at the same time. The supervisor task would spawn worker tasks up to maxNumConcurrentSubTasks regardless of the current available task slots. If this value is set to 1, the Supervisor task processes data ingestion on its own instead of spawning worker tasks. If this value is set to too large, too many worker tasks can be created which might block other ingestion (default = 1) |
| 81 | + public let maxNumConcurrentSubTasks: Int? |
| 82 | + |
| 83 | + /// Maximum number of retries on task failures (default = 3) |
| 84 | + public let maxRetry: Int? |
| 85 | + |
| 86 | + /// Max limit for the number of segments that a single task can merge at the same time in the second phase. Used only with hashed or single_dim partitionsSpec. (default = 100) |
| 87 | + public let maxNumSegmentsToMerge: Int? |
| 88 | + |
| 89 | + /// Total number of tasks to merge segments in the merge phase when partitionsSpec is set to hashed or single_dim. (default = 10) |
| 90 | + public let totalNumMergeTasks: Int? |
| 91 | + |
| 92 | + /// Polling period in milliseconds to check running task statuses. (default=1000) |
| 93 | + public let taskStatusCheckPeriodMs: Int? |
| 94 | + |
| 95 | + /// Timeout for reporting the pushed segments in worker tasks. (default = PT10S) |
| 96 | + public let chatHandlerTimeout: String? |
| 97 | + |
| 98 | + /// Retries for reporting the pushed segments in worker tasks. (default = 5) |
| 99 | + public let chatHandlerNumRetries: Int? |
| 100 | + |
| 101 | + /// Milliseconds to wait for the newly indexed segments to become available for query after ingestion completes. If <= 0, no wait occurs. If > 0, the task waits for the Coordinator to indicate that the new segments are available for querying. If the timeout expires, the task exits as successful, but the segments are not confirmed as available for query. (default = 0) |
| 102 | + public let awaitSegmentAvailabilityTimeoutMillis: Int? |
| 103 | +} |
0 commit comments