Skip to content

java.lang.UnsupportedOperationException thrown while working with query that has lot of aggregates, including array and map operations #16158

Open
@anupam-pamboo

Description

@anupam-pamboo

Exception 1:

java.lang.UnsupportedOperationException: com.facebook.presto.spi.block.VariableWidthBlockBuilder
at com.facebook.presto.spi.block.BlockBuilder.appendStructureInternal(BlockBuilder.java:103)
at com.facebook.presto.spi.block.SingleArrayBlockWriter.appendStructureInternal(SingleArrayBlockWriter.java:111)
at com.facebook.presto.spi.block.AbstractArrayBlock.writePositionTo(AbstractArrayBlock.java:161)
at com.facebook.presto.spi.type.ArrayType.appendTo(ArrayType.java:158)
at com.facebook.presto.operator.aggregation.minmaxby.KeyAndBlockPositionValueStateSerializer.serialize(KeyAndBlockPositionValueStateSerializer.java:71)
at com.facebook.presto.operator.aggregation.minmaxby.KeyAndBlockPositionValueStateSerializer.serialize(KeyAndBlockPositionValueStateSerializer.java:30)
at com.facebook.presto.$gen.Array_varchar_Array_varchar_IntegerMaxByGroupedAccumulator_20210514_091030_848.evaluateIntermediate(Unknown Source)
at com.facebook.presto.operator.aggregation.builder.InMemoryHashAggregationBuilder$Aggregator.evaluate(InMemoryHashAggregationBuilder.java:444)
at com.facebook.presto.operator.aggregation.builder.InMemoryHashAggregationBuilder.lambda$buildResult$3(InMemoryHashAggregationBuilder.java:302)
at com.facebook.presto.operator.WorkProcessorUtils$ProcessWorkProcessor.process(WorkProcessorUtils.java:315)
at com.facebook.presto.operator.HashAggregationOperator.getOutput(HashAggregationOperator.java:480)
at com.facebook.presto.operator.Driver.processInternal(Driver.java:379)
at com.facebook.presto.operator.Driver.lambda$processFor$8(Driver.java:283)
at com.facebook.presto.operator.Driver.tryWithLock(Driver.java:675)
at com.facebook.presto.operator.Driver.processFor(Driver.java:276)
at com.facebook.presto.execution.SqlTaskExecution$DriverSplitRunner.processFor(SqlTaskExecution.java:1077)
at com.facebook.presto.execution.executor.PrioritizedSplitRunner.process(PrioritizedSplitRunner.java:162)
at com.facebook.presto.execution.executor.TaskExecutor$TaskRunner.run(TaskExecutor.java:483)
at com.facebook.presto.$gen.Presto_0_227____20210514_085209_1.run(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)

Exception 2:
java.lang.UnsupportedOperationException: com.facebook.presto.spi.block.LongArrayBlockBuilder
at com.facebook.presto.spi.block.BlockBuilder.beginBlockEntry(BlockBuilder.java:67)
at com.facebook.presto.$gen.LongLongStateSerializer_20210514_101458_946.serialize(Unknown Source)
at com.facebook.presto.$gen.BigintBigintIntegerMaxByGroupedAccumulator_20210514_101458_949.evaluateIntermediate(Unknown Source)
at com.facebook.presto.operator.aggregation.builder.InMemoryHashAggregationBuilder$Aggregator.evaluate(InMemoryHashAggregationBuilder.java:444)
at com.facebook.presto.operator.aggregation.builder.InMemoryHashAggregationBuilder.lambda$buildResult$3(InMemoryHashAggregationBuilder.java:302)
at com.facebook.presto.operator.WorkProcessorUtils$ProcessWorkProcessor.process(WorkProcessorUtils.java:315)
at com.facebook.presto.operator.HashAggregationOperator.getOutput(HashAggregationOperator.java:480)
at com.facebook.presto.operator.Driver.processInternal(Driver.java:379)
at com.facebook.presto.operator.Driver.lambda$processFor$8(Driver.java:283)
at com.facebook.presto.operator.Driver.tryWithLock(Driver.java:675)
at com.facebook.presto.operator.Driver.processFor(Driver.java:276)
at com.facebook.presto.execution.SqlTaskExecution$DriverSplitRunner.processFor(SqlTaskExecution.java:1077)
at com.facebook.presto.execution.executor.PrioritizedSplitRunner.process(PrioritizedSplitRunner.java:162)
at com.facebook.presto.execution.executor.TaskExecutor$TaskRunner.run(TaskExecutor.java:483)
at com.facebook.presto.$gen.Presto_0_227____20210514_085211_1.run(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)

Query:

WITH base_data as
(
SELECT player_id
, registration_date
, country_name
, operating_system_original
, current_date

FROM table_a
WHERE pp.registration_date <= <current_date>
)

, interim_data as
(
SELECT
current_date
, pds.player_id

, MAX_BY(a, pds.processdate) as a
, MAX_BY(b, pds.processdate) as b
, MAX_BY(c, pds.processdate) as c
, MAX_BY(d, pds.processdate) as d
, MAX_BY(e, pds.processdate) as e
, MAX_BY(f, pds.processdate) as f
, MAX_BY(g, pds.processdate) as g
, MAX_BY(h, pds.processdate) as h
, MAX_BY(i, pds.processdate) as i

, MAX_BY(clan_id_array, pds.processdate) as clan_id_array
, MAX_BY(clan_role_array, pds.processdate) as clan_role_array

, MAX_BY(j, pds.processdate) as j
, MAX_BY(k, pds.processdate) as k
, MAX_BY(l, pds.processdate) as l
, MAX_BY(m, pds.processdate) as m
, MAX_BY(n, pds.processdate) as n
, MAX_BY(o, pds.processdate) as o
, MAX_BY(p, pds.processdate) as p
, MAX_BY(q, pds.processdate) as q
, MAX_BY(r, pds.processdate) as r
, MAX_BY(s, pds.processdate) as s
, MAX_BY(t, pds.processdate) as t

, SUM(u) as u
, SUM(v) as v
, SUM(w) as w
, SUM(x) as x
, SUM(y) as y
, SUM(z) as z

, SUM(element_at(a1, 'normal')) as normal_a1
, SUM(element_at(a1, 'rainbow')) as rainbow_a1

, SUM(element_at(b1, 'normal')) as normal_b1
, SUM(element_at(b1, 'rainbow')) as rainbow_b1

, SUM(element_at(c1, 'normal')) as normal_c1
, SUM(element_at(c1, 'rainbow')) as rainbow_c1

, SUM(element_at(d1, 'normal')) as normal_d1
, SUM(element_at(d1, 'rainbow')) as rainbow_d1

, SUM(element_at(e1, 'normal')) as normal_e1
, SUM(element_at(e1, 'rainbow')) as rainbow_e1

, SUM(element_at(f1, 'normal')) as normal_f1
, SUM(element_at(f1, 'rainbow')) as rainbow_f1

, SUM(element_at(g1, 'normal')) as normal_g1
, SUM(element_at(g1, 'rainbow')) as rainbow_g1

, SUM(element_at(h1, 'normal')) as normal_h1
, SUM(element_at(h1, 'rainbow')) as rainbow_h1

, SUM(element_at(i1, 'normal')) as normal_i1
, SUM(element_at(i1, 'rainbow')) as rainbow_i1

, SUM(element_at(j1, 'normal')) as normal_j1
, SUM(element_at(j1, 'rainbow')) as rainbow_j1

, SUM(element_at(k1, 'normal')) as normal_k1
, SUM(element_at(k1, 'rainbow')) as rainbow_k1

FROM table_b
WHERE processdate <= <current_date>
GROUP BY 2,1
)

SELECT player_id
, registration_date
, country_name
, operating_system_original
, current_date

, a
, b
, c
, d
, e
, f
, g
, h
, i

, clan_id_array
, clan_role_array

, j
, k
, l
, m
, n
, o
, p
, q
, r
, s
, t

, u
, v
, w
, x
, y
, z

, map_from_entries(ARRAY[
('normal_a1', normal_a1)
, ('rainbow_a1', rainbow_a1)
, ('normal_b1', normal_b1)
, ('normal_b1', normal_b1)
, ('normal_c1', normal_c1)
, ('rainbow_c1', rainbow_c1)
, ('normal_d1', normal_d1)
, ('rainbow_d1', rainbow_d1)
, ('normal_e1', normal_e1)
, ('rainbow_e1', rainbow_e1)
, ('normal_f1', normal_f1)
, ('rainbow_f1', rainbow_f1)
, ('normal_g1', normal_g1)
, ('rainbow_g1', rainbow_g1)
, ('normal_h1', normal_h1)
, ('rainbow_h1', rainbow_h1)
, ('normal_i1', normal_i1)
, ('rainbow_i1', rainbow_i1)
, ('normal_j1', normal_j1)
, ('rainbow_j1', rainbow_j1)
, ('normal_k1', normal_k1)
, ('rainbow_k1', rainbow_k1)
]) as map_var

, bd.processdate

FROM base_data as bd
LEFT JOIN interim_data as id
ON bd.player_id=id.player_id

Query Comments
There are two tables:

  1. Table_a = This contains all the registered players. Player Snapshot table
  2. Table_b = This contains , on daily basis, all the relevant metrics for each player for that particular day. On any given day this table has 1 million rows
  3. The clan_id_array field of table_b contains all the unique clans for which a player was part of in a particular day. Each clan_id is a 36 character long string. Example, "f4becce9-13a7-4b53-8536-8de36885bd9d". A player can be part of multiple clans in any day.

The query above, given a current_date, scan over all the entries in table_b, and finds the most recent value of some metrics and sum of other metrics, until the current date.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions