Skip to content

Commit ab85dd2

Browse files
authored
[Data] Remove LazyBlockList (ray-project#46054)
After ray-project#45860, LazyBlockList is dead code. Signed-off-by: Balaji Veeramani <balaji@anyscale.com>
1 parent 367dcb6 commit ab85dd2

File tree

4 files changed

+3
-528
lines changed

4 files changed

+3
-528
lines changed

python/ray/data/_internal/block_list.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@
88
class BlockList:
99
"""A list of blocks that may be computed or pending computation.
1010
11-
In the basic version of BlockList, all blocks are known ahead of time. In
12-
LazyBlockList, blocks are not yet computed, so the number of blocks may
13-
change after execution due to block splitting.
11+
All blocks are known ahead of time
1412
"""
1513

1614
def __init__(
@@ -69,7 +67,6 @@ def get_blocks(self) -> List[ObjectRef[Block]]:
6967
The length of this iterator is not known until execution.
7068
"""
7169
self._check_if_cleared()
72-
# Overriden in LazyBlockList for bulk evaluation.
7370
return list(self._blocks)
7471

7572
def get_blocks_with_metadata(self) -> List[Tuple[ObjectRef[Block], BlockMetadata]]:
@@ -78,7 +75,7 @@ def get_blocks_with_metadata(self) -> List[Tuple[ObjectRef[Block], BlockMetadata
7875
Prefer calling this instead of the iter form for performance if you
7976
don't need lazy evaluation.
8077
"""
81-
self.get_blocks() # Force bulk evaluation in LazyBlockList.
78+
self.get_blocks()
8279
return list(self.iter_blocks_with_metadata())
8380

8481
def iter_blocks_with_metadata(

python/ray/data/_internal/execution/legacy_compat.py

Lines changed: 0 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -11,21 +11,11 @@
1111
PhysicalOperator,
1212
RefBundle,
1313
)
14-
from ray.data._internal.lazy_block_list import LazyBlockList
15-
from ray.data._internal.logical.interfaces.logical_plan import LogicalPlan
16-
from ray.data._internal.logical.operators.read_operator import Read
1714
from ray.data._internal.logical.optimizers import get_execution_plan
18-
from ray.data._internal.logical.rules.set_read_parallelism import (
19-
compute_additional_split_factor,
20-
)
2115
from ray.data._internal.logical.util import record_operators_usage
2216
from ray.data._internal.plan import ExecutionPlan
23-
from ray.data._internal.planner.plan_read_op import (
24-
apply_output_blocks_handling_to_read_task,
25-
)
2617
from ray.data._internal.stats import DatasetStats
2718
from ray.data.block import Block, BlockMetadata, List
28-
from ray.data.context import DataContext
2919
from ray.types import ObjectRef
3020

3121
# Warn about tasks larger than this.
@@ -111,55 +101,6 @@ def execute_to_legacy_block_list(
111101
return block_list
112102

113103

114-
def get_legacy_lazy_block_list_read_only(
115-
plan: ExecutionPlan,
116-
) -> LazyBlockList:
117-
"""For a read-only plan, construct a LazyBlockList with ReadTasks from the
118-
input Datasource or Reader. Note that the plan and the underlying ReadTasks
119-
are not executed, only their known metadata is fetched.
120-
121-
Args:
122-
plan: The legacy plan to execute.
123-
124-
Returns:
125-
The output as a legacy LazyBlockList.
126-
"""
127-
assert plan.is_read_only(), "This function only supports read-only plans."
128-
assert isinstance(plan._logical_plan, LogicalPlan)
129-
read_logical_op = plan._logical_plan.dag
130-
assert isinstance(read_logical_op, Read)
131-
132-
# In the full dataset execution, the logic in ApplyAdditionalSplitToOutputBlocks
133-
# is normally executed as part of the MapOperator created in the
134-
# LogicalPlan -> PhysicalPlan plan translation. In this case, since we
135-
# get the ReadTasks directly from the Datasource or Reader,
136-
# we need to manually apply this logic in order to update the ReadTasks.
137-
ctx = DataContext.get_current()
138-
(parallelism, _, estimated_num_blocks, k,) = compute_additional_split_factor(
139-
read_logical_op._datasource_or_legacy_reader,
140-
read_logical_op._parallelism,
141-
read_logical_op._mem_size,
142-
ctx.target_max_block_size,
143-
cur_additional_split_factor=None,
144-
)
145-
read_tasks = read_logical_op._datasource_or_legacy_reader.get_read_tasks(
146-
parallelism
147-
)
148-
for read_task in read_tasks:
149-
apply_output_blocks_handling_to_read_task(read_task, k)
150-
151-
block_list = LazyBlockList(
152-
read_tasks,
153-
read_logical_op.name,
154-
ray_remote_args=read_logical_op._ray_remote_args,
155-
owned_by_consumer=False,
156-
)
157-
# Update the estimated number of blocks after applying optimizations
158-
# and fetching metadata (e.g. SetReadParallelismRule).
159-
block_list._estimated_num_blocks = estimated_num_blocks
160-
return block_list
161-
162-
163104
def _get_execution_dag(
164105
executor: Executor,
165106
plan: ExecutionPlan,

0 commit comments

Comments
 (0)