Skip to content

Commit 814c859

Browse files
authored
Merge pull request #294 from agrare/map_state_item_batcher
Add Map ItemBatcher/ItemSelector
2 parents dbe4b7d + eb89144 commit 814c859

File tree

5 files changed

+386
-4
lines changed

5 files changed

+386
-4
lines changed

lib/floe.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
require_relative "floe/workflow/choice_rule/and"
2121
require_relative "floe/workflow/choice_rule/data"
2222
require_relative "floe/workflow/context"
23+
require_relative "floe/workflow/item_batcher"
2324
require_relative "floe/workflow/item_processor"
2425
require_relative "floe/workflow/intrinsic_function"
2526
require_relative "floe/workflow/intrinsic_function/parser"

lib/floe/workflow/item_batcher.rb

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# frozen_string_literal: true
2+
3+
module Floe
4+
class Workflow
5+
class ItemBatcher
6+
include ValidationMixin
7+
8+
attr_reader :name, :batch_input, :max_items_per_batch, :max_items_per_batch_path, :max_input_bytes_per_batch, :max_input_bytes_per_batch_path
9+
10+
def initialize(payload, name)
11+
@name = name
12+
13+
@batch_input = PayloadTemplate.new(payload["BatchInput"]) if payload["BatchInput"]
14+
@max_items_per_batch = payload["MaxItemsPerBatch"]
15+
@max_input_bytes_per_batch = payload["MaxInputBytesPerBatch"]
16+
17+
@max_items_per_batch_path = ReferencePath.new(payload["MaxItemsPerBatchPath"]) if payload["MaxItemsPerBatchPath"]
18+
@max_input_bytes_per_batch_path = ReferencePath.new(payload["MaxInputBytesPerBatchPath"]) if payload["MaxInputBytesPerBatchPath"]
19+
20+
validate!
21+
end
22+
23+
def value(context, input, state_input = nil)
24+
state_input ||= input
25+
26+
output = batch_input ? batch_input.value(context, state_input) : {}
27+
28+
input.each_slice(max_items(context, state_input)).map do |batch|
29+
output.merge("Items" => batch)
30+
end
31+
end
32+
33+
private
34+
35+
def max_items(context, state_input)
36+
return max_items_per_batch if max_items_per_batch
37+
return if max_items_per_batch_path.nil?
38+
39+
result = max_items_per_batch_path.value(context, state_input)
40+
raise runtime_field_error!("MaxItemsPerBatchPath", result, "must be a positive integer") if result.nil? || !result.kind_of?(Integer) || result <= 0
41+
42+
result
43+
end
44+
45+
def validate!
46+
if [max_items_per_batch, max_items_per_batch_path].all?(&:nil?)
47+
parser_error!("must have one of \"MaxItemsPerBatch\", \"MaxItemsPerBatchPath\"")
48+
end
49+
50+
parser_error!("must not specify both \"MaxItemsPerBatch\" and \"MaxItemsPerBatchPath\"") if max_items_per_batch && max_items_per_batch_path
51+
parser_error!("must not specify both \"MaxInputBytesPerBatch\" and \"MaxInputBytesPerBatchPath\"") if max_input_bytes_per_batch && max_input_bytes_per_batch_path
52+
53+
if max_items_per_batch && (!max_items_per_batch.kind_of?(Integer) || max_items_per_batch <= 0)
54+
invalid_field_error!("MaxItemsPerBatch", max_items_per_batch, "must be a positive integer")
55+
end
56+
if max_input_bytes_per_batch && (!max_input_bytes_per_batch.kind_of?(Integer) || max_input_bytes_per_batch <= 0)
57+
invalid_field_error!("MaxInputBytesPerBatch", max_input_bytes_per_batch, "must be a positive integer")
58+
end
59+
end
60+
end
61+
end
62+
end

lib/floe/workflow/states/map.rb

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ def initialize(workflow, name, payload)
3131
@item_processor = ItemProcessor.new(payload["ItemProcessor"], name)
3232
@items_path = ReferencePath.new(payload.fetch("ItemsPath", "$"))
3333
@item_reader = payload["ItemReader"]
34-
@item_selector = payload["ItemSelector"]
35-
@item_batcher = payload["ItemBatcher"]
34+
@item_selector = PayloadTemplate.new(payload["ItemSelector"]) if payload["ItemSelector"]
35+
@item_batcher = ItemBatcher.new(payload["ItemBatcher"], name + ["ItemBatcher"]) if payload["ItemBatcher"]
3636
@result_writer = payload["ResultWriter"]
3737
@max_concurrency = payload["MaxConcurrency"]&.to_i
3838
@tolerated_failure_percentage = payload["ToleratedFailurePercentage"]&.to_i
@@ -43,15 +43,30 @@ def initialize(workflow, name, payload)
4343

4444
def process_input(context)
4545
input = super
46-
items_path.value(context, input)
46+
input = items_path.value(context, input)
47+
input = item_batcher.value(context, input, context.state["Input"]) if item_batcher
48+
input
4749
end
4850

4951
def start(context)
5052
super
5153

5254
input = process_input(context)
5355

54-
context.state["ItemProcessorContext"] = input.map { |item| Context.new({"Execution" => {"Id" => context.execution["Id"]}}, :input => item.to_json).to_h }
56+
context.state["ItemProcessorContext"] = input.map.with_index do |item, index|
57+
item_processor_context = {
58+
"Execution" => {
59+
"Id" => context.execution["Id"]
60+
},
61+
"Map" => {
62+
"Item" => {"Index" => index, "Value" => item}
63+
}
64+
}
65+
66+
item_processor_input = item_selector ? item_selector.value(item_processor_context, context.state["Input"]) : item
67+
68+
Context.new(item_processor_context, :input => item_processor_input.to_json).to_h
69+
end
5570
end
5671

5772
def end?

spec/workflow/item_batcher_spec.rb

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
RSpec.describe Floe::Workflow::ItemBatcher do
2+
let(:subject) { described_class.new(payload, ["Map", "ItemBatcher"]) }
3+
4+
describe "#initialize" do
5+
context "with no MaxItems or MaxInputBytes" do
6+
let(:payload) { {} }
7+
8+
it "raises an exception" do
9+
expect { subject }
10+
.to raise_error(
11+
Floe::InvalidWorkflowError,
12+
"Map.ItemBatcher must have one of \"MaxItemsPerBatch\", \"MaxItemsPerBatchPath\""
13+
)
14+
end
15+
end
16+
17+
context "with a BatchInput field" do
18+
let(:payload) { {"BatchInput" => "foo", "MaxItemsPerBatch" => 10} }
19+
20+
it "returns an ItemBatcher" do
21+
expect(subject).to be_kind_of(described_class)
22+
end
23+
24+
it "sets the BatchInput to a PayloadTemplate" do
25+
expect(subject.batch_input).to be_kind_of(Floe::Workflow::PayloadTemplate)
26+
end
27+
end
28+
29+
context "with MaxItemsPerBatch" do
30+
let(:payload) { {"MaxItemsPerBatch" => 10} }
31+
32+
it "returns an ItemBatcher" do
33+
expect(subject).to be_kind_of(described_class)
34+
end
35+
36+
it "sets max_items_per_batch" do
37+
expect(subject.max_items_per_batch).to eq(payload["MaxItemsPerBatch"])
38+
end
39+
40+
context "that is an invalid value" do
41+
it "raises an exception" do
42+
expect { described_class.new({"MaxItemsPerBatch" => 0}, ["Map", "ItemBatcher"]) }
43+
.to raise_error(Floe::InvalidWorkflowError, "Map.ItemBatcher field \"MaxItemsPerBatch\" value \"0\" must be a positive integer")
44+
expect { described_class.new({"MaxItemsPerBatch" => -1}, ["Map", "ItemBatcher"]) }
45+
.to raise_error(Floe::InvalidWorkflowError, "Map.ItemBatcher field \"MaxItemsPerBatch\" value \"-1\" must be a positive integer")
46+
expect { described_class.new({"MaxItemsPerBatch" => 2.5}, ["Map", "ItemBatcher"]) }
47+
.to raise_error(Floe::InvalidWorkflowError, "Map.ItemBatcher field \"MaxItemsPerBatch\" value \"2.5\" must be a positive integer")
48+
expect { described_class.new({"MaxItemsPerBatch" => "1"}, ["Map", "ItemBatcher"]) }
49+
.to raise_error(Floe::InvalidWorkflowError, "Map.ItemBatcher field \"MaxItemsPerBatch\" value \"1\" must be a positive integer")
50+
end
51+
end
52+
end
53+
54+
context "with MaxInputBytesPerBatch" do
55+
let(:payload) { {"MaxInputBytesPerBatch" => 1_024} }
56+
57+
it "returns an ItemBatcher" do
58+
pending "implement MaxInputBytesPerBatch"
59+
expect(subject).to be_kind_of(described_class)
60+
end
61+
62+
it "sets max_input_bytes_per_batch" do
63+
pending "implement MaxInputBytesPerBatch"
64+
65+
expect(subject.max_input_bytes_per_batch).to eq(payload["MaxInputBytesPerBatch"])
66+
end
67+
68+
context "that is an invalid value" do
69+
it "raises an exception" do
70+
pending "implement MaxInputBytesPerBatch"
71+
72+
expect { described_class.new({"MaxInputBytesPerBatch" => 0}, ["Map", "ItemBatcher"]) }
73+
.to raise_error(Floe::InvalidWorkflowError, "Map.ItemBatcher field \"MaxInputBytesPerBatch\" value \"0\" must be a positive integer")
74+
expect { described_class.new({"MaxInputBytesPerBatch" => -1}, ["Map", "ItemBatcher"]) }
75+
.to raise_error(Floe::InvalidWorkflowError, "Map.ItemBatcher field \"MaxInputBytesPerBatch\" value \"-1\" must be a positive integer")
76+
expect { described_class.new({"MaxInputBytesPerBatch" => 2.5}, ["Map", "ItemBatcher"]) }
77+
.to raise_error(Floe::InvalidWorkflowError, "Map.ItemBatcher field \"MaxInputBytesPerBatch\" value \"2.5\" must be a positive integer")
78+
expect { described_class.new({"MaxInputBytesPerBatch" => "1"}, ["Map", "ItemBatcher"]) }
79+
.to raise_error(Floe::InvalidWorkflowError, "Map.ItemBatcher field \"MaxInputBytesPerBatch\" value \"1\" must be a positive integer")
80+
end
81+
end
82+
end
83+
84+
context "with MaxItemsPerBatchPath" do
85+
let(:payload) { {"MaxItemsPerBatchPath" => "$.maxBatchItems"} }
86+
87+
it "returns an ItemBatcher" do
88+
expect(subject).to be_kind_of(described_class)
89+
end
90+
91+
it "sets max_items_per_batch_path" do
92+
expect(subject.max_items_per_batch_path).to be_kind_of(Floe::Workflow::ReferencePath)
93+
expect(subject.max_items_per_batch_path).to have_attributes(:path => ["maxBatchItems"])
94+
end
95+
end
96+
97+
context "with MaxInputBytesPerBatchPath" do
98+
let(:payload) { {"MaxInputBytesPerBatchPath" => "$.batchSize"} }
99+
100+
it "returns an ItemBatcher" do
101+
pending "implement MaxInputBytesPerBatchPath"
102+
103+
expect(subject).to be_kind_of(described_class)
104+
end
105+
106+
it "sets max_input_bytes_per_batch_path" do
107+
pending "implement MaxInputBytesPerBatchPath"
108+
109+
expect(subject.max_input_bytes_per_batch_path).to be_kind_of(Floe::Workflow::ReferencePath)
110+
expect(subject.max_input_bytes_per_batch_path).to have_attributes(:path => ["batchSize"])
111+
end
112+
end
113+
114+
context "with MaxItemsPerBatch and MaxItemsPerBatchPath" do
115+
let(:payload) { {"MaxItemsPerBatch" => 10, "MaxItemsPerBatchPath" => "$.maxBatchItems"} }
116+
117+
it "raises an exception" do
118+
expect { subject }.to raise_error(Floe::InvalidWorkflowError, "Map.ItemBatcher must not specify both \"MaxItemsPerBatch\" and \"MaxItemsPerBatchPath\"")
119+
end
120+
end
121+
122+
context "with MaxInputBytesPerBatch and MaxInputBytesPerBatchPath" do
123+
let(:payload) { {"MaxInputBytesPerBatch" => 1_024, "MaxInputBytesPerBatchPath" => "$.batchSize"} }
124+
125+
it "raises an exception" do
126+
pending "implement MaxInputBytesPerBatchPath"
127+
128+
expect { subject }.to raise_error(Floe::InvalidWorkflowError, "Map.ItemBatcher must not specify both \"MaxInputBytesPerBatch\" and \"MaxInputBytesPerBatchPath\"")
129+
end
130+
end
131+
end
132+
133+
describe "#value" do
134+
let(:context) { {} }
135+
let(:input) { %w[a b c d e] }
136+
137+
context "with MaxItemsPerBatch" do
138+
let(:payload) { {"MaxItemsPerBatch" => 2} }
139+
140+
it "returns in batches of 2" do
141+
expect(subject.value(context, input)).to eq([{"Items" => %w[a b]}, {"Items" => %w[c d]}, {"Items" => %w[e]}])
142+
end
143+
end
144+
145+
context "with MaxItemsPerBatchPath" do
146+
let(:payload) { {"MaxItemsPerBatchPath" => "$.batchSize"} }
147+
let(:state_input) { {"batchSize" => 2, "items" => input} }
148+
149+
it "returns in batches of 2" do
150+
expect(subject.value(context, input, state_input)).to eq([{"Items" => %w[a b]}, {"Items" => %w[c d]}, {"Items" => %w[e]}])
151+
end
152+
153+
context "with an invalid value in input" do
154+
it "raises an exception" do
155+
expect { subject.value(context, input, {"batchSize" => 0, "items" => input}) }
156+
.to raise_error(Floe::ExecutionError, "Map.ItemBatcher field \"MaxItemsPerBatchPath\" value \"0\" must be a positive integer")
157+
expect { subject.value(context, input, {"batchSize" => -1, "items" => input}) }
158+
.to raise_error(Floe::ExecutionError, "Map.ItemBatcher field \"MaxItemsPerBatchPath\" value \"-1\" must be a positive integer")
159+
expect { subject.value(context, input, {"batchSize" => 2.5, "items" => input}) }
160+
.to raise_error(Floe::ExecutionError, "Map.ItemBatcher field \"MaxItemsPerBatchPath\" value \"2.5\" must be a positive integer")
161+
expect { subject.value(context, input, {"batchSize" => "1", "items" => input}) }
162+
.to raise_error(Floe::ExecutionError, "Map.ItemBatcher field \"MaxItemsPerBatchPath\" value \"1\" must be a positive integer")
163+
end
164+
end
165+
end
166+
167+
context "with MaxInputBytesPerBatch" do
168+
let(:payload) { {"MaxInputBytesPerBatch" => 1_024} }
169+
170+
it "returns in batches of 2" do
171+
pending "support max bytes per batch"
172+
173+
expect(subject.value(context, input)).to eq([{"Items" => %w[a b]}, {"Items" => %w[c d]}, {"Items" => %w[e]}])
174+
end
175+
end
176+
177+
context "with MaxInputBytesPerBatchPath" do
178+
let(:payload) { {"MaxInputBytesPerBatchPath" => "$.bytesPerBatch"} }
179+
let(:state_input) { {"bytesPerBatch" => 1_024, "items" => input} }
180+
181+
it "returns in batches of 2" do
182+
pending "support max bytes per batch"
183+
184+
expect(subject.value(context, input, state_input)).to eq([{"Items" => %w[a b]}, {"Items" => %w[c d]}, {"Items" => %w[e]}])
185+
end
186+
187+
context "with an invalid value in input" do
188+
let(:state_input) { {"bytesPerBatch" => 0, "items" => input} }
189+
190+
it "raises an exception" do
191+
pending "support max bytes per batch"
192+
193+
expect { subject.value(context, input, state_input) }
194+
.to raise_error(Floe::ExecutionError, "Map.ItemBatcher field \"MaxInputBytesPerBatchPath\" value \"0\" must be a positive integer")
195+
end
196+
end
197+
end
198+
199+
context "with BatchInput" do
200+
let(:payload) { {"BatchInput" => {"foo.$" => "$.bar"}, "MaxItemsPerBatch" => 2} }
201+
let(:state_input) { {"bar" => "bar", "items" => input} }
202+
203+
it "merges BatchInput with payloads" do
204+
expect(subject.value(context, input, state_input)).to eq([{"foo" => "bar", "Items" => %w[a b]}, {"foo" => "bar", "Items" => %w[c d]}, {"foo" => "bar", "Items" => %w[e]}])
205+
end
206+
end
207+
end
208+
end

0 commit comments

Comments
 (0)