Skip to content

Commit 2ec34d7

Browse files
authored
Allow csv_collection to pass options to CSV parser (#1001)
1 parent 705b8b2 commit 2ec34d7

File tree

10 files changed

+89
-13
lines changed

10 files changed

+89
-13
lines changed

README.md

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,8 @@ module Maintenance
191191
end
192192
```
193193

194+
`posts.csv`:
194195
```csv
195-
# posts.csv
196196
title,content
197197
My Title,Hello World!
198198
```
@@ -211,6 +211,38 @@ def count(task)
211211
end
212212
```
213213

214+
#### CSV options
215+
216+
Tasks can pass [options for Ruby's CSV parser][csv-parse-options] by adding
217+
keyword arguments to `csv_collection`:
218+
219+
[csv-parse-options]: https://ruby-doc.org/3.3.0/stdlibs/csv/CSV.html#class-CSV-label-Options+for+Parsing
220+
221+
```ruby
222+
# app/tasks/maintenance/import_posts_task.rb
223+
224+
module Maintenance
225+
class ImportPosts
226+
csv_collection(skip_lines: /^#/, converters: ->(field) { field.strip })
227+
228+
def process(row)
229+
Post.create!(title: row["title"], content: row["content"])
230+
end
231+
end
232+
end
233+
```
234+
235+
These options instruct Ruby's CSV parser to skip lines that start with a `#`,
236+
and removes the leading and trailing spaces from any field, so that the
237+
following file will be processed identically as the previous example:
238+
239+
`posts.csv`:
240+
```csv
241+
# A comment
242+
title,content
243+
My Title ,Hello World!
244+
```
245+
214246
#### Batch CSV Tasks
215247

216248
Tasks can process CSVs in batches. Add the `in_batches` option to your task’s

app/models/maintenance_tasks/batch_csv_collection_builder.rb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,17 @@ class BatchCsvCollectionBuilder < CsvCollectionBuilder
1212
# Initialize a BatchCsvCollectionBuilder with a batch size.
1313
#
1414
# @param batch_size [Integer] the number of CSV rows in a batch.
15-
def initialize(batch_size)
15+
# @param csv_options [Hash] options to pass to the CSV parser.
16+
def initialize(batch_size, **csv_options)
1617
@batch_size = batch_size
17-
super()
18+
super(**csv_options)
1819
end
1920

2021
# Defines the collection to be iterated over, based on the provided CSV.
2122
# Includes the CSV and the batch size.
2223
def collection(task)
2324
BatchCsv.new(
24-
csv: CSV.new(task.csv_content, headers: true),
25+
csv: CSV.new(task.csv_content, **@csv_options),
2526
batch_size: @batch_size,
2627
)
2728
end

app/models/maintenance_tasks/csv_collection_builder.rb

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,18 @@
55
module MaintenanceTasks
66
# Strategy for building a Task that processes CSV files.
77
#
8+
# @param csv_options [Hash] options to pass to the CSV parser.
89
# @api private
910
class CsvCollectionBuilder
11+
def initialize(**csv_options)
12+
@csv_options = csv_options
13+
end
14+
1015
# Defines the collection to be iterated over, based on the provided CSV.
1116
#
12-
# @return [CSV] the CSV object constructed from the specified CSV content,
13-
# with headers.
17+
# @return [CSV] the CSV object constructed from the specified CSV content.
1418
def collection(task)
15-
CSV.new(task.csv_content, headers: true)
19+
CSV.new(task.csv_content, **@csv_options)
1620
end
1721

1822
# The number of rows to be processed.
@@ -21,7 +25,7 @@ def collection(task)
2125
#
2226
# @return [Integer] the approximate number of rows to process.
2327
def count(task)
24-
CSV.new(task.csv_content, headers: true).count
28+
CSV.new(task.csv_content, **@csv_options).count
2529
end
2630

2731
# Return that the Task processes CSV content.

app/models/maintenance_tasks/task.rb

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,20 +65,24 @@ def available_tasks
6565
# Make this Task a task that handles CSV.
6666
#
6767
# @param in_batches [Integer] optionally, supply a batch size if the CSV
68-
# should be processed in batches.
68+
# should be processed in batches.
69+
# @param csv_options [Hash] optionally, supply options for the CSV parser.
70+
# If not given, defaults to: <code>{ headers: true }</code>
71+
# @see https://ruby-doc.org/3.3.0/stdlibs/csv/CSV.html#class-CSV-label-Options+for+Parsing
6972
#
7073
# An input to upload a CSV will be added in the form to start a Run. The
7174
# collection and count method are implemented.
72-
def csv_collection(in_batches: nil)
75+
def csv_collection(in_batches: nil, **csv_options)
7376
unless defined?(ActiveStorage)
7477
raise NotImplementedError, "Active Storage needs to be installed\n" \
7578
"To resolve this issue run: bin/rails active_storage:install"
7679
end
7780

81+
csv_options[:headers] = true unless csv_options.key?(:headers)
7882
self.collection_builder_strategy = if in_batches
79-
BatchCsvCollectionBuilder.new(in_batches)
83+
BatchCsvCollectionBuilder.new(in_batches, **csv_options)
8084
else
81-
CsvCollectionBuilder.new
85+
CsvCollectionBuilder.new(**csv_options)
8286
end
8387
end
8488

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# frozen_string_literal: true
2+
3+
module Maintenance
4+
class ImportPostsWithOptionsTask < MaintenanceTasks::Task
5+
csv_collection(skip_lines: /^#/, converters: ->(field) { field.upcase })
6+
7+
def process(row)
8+
Post.create!(title: row["title"], content: row["content"])
9+
end
10+
end
11+
end

test/models/maintenance_tasks/csv_collection_builder_test.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class CsvCollectionBuilderTest < ActiveSupport::TestCase
2626
3
2727
CSV
2828

29-
assert_equal(3, @builder.count(@task))
29+
assert_equal(4, @builder.count(@task))
3030
end
3131

3232
test "#has_csv_content?" do

test/models/maintenance_tasks/csv_task_test.rb

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,27 @@ class CsvTaskTest < ActiveSupport::TestCase
1919
assert_equal "Hello World 1!", first_row["content"]
2020
end
2121

22+
test ".collection passes options to the CSV parser" do
23+
csv_file = file_fixture("sample.csv")
24+
csv = csv_file.read
25+
csv.prepend("# Comment\n")
26+
csv.concat("# Another comment\n")
27+
28+
csv_task = Maintenance::ImportPostsWithOptionsTask.new
29+
csv_task.csv_content = csv
30+
collection = csv_task.collection
31+
32+
assert CSV, collection.class
33+
assert collection.headers
34+
35+
all_rows = collection.to_a
36+
assert_equal 5, all_rows.count
37+
38+
first_row = all_rows.first
39+
assert_equal "MY TITLE 1", first_row["title"]
40+
assert_equal "HELLO WORLD 1!", first_row["content"]
41+
end
42+
2243
test ".count returns the number of rows to process, excluding headers and assuming a trailing newline" do
2344
csv_file = file_fixture("sample.csv")
2445

test/models/maintenance_tasks/task_data_index_test.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class TaskDataIndexTest < ActiveSupport::TestCase
1313
"Maintenance::EnqueueErrorTask",
1414
"Maintenance::ErrorTask",
1515
"Maintenance::ImportPostsTask",
16+
"Maintenance::ImportPostsWithOptionsTask",
1617
"Maintenance::Nested::NestedMore::NestedMoreTask",
1718
"Maintenance::Nested::NestedTask",
1819
"Maintenance::NoCollectionTask",

test/models/maintenance_tasks/task_test.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class TaskTest < ActiveSupport::TestCase
1313
"Maintenance::EnqueueErrorTask",
1414
"Maintenance::ErrorTask",
1515
"Maintenance::ImportPostsTask",
16+
"Maintenance::ImportPostsWithOptionsTask",
1617
"Maintenance::Nested::NestedMore::NestedMoreTask",
1718
"Maintenance::Nested::NestedTask",
1819
"Maintenance::NoCollectionTask",

test/system/maintenance_tasks/tasks_test.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class TasksTest < ApplicationSystemTestCase
2828
"Maintenance::CustomEnumeratingTask\nNew",
2929
"Maintenance::EnqueueErrorTask\nNew",
3030
"Maintenance::ErrorTask\nNew",
31+
"Maintenance::ImportPostsWithOptionsTask\nNew",
3132
"Maintenance::Nested::NestedMore::NestedMoreTask\nNew",
3233
"Maintenance::Nested::NestedTask\nNew",
3334
"Maintenance::ParamsTask\nNew",

0 commit comments

Comments
 (0)