-
Notifications
You must be signed in to change notification settings - Fork 22
Expand file tree
/
Copy pathcsv-mapper.rb
More file actions
143 lines (129 loc) · 5.73 KB
/
Copy pathcsv-mapper.rb
File metadata and controls
143 lines (129 loc) · 5.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
dir = File.dirname(__FILE__)
$LOAD_PATH.unshift dir unless $LOAD_PATH.include?(dir)
require 'rubygems'
# the following is slightly modified from Gregory Brown's
# solution on the Ruport Blaag:
# http://ruport.blogspot.com/2008/03/fastercsv-api-shim-for-19.html
if RUBY_VERSION > "1.9"
require "csv"
unless defined? FCSV
class Object
FasterCSV = CSV
alias_method :FasterCSV, :CSV
end
end
else
require "fastercsv"
end
# This module provides the main interface for importing CSV files & data to mapped Ruby objects.
# = Usage
# Including CsvMapper will provide two methods:
# - +import+
# - +map_csv+
#
# See csv-mapper.rb[link:files/lib/csv-mapper_rb.html] for method docs.
#
# === Import From File
# results = import('/path/to/file.csv') do
# # declare mapping here
# end
#
# === Import From String or IO
# results = import(csv_data, :type => :io) do
# # declare mapping here
# end
#
# === Mapping
# Mappings are built inside blocks. All three of CsvMapper's main API methods accept a block containing a mapping.
# Maps are defined by using +map_to+, +start_at_row+, +before_row+, and +after_row+ (methods on CsvMapper::RowMap) and
# by defining your own mapping attributes.
# A mapping block uses an internal cursor to keep track of the order the mapping attributes are declared and use that order to
# know the corresponding CSV column index to associate with the attribute.
#
# ===== The Basics
# * +map_to+ - Override the default Struct target. Accepts a class and an optional hash of default attribute names and values.
# * +start_at_row+ - Specify what row to begin parsing at. Use this to skip headers.
# * +before_row+ - Accepts an Array of method name symbols or lambdas to be invoked before parsing each row.
# * +after_row+ - Accepts an Array of method name symbols or lambdas to be invoked after parsing each row.
# * +delimited_by+ - Accepts a character to be used to delimit columns. Use this to specify pipe-delimited files.
# * <tt>\_SKIP_</tt> - Use as a placehold to skip a CSV column index.
# * +parser_options+ - Accepts a hash of FasterCSV options. Can be anything FasterCSV::new()[http://fastercsv.rubyforge.org/classes/FasterCSV.html#M000018] understands
#
# ===== Attribute Mappings
# Attribute mappings are created by using the name of the attribute to be mapped to.
# The order in which attribute mappings are declared determines the index of the corresponding CSV row.
# All mappings begin at the 0th index of the CSV row.
# foo # maps the 0th CSV row position value to the value of the 'foo' attribute on the target object.
# bar # maps the 1st row position to 'bar'
# This could also be a nice one liner for easy CSV format conversion
# [foo, bar] # creates the same attribute maps as above.
# The mapping index may be specifically declared in two additional ways:
# foo(2) # maps the 2nd CSV row position value to 'foo' and moves the cursor to 3
# bar # maps the 3rd CSV row position to 'bar' due to the current cursor position
# baz.at(0) # maps the 0th CSV row position to 'baz' but only increments the cursor 1 position to 4
# Each attribute mapping may be configured to parse the record using a lambda or a method name
# foo.map lambda{|row| row[2].strip } # maps the 2nd row position value with leading and trailing whitespace removed to 'foo'.
# bar.map :clean_bar # maps the result of the clean_bar method to 'bar'. clean_bar must accept the row as a parameter.
# Attribute mapping declarations and "modifiers" may be chained
# foo.at(4).map :some_transform
#
# === Create Reusable Mappings
# The +import+ method accepts an instance of RowMap as an optional mapping parameter.
# The easiest way to create an instance of a RowMap is by using +map_csv+.
# a_row_map = map_csv do
# # declare mapping here
# end
# Then you can reuse the mapping
# results = import(some_string, :type => :io, :map => a_row_map)
# other_results = import('/path/to/file.csv', :map => a_row_map)
#
module CsvMapper
# Create a new RowMap instance from the definition in the given block.
def map_csv(&map_block)
CsvMapper::RowMap.new(self, &map_block)
end
# Load CSV data and map the values according to the definition in the given block.
# Accepts either a file path, String, or IO as +data+. Defaults to file path.
#
# The following +options+ may be used:
# <tt>:type</tt>:: defaults to <tt>:file_path</tt>. Use <tt>:io</tt> to specify data as String or IO.
# <tt>:map</tt>:: Specify an instance of a RowMap to take presidence over a given block defintion.
#
def import(data, options={}, &map_block)
results = []
build_results(data, map_block, options) do |result|
results << result
end
results
end
# Process works the same was as import except that it returns a count of
# imported rows instead of the results. This is useful for processing large
# files.
def process(data, options={}, &map_block)
count = 0
build_results(data, map_block, options) do |result|
count = count.succ
end
count
end
protected
# Create a new RowMap instance from the definition in the given block and pass the csv_data.
def map_csv_with_data(csv_data, &map_block) # :nodoc:
CsvMapper::RowMap.new(self, csv_data, &map_block)
end
private
# Process each row and yield result to caller
def build_results(data, map_block, options={})
csv_data = options[:type] == :io ? data : File.new(data, 'r')
config = { :type => :file_path,
:map => map_csv_with_data(csv_data, &map_block) }.merge!(options)
map = config[:map]
FasterCSV.new(csv_data, map.parser_options ).each_with_index do |row, i|
if i >= map.start_at_row && i <= map.stop_at_row
yield map.parse(row)
end
end
end
extend self
end
require 'csv-mapper/row_map'