Skip to content
This repository was archived by the owner on Nov 7, 2018. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion lib/data_magic.rb
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,9 @@ def self.search(terms, options = {})
# each result looks like this:
# {"city"=>["Springfield"], "address"=>["742 Evergreen Terrace"]}

found.keys.each { |key| found[key] = found[key][0] }
found.keys.each { |key|
found[key] = found[key].size === 1 ? found[key][0] : found[key]
}
# now it should look like this:
# {"city"=>"Springfield", "address"=>"742 Evergreen Terrace}

Expand Down Expand Up @@ -234,6 +236,10 @@ def self.es_field_types(field_types)
index_analyzer: 'autocomplete_index',
search_analyzer: 'autocomplete_search'
},
'multivalue' => {
type: 'string',
position_offset_gap: 100
}
}
field_types.each_with_object({}) do |(key, type), result|
result[key] = custom_type[type]
Expand Down
15 changes: 15 additions & 0 deletions lib/data_magic/config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def init_ivars
@csv_column_types = nil
@field_mapping = nil
@calculated_field_list = nil
@multivalue_field_list = nil
@field_types = nil
end

Expand Down Expand Up @@ -246,6 +247,20 @@ def calculated_field_list
@calculated_field_list
end

def multivalue_field_list
if @multivalue_field_list.nil?
@multivalue_field_list = []
dictionary.each do |field_name, info|
if info.is_a? Hash
if info[:type] === 'multivalue'
@multivalue_field_list << field_name.to_s
end
end
end
end
@multivalue_field_list
end

def field_type(field_name)
field_types[field_name]
end
Expand Down
2 changes: 1 addition & 1 deletion lib/data_magic/index.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def self.import_with_dictionary(options = {})

private
def self.valid_types
%w[integer float string literal name autocomplete boolean]
%w[integer float string literal name autocomplete boolean multivalue]
end

end # module DataMagic
24 changes: 24 additions & 0 deletions lib/data_magic/index/document_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def build(row, builder_data, config)
field_values = map_field_names(csv_row, fields, options)
end
field_values.merge!(calculated_fields(csv_row, config))
field_values.merge!(multivalue_fields(csv_row, config))
field_values.merge!(lowercase_columns(field_values, config.column_field_types))
field_values.merge!(additional) if additional
doc = NestedHash.new.add(field_values)
Expand All @@ -48,6 +49,16 @@ def calculated_fields(row, config)
result
end

private

def multivalue_fields(row, config)
result = {}
config.multivalue_field_list.each do |field_name|
result[field_name] = parse_multivalue(field_name, row, config)
end
result
end

# row: a hash (keys may be strings or symbols)
# valid_types: an array of allowed types
# field_types: hash field_name : type (float, integer, string)
Expand Down Expand Up @@ -128,6 +139,19 @@ def parse_boolean(value)
end
end

# currently only string values are accepted
def parse_multivalue(field_name, row, config)
item = config.dictionary[field_name.to_sym]
fail "multivalue: field not found in dictionary #{field_name.inspect}" if item.nil?
row_value = row[item[:source].to_sym]
null_value = [*config.null_value] || ['NULL']
if null_value.include? row_value
return nil
end
sep = item['separator'] || ','
row_value.split("#{sep}").map {|i| i.to_s; i.strip }
end

# currently we just support 'or' operations on two columns
def calculate(field_name, row, dictionary)
item = dictionary[field_name.to_s] || dictionary[field_name.to_sym]
Expand Down
24 changes: 24 additions & 0 deletions spec/lib/data_magic/config_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,30 @@
end
end

context ".multivalue_field_list" do
let(:config) { DataMagic::Config.new(load_datayaml: false) }
it "finds fields with 'multivalue' property" do
allow(config).to receive(:dictionary).and_return(
{
one: {
source: 'column1',
type: 'float'
},
two: {
source: 'column2',
type: 'float'
},
names: {
source: 'THING_NAMES',
type: 'multivalue',
description: 'something with multiple names'
}
}
)
expect(config.multivalue_field_list).to eq(['names'])
end
end

context ".only_field_list" do
let(:config) { DataMagic::Config.new(load_datayaml: false) }
let(:simple_fields) do
Expand Down
45 changes: 45 additions & 0 deletions spec/lib/data_magic/index/document_builder_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,51 @@
it_correctly "creates a document"
end
end

context "with multivalue type" do
before do
allow(config).to receive(:csv_column_type).with(:THING_NAMES).and_return('multivalue')
allow(config).to receive(:multivalue_field_list).and_return(['names_for_thing'])
end

describe "with default comma separator" do
let(:fields) { config.field_mapping }
context "stores multi value string as array" do
before do
config.dictionary = {
names_for_thing: {
source: 'THING_NAMES',
type: 'multivalue',
description: 'a field that has multiple names'
}
}
end
subject {{ THING_NAMES: 'foo,bar,foo bar' }}
let(:expected_document) {{ 'names_for_thing' => ['foo', 'bar', 'foo bar'] }}
it_correctly "creates a document"
end
end

describe "with a specified separator" do
before do
config.dictionary = {
names_for_thing: {
source: 'THING_NAMES',
type: 'multivalue',
separator: '|',
description: 'a field that has multiple names'
}
}
end
let(:fields) { config.field_mapping }
context "and stores multi value string as array" do
subject {{ THING_NAMES: 'foo|bar|foo bar' }}
let(:expected_document) {{ 'names_for_thing' => ['foo', 'bar', 'foo bar'] }}
it_correctly "creates a document"
end
end

end
end

describe "boolean expressions with integer inputs" do
Expand Down