diff --git a/lib/data_magic.rb b/lib/data_magic.rb index 1000d5cf..eb3b984a 100644 --- a/lib/data_magic.rb +++ b/lib/data_magic.rb @@ -105,7 +105,9 @@ def self.search(terms, options = {}) # each result looks like this: # {"city"=>["Springfield"], "address"=>["742 Evergreen Terrace"]} - found.keys.each { |key| found[key] = found[key][0] } + found.keys.each { |key| + found[key] = found[key].size === 1 ? found[key][0] : found[key] + } # now it should look like this: # {"city"=>"Springfield", "address"=>"742 Evergreen Terrace} @@ -234,6 +236,10 @@ def self.es_field_types(field_types) index_analyzer: 'autocomplete_index', search_analyzer: 'autocomplete_search' }, + 'multivalue' => { + type: 'string', + position_offset_gap: 100 + } } field_types.each_with_object({}) do |(key, type), result| result[key] = custom_type[type] diff --git a/lib/data_magic/config.rb b/lib/data_magic/config.rb index df4ab0f0..2fd03a90 100644 --- a/lib/data_magic/config.rb +++ b/lib/data_magic/config.rb @@ -17,6 +17,7 @@ def init_ivars @csv_column_types = nil @field_mapping = nil @calculated_field_list = nil + @multivalue_field_list = nil @field_types = nil end @@ -246,6 +247,20 @@ def calculated_field_list @calculated_field_list end + def multivalue_field_list + if @multivalue_field_list.nil? + @multivalue_field_list = [] + dictionary.each do |field_name, info| + if info.is_a? Hash + if info[:type] === 'multivalue' + @multivalue_field_list << field_name.to_s + end + end + end + end + @multivalue_field_list + end + def field_type(field_name) field_types[field_name] end diff --git a/lib/data_magic/index.rb b/lib/data_magic/index.rb index c8bd0d1f..080a3030 100644 --- a/lib/data_magic/index.rb +++ b/lib/data_magic/index.rb @@ -67,7 +67,7 @@ def self.import_with_dictionary(options = {}) private def self.valid_types - %w[integer float string literal name autocomplete boolean] + %w[integer float string literal name autocomplete boolean multivalue] end end # module DataMagic diff --git a/lib/data_magic/index/document_builder.rb b/lib/data_magic/index/document_builder.rb index 3564e403..6b765dc1 100644 --- a/lib/data_magic/index/document_builder.rb +++ b/lib/data_magic/index/document_builder.rb @@ -24,6 +24,7 @@ def build(row, builder_data, config) field_values = map_field_names(csv_row, fields, options) end field_values.merge!(calculated_fields(csv_row, config)) + field_values.merge!(multivalue_fields(csv_row, config)) field_values.merge!(lowercase_columns(field_values, config.column_field_types)) field_values.merge!(additional) if additional doc = NestedHash.new.add(field_values) @@ -48,6 +49,16 @@ def calculated_fields(row, config) result end + private + + def multivalue_fields(row, config) + result = {} + config.multivalue_field_list.each do |field_name| + result[field_name] = parse_multivalue(field_name, row, config) + end + result + end + # row: a hash (keys may be strings or symbols) # valid_types: an array of allowed types # field_types: hash field_name : type (float, integer, string) @@ -128,6 +139,19 @@ def parse_boolean(value) end end + # currently only string values are accepted + def parse_multivalue(field_name, row, config) + item = config.dictionary[field_name.to_sym] + fail "multivalue: field not found in dictionary #{field_name.inspect}" if item.nil? + row_value = row[item[:source].to_sym] + null_value = [*config.null_value] || ['NULL'] + if null_value.include? row_value + return nil + end + sep = item['separator'] || ',' + row_value.split("#{sep}").map {|i| i.to_s; i.strip } + end + # currently we just support 'or' operations on two columns def calculate(field_name, row, dictionary) item = dictionary[field_name.to_s] || dictionary[field_name.to_sym] diff --git a/spec/lib/data_magic/config_spec.rb b/spec/lib/data_magic/config_spec.rb index 9f8dad56..a74c8e37 100644 --- a/spec/lib/data_magic/config_spec.rb +++ b/spec/lib/data_magic/config_spec.rb @@ -162,6 +162,30 @@ end end + context ".multivalue_field_list" do + let(:config) { DataMagic::Config.new(load_datayaml: false) } + it "finds fields with 'multivalue' property" do + allow(config).to receive(:dictionary).and_return( + { + one: { + source: 'column1', + type: 'float' + }, + two: { + source: 'column2', + type: 'float' + }, + names: { + source: 'THING_NAMES', + type: 'multivalue', + description: 'something with multiple names' + } + } + ) + expect(config.multivalue_field_list).to eq(['names']) + end + end + context ".only_field_list" do let(:config) { DataMagic::Config.new(load_datayaml: false) } let(:simple_fields) do diff --git a/spec/lib/data_magic/index/document_builder_spec.rb b/spec/lib/data_magic/index/document_builder_spec.rb index 744f3909..67c84f98 100644 --- a/spec/lib/data_magic/index/document_builder_spec.rb +++ b/spec/lib/data_magic/index/document_builder_spec.rb @@ -113,6 +113,51 @@ it_correctly "creates a document" end end + + context "with multivalue type" do + before do + allow(config).to receive(:csv_column_type).with(:THING_NAMES).and_return('multivalue') + allow(config).to receive(:multivalue_field_list).and_return(['names_for_thing']) + end + + describe "with default comma separator" do + let(:fields) { config.field_mapping } + context "stores multi value string as array" do + before do + config.dictionary = { + names_for_thing: { + source: 'THING_NAMES', + type: 'multivalue', + description: 'a field that has multiple names' + } + } + end + subject {{ THING_NAMES: 'foo,bar,foo bar' }} + let(:expected_document) {{ 'names_for_thing' => ['foo', 'bar', 'foo bar'] }} + it_correctly "creates a document" + end + end + + describe "with a specified separator" do + before do + config.dictionary = { + names_for_thing: { + source: 'THING_NAMES', + type: 'multivalue', + separator: '|', + description: 'a field that has multiple names' + } + } + end + let(:fields) { config.field_mapping } + context "and stores multi value string as array" do + subject {{ THING_NAMES: 'foo|bar|foo bar' }} + let(:expected_document) {{ 'names_for_thing' => ['foo', 'bar', 'foo bar'] }} + it_correctly "creates a document" + end + end + + end end describe "boolean expressions with integer inputs" do