@@ -20,49 +20,58 @@ def self.import_csv(data, options={})
2020 Index ::Importer . process ( data , options )
2121 end
2222
23- # pre-condition: index is already created w/ config
24- def self . index_with_dictionary ( options = { } )
23+ def self . log_index_start
2524 start_time = Time . now
26- Config . logger . debug "--- index_with_dictionary, starting at #{ start_time } "
25+ Config . logger . debug "--- Indexing Begins, starting at #{ start_time } "
26+ start_time
27+ end
28+
29+ def self . log_index_end ( start_time )
30+ end_time = Time . now
31+ logger . debug "indexing complete: #{ distance_of_time_in_words ( end_time , start_time ) } "
32+ logger . debug "duration: #{ end_time - start_time } "
33+ end
2734
28- logger . info "files: #{ self . config . files } "
35+ def self . index_file_process ( options = { } , filepath )
36+ begin
37+ logger . debug "--" *40
38+ logger . debug "-- #{ filepath } "
39+ logger . debug "--" *40
40+ file_start = Time . now
41+ data = config . read_path ( filepath )
42+ rows , _ = DataMagic . import_csv ( data , options )
43+ file_end = Time . now
44+ logger . debug "imported #{ rows } rows in #{ distance_of_time_in_words ( file_end , file_start ) } , ms: #{ file_end - file_start } "
45+ rescue DataMagic ::InvalidData => e
46+ Config . logger . debug "Error: skipping #{ filepath } , #{ e . message } "
47+ end
48+ end
2949
50+ # pre-condition: index is already created w/ config
51+ def self . index_with_dictionary ( options = { } )
52+ start_time = log_index_start
3053 # optionally continue importing from a named file (see import.rake)
3154 starting_from = 0
3255 if options [ :continue ]
3356 starting_from = config . files . find_index { |file | file . match ( /#{ options [ :continue ] } / ) }
3457 logger . info "Indexing continues with file: #{ options [ :continue ] } " unless starting_from . nil?
3558 end
36-
59+ logger . info "files: #{ self . config . files [ starting_from . to_i ..- 1 ] } "
3760 config . files [ starting_from . to_i ..-1 ] . each_with_index do |filepath , index |
3861 fname = filepath . split ( '/' ) . last
3962 logger . debug "indexing #{ fname } #{ starting_from + index } file config:#{ config . additional_data_for_file ( starting_from + index ) . inspect } "
4063 options [ :add_data ] = config . additional_data_for_file ( starting_from + index )
64+ options [ :root ] = config . info_for_file ( starting_from + index , :root )
4165 options [ :only ] = config . info_for_file ( starting_from + index , :only )
4266 options [ :nest ] = config . info_for_file ( starting_from + index , :nest )
43- begin
44- logger . debug "--" *40
45- logger . debug "-- #{ filepath } "
46- logger . debug "--" *40
47- file_start = Time . now
48- data = config . read_path ( filepath )
49- rows , _ = DataMagic . import_csv ( data , options )
50- file_end = Time . now
51- logger . debug "imported #{ rows } rows in #{ distance_of_time_in_words ( file_end , file_start ) } , ms: #{ file_end - file_start } "
52- rescue DataMagic ::InvalidData => e
53- Config . logger . debug "Error: skipping #{ filepath } , #{ e . message } "
54- end
67+ index_file_process ( options , filepath )
5568 end
56- end_time = Time . now
57- logger . debug "indexing complete: #{ distance_of_time_in_words ( end_time , start_time ) } "
58- logger . debug "duration: #{ end_time - start_time } "
69+ log_index_end ( start_time )
5970 end
6071
6172 def self . import_with_dictionary ( options = { } )
62- #logger.debug("field_mapping: #{field_mapping.inspect}")
6373 options [ :mapping ] = config . field_mapping
6474 options = options . merge ( config . options )
65-
6675 es_index_name = self . config . load_datayaml ( options [ :data_path ] )
6776 unless config . index_exists? ( es_index_name )
6877 logger . info "creating #{ es_index_name } " # TO DO: fix #14
@@ -73,6 +82,51 @@ def self.import_with_dictionary(options = {})
7382
7483 end # import_with_dictionary
7584
85+ def self . index_with_delta ( options = { } )
86+ # delta updates the current index with a single file
87+ if options [ :delta_original ]
88+ start_time = log_index_start
89+ # find the index of the delta file from the config by the :delta_only key (see delta.rake)
90+ original_file_index = nil
91+ config . files . each_with_index do |file , index |
92+ if config . info_for_file ( index , :delta_only )
93+ original_file_index = index
94+ end
95+ end
96+
97+ unless original_file_index
98+ raise ArgumentError , "delta_original file must contiain :delta_only key in data.yaml. No :delta_only key found."
99+ end
100+
101+ # use specified :delta_update filename, or fall back to :delta_original if not provided
102+ delta_filename = options [ :delta_update ] || options [ :delta_original ]
103+ config . files [ original_file_index ..original_file_index ] . each do |filepath |
104+ original_fname = filepath . split ( '/' ) . last
105+ # update filepath to use a "delta" subdirectory within DATA_PATH (e.g, <DATA_PATH>/delta/<CSV_FILE> )
106+ delta_filepath = filepath . gsub ( /#{ original_fname } / , "delta/#{ delta_filename } " )
107+ logger . debug "delta update with #{ delta_filename } file config:#{ config . additional_data_for_file ( original_file_index ) . inspect } "
108+ options [ :add_data ] = config . additional_data_for_file ( original_file_index )
109+ # Append the :delta_only array as our :only fields
110+ options [ :only ] = config . info_for_file ( original_file_index , :delta_only )
111+ options [ :nest ] = config . info_for_file ( original_file_index , :nest )
112+ options [ :root ] = false # we are not creating new documents
113+ options [ :nest ] [ :parent_missing ] = 'skip' # we allow skips
114+ index_file_process ( options , delta_filepath )
115+ end
116+ log_index_end ( start_time )
117+ else
118+ raise ArgumentError , "delta.rake requires 'delta_original' argument to be a filename from the config. No option[:delta_original] provided."
119+ end
120+ end
121+
122+ # pre-condition: index is already created w/ config
123+ def self . import_with_delta ( options = { } )
124+ options [ :mapping ] = config . field_mapping
125+ options = options . merge ( config . options )
126+ es_index_name = self . config . load_datayaml ( options [ :data_path ] )
127+ index_with_delta ( options )
128+ end # import_with_delta
129+
76130private
77131 def self . valid_types
78132 %w[ integer float string literal name autocomplete boolean ]
0 commit comments