Skip to content
This repository was archived by the owner on Nov 7, 2018. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ end
# Padrino Stable Gem
gem 'padrino', '0.12.5'

gem 'pry', :group => 'development'
gem 'pry-byebug', :group => 'development'
gem 'pry', :group => ['development', 'test']
gem 'pry-byebug', :group => ['development', 'test']
gem 'newrelic_rpm'

# Or Padrino Edge
Expand Down
3 changes: 0 additions & 3 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,3 @@ DEPENDENCIES
safe_yaml
sass
stretchy

BUNDLED WITH
1.10.6
65 changes: 45 additions & 20 deletions app/controllers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,27 +47,49 @@
data.to_json
end

get :index, with: :endpoint, provides: [:json, :csv] do
options = get_search_args_from_params(params)
endpoint = options[:endpoint]
content_type options[:format].to_sym if options[:format]
DataMagic.logger.debug "-----> APP GET #{params.inspect}"

unless DataMagic.config.api_endpoints.keys.include? endpoint
halt 404, {
error: 404,
message: "#{endpoint} not found. Available endpoints: #{DataMagic.config.api_endpoints.keys.join(',')}"
}.to_json
end
get :index, with: ':endpoint/:command', provides: [:json] do
process_params
end

data = DataMagic.search(params, options)
halt 400, data.to_json if data.key?(:errors)
get :index, with: ':endpoint', provides: [:json, :csv] do
process_params
end
end

if content_type == :csv
output_data_as_csv(data['results'])
else
data.to_json
end
def process_params
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like how you've refactored this, thank you!

options = get_search_args_from_params(params)
DataMagic.logger.debug "-----> APP GET #{params.inspect} with options #{options.inspect}"

check_endpoint!(options)
set_content_type(options)
search_and_respond(options)
end

def search_and_respond(options)
data = DataMagic.search(params, options)
halt 400, data.to_json if data.key?(:errors)

if content_type == :csv
output_data_as_csv(data['results'])
else
data.to_json
end
end

def check_endpoint!(options)
unless DataMagic.config.api_endpoints.keys.include? options[:endpoint]
halt 404, {
error: 404,
message: "#{options[:endpoint]} not found. Available endpoints: #{DataMagic.config.api_endpoints.keys.join(',')}"
}.to_json
end
end

def set_content_type(options)
if options[:command] == 'stats'
content_type :json
else
content_type(options[:format].nil? ? :json : options[:format].to_sym)
end
end

Expand All @@ -76,14 +98,17 @@
# see comment in method body
def get_search_args_from_params(params)
options = {}
%w(sort fields zip distance page per_page debug).each do |opt|
%w(metrics sort fields zip distance page per_page debug).each do |opt|
options[opt.to_sym] = params.delete("_#{opt}")
# TODO: remove next line to end support for un-prefixed option parameters
options[opt.to_sym] ||= params.delete(opt)
end
options[:endpoint] = params.delete("endpoint") # these two params are
options[:format] = params.delete("format") # supplied by Padrino
options[:fields] = (options[:fields] || "").split(',')
options[:command] = params.delete("command")

options[:metrics] = options[:metrics].split(/\s*,\s*/) if options[:metrics]
options
end

Expand Down
28 changes: 27 additions & 1 deletion lib/data_magic.rb
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,20 @@ def self.search(terms, options = {})
body: query_body
}

# Per https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html:
# "the search_type and the query_cache must be passed as query-string parameters"
if options[:command] == 'stats'
full_query.merge! :search_type => 'count'
end

logger.info "FULL_QUERY: #{full_query.inspect}"

time_start = Time.now.to_f
result = client.search full_query

search_time = Time.now.to_f - time_start
logger.info "ES query time (ms): #{result["took"]} ; Query fetch time (s): #{search_time} ; result: #{result.inspect[0..500]}"

hits = result["hits"]
total = hits["total"]
results = []
Expand All @@ -97,7 +105,7 @@ def self.search(terms, options = {})

found.keys.each { |key| found[key] = found[key][0] }
# now it should look like this:
# {"city"=>"Springfield", "address"=>"742 Evergreen Terrace
# {"city"=>"Springfield", "address"=>"742 Evergreen Terrace}

# re-insert null fields that didn't get returned by ES
query_body[:fields].each do |field|
Expand All @@ -118,10 +126,28 @@ def self.search(terms, options = {})
end

# assemble a simpler json document to return
simple_result =
{
"metadata" => metadata,
"results" => results
}

if options[:command] == 'stats'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be broken out into its own method. (We should refactor the rest of search() anyway, it doesn't have to be done now)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I'm guessing each of the 3 if blocks could be a method, handling a specific aspect of the search request... but I thought the code was still on this side of becoming unwieldy within one method.

# Remove metrics that weren't requested.
aggregations = result['aggregations']
aggregations.each do |f_name, values|
if options[:metrics] && options[:metrics].size > 0
aggregations[f_name] = values.reject { |k, v| !(options[:metrics].include? k) }
else
# Keep everything is no metric list is provided
aggregations[f_name] = values
end
end

simple_result.merge!({"aggregations" => aggregations})
end

simple_result
end

private
Expand Down
13 changes: 12 additions & 1 deletion lib/data_magic/error_checker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ module DataMagic
module ErrorChecker
class << self
def check(params, options, config)
report_nonexistent_params(params, config) +
report_required_params_absent(options) +
report_nonexistent_params(params, config) +
report_nonexistent_operators(params) +
report_nonexistent_fields(options[:fields], config) +
report_bad_range_argument(params) +
Expand All @@ -11,6 +12,14 @@ def check(params, options, config)

private

def report_required_params_absent(options)
if options[:command] == 'stats' && options[:fields].length == 0
[build_error(error: 'invalid_or_incomplete_parameters', input: options[:command])]
else
[]
end
end

def report_nonexistent_params(params, config)
return [] unless config.dictionary_only_search?
params.keys.reject { |p| config.field_type(strip_op(p)) }.
Expand Down Expand Up @@ -66,6 +75,8 @@ def report_wrong_field_type(params, config)
def build_error(opts)
opts[:message] =
case opts[:error]
when 'invalid_or_incomplete_parameters'
"The command #{opts[:input]} requires a fields parameter."
when 'parameter_not_found'
"The input parameter '#{opts[:input]}' is not known in this dataset."
when 'field_not_found'
Expand Down
22 changes: 21 additions & 1 deletion lib/data_magic/query_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,15 @@ def from_params(params, options, config)
per_page = DataMagic::MAX_PAGE_SIZE if per_page > DataMagic::MAX_PAGE_SIZE
query_hash = {
from: page * per_page,
size: per_page
size: per_page,
}

query_hash[:query] = generate_squery(params, options, config).to_search

if options[:command] == 'stats'
query_hash.merge! add_aggregations(params, options, config)
end

if options[:fields] && !options[:fields].empty?
query_hash[:fields] = get_restrict_fields(options)
query_hash[:_source] = false
Expand All @@ -31,6 +37,20 @@ def generate_squery(params, options, config)
search_fields_and_ranges(squery, params, config)
end

# Wrapper for Stretchy aggregation clause builder (which wraps ElasticSearch (ES) :aggs parameter)
# Extracts all extended_stats aggregations from ES, to be filtered later
# Is a no-op if no fields are specified, or none of them are numeric
def add_aggregations(params, options, config)
agg_hash = options[:fields].inject({}) do |memo, f|
if config.column_field_types[f.to_s] && ["integer", "float"].include?(config.column_field_types[f.to_s])
memo[f.to_s] = { extended_stats: { "field" => f.to_s } }
end
memo
end

agg_hash.empty? ? {} : { aggs: agg_hash }
end

def get_restrict_fields(options)
options[:fields].map(&:to_s)
end
Expand Down
3 changes: 2 additions & 1 deletion sample-data/data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ dictionary:
type: integer
location.lat: INTPTLAT
location.lon: INTPTLONG
area.land:
land_area:
source: ALAND_SQMI
description: Land Area (square miles)
source: ALAND_SQMI
type: float
Expand Down
61 changes: 61 additions & 0 deletions spec/features/api_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,67 @@
end
end
end

describe "With residents CSV data" do
before do
ENV['DATA_PATH'] = './spec/fixtures/numeric_data'
DataMagic.init(load_now: false)
num_rows, fields = DataMagic.import_csv(address_data)
end

after do
DataMagic.destroy
end

let(:all_aggregs) do
{ "aggregations" => {
"age"=>{"count"=>2, "min"=>14.0, "max"=>70.0, "avg"=>42.0, "sum"=>84.0, "sum_of_squares"=>5096.0, "variance"=>784.0, "std_deviation"=>28.0, "std_deviation_bounds"=>{"upper"=>98.0, "lower"=>-14.0}},
"height"=>{"count"=>2, "min"=>2.0, "max"=>142.0, "avg"=>72.0, "sum"=>144.0, "sum_of_squares"=>20168.0, "variance"=>4900.0, "std_deviation"=>70.0, "std_deviation_bounds"=>{"upper"=>212.0, "lower"=>-68.0}}
}
}
end

let(:max_avg_aggregs) do
{ "aggregations" => {
"age" => { "max" => 70.0, "avg" => 42.0},
"height" => { "max" => 142.0, "avg" => 72.0}
}
}
end

let(:stats_envelope) do
{ "metadata" => { "total" => 2,
"page" => 0,
"per_page" => DataMagic::DEFAULT_PAGE_SIZE
},
"results" => []
}
end

it "/stats returns the correct results for Springfield residents" do
get '/v1/cities/stats?city=Springfield&_fields=address,age,height&_metrics=max,avg'
expect(last_response).to be_ok
json_response["results"] = json_response["results"].sort_by { |k| k["age"] }
expect(json_response).to eq(stats_envelope.merge(max_avg_aggregs))
end

it "/stats returns all metrics when none are specified" do
get '/v1/cities/stats?city=Springfield&_fields=address,age,height'
expect(last_response).to be_ok
json_response["results"] = json_response["results"].sort_by { |k| k["age"] }

age_expected = stats_envelope.merge(all_aggregs)["aggregations"]["age"]
expect(json_response["aggregations"]["age"]["std_deviation"]).to eq(age_expected["std_deviation"])

height_expected = stats_envelope.merge(all_aggregs)["aggregations"]["height"]
expect(json_response["aggregations"]["height"]["std_deviation"]).to eq(height_expected["std_deviation"])
end

it "/stats requires fields option" do
get '/v1/cities/stats?city=Springfield&_metrics=max,avg'
expect(last_response.status).to eq(400)
end
end

describe "deprecated option syntax" do
before do
Expand Down
22 changes: 11 additions & 11 deletions spec/fixtures/data.rb
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@


# Ages adjusted for Springfield residents to average to 42
# Heights randomly set to generate a max of 142
def address_data
@address_data ||= StringIO.new <<-eos
name,address,city
Paul,15 Penny Lane,Liverpool
Michelle,600 Pennsylvania Avenue,Washington
Marilyn,1313 Mockingbird Lane,Springfield
Sherlock,221B Baker Street,London
Clark,66 Lois Lane,Smallville
Bart,742 Evergreen Terrace,Springfield
Paul,19 N Square,Boston
Peter,66 Parker Lane,New York
name,address,city,age,height
Paul,15 Penny Lane,Liverpool,10,142
Michelle,600 Pennsylvania Avenue,Washington,12,1
Marilyn,1313 Mockingbird Lane,Springfield,14,2
Sherlock,221B Baker Street,London,16,123
Clark,66 Lois Lane,Smallville,18,141
Bart,742 Evergreen Terrace,Springfield,70,142
Paul,19 N Square,Boston,70,55.2
Peter,66 Parker Lane,New York,74,11.5123
eos
@address_data.rewind
@address_data
Expand Down
21 changes: 21 additions & 0 deletions spec/fixtures/numeric_data/data.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# cities100.txt
# Test YAML file
index: numeric-data
api: cities

dictionary:
name:
source: name
type: string
address:
source: address
type: string
city:
source: city
type: string
age:
source: age
type: integer
height:
source: height
type: float
3 changes: 1 addition & 2 deletions spec/lib/data_magic/config_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@
"version" => "cities100-2010",
"index" => "city-data", "api" => "cities",
"files" => [{ "name" => "cities100.csv" }],
"data_path" => "./sample-data",
"options" => {:search=>"dictionary_only"},
"unique" => ["name"],
"data_path" => "./sample-data"
Expand All @@ -89,7 +88,7 @@
dictionary = config.data.delete 'dictionary'

expect(dictionary.keys.sort).to eq %w(id code name state population
location.lat location.lon area.land area.water).sort
location.lat location.lon land_area area.water).sort
categories = config.data.delete 'categories'
expect(categories.keys.sort).to eq %w(general general2 general3 general4 general5 geographic).sort
expect(config.data).to eq(default_config)
Expand Down
Loading