Skip to content

Muhammad Owais solution # 2 in Ruby #318

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,13 @@ build-iPhoneSimulator/
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
.rvmrc
.DS_Store



# python virtual lib
*venv/
*.venv/

# python cache
*__pycache__/
*.pytest_cache
1 change: 1 addition & 0 deletions moya_ruby_solution/.rspec
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--require spec_helper
1 change: 1 addition & 0 deletions moya_ruby_solution/.ruby-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.4.1
9 changes: 9 additions & 0 deletions moya_ruby_solution/Gemfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# frozen_string_literal: true

source "https://rubygems.org"

git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }

# gem "rails"
gem 'rspec', '~> 3.0'
gem 'nokogiri'
51 changes: 51 additions & 0 deletions moya_ruby_solution/Gemfile.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
GEM
remote: https://rubygems.org/
specs:
diff-lcs (1.6.1)
nokogiri (1.18.7-aarch64-linux-gnu)
racc (~> 1.4)
nokogiri (1.18.7-aarch64-linux-musl)
racc (~> 1.4)
nokogiri (1.18.7-arm-linux-gnu)
racc (~> 1.4)
nokogiri (1.18.7-arm-linux-musl)
racc (~> 1.4)
nokogiri (1.18.7-arm64-darwin)
racc (~> 1.4)
nokogiri (1.18.7-x86_64-darwin)
racc (~> 1.4)
nokogiri (1.18.7-x86_64-linux-gnu)
racc (~> 1.4)
nokogiri (1.18.7-x86_64-linux-musl)
racc (~> 1.4)
racc (1.8.1)
rspec (3.13.0)
rspec-core (~> 3.13.0)
rspec-expectations (~> 3.13.0)
rspec-mocks (~> 3.13.0)
rspec-core (3.13.3)
rspec-support (~> 3.13.0)
rspec-expectations (3.13.3)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.13.0)
rspec-mocks (3.13.2)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.13.0)
rspec-support (3.13.2)

PLATFORMS
aarch64-linux-gnu
aarch64-linux-musl
arm-linux-gnu
arm-linux-musl
arm64-darwin
x86_64-darwin
x86_64-linux-gnu
x86_64-linux-musl

DEPENDENCIES
nokogiri
rspec (~> 3.0)

BUNDLED WITH
2.6.7
27 changes: 27 additions & 0 deletions moya_ruby_solution/bin/rspec
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

#
# This file was generated by Bundler.
#
# The application 'rspec' is installed as part of a gem, and
# this file is here to facilitate running it.
#

ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../Gemfile", __dir__)

bundle_binstub = File.expand_path("bundle", __dir__)

if File.file?(bundle_binstub)
if File.read(bundle_binstub, 300).include?("This file was generated by Bundler")
load(bundle_binstub)
else
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
end
end

require "rubygems"
require "bundler/setup"

load Gem.bin_path("rspec-core", "rspec")
77 changes: 77 additions & 0 deletions moya_ruby_solution/lib/parser.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Google Artwork parser solution for serp api code challenge
# Author: Muhammad Owais
# Date: 10-April-2025


require 'nokogiri'

class GoogleArtworksParser

def initialize(html_content=nil)
@google_host = "https://www.google.com"
@doc = Nokogiri::HTML(html_content)
end

def parse
{ artworks: extract_all_artworks}
end

def extract_all_artworks
# iterate over all artworks div containers and extract title, link, extensions and image
@doc.css('div.iELo6').map do |artwork_div|
{
name: extract_artwork_name(artwork_div),
link: extract_artwork_link(artwork_div),
extensions: extract_artwork_extensions(artwork_div),
image: extract_artwork_image(artwork_div)
}
end
end

def extract_artwork_name(artwork_div)
name_div = artwork_div.at_css('div.pgNMRc')
name_div.text
end

def extract_artwork_link(artwork_div)
link = artwork_div.at_css('a')&.[]('href')
return nil unless link
link.start_with?(@google_host) ? link : "#{@google_host}#{link}"
end

def extract_artwork_extensions(artwork_div)
extensions_div = artwork_div.at_css('div.cxzHyb')
[extensions_div.text]
end

def extract_artwork_image(artwork_div)
attr_data_src_value = artwork_div.at_css('img.taFZJe')&.[]('data-src')
attr_id_value = artwork_div.at_css('img.taFZJe')&.[]('id')
return attr_data_src_value unless attr_id_value
extract_img_base64_str(attr_id_value)
end

def extract_img_base64_str(attr_id_value)
@doc.css('script').each do |script|
content = script.text
next unless content.include?(attr_id_value)
if match = content.match(/var\s+s\s*=\s*'([^']+)'/)
return unescape_hex_chars(match[1])
end
end
nil
end

def unescape_hex_chars(encoded_string)
encoded_string.gsub(/\\x([0-9a-fA-F]{2})/) { [$1].pack('H*') }
end
end


# Example usage
if __FILE__ == $PROGRAM_NAME
html = File.read("sample_html_files/van-gogh-paintings.html", encoding: "utf-8")
parser = GoogleArtworksParser.new(html)
result = parser.parse
puts result
end
Loading