Skip to content

Commit b87435f

Browse files
committed
fix poster fetching
Follows 137d375 and now uses selenium for posters too.
1 parent 1b24465 commit b87435f

File tree

1 file changed

+17
-5
lines changed

1 file changed

+17
-5
lines changed

www/lib/tasks/movies.rake

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,16 @@ namespace :movies do
251251
agent.read_timeout = 5
252252
agent.open_timeout = 5
253253

254+
options = ::Selenium::WebDriver::Options.chrome
255+
options.add_argument("--headless=new")
256+
options.add_argument("user-agent=mozilla/5.0 (x11; ubuntu; linux x86_64; rv:147.0) gecko/20100101 firefox/147.0")
257+
options.timeouts = {
258+
page_load: 5_000, # 5 seconds
259+
script: 5_000 # 5 seconds
260+
}
261+
options.page_load_strategy = :none
262+
driver = ::Selenium::WebDriver.for(:chrome, options: options)
263+
254264
out_dir = Rails.root.join("public/posters/original")
255265
out_dir_100 = Rails.root.join("public/posters/100")
256266
out_dir_300 = Rails.root.join("public/posters/300")
@@ -280,11 +290,13 @@ namespace :movies do
280290

281291
records.where.not(wiki_id: no_posters).find_each do |movie|
282292
wiki_id = movie.wiki_id
283-
284293
imdb_id = movie.imdb_id
285-
page = agent.get("https://www.imdb.com/title/#{imdb_id}")
286-
url = URI(page.search("meta[property='og:image']").first[:content])
287-
filename = url.path.split("/").last
294+
295+
driver.get("https://www.imdb.com/title/#{imdb_id}")
296+
wait = ::Selenium::WebDriver::Wait.new(timeout: 5)
297+
element = wait.until { driver.find_element(css: "meta[property='og:image']") }
298+
url = element.attribute("content")
299+
filename = url.split("/").last
288300

289301
if filename == "imdb_logo.png"
290302
print "X"
@@ -307,7 +319,7 @@ namespace :movies do
307319

308320
print "."
309321
errors.delete(wiki_id)
310-
rescue Mechanize::ResponseCodeError, Net::ReadTimeout, Net::OpenTimeout => error
322+
rescue Mechanize::ResponseCodeError, Net::ReadTimeout, Net::OpenTimeout, Selenium::WebDriver::Error::TimeoutError => error
311323
print "F"
312324
pp ["Get failed", { wiki_id:, imdb_id:, error: error }]
313325
errors << wiki_id

0 commit comments

Comments
 (0)