@@ -251,6 +251,16 @@ namespace :movies do
251251 agent . read_timeout = 5
252252 agent . open_timeout = 5
253253
254+ options = ::Selenium ::WebDriver ::Options . chrome
255+ options . add_argument ( "--headless=new" )
256+ options . add_argument ( "user-agent=mozilla/5.0 (x11; ubuntu; linux x86_64; rv:147.0) gecko/20100101 firefox/147.0" )
257+ options . timeouts = {
258+ page_load : 5_000 , # 5 seconds
259+ script : 5_000 # 5 seconds
260+ }
261+ options . page_load_strategy = :none
262+ driver = ::Selenium ::WebDriver . for ( :chrome , options : options )
263+
254264 out_dir = Rails . root . join ( "public/posters/original" )
255265 out_dir_100 = Rails . root . join ( "public/posters/100" )
256266 out_dir_300 = Rails . root . join ( "public/posters/300" )
@@ -280,11 +290,13 @@ namespace :movies do
280290
281291 records . where . not ( wiki_id : no_posters ) . find_each do |movie |
282292 wiki_id = movie . wiki_id
283-
284293 imdb_id = movie . imdb_id
285- page = agent . get ( "https://www.imdb.com/title/#{ imdb_id } " )
286- url = URI ( page . search ( "meta[property='og:image']" ) . first [ :content ] )
287- filename = url . path . split ( "/" ) . last
294+
295+ driver . get ( "https://www.imdb.com/title/#{ imdb_id } " )
296+ wait = ::Selenium ::WebDriver ::Wait . new ( timeout : 5 )
297+ element = wait . until { driver . find_element ( css : "meta[property='og:image']" ) }
298+ url = element . attribute ( "content" )
299+ filename = url . split ( "/" ) . last
288300
289301 if filename == "imdb_logo.png"
290302 print "X"
@@ -307,7 +319,7 @@ namespace :movies do
307319
308320 print "."
309321 errors . delete ( wiki_id )
310- rescue Mechanize ::ResponseCodeError , Net ::ReadTimeout , Net ::OpenTimeout => error
322+ rescue Mechanize ::ResponseCodeError , Net ::ReadTimeout , Net ::OpenTimeout , Selenium :: WebDriver :: Error :: TimeoutError => error
311323 print "F"
312324 pp [ "Get failed" , { wiki_id :, imdb_id :, error : error } ]
313325 errors << wiki_id
0 commit comments