-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathfetch.rb
61 lines (48 loc) · 1.38 KB
/
fetch.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
require 'open-uri'
require 'nokogiri'
class CatchUpError < StandardError;
end
class NODecsError < StandardError
end
CATCH_OPTIONS = %q(q=language%3Aruby+stars%3A%3E500+fork%3Atrue&ref=advsearch&type=Repositories&utf8=%E2%9C%93)
total = 1
total_match = 1
page_range = 1..100
`rm log`
`cp list_head.md list.md`
page_range.each do |n|
fetch_retry_times = 3
begin
page = Nokogiri::HTML(open("https://github.com/search?l=&p=#{n}&#{CATCH_OPTIONS}"))
repos = page.css('.repo-list-name a')
descriptions = page.css('.repo-list-description')
raise NODecsError if descriptions.length < 10
rescue OpenURI::HTTPError
raise CatchUpError if fetch_retry_times == 0
fetch_retry_times -= 1
sleep 3
retry
rescue NODecsError
`echo "Error -- page:#{n} repos:#{repos.map(&:text).join(' ')}" >> log`
next
end
(0..9).each do |num|
repo = repos[num]
m = /(\w.+)\s/.match(descriptions[num])
description = m ? m[1] : ""
url = repo.attributes["href"].value
name = repo.text
puts "#{total}: #{name}"
`echo "#{total}: #{name}" >> log`
total += 1
begin
open("https://github.com#{url}/blob/master/app/controllers/application_controller.rb")
rescue OpenURI::HTTPError
next
end
`echo "| [#{name}](https://github.com#{url}) | #{description} |" >> list.md`
`echo "Matched!" >> log`
total_match += 1
end
sleep 3
end