Skip to content

Commit 0dd0ad8

Browse files
authored
Merge pull request #2201 from jessevanherk/godot_4.2
Update Godot docs to include v4.2 and fix older version scraping
2 parents b99e565 + 7026706 commit 0dd0ad8

File tree

6 files changed

+126
-28
lines changed

6 files changed

+126
-28
lines changed

lib/docs/filters/godot/clean_html.rb

+20-3
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@ def call
66
at_css('h1').content = 'Godot Engine'
77
at_css('.admonition.note').remove
88
end
9+
css('.admonition-grid').remove
910

10-
css('ul[id].simple li:first-child:last-child').each do |node|
11+
css('p[id]').each do |node|
1112
heading = Nokogiri::XML::Node.new 'h3', doc.document
12-
heading['id'] = node.parent['id']
13+
heading['id'] = node['id']
1314
heading.children = node.children
14-
node.parent.before(heading).remove
15+
node.before(heading).remove
1516
end
1617

1718
css('h3 strong').each do |node|
@@ -20,6 +21,22 @@ def call
2021

2122
css('a.reference').remove_attr('class')
2223

24+
# flatten gdscript+C# example blocks and add language name.
25+
css('div[role="tabpanel"]').each do |node|
26+
language_label = Nokogiri::XML::Node.new 'strong', doc.document
27+
language_name = 'GDScript' if node.at_css('div.highlight-gdscript')
28+
language_name = 'C#' if node.at_css('div.highlight-csharp')
29+
language_label.content = language_name.to_s
30+
31+
node.before(language_label)
32+
node.before(node.children).remove
33+
end
34+
35+
css('div.sphinx-tabs [role="tablist"]').remove
36+
37+
# remove the remotely hosted "percent-translated" badge
38+
css('a[href^="https://hosted.weblate"]').remove if root_page?
39+
2340
doc
2441
end
2542
end

lib/docs/filters/godot/clean_html_v2.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ class CleanHtmlV2Filter < Filter
44
def call
55
if root_page?
66
at_css('h1').content = 'Godot Engine'
7-
at_css('.admonition.tip').remove
7+
at_css('.admonition.caution').remove
88
end
99

1010
css('ul[id].simple li:first-child:last-child').each do |node|
+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
module Docs
2+
class Godot
3+
class CleanHtmlV3Filter < Filter
4+
def call
5+
if root_page?
6+
at_css('h1').content = 'Godot Engine'
7+
at_css('.admonition.caution').remove
8+
end
9+
10+
css('ul[id].simple li:first-child:last-child').each do |node|
11+
heading = Nokogiri::XML::Node.new 'h3', doc.document
12+
heading['id'] = node.parent['id']
13+
heading.children = node.children
14+
node.parent.before(heading).remove
15+
end
16+
17+
css('h3 strong').each do |node|
18+
node.before(node.children).remove
19+
end
20+
21+
css('a.reference').remove_attr('class')
22+
23+
doc
24+
end
25+
end
26+
end
27+
end

lib/docs/filters/godot/entries.rb

+4-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def get_type
1111
if slug.start_with?('getting_started')
1212
# Getting started sections are different even between different minor
1313
# versions from v3 so we're programmatically generating them instead.
14-
"Getting started: " + slug.split('/')[1].tr_s('_', ' ').capitalize
14+
'Getting started: ' + slug.split('/')[1].tr_s('_', ' ').capitalize
1515
else
1616
name
1717
end
@@ -20,9 +20,10 @@ def get_type
2020
def additional_entries
2121
return [] unless slug.start_with?('classes')
2222

23-
css('.simple[id]').each_with_object [] do |node, entries|
23+
css('p[id]').each_with_object [] do |node, entries|
2424
name = node.at_css('strong').content
2525
next if name == self.name
26+
2627
name.prepend "#{self.name}."
2728
name << '()'
2829
entries << [name, node['id']] unless entries.any? { |entry| entry[0] == name }
@@ -32,6 +33,7 @@ def additional_entries
3233
def include_default_entry?
3334
return false if subpath.start_with?('getting_started') && subpath.end_with?('index.html')
3435
return false if subpath == 'classes/index.html'
36+
3537
true
3638
end
3739
end

lib/docs/filters/godot/entries_v3.rb

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
module Docs
2+
class Godot
3+
class EntriesV3Filter < Docs::EntriesFilter
4+
def get_name
5+
name = at_css('h1').content
6+
name.remove! "\u{00B6}" # Remove the pilcrow
7+
name
8+
end
9+
10+
def get_type
11+
if slug.start_with?('getting_started')
12+
# Getting started sections are different even between different minor
13+
# versions from v3 so we're programmatically generating them instead.
14+
"Getting started: " + slug.split('/')[1].tr_s('_', ' ').capitalize
15+
else
16+
name
17+
end
18+
end
19+
20+
def additional_entries
21+
return [] unless slug.start_with?('classes')
22+
23+
css('.simple[id]').each_with_object [] do |node, entries|
24+
name = node.at_css('strong').content
25+
next if name == self.name
26+
name.prepend "#{self.name}."
27+
name << '()'
28+
entries << [name, node['id']] unless entries.any? { |entry| entry[0] == name }
29+
end
30+
end
31+
32+
def include_default_entry?
33+
return false if subpath.start_with?('getting_started') && subpath.end_with?('index.html')
34+
return false if subpath == 'classes/index.html'
35+
true
36+
end
37+
end
38+
end
39+
end

lib/docs/scrapers/godot.rb

+35-22
Original file line numberDiff line numberDiff line change
@@ -5,59 +5,72 @@ class Godot < UrlScraper
55
home: 'https://godotengine.org/',
66
code: 'https://github.com/godotengine/godot'
77
}
8+
# godot docs since 3.5 don't link everything from the index.
9+
self.initial_paths = %w[
10+
getting_started/introduction/index.html
11+
getting_started/step_by_step/index.html
12+
classes/index.html
13+
]
814

9-
options[:container] = '.document .section'
10-
15+
options[:container] = '.document > [itemprop="articleBody"]'
1116
options[:download_images] = false
12-
options[:only_patterns] = [/\Agetting_started\//, /\Aclasses\//]
17+
options[:only_patterns] = [%r{\Agetting_started/}, %r{\Aclasses/}]
18+
19+
options[:attribution] = <<-HTML
20+
&copy; 2014&ndash;present Juan Linietsky, Ariel Manzur and the Godot community<br>
21+
Licensed under the Creative Commons Attribution Unported License v3.0.
22+
HTML
1323

14-
options[:attribution] = ->(filter) do
15-
if filter.subpath.start_with?('classes')
16-
<<-HTML
17-
&copy; 2014&ndash;2022 Juan Linietsky, Ariel Manzur, Godot Engine contributors<br>
18-
Licensed under the MIT License.
19-
HTML
20-
else
21-
<<-HTML
22-
&copy; 2014&ndash;2022 Juan Linietsky, Ariel Manzur and the Godot community<br>
23-
Licensed under the Creative Commons Attribution Unported License v3.0.
24-
HTML
25-
end
24+
version '4.2' do
25+
self.release = '4.2.2'
26+
self.base_url = "https://docs.godotengine.org/en/#{self.version}/"
27+
html_filters.push 'godot/entries', 'godot/clean_html', 'sphinx/clean_html'
2628
end
2729

2830
version '3.5' do
29-
self.release = '3.5.1'
31+
self.release = '3.5.3'
3032
self.base_url = "https://docs.godotengine.org/en/#{self.version}/"
31-
options[:container] = '.document > [itemprop="articleBody"] > section[id]'
33+
34+
# godot 3.5 upstream docs are formatted like godot4
3235
html_filters.push 'godot/entries', 'godot/clean_html', 'sphinx/clean_html'
3336
end
3437

3538
version '3.4' do
3639
self.release = '3.4.5'
3740
self.base_url = "https://docs.godotengine.org/en/#{self.version}/"
41+
3842
options[:container] = '.document > [itemprop="articleBody"] > section[id]'
39-
html_filters.push 'godot/entries', 'godot/clean_html', 'sphinx/clean_html'
43+
html_filters.push 'godot/entries_v3', 'godot/clean_html_v3', 'sphinx/clean_html'
4044
end
4145

4246
version '3.3' do
4347
self.release = '3.3.0'
4448
self.base_url = "https://docs.godotengine.org/en/#{self.version}/"
45-
html_filters.push 'godot/entries', 'godot/clean_html', 'sphinx/clean_html'
49+
self.initial_paths = %w[/index.html]
50+
51+
options[:only_patterns] = [%r{\Aclasses/}]
52+
options[:container] = '.document .section'
53+
html_filters.push 'godot/entries_v3', 'godot/clean_html_v3', 'sphinx/clean_html'
4654
end
4755

4856
version '3.2' do
4957
self.release = '3.2.3'
5058
self.base_url = "https://docs.godotengine.org/en/#{self.version}/"
51-
html_filters.push 'godot/entries', 'godot/clean_html', 'sphinx/clean_html'
59+
self.initial_paths = %w[/index.html]
60+
61+
options[:only_patterns] = [%r{\Aclasses/}]
62+
options[:container] = '.document .section'
63+
html_filters.push 'godot/entries_v3', 'godot/clean_html_v3', 'sphinx/clean_html'
5264
end
5365

5466
version '2.1' do
5567
self.release = '2.1.6'
5668
self.base_url = "https://docs.godotengine.org/en/#{self.version}/"
69+
self.initial_paths = %w[/index.html]
5770

5871
options[:skip] = %w(classes/class_@global\ scope.html)
59-
options[:only_patterns] = [/\Alearning\//, /\Aclasses\//]
60-
72+
options[:only_patterns] = [%r{\Alearning/}, %r{\Aclasses/}]
73+
options[:container] = '.document .section'
6174
html_filters.push 'godot/entries_v2', 'godot/clean_html_v2', 'sphinx/clean_html'
6275
end
6376

0 commit comments

Comments
 (0)