Skip to content

Commit

Permalink
Merge pull request #2201 from jessevanherk/godot_4.2
Browse files Browse the repository at this point in the history
Update Godot docs to include v4.2 and fix older version scraping
  • Loading branch information
simon04 authored Jul 28, 2024
2 parents b99e565 + 7026706 commit 0dd0ad8
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 28 deletions.
23 changes: 20 additions & 3 deletions lib/docs/filters/godot/clean_html.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ def call
at_css('h1').content = 'Godot Engine'
at_css('.admonition.note').remove
end
css('.admonition-grid').remove

css('ul[id].simple li:first-child:last-child').each do |node|
css('p[id]').each do |node|
heading = Nokogiri::XML::Node.new 'h3', doc.document
heading['id'] = node.parent['id']
heading['id'] = node['id']
heading.children = node.children
node.parent.before(heading).remove
node.before(heading).remove
end

css('h3 strong').each do |node|
Expand All @@ -20,6 +21,22 @@ def call

css('a.reference').remove_attr('class')

# flatten gdscript+C# example blocks and add language name.
css('div[role="tabpanel"]').each do |node|
language_label = Nokogiri::XML::Node.new 'strong', doc.document
language_name = 'GDScript' if node.at_css('div.highlight-gdscript')
language_name = 'C#' if node.at_css('div.highlight-csharp')
language_label.content = language_name.to_s

node.before(language_label)
node.before(node.children).remove
end

css('div.sphinx-tabs [role="tablist"]').remove

# remove the remotely hosted "percent-translated" badge
css('a[href^="https://hosted.weblate"]').remove if root_page?

doc
end
end
Expand Down
2 changes: 1 addition & 1 deletion lib/docs/filters/godot/clean_html_v2.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ class CleanHtmlV2Filter < Filter
def call
if root_page?
at_css('h1').content = 'Godot Engine'
at_css('.admonition.tip').remove
at_css('.admonition.caution').remove
end

css('ul[id].simple li:first-child:last-child').each do |node|
Expand Down
27 changes: 27 additions & 0 deletions lib/docs/filters/godot/clean_html_v3.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
module Docs
class Godot
class CleanHtmlV3Filter < Filter
def call
if root_page?
at_css('h1').content = 'Godot Engine'
at_css('.admonition.caution').remove
end

css('ul[id].simple li:first-child:last-child').each do |node|
heading = Nokogiri::XML::Node.new 'h3', doc.document
heading['id'] = node.parent['id']
heading.children = node.children
node.parent.before(heading).remove
end

css('h3 strong').each do |node|
node.before(node.children).remove
end

css('a.reference').remove_attr('class')

doc
end
end
end
end
6 changes: 4 additions & 2 deletions lib/docs/filters/godot/entries.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def get_type
if slug.start_with?('getting_started')
# Getting started sections are different even between different minor
# versions from v3 so we're programmatically generating them instead.
"Getting started: " + slug.split('/')[1].tr_s('_', ' ').capitalize
'Getting started: ' + slug.split('/')[1].tr_s('_', ' ').capitalize
else
name
end
Expand All @@ -20,9 +20,10 @@ def get_type
def additional_entries
return [] unless slug.start_with?('classes')

css('.simple[id]').each_with_object [] do |node, entries|
css('p[id]').each_with_object [] do |node, entries|
name = node.at_css('strong').content
next if name == self.name

name.prepend "#{self.name}."
name << '()'
entries << [name, node['id']] unless entries.any? { |entry| entry[0] == name }
Expand All @@ -32,6 +33,7 @@ def additional_entries
def include_default_entry?
return false if subpath.start_with?('getting_started') && subpath.end_with?('index.html')
return false if subpath == 'classes/index.html'

true
end
end
Expand Down
39 changes: 39 additions & 0 deletions lib/docs/filters/godot/entries_v3.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
module Docs
class Godot
class EntriesV3Filter < Docs::EntriesFilter
def get_name
name = at_css('h1').content
name.remove! "\u{00B6}" # Remove the pilcrow
name
end

def get_type
if slug.start_with?('getting_started')
# Getting started sections are different even between different minor
# versions from v3 so we're programmatically generating them instead.
"Getting started: " + slug.split('/')[1].tr_s('_', ' ').capitalize
else
name
end
end

def additional_entries
return [] unless slug.start_with?('classes')

css('.simple[id]').each_with_object [] do |node, entries|
name = node.at_css('strong').content
next if name == self.name
name.prepend "#{self.name}."
name << '()'
entries << [name, node['id']] unless entries.any? { |entry| entry[0] == name }
end
end

def include_default_entry?
return false if subpath.start_with?('getting_started') && subpath.end_with?('index.html')
return false if subpath == 'classes/index.html'
true
end
end
end
end
57 changes: 35 additions & 22 deletions lib/docs/scrapers/godot.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,59 +5,72 @@ class Godot < UrlScraper
home: 'https://godotengine.org/',
code: 'https://github.com/godotengine/godot'
}
# godot docs since 3.5 don't link everything from the index.
self.initial_paths = %w[
getting_started/introduction/index.html
getting_started/step_by_step/index.html
classes/index.html
]

options[:container] = '.document .section'

options[:container] = '.document > [itemprop="articleBody"]'
options[:download_images] = false
options[:only_patterns] = [/\Agetting_started\//, /\Aclasses\//]
options[:only_patterns] = [%r{\Agetting_started/}, %r{\Aclasses/}]

options[:attribution] = <<-HTML
&copy; 2014&ndash;present Juan Linietsky, Ariel Manzur and the Godot community<br>
Licensed under the Creative Commons Attribution Unported License v3.0.
HTML

options[:attribution] = ->(filter) do
if filter.subpath.start_with?('classes')
<<-HTML
&copy; 2014&ndash;2022 Juan Linietsky, Ariel Manzur, Godot Engine contributors<br>
Licensed under the MIT License.
HTML
else
<<-HTML
&copy; 2014&ndash;2022 Juan Linietsky, Ariel Manzur and the Godot community<br>
Licensed under the Creative Commons Attribution Unported License v3.0.
HTML
end
version '4.2' do
self.release = '4.2.2'
self.base_url = "https://docs.godotengine.org/en/#{self.version}/"
html_filters.push 'godot/entries', 'godot/clean_html', 'sphinx/clean_html'
end

version '3.5' do
self.release = '3.5.1'
self.release = '3.5.3'
self.base_url = "https://docs.godotengine.org/en/#{self.version}/"
options[:container] = '.document > [itemprop="articleBody"] > section[id]'

# godot 3.5 upstream docs are formatted like godot4
html_filters.push 'godot/entries', 'godot/clean_html', 'sphinx/clean_html'
end

version '3.4' do
self.release = '3.4.5'
self.base_url = "https://docs.godotengine.org/en/#{self.version}/"

options[:container] = '.document > [itemprop="articleBody"] > section[id]'
html_filters.push 'godot/entries', 'godot/clean_html', 'sphinx/clean_html'
html_filters.push 'godot/entries_v3', 'godot/clean_html_v3', 'sphinx/clean_html'
end

version '3.3' do
self.release = '3.3.0'
self.base_url = "https://docs.godotengine.org/en/#{self.version}/"
html_filters.push 'godot/entries', 'godot/clean_html', 'sphinx/clean_html'
self.initial_paths = %w[/index.html]

options[:only_patterns] = [%r{\Aclasses/}]
options[:container] = '.document .section'
html_filters.push 'godot/entries_v3', 'godot/clean_html_v3', 'sphinx/clean_html'
end

version '3.2' do
self.release = '3.2.3'
self.base_url = "https://docs.godotengine.org/en/#{self.version}/"
html_filters.push 'godot/entries', 'godot/clean_html', 'sphinx/clean_html'
self.initial_paths = %w[/index.html]

options[:only_patterns] = [%r{\Aclasses/}]
options[:container] = '.document .section'
html_filters.push 'godot/entries_v3', 'godot/clean_html_v3', 'sphinx/clean_html'
end

version '2.1' do
self.release = '2.1.6'
self.base_url = "https://docs.godotengine.org/en/#{self.version}/"
self.initial_paths = %w[/index.html]

options[:skip] = %w(classes/class_@global\ scope.html)
options[:only_patterns] = [/\Alearning\//, /\Aclasses\//]

options[:only_patterns] = [%r{\Alearning/}, %r{\Aclasses/}]
options[:container] = '.document .section'
html_filters.push 'godot/entries_v2', 'godot/clean_html_v2', 'sphinx/clean_html'
end

Expand Down

0 comments on commit 0dd0ad8

Please sign in to comment.