forked from discourse/discourse
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathurl_helper.rb
166 lines (132 loc) · 4.88 KB
/
url_helper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# frozen_string_literal: true
class UrlHelper
MAX_URL_LENGTH = 2_000
# At the moment this handles invalid URLs that browser address bar accepts
# where second # is not encoded
#
# Longer term we can add support of simpleidn and encode unicode domains
def self.relaxed_parse(url)
url, fragment = url.split("#", 2)
uri = URI.parse(url)
if uri
# Addressable::URI::CharacterClasses::UNRESERVED is used here because without it
# the # in the fragment is not encoded
fragment =
Addressable::URI.encode_component(
fragment,
Addressable::URI::CharacterClasses::UNRESERVED,
) if fragment&.include?("#")
uri.fragment = fragment
uri
end
rescue URI::Error
end
def self.encode_and_parse(url)
URI.parse(Addressable::URI.encode(url))
end
def self.encode(url)
Addressable::URI.encode(url)
end
def self.unencode(url)
Addressable::URI.unencode(url)
end
def self.encode_component(url_component)
Addressable::URI.encode_component(url_component)
end
def self.is_local(url)
url.present? &&
(
Discourse.store.has_been_uploaded?(url) ||
!!(url =~ Regexp.new("^#{Discourse.base_path}/(assets|plugins|images)/")) ||
url.start_with?(Discourse.asset_host || Discourse.base_url_no_prefix)
)
end
def self.absolute(url, cdn = Discourse.asset_host)
cdn = "https:#{cdn}" if cdn && cdn =~ %r{\A//}
url =~ %r{\A/[^/]} ? (cdn || Discourse.base_url_no_prefix) + url : url
end
def self.absolute_without_cdn(url)
self.absolute(url, nil)
end
def self.schemaless(url)
url.sub(/\Ahttp:/i, "")
end
def self.secure_proxy_without_cdn(url)
self.absolute(Upload.secure_uploads_url_from_upload_url(url), nil)
end
def self.normalized_encode(uri)
url = uri.to_s
if url.length > MAX_URL_LENGTH
raise ArgumentError.new("URL starting with #{url[0..100]} is too long")
end
# Ideally we will jump straight to `Addressable::URI.normalized_encode`. However,
# that implementation has some edge-case issues like https://github.com/sporkmonger/addressable/issues/472.
# To temporaily work around those issues for the majority of cases, we try parsing with `::URI`.
# If that fails (e.g. due to non-ascii characters) then we will fall back to addressable.
# Hopefully we can simplify this back to `Addressable::URI.normalized_encode` in the future.
# edge case where we expect mailto:test%40test.com to normalize to mailto:[email protected]
return normalize_with_addressable(url) if url.match(/\Amailto:/)
# If it doesn't pass the regexp, it's definitely not gonna parse with URI.parse. Skip
# to addressable
return normalize_with_addressable(url) if !url.match?(/\A#{URI.regexp}\z/)
begin
normalize_with_ruby_uri(url)
rescue URI::Error
normalize_with_addressable(url)
end
end
def self.rails_route_from_url(url)
path = URI.parse(encode(url)).path
Rails.application.routes.recognize_path(path)
rescue Addressable::URI::InvalidURIError, URI::InvalidComponentError
nil
end
def self.cook_url(url, secure: false, local: nil)
is_secure = SiteSetting.secure_uploads && secure
local = is_local(url) if local.nil?
return url if !local
url = is_secure ? secure_proxy_without_cdn(url) : absolute_without_cdn(url)
# we always want secure uploads to come from
# Discourse.base_url_no_prefix/secure-uploads
# to avoid asset_host mixups
return schemaless(url) if is_secure
# PERF: avoid parsing url except for extreme conditions
# this is a hot path used on home page
filename = url
if url.include?("?")
uri = URI.parse(url)
filename = File.basename(uri.path)
end
# this technically requires a filename, but will work with a URL as long as it end with the
# extension and has no query params
is_attachment = !FileHelper.is_supported_media?(filename)
no_cdn = SiteSetting.login_required || SiteSetting.prevent_anons_from_downloading_files
unless is_attachment && no_cdn
url = Discourse.store.cdn_url(url)
url = local_cdn_url(url) if Discourse.store.external?
end
schemaless(url)
rescue URI::Error
url
end
def self.local_cdn_url(url)
return url if Discourse.asset_host.blank?
if url.start_with?("/#{Discourse.store.upload_path}/")
"#{Discourse.asset_host}#{url}"
else
url.sub(Discourse.base_url_no_prefix, Discourse.asset_host)
end
end
private
def self.normalize_with_addressable(url)
u = Addressable::URI.normalized_encode(url, Addressable::URI)
u.host = ::Addressable::IDNA.to_ascii(u.host) if u.host && !u.host.ascii_only?
u.to_s
end
def self.normalize_with_ruby_uri(url)
u = URI.parse(url)
u.scheme = u.scheme.downcase if u.scheme && u.scheme != u.scheme.downcase
u.host = u.host.downcase if u.host && u.host != u.host.downcase
u.to_s
end
end