Skip to content

Commit 6d3aa57

Browse files
authored
Merge pull request #5108 from quarto-dev/bugfix/5089-2
Improved fix for 5089
2 parents a949164 + 913292c commit 6d3aa57

File tree

3 files changed

+45
-5
lines changed

3 files changed

+45
-5
lines changed

src/resources/filters/common/url.lua

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,13 @@ function urldecode(url)
1212
return url
1313
end
1414

15-
15+
function fullyUrlDecode(url)
16+
-- decode the url until it is fully decoded (not a single pass,
17+
-- but repeated until it decodes no further)
18+
result = urldecode(url)
19+
if result == url then
20+
return result
21+
else
22+
return fullyUrlDecode(result)
23+
end
24+
end

src/resources/filters/quarto-post/pdf-images.lua

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,19 @@ local function convert_svg(path)
2020
end
2121
end
2222

23+
local mimeImgExts = {
24+
["image/jpeg"]="jpg",
25+
["image/gif"]="gif",
26+
["image/vnd.microsoft.icon"]="ico",
27+
["image/avif"]="avif",
28+
["image/bmp"]="bmp",
29+
["image/png"]="png",
30+
["image/svg+xml"]="svg",
31+
["image/tiff"]="tif",
32+
["image/webp"]="webp",
33+
}
34+
35+
2336
-- A cache of image urls that we've resolved into the mediabag
2437
-- keyed by {url: mediabagpath}
2538
local resolvedUrls = {}
@@ -109,13 +122,31 @@ function pdfImages()
109122
else
110123
local relativePath = image.src:match('https?://[%w%.%:]+/(.+)')
111124
if relativePath then
112-
125+
113126
local imgMt, imgContents = pandoc.mediabag.fetch(image.src)
114-
local decodedSrc = urldecode(image.src)
127+
local decodedSrc = fullyUrlDecode(image.src)
115128
if decodedSrc == nil then
116129
decodedSrc = "unknown"
117130
end
118-
local filename = windows_safe_filename(tex_safe_filename(pandoc.path.filename(decodedSrc)))
131+
132+
local function filenameFromMimeType(filename, imgMt)
133+
-- Use the mime type to compute an extension when possible
134+
-- This will allow pandoc to properly know the type, even when
135+
-- the path to the image is a difficult to parse URI
136+
local mimeExt = mimeImgExts[imgMt]
137+
if mimeExt then
138+
local stem, _ext = pandoc.path.split_extension(filename)
139+
return stem .. '.' .. mimeExt
140+
else
141+
return filename
142+
end
143+
end
144+
145+
-- compute the filename for this file
146+
local basefilename = pandoc.path.filename(decodedSrc)
147+
local safefilename = windows_safe_filename(tex_safe_filename(basefilename))
148+
local filename = filenameFromMimeType(safefilename, imgMt)
149+
119150
if imgMt ~= nil then
120151
local existingMt = pandoc.mediabag.lookup(filename)
121152
local counter = 1
@@ -139,3 +170,4 @@ function pdfImages()
139170
}
140171
end
141172

173+

tests/docs/smoke-all/2023/04/04/5089.qmd

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ format: pdf
77

88
![](https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fbucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com%2Fpublic%2Fimages%2F9b7345d9-5f62-46dc-8062-d704c2c014a5_289x174.jpeg)
99

10-
1110
## Simple Url
1211

1312
![](https://quarto.org/quarto.png)

0 commit comments

Comments
 (0)