diff --git a/CHANGES.rst b/CHANGES.rst index a7e37f11e32..29c2853e5e0 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -177,6 +177,8 @@ Bugs fixed Patch by Adam Turner. * #13328: Fix parsing of PEP 695 functions with return annotations. Patch by Bénédikt Tran. Initial work by Arash Badie-Modiri. +* #13355: Don't include escaped title content in the search index. + Patch by Will Lachance. Testing ------- diff --git a/sphinx/builders/html/__init__.py b/sphinx/builders/html/__init__.py index 5e6acdeaf9d..6b173caf503 100644 --- a/sphinx/builders/html/__init__.py +++ b/sphinx/builders/html/__init__.py @@ -679,8 +679,9 @@ def write_doc(self, docname: str, doctree: nodes.document) -> None: def write_doc_serialized(self, docname: str, doctree: nodes.document) -> None: self.imgpath = relative_uri(self.get_target_uri(docname), self.imagedir) self.post_process_images(doctree) + # get title as plain text title_node = self.env.longtitles.get(docname) - title = self.render_partial(title_node)['title'] if title_node else '' + title = title_node.astext() if title_node else '' self.index_page(docname, doctree, title) def finish(self) -> None: diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js index 91f4be57fc8..e63bcfa8ec6 100644 --- a/sphinx/themes/basic/static/searchtools.js +++ b/sphinx/themes/basic/static/searchtools.js @@ -58,6 +58,15 @@ const _removeChildren = (element) => { const _escapeRegExp = (string) => string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string +const _escapeHTML = (text) => { + return text + .replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">") + .replaceAll('"', """) + .replaceAll("'", "'"); +} + const _displayItem = (item, searchTerms, highlightTerms) => { const docBuilder = DOCUMENTATION_OPTIONS.BUILDER; const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX; @@ -90,10 +99,10 @@ const _displayItem = (item, searchTerms, highlightTerms) => { let linkEl = listItem.appendChild(document.createElement("a")); linkEl.href = linkUrl + anchor; linkEl.dataset.score = score; - linkEl.innerHTML = title; + linkEl.innerHTML = _escapeHTML(title); if (descr) { listItem.appendChild(document.createElement("span")).innerHTML = - " (" + descr + ")"; + _escapeHTML(" (" + descr + ")"); // highlight search terms in the description if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); diff --git a/tests/js/fixtures/cpp/searchindex.js b/tests/js/fixtures/cpp/searchindex.js index 42adb88db92..f48a3ec1e0e 100644 --- a/tests/js/fixtures/cpp/searchindex.js +++ b/tests/js/fixtures/cpp/searchindex.js @@ -1 +1 @@ -Search.setIndex({"alltitles":{},"docnames":["index"],"envversion":{"sphinx":65,"sphinx.domains.c":3,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":9,"sphinx.domains.index":1,"sphinx.domains.javascript":3,"sphinx.domains.math":2,"sphinx.domains.python":4,"sphinx.domains.rst":2,"sphinx.domains.std":2},"filenames":["index.rst"],"indexentries":{"sphinx (c++ class)":[[0,"_CPPv46Sphinx",false]]},"objects":{"":[[0,0,1,"_CPPv46Sphinx","Sphinx"]]},"objnames":{"0":["cpp","class","C++ class"]},"objtypes":{"0":"cpp:class"},"terms":{"The":0,"becaus":0,"c":0,"can":0,"cardin":0,"challeng":0,"charact":0,"class":0,"descript":0,"drop":0,"engin":0,"fixtur":0,"frequent":0,"gener":0,"i":0,"index":0,"inflat":0,"mathemat":0,"occur":0,"often":0,"project":0,"punctuat":0,"queri":0,"relat":0,"sampl":0,"search":0,"size":0,"sphinx":0,"term":0,"thei":0,"thi":0,"token":0,"us":0,"web":0,"would":0},"titles":["<no title>"],"titleterms":{}}) \ No newline at end of file +Search.setIndex({"alltitles":{},"docnames":["index"],"envversion":{"sphinx":65,"sphinx.domains.c":3,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":9,"sphinx.domains.index":1,"sphinx.domains.javascript":3,"sphinx.domains.math":2,"sphinx.domains.python":4,"sphinx.domains.rst":2,"sphinx.domains.std":2},"filenames":["index.rst"],"indexentries":{"sphinx (c++ class)":[[0,"_CPPv46Sphinx",false]]},"objects":{"":[[0,0,1,"_CPPv46Sphinx","Sphinx"]]},"objnames":{"0":["cpp","class","C++ class"]},"objtypes":{"0":"cpp:class"},"terms":{"The":0,"becaus":0,"c":0,"can":0,"cardin":0,"challeng":0,"charact":0,"class":0,"descript":0,"drop":0,"engin":0,"fixtur":0,"frequent":0,"gener":0,"i":0,"index":0,"inflat":0,"mathemat":0,"occur":0,"often":0,"project":0,"punctuat":0,"queri":0,"relat":0,"sampl":0,"search":0,"size":0,"sphinx":0,"term":0,"thei":0,"thi":0,"token":0,"us":0,"web":0,"would":0},"titles":[""],"titleterms":{}}) \ No newline at end of file diff --git a/tests/js/searchtools.spec.js b/tests/js/searchtools.spec.js index 809fd19d0f4..bafbc4b42d2 100644 --- a/tests/js/searchtools.spec.js +++ b/tests/js/searchtools.spec.js @@ -34,7 +34,7 @@ describe('Basic html theme search', function() { hits = [[ "index", - "<no title>", + "", "", null, 5, diff --git a/tests/roots/test-search/escapedtitle.rst b/tests/roots/test-search/escapedtitle.rst new file mode 100644 index 00000000000..70f4c982d5b --- /dev/null +++ b/tests/roots/test-search/escapedtitle.rst @@ -0,0 +1,4 @@ +`escaped` title with < and > in it +================================== + +this document has escaped content in the title but also the characters < and > in it \ No newline at end of file diff --git a/tests/test_search.py b/tests/test_search.py index 22fa6ab7616..5fab7a9d167 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -155,8 +155,17 @@ def test_term_in_heading_and_section(app: SphinxTestApp) -> None: # if search term is in the title of one doc and in the text of another # both documents should be a hit in the search index as a title, # respectively text hit - assert '"textinhead":2' in searchindex - assert '"textinhead":0' in searchindex + assert '"textinhead":3' in searchindex + assert '"textinhead":1' in searchindex + + +@pytest.mark.sphinx('html', testroot='search') +def test_escaped_title(app: SphinxTestApp) -> None: + app.build(force_all=True) + searchindex = load_searchindex(app.outdir / 'searchindex.js') + print(searchindex) + assert 'escapedtitle' in searchindex['docnames'] + assert 'escaped title with < and > in it' in searchindex['titles'] @pytest.mark.sphinx('html', testroot='search') @@ -398,7 +407,7 @@ def test_search_index_gen_zh(app: SphinxTestApp) -> None: def test_nosearch(app: SphinxTestApp) -> None: app.build() index = load_searchindex(app.outdir / 'searchindex.js') - assert index['docnames'] == ['index', 'nosearch', 'tocitem'] + assert index['docnames'] == ['escapedtitle', 'index', 'nosearch', 'tocitem'] # latex is in 'nosearch.rst', and nowhere else assert 'latex' not in index['terms'] # cat is in 'index.rst' but is marked with the 'no-search' class @@ -406,7 +415,7 @@ def test_nosearch(app: SphinxTestApp) -> None: # bat is indexed from 'index.rst' and 'tocitem.rst' (document IDs 0, 2), and # not from 'nosearch.rst' (document ID 1) assert 'bat' in index['terms'] - assert index['terms']['bat'] == [0, 2] + assert index['terms']['bat'] == [1, 3] @pytest.mark.sphinx( @@ -418,7 +427,7 @@ def test_nosearch(app: SphinxTestApp) -> None: def test_parallel(app: SphinxTestApp) -> None: app.build() index = load_searchindex(app.outdir / 'searchindex.js') - assert index['docnames'] == ['index', 'nosearch', 'tocitem'] + assert index['docnames'] == ['escapedtitle', 'index', 'nosearch', 'tocitem'] @pytest.mark.sphinx('html', testroot='search')