|
| 1 | +# All rights reserved. |
| 2 | +# |
| 3 | +# Redistribution and use in source and binary forms, with or without |
| 4 | +# modification, are permitted provided that the following conditions are |
| 5 | +# met: |
| 6 | +# |
| 7 | +# * Redistributions of source code must retain the above copyright notice, |
| 8 | +# this list of conditions and the following disclaimer. |
| 9 | +# * Redistributions in binary form must reproduce the above copyright notice, |
| 10 | +# this list of conditions and the following disclaimer in the documentation |
| 11 | +# and/or other materials provided with the distribution. |
| 12 | +# * The name of the author may not be used to endorse or promote products |
| 13 | +# derived from this software without specific prior written permission. |
| 14 | +# |
| 15 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 16 | +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 17 | +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 18 | +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 19 | +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 20 | +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 21 | +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 22 | +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 23 | +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 24 | +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 25 | +# POSSIBILITY OF SUCH DAMAGE. |
| 26 | + |
| 27 | + |
| 28 | +# This module contains code used by _writer.py to track links in pages |
| 29 | +# being added to the writer until the links can be resolved. |
| 30 | + |
| 31 | +from typing import TYPE_CHECKING, List, Optional, Tuple, Union, cast |
| 32 | + |
| 33 | +from . import ArrayObject, DictionaryObject, IndirectObject, PdfObject, TextStringObject |
| 34 | + |
| 35 | +if TYPE_CHECKING: |
| 36 | + from .._page import PageObject |
| 37 | + from .._reader import PdfReader |
| 38 | + from .._writer import PdfWriter |
| 39 | + |
| 40 | + |
| 41 | +class NamedReferenceLink: |
| 42 | + """Named reference link being preserved until we can resolve it correctly.""" |
| 43 | + |
| 44 | + def __init__(self, reference: TextStringObject, source_pdf: "PdfReader") -> None: |
| 45 | + """reference: TextStringObject with named reference""" |
| 46 | + self._reference = reference |
| 47 | + self._source_pdf = source_pdf |
| 48 | + |
| 49 | + def find_referenced_page(self) -> Union[IndirectObject, None]: |
| 50 | + destination = self._source_pdf.named_destinations.get(str(self._reference)) |
| 51 | + return destination.page if destination else None |
| 52 | + |
| 53 | + def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None: |
| 54 | + """target_pdf: PdfWriter which the new link went into""" |
| 55 | + # point named destination in new PDF to the new page |
| 56 | + if str(self._reference) not in target_pdf.named_destinations: |
| 57 | + target_pdf.add_named_destination(str(self._reference), new_page.page_number) |
| 58 | + |
| 59 | + |
| 60 | +class DirectReferenceLink: |
| 61 | + """Direct reference link being preserved until we can resolve it correctly.""" |
| 62 | + |
| 63 | + def __init__(self, reference: ArrayObject) -> None: |
| 64 | + """reference: an ArrayObject whose first element is the Page indirect object""" |
| 65 | + self._reference = reference |
| 66 | + |
| 67 | + def find_referenced_page(self) -> IndirectObject: |
| 68 | + return self._reference[0] |
| 69 | + |
| 70 | + def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None: |
| 71 | + """target_pdf: PdfWriter which the new link went into""" |
| 72 | + self._reference[0] = new_page |
| 73 | + |
| 74 | + |
| 75 | +ReferenceLink = Union[NamedReferenceLink, DirectReferenceLink] |
| 76 | + |
| 77 | + |
| 78 | +def extract_links(new_page: "PageObject", old_page: "PageObject") -> List[Tuple[ReferenceLink, ReferenceLink]]: |
| 79 | + """Extracts links from two pages on the assumption that the two pages are |
| 80 | + the same. Produces one list of (new link, old link) tuples. |
| 81 | + """ |
| 82 | + new_links = [_build_link(link, new_page) for link in new_page.get("/Annots", [])] |
| 83 | + old_links = [_build_link(link, old_page) for link in old_page.get("/Annots", [])] |
| 84 | + |
| 85 | + return [ |
| 86 | + (new_link, old_link) for (new_link, old_link) |
| 87 | + in zip(new_links, old_links) |
| 88 | + if new_link and old_link |
| 89 | + ] |
| 90 | + |
| 91 | + |
| 92 | +def _build_link(indirect_object: IndirectObject, page: "PageObject") -> Optional[ReferenceLink]: |
| 93 | + src = cast("PdfReader", page.pdf) |
| 94 | + link = cast(DictionaryObject, indirect_object.get_object()) |
| 95 | + if (not isinstance(link, DictionaryObject)) or link.get("/Subtype") != "/Link": |
| 96 | + return None |
| 97 | + |
| 98 | + if "/A" in link: |
| 99 | + action = cast(DictionaryObject, link["/A"]) |
| 100 | + if action.get("/S") != "/GoTo": |
| 101 | + return None |
| 102 | + |
| 103 | + return _create_link(action["/D"], src) |
| 104 | + |
| 105 | + if "/Dest" in link: |
| 106 | + return _create_link(link["/Dest"], src) |
| 107 | + |
| 108 | + return None # Nothing to do here |
| 109 | + |
| 110 | + |
| 111 | +def _create_link(reference: PdfObject, source_pdf: "PdfReader")-> Optional[ReferenceLink]: |
| 112 | + if isinstance(reference, TextStringObject): |
| 113 | + return NamedReferenceLink(reference, source_pdf) |
| 114 | + if isinstance(reference, ArrayObject): |
| 115 | + return DirectReferenceLink(reference) |
| 116 | + return None |
0 commit comments