Fixes issue #173

JorjMcKie · JorjMcKie · commit fa4efca7767d · 2018-05-27T10:32:58.000-04:00
also massaged some demo scripts
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# PyMuPDF 1.13.5 [![Build Status](https://travis-ci.org/rk700/PyMuPDF.svg?branch=master)](https://travis-ci.org/rk700/PyMuPDF)
+# PyMuPDF 1.13.6 [![Build Status](https://travis-ci.org/rk700/PyMuPDF.svg?branch=master)](https://travis-ci.org/rk700/PyMuPDF)
 
 ![logo](https://github.com/rk700/PyMuPDF/blob/master/demo/pymupdf.jpg)
 
@@ -10,7 +10,7 @@ Release date: May 23, 2018
 
 # Introduction
 
-This is **version 1.13.5 of PyMuPDF (formerly python-fitz)**, a Python binding with support for [MuPDF 1.13.0](http://mupdf.com/) - "a lightweight PDF and XPS viewer".
+This is **version 1.13.6 of PyMuPDF (formerly python-fitz)**, a Python binding with support for [MuPDF 1.13.0](http://mupdf.com/) - "a lightweight PDF and XPS viewer".
 
 MuPDF can access files in PDF, XPS, OpenXPS, CBZ, EPUB and FB2 (e-books) formats, and it is known for its top performance and high rendering quality.
 
diff --git a/demo/curly-polygon.py b/demo/curly-polygon.py
@@ -49,6 +49,7 @@
     img.drawSquiggle(points[i], points[i+1], breadth = breadth)
 
 img.finish(color = (0,0,1), fill = (1,1,0), closePath = False)
+
 # adjust visible page to dimensions of the drawing
 page.setCropBox(img.rect)
 img.commit()
diff --git a/demo/demo-lowlevel.py b/demo/demo-lowlevel.py
@@ -44,17 +44,10 @@ def olTraversal(root):
     print('Outline of the document')
     olTraversal(ol)
 
-# we can also save a table of contents as a XML or TXT file
-    ol.saveXML(sys.argv[1]+'.xml')
-    ol.saveText(sys.argv[1]+'.txt')
-else:
-    print('No outline available')
-
 # get the page number, which should start from 0
 pn = int(sys.argv[2])-1
 if pn > doc.pageCount:
-    print '%s has %d pages only' % (sys.argv[1], doc.pageCount)
-    exit(1)
+    raise SystemExit('%s has %d pages only' % (sys.argv[1], doc.pageCount))
 
 # get the page
 page = doc.loadPage(pn)
diff --git a/demo/demo.py b/demo/demo.py
@@ -16,7 +16,7 @@
 Remarks
 --------
 This demo version uses the normal user interface of PyMuPDF. If you are
-interested in more low level information, have a look at "demo_lowlevel.py" in
+interested in more low level information, have a look at "demo-lowlevel.py" in
 the same directory.
 
 """
@@ -45,31 +45,31 @@
     if doc.metadata[key]:
         print(' %s: %s' % (key.title(), doc.metadata[key]))
 print("")
+
 # here we print out the outline of the document(if any)
-# first, we define a function for traversal
 toc = doc.getToC()
 if len(toc) == 0:
-    print('No outline available')
+    print('No Table of Contents available')
 else:
     print("Table of Contents:")
     print("------------------")
     for t in toc:
         print("  " * (t[0] - 1), t[0], t[1], "page", t[2])
 
 print("")
+
 # get the page number, which should start from 0
 pn = int(sys.argv[2])-1
 if pn > doc.pageCount:
-    print('%s has %d pages only' % (sys.argv[1], doc.pageCount))
-    raise SystemExit
+    raise SystemExit('%s has %d pages only' % (sys.argv[1], doc.pageCount))
 
 # get the page
 page = doc[pn]
 
 # we can also get all the links in the current page
 links = page.getLinks()
 if len(links) == 0:
-    print("Page has no links")
+    print("No links on page", (pn + 1))
 else:
     print("Links on page %i:" % (pn + 1))
     print("------------------")
@@ -95,6 +95,7 @@
 # now we are ready for search, with max hit count limited to 16
 # the return result is a list of hit box rectangles
 res = page.searchFor(sys.argv[6], hit_max = 16)
+print("search text '%s' found %i on the page" % (sys.argv[6], len(res)))
 for r in res:
     # we invert the pixmap at the hit irect to highlight the search result
     pm.invertIRect(r.round())
diff --git a/demo/draw-sines.py b/demo/draw-sines.py
@@ -171,6 +171,7 @@ def rot_points(pnts, pb, alfa):
     img.insertTextbox(r1, "sine", color = red, fontsize = 8, morph = morph)
     r2 = fitz.Rect(rect.x0 + 15, rect.y1 - 10, rect.br)
     img.insertTextbox(r2, "cosine", color = blue, fontsize = 8, morph = morph)
+    
     img.commit()                  # commit with overlay = True
     
     doc.save("draw-sines.pdf")
diff --git a/demo/fitz-logo.py b/demo/fitz-logo.py
@@ -21,8 +21,8 @@
 (2) SVG-based logos are not always shown correctly. Use a different
     PDF converter like svglib if that occurs.
 
-(3) Logos / watermarks are transparent for non-image document types.
-    If you need transparency for images, you must convert it to a PDF
+(3) Logos / watermarks are transparent for all document types except images.
+    If you need transparency here, you must convert it to a PDF
     "manually" like so:
 
     pix = fitz.Pixmap(logo_fn)
diff --git a/demo/list-fields.py b/demo/list-fields.py
@@ -25,7 +25,7 @@
         a = a.next
 
 """
-Above script may produce the following output:
+Example output of above script:
 
 Showing the form fields of page 0
 --------------------------------------------------------------------------------
diff --git a/demo/splitter.py b/demo/splitter.py
@@ -3,7 +3,7 @@
 PyMuPDF Example Script:
 ------------------------
 
-Split a given PDF into separate files with one page each.
+Split a given PDF into separate files of one page each.
 For "input.pdf" the generated files are named "input-%i.pdf".
 
 PyMuPDF license
diff --git a/demo/svg-logo.py b/demo/svg-logo.py
@@ -18,9 +18,9 @@
 svglib, reportlab
 '''
 from __future__ import print_function
+import sys
 import fitz
 from svglib.svglib import svg2rlg
-import sys
 doc_fn = sys.argv[1]                        # name of PDF file
 svg_fn = sys.argv[2]                        # name of SVG image file
 
diff --git a/fitz/fitz.i b/fitz/fitz.i
@@ -147,6 +147,7 @@ struct fz_document_s
             self.isClosed    = False
             self.isEncrypted = 0
             self.metadata    = None
+            self.stream      = stream       # prevent garbage collection of it
             self.openErrCode = 0
             self.openErrMsg  = ''
             self.FontInfos   = []
@@ -158,7 +159,6 @@ struct fz_document_s
                 self.openErrCode = self._getGCTXerrcode()
                 self.openErrMsg  = self._getGCTXerrmsg()
                 self.thisown = True
-                self.isClosed    = False
                 if self.needsPass:
                     self.isEncrypted = 1
                 else: # we won't init until doc is decrypted
@@ -219,6 +219,7 @@ struct fz_document_s
                 self._outline = None
             self._reset_page_refs()
             self.metadata    = None
+            self.stream      = None
             self.isClosed    = True
             self.openErrCode = 0
             self.openErrMsg  = ''
@@ -312,7 +313,7 @@ struct fz_document_s
                 limits = pdf_dict_get(gctx, efiles, PDF_NAME_Limits);
                 limit1 = NULL;
                 limit2 = NULL;
-                if (limits)                     // have name limits?
+                if (limits)                      // have name limits?
                     {
                         limit1 = pdf_to_utf8(gctx, pdf_array_get(gctx, limits, 0));
                         limit2 = pdf_to_utf8(gctx, pdf_array_get(gctx, limits, 1));
@@ -374,7 +375,6 @@ struct fz_document_s
         %feature("autodoc","Retrieve embedded file information given its entry number or name.") embeddedFileInfo;
         PyObject *embeddedFileInfo(PyObject *id)
         {
-            PyObject *infodict = PyDict_New();
             pdf_document *pdf = pdf_document_from_fz_document(gctx, $self);
             Py_ssize_t name_len = 0;
             int n = -1;
@@ -383,22 +383,23 @@ struct fz_document_s
             fz_try(gctx)
             {
                 assert_PDF(pdf);
-                n = FindEmbedded(gctx, id, pdf);
+                n = JM_FindEmbedded(gctx, id, pdf);
                 if (n < 0) THROWMSG("entry not found");
             }
             fz_catch(gctx) return NULL;
 
+            PyObject *infodict = PyDict_New();
             // name of file entry
             name = pdf_to_utf8(gctx, pdf_portfolio_entry_name(gctx, pdf, n));
             PyDict_SetItemString(infodict, "name", 
-                   JM_UNICODE(name, strlen(name)));
+                                 JM_UNICODE(name, strlen(name)));
             pdf_obj *o = pdf_portfolio_entry_obj(gctx, pdf, n);
             name = pdf_to_utf8(gctx, pdf_dict_get(gctx, o, PDF_NAME_F));
             PyDict_SetItemString(infodict, "file", 
-                   JM_UNICODE(name, strlen(name)));
+                                 JM_UNICODE(name, strlen(name)));
             name = pdf_to_utf8(gctx, pdf_dict_get(gctx, o, PDF_NAME_Desc));
             PyDict_SetItemString(infodict, "desc", 
-                   JM_UNICODE(name, strlen(name)));
+                                 JM_UNICODE(name, strlen(name)));
             pdf_obj *olen = pdf_dict_getl(gctx, o, PDF_NAME_EF, PDF_NAME_F,
                                           PDF_NAME_Length, NULL);
             int len = -1;
@@ -423,8 +424,8 @@ struct fz_document_s
                 int flen = 0, dlen = 0;
                 if (filename) flen = (int) strlen(filename);
                 if (desc)     dlen = (int) strlen(desc);
-                if ((flen == 0) && (dlen == 0)) THROWMSG("nothing to change");
-                int n = FindEmbedded(gctx, id, pdf);
+                if ((flen < 1) && (dlen < 1)) THROWMSG("nothing to change");
+                int n = JM_FindEmbedded(gctx, id, pdf);
                 if (n < 0) THROWMSG("entry not found");
                 pdf_obj *entry = pdf_portfolio_entry_obj(gctx, pdf, n);
                 
@@ -452,13 +453,13 @@ struct fz_document_s
         %feature("autodoc","Retrieve embedded file content given its entry number or name.") embeddedFileGet;
         PyObject *embeddedFileGet(PyObject *id)
         {
-            PyObject *cont = PyBytes_FromString("");
+            PyObject *cont = NULL;
             pdf_document *pdf = pdf_document_from_fz_document(gctx, $self);
             fz_buffer *buf = NULL;
             fz_try(gctx)
             {
                 assert_PDF(pdf);
-                int i = FindEmbedded(gctx, id, pdf);
+                int i = JM_FindEmbedded(gctx, id, pdf);
                 if (i < 0) THROWMSG("entry not found");
                 unsigned char *data;
                 buf = pdf_portfolio_entry(gctx, pdf, i);
@@ -492,12 +493,12 @@ struct fz_document_s
                 assert_PDF(pdf);
             }
             fz_catch(gctx) return -1;
-            if (file_len == 0)                  // no filename given
+            if (file_len == 0)              // no filename given
                 {
                    f = name;                // take the name
                    file_len = name_len;
                 }
-            if (desc_len == 0)                  // no description given
+            if (desc_len == 0)              // no description given
                 {
                     d = name;               // take the name
                     desc_len = name_len;
@@ -668,6 +669,7 @@ struct fz_document_s
                     THROWMSG("decrypted file - save to new");
                 pdf_finish_edit(gctx, pdf);
                 pdf_save_document(gctx, pdf, filename, &opts);
+                pdf->dirty = 0;
                 }
             fz_catch(gctx) return NULL;
             return NONE;
@@ -715,6 +717,7 @@ struct fz_document_s
                 res = fz_new_buffer(gctx, 1024);
                 out = fz_new_output_with_buffer(gctx, res);
                 pdf_write_document(gctx, pdf, out, &opts);
+                pdf->dirty = 0;
                 len = fz_buffer_storage(gctx, res, &c);
                 r = PyBytes_FromStringAndSize(c, len);
             }
@@ -1612,6 +1615,7 @@ if links:
                     self.thisown = False
                     self.__swig_destroy__(self)
                 self.Graftmaps = {}
+                self.stream    = None
                 self._reset_page_refs = DUMMY
                 self.__swig_destroy__ = DUMMY
                 self.isClosed = True
diff --git a/fitz/fitz.py b/fitz/fitz.py
@@ -102,9 +102,9 @@ class _object:
 
 
 VersionFitz = "1.13.0"
-VersionBind = "1.13.5"
-VersionDate = "2018-05-23 12:56:02"
-version = (VersionBind, VersionFitz, "20180523125602")
+VersionBind = "1.13.6"
+VersionDate = "2018-05-27 05:27:32"
+version = (VersionBind, VersionFitz, "20180527052732")
 
 
 #------------------------------------------------------------------------------
@@ -507,6 +507,7 @@ def __init__(self, filename=None, stream=None, filetype=None):
         self.isClosed    = False
         self.isEncrypted = 0
         self.metadata    = None
+        self.stream      = stream
         self.openErrCode = 0
         self.openErrMsg  = ''
         self.FontInfos   = []
diff --git a/fitz/fitz_wrap.c b/fitz/fitz_wrap.c
diff --git a/fitz/helper-portfolio.i b/fitz/helper-portfolio.i
diff --git a/fitz/version.i b/fitz/version.i
diff --git a/setup.py b/setup.py