jsvine
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pdfplumber/display.py‎
Lines changed: 6 additions & 1 deletion b/‎pdfplumber/display.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎pdfplumber/page.py‎
Lines changed: 14 additions & 2 deletions b/‎pdfplumber/page.py‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎pdfplumber/pdf.py‎
Lines changed: 18 additions & 3 deletions b/‎pdfplumber/pdf.py‎
Lines changed: 18 additions & 3 deletions
diff --git a/‎pdfplumber/utils/exceptions.py‎
Lines changed: 6 additions & 0 deletions b/‎pdfplumber/utils/exceptions.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎tests/pdfs/from-oss-fuzz/load/4591020179783680.pdf‎
131 KB b/‎tests/pdfs/from-oss-fuzz/load/4591020179783680.pdf‎
131 KB
diff --git a/‎tests/pdfs/from-oss-fuzz/load/4652594248613888.pdf‎
123 KB b/‎tests/pdfs/from-oss-fuzz/load/4652594248613888.pdf‎
123 KB
diff --git a/‎tests/pdfs/from-oss-fuzz/load/4691742750474240.pdf‎
1.54 KB b/‎tests/pdfs/from-oss-fuzz/load/4691742750474240.pdf‎
1.54 KB
diff --git a/‎tests/pdfs/from-oss-fuzz/load/4715311080734720.pdf‎
137 KB b/‎tests/pdfs/from-oss-fuzz/load/4715311080734720.pdf‎
137 KB
diff --git a/‎tests/pdfs/from-oss-fuzz/load/4736668896133120.pdf‎
4.64 KB b/‎tests/pdfs/from-oss-fuzz/load/4736668896133120.pdf‎
4.64 KB
@@ -570,6 +570,7 @@ Many thanks to the following users who've contributed ideas, features, and fixes
 - [@wodny](https://github.com/wodny)
 - [Michal Stolarczyk](https://github.com/stolarczyk)
 - [Brandon Roberts](https://github.com/brandonrobertz)
+- [@ennamarie19](https://github.com/ennamarie19)
 
 ## Contributing
 
 
@@ -9,6 +9,7 @@
 from . import utils
 from ._typing import T_bbox, T_num, T_obj, T_obj_list, T_point, T_seq
 from .table import T_table_settings, Table, TableFinder, TableSettings
+from .utils.exceptions import MalformedPDFException
 
 if TYPE_CHECKING:  # pragma: nocover
     import pandas as pd
@@ -52,7 +53,11 @@ def get_page_image(
         stream.seek(0)
         src = stream
 
-    pdfium_doc = pypdfium2.PdfDocument(src, password=password)
+    try:
+        pdfium_doc = pypdfium2.PdfDocument(src, password=password)
+    except pypdfium2._helpers.misc.PdfiumError as e:
+        raise MalformedPDFException(e)
+
     pdfium_page = pdfium_doc.get_page(page_ix)
 
     img: PIL.Image.Image = pdfium_page.render(
 
@@ -1,3 +1,4 @@
+import numbers
 import re
 from functools import lru_cache
 from typing import (
@@ -35,6 +36,7 @@
 from .structure import PDFStructTree, StructTreeMissing
 from .table import T_table_settings, Table, TableFinder, TableSettings
 from .utils import decode_text, resolve_all, resolve_and_decode
+from .utils.exceptions import MalformedPDFException, PdfminerException
 from .utils.text import TextMap
 
 lt_pat = re.compile(r"^LT")
@@ -184,6 +186,10 @@ def _normalize_box(box_raw: T_bbox, rotation: T_num = 0) -> T_bbox:
     # conventionally specified by their lower-left and upperright
     # corners, it is acceptable to specify any two diagonally opposite
     # corners."
+    if not all(isinstance(x, numbers.Number) for x in box_raw):
+        raise MalformedPDFException(
+            f"Bounding box contains non-number coordinate(s): {box_raw}"
+        )
     x0, x1 = sorted((box_raw[0], box_raw[2]))
     y0, y1 = sorted((box_raw[1], box_raw[3]))
     if rotation in [90, 270]:
@@ -276,7 +282,10 @@ def layout(self) -> LTPage:
             laparams=self.pdf.laparams,
         )
         interpreter = PDFPageInterpreter(self.pdf.rsrcmgr, device)
-        interpreter.process_page(self.page_obj)
+        try:
+            interpreter.process_page(self.page_obj)
+        except Exception as e:
+            raise PdfminerException(e)
         self._layout: LTPage = device.get_result()
         return self._layout
 
@@ -339,7 +348,10 @@ def parse(annot: T_obj) -> T_obj:
             parsed["data"] = annot
             return parsed
 
-        raw = resolve_all(self.page_obj.annots) or []
+        try:
+            raw = resolve_all(self.page_obj.annots) or []
+        except RecursionError:
+            raise MalformedPDFException("Annotations are infinitely recursive.")
         parsed = list(map(parse, raw))
         if isinstance(self, CroppedPage):
             return self._crop_fn(parsed)
 
@@ -3,7 +3,7 @@
 import pathlib
 from io import BufferedReader, BytesIO
 from types import TracebackType
-from typing import Any, Dict, List, Literal, Optional, Tuple, Type, Union
+from typing import Any, Dict, Generator, List, Literal, Optional, Tuple, Type, Union
 
 from pdfminer.layout import LAParams
 from pdfminer.pdfdocument import PDFDocument
@@ -18,6 +18,7 @@
 from .repair import T_repair_setting, _repair
 from .structure import PDFStructTree, StructTreeMissing
 from .utils import resolve_and_decode
+from .utils.exceptions import PdfminerException
 
 logger = logging.getLogger(__name__)
 
@@ -46,7 +47,10 @@ def __init__(
         self.unicode_norm = unicode_norm
         self.raise_unicode_errors = raise_unicode_errors
 
-        self.doc = PDFDocument(PDFParser(stream), password=password or "")
+        try:
+            self.doc = PDFDocument(PDFParser(stream), password=password or "")
+        except Exception as e:
+            raise PdfminerException(e)
         self.rsrcmgr = PDFResourceManager()
         self.metadata = {}
 
@@ -146,7 +150,18 @@ def pages(self) -> List[Page]:
         doctop: T_num = 0
         pp = self.pages_to_parse
         self._pages: List[Page] = []
-        for i, page in enumerate(PDFPage.create_pages(self.doc)):
+
+        def iter_pages() -> Generator[PDFPage, None, None]:
+            gen = PDFPage.create_pages(self.doc)
+            while True:
+                try:
+                    yield next(gen)
+                except StopIteration:
+                    break
+                except Exception as e:
+                    raise PdfminerException(e)
+
+        for i, page in enumerate(iter_pages()):
             page_number = i + 1
             if pp is not None and page_number not in pp:
                 continue
 
@@ -0,0 +1,6 @@
+class MalformedPDFException(Exception):
+    pass
+
+
+class PdfminerException(Exception):
+    pass
-Original file line number
+Diff line change
@@ @@ -0,0 +1,6 @@ @@
 +class MalformedPDFException(Exception):
 +    pass
++
++
 +class PdfminerException(Exception):
 +    pass