From d44aa98545e4c480d20dc10a372659dbb6385908 Mon Sep 17 00:00:00 2001 From: Mostafa Khalil Date: Sun, 6 Oct 2024 06:58:55 +0300 Subject: [PATCH 1/6] feat: Optional max_file_size when parsing form --- docs/requests.md | 7 +++++++ starlette/formparsers.py | 8 ++++++++ starlette/requests.py | 11 ++++++++--- tests/test_formparsers.py | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+), 3 deletions(-) diff --git a/docs/requests.md b/docs/requests.md index 9140bb964..4046540d3 100644 --- a/docs/requests.md +++ b/docs/requests.md @@ -122,6 +122,13 @@ async with request.form(max_files=1000, max_fields=1000): ... ``` +You can configure maximum size per file uploaded with the parameter `max_file_size`: + +```python +async with request.form(max_file_size=100*1024*1024): # 100 MB limit per file + ... +``` + !!! info These limits are for security reasons, allowing an unlimited number of fields or files could lead to a denial of service attack by consuming a lot of CPU and memory parsing too many empty fields. diff --git a/starlette/formparsers.py b/starlette/formparsers.py index 48b0f2aad..b072ecff7 100644 --- a/starlette/formparsers.py +++ b/starlette/formparsers.py @@ -126,12 +126,14 @@ def __init__( *, max_files: int | float = 1000, max_fields: int | float = 1000, + max_part_file_size: int | float | None = None, ) -> None: assert multipart is not None, "The `python-multipart` library must be installed to use form parsing." self.headers = headers self.stream = stream self.max_files = max_files self.max_fields = max_fields + self.max_part_file_size = max_part_file_size self.items: list[tuple[str, str | UploadFile]] = [] self._current_files = 0 self._current_fields = 0 @@ -247,6 +249,12 @@ async def parse(self) -> FormData: # the main thread. for part, data in self._file_parts_to_write: assert part.file # for type checkers + if ( + self.max_part_file_size is not None + and part.file.size is not None + and part.file.size + len(data) > self.max_part_file_size + ): + raise MultiPartException(f"File exceeds maximum size of {self.max_part_file_size} bytes.") await part.file.write(data) for part in self._file_parts_to_finish: assert part.file # for type checkers diff --git a/starlette/requests.py b/starlette/requests.py index 9c6776f0c..1ebec2b61 100644 --- a/starlette/requests.py +++ b/starlette/requests.py @@ -245,7 +245,9 @@ async def json(self) -> typing.Any: self._json = json.loads(body) return self._json - async def _get_form(self, *, max_files: int | float = 1000, max_fields: int | float = 1000) -> FormData: + async def _get_form( + self, *, max_files: int | float = 1000, max_fields: int | float = 1000, max_file_size: int | None + ) -> FormData: if self._form is None: assert ( parse_options_header is not None @@ -260,6 +262,7 @@ async def _get_form(self, *, max_files: int | float = 1000, max_fields: int | fl self.stream(), max_files=max_files, max_fields=max_fields, + max_part_file_size=max_file_size, ) self._form = await multipart_parser.parse() except MultiPartException as exc: @@ -274,9 +277,11 @@ async def _get_form(self, *, max_files: int | float = 1000, max_fields: int | fl return self._form def form( - self, *, max_files: int | float = 1000, max_fields: int | float = 1000 + self, *, max_files: int | float = 1000, max_fields: int | float = 1000, max_file_size: int | None = None ) -> AwaitableOrContextManager[FormData]: - return AwaitableOrContextManagerWrapper(self._get_form(max_files=max_files, max_fields=max_fields)) + return AwaitableOrContextManagerWrapper( + self._get_form(max_files=max_files, max_fields=max_fields, max_file_size=max_file_size) + ) async def close(self) -> None: if self._form is not None: # pragma: no branch diff --git a/tests/test_formparsers.py b/tests/test_formparsers.py index 61c1bede1..4cff626df 100644 --- a/tests/test_formparsers.py +++ b/tests/test_formparsers.py @@ -4,6 +4,7 @@ import typing from contextlib import nullcontext as does_not_raise from pathlib import Path +from secrets import token_bytes import pytest @@ -127,6 +128,14 @@ async def app(scope: Scope, receive: Receive, send: Send) -> None: return app +def make_app_max_file_size(max_file_size: int) -> ASGIApp: + async def app(scope: Scope, receive: Receive, send: Send) -> None: + request = Request(scope, receive) + await request.form(max_file_size=max_file_size) + + return app + + def test_multipart_request_data(tmpdir: Path, test_client_factory: TestClientFactory) -> None: client = test_client_factory(app) response = client.post("/", data={"some": "data"}, files=FORCE_MULTIPART) @@ -580,6 +589,30 @@ def test_too_many_files_and_fields_raise( assert res.text == "Too many files. Maximum number of files is 1000." +@pytest.mark.parametrize( + "app,expectation", + [ + (make_app_max_file_size(1024), pytest.raises(MultiPartException)), + (Starlette(routes=[Mount("/", app=make_app_max_file_size(1024))]), does_not_raise()), + ], +) +def test_max_part_file_size_raise( + tmpdir: Path, + app: ASGIApp, + expectation: typing.ContextManager[Exception], + test_client_factory: TestClientFactory, +) -> None: + path = os.path.join(tmpdir, "test.txt") + with open(path, "wb") as file: + file.write(token_bytes(1024 + 1)) + + client = test_client_factory(app) + with open(path, "rb") as f, expectation: + response = client.post("/", files={"test": f}) + assert response.status_code == 400 + assert response.text == "File exceeds maximum size of 1024 bytes." + + @pytest.mark.parametrize( "app,expectation", [ From 412851e923c79bbfc2e19f1cd7305d5d5d3278d6 Mon Sep 17 00:00:00 2001 From: Mostafa Khalil Date: Sun, 15 Dec 2024 09:05:13 +0200 Subject: [PATCH 2/6] feat: make form part and file size limits configurable --- starlette/formparsers.py | 27 ++++++++++++++------------- starlette/requests.py | 38 ++++++++++++++++++++++++++++++++++---- 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/starlette/formparsers.py b/starlette/formparsers.py index 9255d1e95..d304abc01 100644 --- a/starlette/formparsers.py +++ b/starlette/formparsers.py @@ -122,8 +122,9 @@ async def parse(self) -> FormData: class MultiPartParser: - max_file_size = 1024 * 1024 # 1MB - max_part_size = 1024 * 1024 # 1MB + default_max_field_size = 1024 * 1024 # 1MB + default_max_file_mem_size = 1024 * 1024 # 1MB + default_max_file_disk_size = 1024 * 1024 * 1024 # 1GB def __init__( self, @@ -132,14 +133,18 @@ def __init__( *, max_files: int | float = 1000, max_fields: int | float = 1000, - max_part_file_size: int | float | None = None, + max_field_size: int | float | None = None, + max_file_mem_size: int | float | None = None, + max_file_disk_size: int | float | None = None, ) -> None: assert multipart is not None, "The `python-multipart` library must be installed to use form parsing." self.headers = headers self.stream = stream self.max_files = max_files self.max_fields = max_fields - self.max_part_file_size = max_part_file_size + self.max_field_size: int | float = max_field_size or self.default_max_field_size + self.max_file_mem_size: int | float = max_file_mem_size or self.default_max_file_mem_size + self.max_file_disk_size: int | float = max_file_disk_size or self.default_max_file_disk_size self.items: list[tuple[str, str | UploadFile]] = [] self._current_files = 0 self._current_fields = 0 @@ -157,8 +162,8 @@ def on_part_begin(self) -> None: def on_part_data(self, data: bytes, start: int, end: int) -> None: message_bytes = data[start:end] if self._current_part.file is None: - if len(self._current_part.data) + len(message_bytes) > self.max_part_size: - raise MultiPartException(f"Part exceeded maximum size of {int(self.max_part_size / 1024)}KB.") + if len(self._current_part.data) + len(message_bytes) > self.max_field_size: + raise MultiPartException(f"Part exceeded maximum size of {int(self.max_field_size / 1024)}KB.") self._current_part.data.extend(message_bytes) else: self._file_parts_to_write.append((self._current_part, message_bytes)) @@ -203,7 +208,7 @@ def on_headers_finished(self) -> None: if self._current_files > self.max_files: raise MultiPartException(f"Too many files. Maximum number of files is {self.max_files}.") filename = _user_safe_decode(options[b"filename"], self._charset) - tempfile = SpooledTemporaryFile(max_size=self.max_file_size) + tempfile = SpooledTemporaryFile(max_size=self.max_file_mem_size) self._files_to_close_on_error.append(tempfile) self._current_part.file = UploadFile( file=tempfile, # type: ignore[arg-type] @@ -257,12 +262,8 @@ async def parse(self) -> FormData: # the main thread. for part, data in self._file_parts_to_write: assert part.file # for type checkers - if ( - self.max_part_file_size is not None - and part.file.size is not None - and part.file.size + len(data) > self.max_part_file_size - ): - raise MultiPartException(f"File exceeds maximum size of {self.max_part_file_size} bytes.") + if part.file.size is not None and part.file.size + len(data) > self.max_file_disk_size: + raise MultiPartException(f"File exceeds maximum size of {self.max_file_disk_size} bytes.") await part.file.write(data) for part in self._file_parts_to_finish: assert part.file # for type checkers diff --git a/starlette/requests.py b/starlette/requests.py index a662e2bdd..06492d4f5 100644 --- a/starlette/requests.py +++ b/starlette/requests.py @@ -250,7 +250,13 @@ async def json(self) -> typing.Any: return self._json async def _get_form( - self, *, max_files: int | float = 1000, max_fields: int | float = 1000, max_file_size: int | None + self, + *, + max_files: int | float = 1000, + max_fields: int | float = 1000, + max_field_size: int | float | None, + max_file_mem_size: int | float | None, + max_file_disk_size: int | float | None, ) -> FormData: if self._form is None: assert ( @@ -266,7 +272,9 @@ async def _get_form( self.stream(), max_files=max_files, max_fields=max_fields, - max_part_file_size=max_file_size, + max_field_size=max_field_size, + max_file_mem_size=max_file_mem_size, + max_file_disk_size=max_file_disk_size, ) self._form = await multipart_parser.parse() except MultiPartException as exc: @@ -281,10 +289,32 @@ async def _get_form( return self._form def form( - self, *, max_files: int | float = 1000, max_fields: int | float = 1000, max_file_size: int | None = None + self, + *, + max_files: int | float = 1000, + max_fields: int | float = 1000, + max_field_size: int | float | None = None, + max_file_mem_size: int | float | None = None, + max_file_disk_size: int | float | None = None, ) -> AwaitableOrContextManager[FormData]: + """ + Return a FormData instance, representing the form data in the request. + + :param max_files: The maximum number of files that can be parsed. + :param max_fields: The maximum number of fields that can be parsed. + :param max_field_size: The maximum size of each field part in bytes. + :param max_file_mem_size: The maximum memory size for each file part in bytes. + :param max_file_disk_size: The maximum disk size for each file part in bytes. + https://docs.python.org/3/library/tempfile.html#tempfile.SpooledTemporaryFile + """ return AwaitableOrContextManagerWrapper( - self._get_form(max_files=max_files, max_fields=max_fields, max_file_size=max_file_size) + self._get_form( + max_files=max_files, + max_fields=max_fields, + max_field_size=max_field_size, + max_file_mem_size=max_file_mem_size, + max_file_disk_size=max_file_disk_size, + ) ) async def close(self) -> None: From 2fd523d06ac842f023dec5422e48cf61724416df Mon Sep 17 00:00:00 2001 From: Mostafa Khalil Date: Sun, 23 Feb 2025 16:36:23 +0200 Subject: [PATCH 3/6] feat: make form file memory size limit configurable --- starlette/formparsers.py | 2 ++ starlette/requests.py | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/starlette/formparsers.py b/starlette/formparsers.py index 4551d6887..1602de424 100644 --- a/starlette/formparsers.py +++ b/starlette/formparsers.py @@ -135,6 +135,7 @@ def __init__( max_files: int | float = 1000, max_fields: int | float = 1000, max_part_size: int = 1024 * 1024, # 1MB + spool_max_size: int = 1024 * 1024, # 1MB ) -> None: assert multipart is not None, "The `python-multipart` library must be installed to use form parsing." self.headers = headers @@ -152,6 +153,7 @@ def __init__( self._file_parts_to_finish: list[MultipartPart] = [] self._files_to_close_on_error: list[SpooledTemporaryFile[bytes]] = [] self.max_part_size = max_part_size + self.spool_max_size = spool_max_size def on_part_begin(self) -> None: self._current_part = MultipartPart() diff --git a/starlette/requests.py b/starlette/requests.py index 7dc04a746..244012153 100644 --- a/starlette/requests.py +++ b/starlette/requests.py @@ -255,6 +255,7 @@ async def _get_form( max_files: int | float = 1000, max_fields: int | float = 1000, max_part_size: int = 1024 * 1024, + max_file_memory_size: int = 1024 * 1024, ) -> FormData: if self._form is None: # pragma: no branch assert parse_options_header is not None, ( @@ -271,6 +272,7 @@ async def _get_form( max_files=max_files, max_fields=max_fields, max_part_size=max_part_size, + spool_max_size=max_file_memory_size, ) self._form = await multipart_parser.parse() except MultiPartException as exc: @@ -290,9 +292,15 @@ def form( max_files: int | float = 1000, max_fields: int | float = 1000, max_part_size: int = 1024 * 1024, + max_file_memory_size: int = 1024 * 1024, ) -> AwaitableOrContextManager[FormData]: return AwaitableOrContextManagerWrapper( - self._get_form(max_files=max_files, max_fields=max_fields, max_part_size=max_part_size) + self._get_form( + max_files=max_files, + max_fields=max_fields, + max_part_size=max_part_size, + max_file_memory_size=max_file_memory_size, + ) ) async def close(self) -> None: From 9caf25bdb8cdea95e2e9fed4f7f70446ce87616c Mon Sep 17 00:00:00 2001 From: Mostafa Khalil Date: Sun, 23 Feb 2025 16:46:54 +0200 Subject: [PATCH 4/6] feat: make spool file memory size limit configurable --- docs/requests.md | 4 ++-- starlette/requests.py | 8 ++++---- tests/test_formparsers.py | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/requests.md b/docs/requests.md index 04932def4..a8b648c88 100644 --- a/docs/requests.md +++ b/docs/requests.md @@ -122,10 +122,10 @@ async with request.form(max_files=1000, max_fields=1000, max_part_size=1024*1024 ... ``` -You can configure maximum size per file uploaded with the parameter `max_file_size`: +You can configure maximum [spooled file](https://docs.python.org/3/library/tempfile.html#tempfile.SpooledTemporaryFile) size per file uploaded with the parameter `max_file_size`: ```python -async with request.form(max_file_size=100*1024*1024): # 100 MB limit per file +async with request.form(max_file_spool_size=100*1024*1024): # 100 MB limit per file ... ``` diff --git a/starlette/requests.py b/starlette/requests.py index 244012153..5504dbe5b 100644 --- a/starlette/requests.py +++ b/starlette/requests.py @@ -255,7 +255,7 @@ async def _get_form( max_files: int | float = 1000, max_fields: int | float = 1000, max_part_size: int = 1024 * 1024, - max_file_memory_size: int = 1024 * 1024, + max_file_spool_size: int = 1024 * 1024, ) -> FormData: if self._form is None: # pragma: no branch assert parse_options_header is not None, ( @@ -272,7 +272,7 @@ async def _get_form( max_files=max_files, max_fields=max_fields, max_part_size=max_part_size, - spool_max_size=max_file_memory_size, + spool_max_size=max_file_spool_size, ) self._form = await multipart_parser.parse() except MultiPartException as exc: @@ -292,14 +292,14 @@ def form( max_files: int | float = 1000, max_fields: int | float = 1000, max_part_size: int = 1024 * 1024, - max_file_memory_size: int = 1024 * 1024, + max_file_spool_size: int = 1024 * 1024, ) -> AwaitableOrContextManager[FormData]: return AwaitableOrContextManagerWrapper( self._get_form( max_files=max_files, max_fields=max_fields, max_part_size=max_part_size, - max_file_memory_size=max_file_memory_size, + max_file_spool_size=max_file_spool_size, ) ) diff --git a/tests/test_formparsers.py b/tests/test_formparsers.py index e10af975c..84f3fc735 100644 --- a/tests/test_formparsers.py +++ b/tests/test_formparsers.py @@ -128,10 +128,10 @@ async def app(scope: Scope, receive: Receive, send: Send) -> None: return app -def make_app_max_file_size(max_file_size: int) -> ASGIApp: +def make_app_max_spool_file_size(max_file_size: int) -> ASGIApp: async def app(scope: Scope, receive: Receive, send: Send) -> None: request = Request(scope, receive) - await request.form(max_file_size=max_file_size) + await request.form(max_file_spool_size=max_file_size) return app @@ -592,8 +592,8 @@ def test_too_many_files_and_fields_raise( @pytest.mark.parametrize( "app,expectation", [ - (make_app_max_file_size(1024), pytest.raises(MultiPartException)), - (Starlette(routes=[Mount("/", app=make_app_max_file_size(1024))]), does_not_raise()), + (make_app_max_spool_file_size(1024), pytest.raises(MultiPartException)), + (Starlette(routes=[Mount("/", app=make_app_max_spool_file_size(1024))]), does_not_raise()), ], ) def test_max_part_file_size_raise( From 05d883108c38bee8ac943a734f777460db402dd1 Mon Sep 17 00:00:00 2001 From: Mostafa Khalil Date: Sun, 23 Feb 2025 17:15:10 +0200 Subject: [PATCH 5/6] remove test for max file size on disk --- tests/test_formparsers.py | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/tests/test_formparsers.py b/tests/test_formparsers.py index 84f3fc735..b18fd6c40 100644 --- a/tests/test_formparsers.py +++ b/tests/test_formparsers.py @@ -4,7 +4,6 @@ import typing from contextlib import nullcontext as does_not_raise from pathlib import Path -from secrets import token_bytes import pytest @@ -128,14 +127,6 @@ async def app(scope: Scope, receive: Receive, send: Send) -> None: return app -def make_app_max_spool_file_size(max_file_size: int) -> ASGIApp: - async def app(scope: Scope, receive: Receive, send: Send) -> None: - request = Request(scope, receive) - await request.form(max_file_spool_size=max_file_size) - - return app - - def test_multipart_request_data(tmpdir: Path, test_client_factory: TestClientFactory) -> None: client = test_client_factory(app) response = client.post("/", data={"some": "data"}, files=FORCE_MULTIPART) @@ -589,30 +580,6 @@ def test_too_many_files_and_fields_raise( assert res.text == "Too many files. Maximum number of files is 1000." -@pytest.mark.parametrize( - "app,expectation", - [ - (make_app_max_spool_file_size(1024), pytest.raises(MultiPartException)), - (Starlette(routes=[Mount("/", app=make_app_max_spool_file_size(1024))]), does_not_raise()), - ], -) -def test_max_part_file_size_raise( - tmpdir: Path, - app: ASGIApp, - expectation: typing.ContextManager[Exception], - test_client_factory: TestClientFactory, -) -> None: - path = os.path.join(tmpdir, "test.txt") - with open(path, "wb") as file: - file.write(token_bytes(1024 + 1)) - - client = test_client_factory(app) - with open(path, "rb") as f, expectation: - response = client.post("/", files={"test": f}) - assert response.status_code == 400 - assert response.text == "File exceeds maximum size of 1024 bytes." - - @pytest.mark.parametrize( "app,expectation", [ From 59464458f62a6e563aa6804ad867da0eb806ca74 Mon Sep 17 00:00:00 2001 From: Mostafa Khalil Date: Sun, 23 Feb 2025 17:20:06 +0200 Subject: [PATCH 6/6] naming change --- docs/requests.md | 2 +- starlette/formparsers.py | 4 ++-- starlette/requests.py | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/requests.md b/docs/requests.md index a8b648c88..abc7effd1 100644 --- a/docs/requests.md +++ b/docs/requests.md @@ -125,7 +125,7 @@ async with request.form(max_files=1000, max_fields=1000, max_part_size=1024*1024 You can configure maximum [spooled file](https://docs.python.org/3/library/tempfile.html#tempfile.SpooledTemporaryFile) size per file uploaded with the parameter `max_file_size`: ```python -async with request.form(max_file_spool_size=100*1024*1024): # 100 MB limit per file +async with request.form(max_spool_size=100*1024*1024): # 100 MB limit per file ... ``` diff --git a/starlette/formparsers.py b/starlette/formparsers.py index 1602de424..d05963c9a 100644 --- a/starlette/formparsers.py +++ b/starlette/formparsers.py @@ -135,7 +135,7 @@ def __init__( max_files: int | float = 1000, max_fields: int | float = 1000, max_part_size: int = 1024 * 1024, # 1MB - spool_max_size: int = 1024 * 1024, # 1MB + max_spool_size: int = 1024 * 1024, # 1MB ) -> None: assert multipart is not None, "The `python-multipart` library must be installed to use form parsing." self.headers = headers @@ -153,7 +153,7 @@ def __init__( self._file_parts_to_finish: list[MultipartPart] = [] self._files_to_close_on_error: list[SpooledTemporaryFile[bytes]] = [] self.max_part_size = max_part_size - self.spool_max_size = spool_max_size + self.spool_max_size = max_spool_size def on_part_begin(self) -> None: self._current_part = MultipartPart() diff --git a/starlette/requests.py b/starlette/requests.py index 5504dbe5b..8a134de18 100644 --- a/starlette/requests.py +++ b/starlette/requests.py @@ -255,7 +255,7 @@ async def _get_form( max_files: int | float = 1000, max_fields: int | float = 1000, max_part_size: int = 1024 * 1024, - max_file_spool_size: int = 1024 * 1024, + max_spool_size: int = 1024 * 1024, ) -> FormData: if self._form is None: # pragma: no branch assert parse_options_header is not None, ( @@ -272,7 +272,7 @@ async def _get_form( max_files=max_files, max_fields=max_fields, max_part_size=max_part_size, - spool_max_size=max_file_spool_size, + max_spool_size=max_spool_size, ) self._form = await multipart_parser.parse() except MultiPartException as exc: @@ -292,14 +292,14 @@ def form( max_files: int | float = 1000, max_fields: int | float = 1000, max_part_size: int = 1024 * 1024, - max_file_spool_size: int = 1024 * 1024, + max_spool_size: int = 1024 * 1024, ) -> AwaitableOrContextManager[FormData]: return AwaitableOrContextManagerWrapper( self._get_form( max_files=max_files, max_fields=max_fields, max_part_size=max_part_size, - max_file_spool_size=max_file_spool_size, + max_spool_size=max_spool_size, ) )