diff --git a/fsspec/implementations/http.py b/fsspec/implementations/http.py index 093fa29be..41a49cc3d 100644 --- a/fsspec/implementations/http.py +++ b/fsspec/implementations/http.py @@ -326,7 +326,7 @@ async def gen_chunks(): async with meth(self.encode_url(rpath), data=gen_chunks(), **kw) as resp: self._raise_not_found_for_status(resp, rpath) - async def _exists(self, path, **kwargs): + async def _exists(self, path, strict=False, **kwargs): kw = self.kwargs.copy() kw.update(kwargs) try: @@ -334,8 +334,14 @@ async def _exists(self, path, **kwargs): session = await self.set_session() r = await session.get(self.encode_url(path), **kw) async with r: + if strict: + self._raise_not_found_for_status(r, path) return r.status < 400 + except FileNotFoundError: + return False except aiohttp.ClientError: + if strict: + raise return False async def _isfile(self, path, **kwargs): diff --git a/fsspec/implementations/http_sync.py b/fsspec/implementations/http_sync.py index 08799f20a..a67ea3ea5 100644 --- a/fsspec/implementations/http_sync.py +++ b/fsspec/implementations/http_sync.py @@ -463,14 +463,20 @@ def _process_limits(self, url, start, end): end -= 1 # bytes range is inclusive return f"bytes={start}-{end}" - def exists(self, path, **kwargs): + def exists(self, path, strict=False, **kwargs): kw = self.kwargs.copy() kw.update(kwargs) try: logger.debug(path) r = self.session.get(self.encode_url(path), **kw) + if strict: + self._raise_not_found_for_status(r, path) return r.status_code < 400 + except FileNotFoundError: + return False except Exception: + if strict: + raise return False def isfile(self, path, **kwargs): diff --git a/fsspec/implementations/tests/test_http.py b/fsspec/implementations/tests/test_http.py index d014d1155..856b51811 100644 --- a/fsspec/implementations/tests/test_http.py +++ b/fsspec/implementations/tests/test_http.py @@ -163,6 +163,14 @@ def test_exists(server): h.cat(server.address + "/notafile") +def test_exists_strict(server): + h = fsspec.filesystem("http") + assert not h.exists(server.address + "/notafile", strict=True) + with pytest.raises(aiohttp.ClientResponseError) as e: + h.exists(server.address + "/unauthorized", strict=True) + assert e.value.status == 401 + + def test_read(server): h = fsspec.filesystem("http") out = server.realfile diff --git a/fsspec/implementations/tests/test_http_sync.py b/fsspec/implementations/tests/test_http_sync.py index 330cf4d07..5b0efab41 100644 --- a/fsspec/implementations/tests/test_http_sync.py +++ b/fsspec/implementations/tests/test_http_sync.py @@ -6,7 +6,7 @@ import pytest import fsspec.utils -from fsspec.tests.conftest import data, reset_files, server, win # noqa: F401 +from fsspec.tests.conftest import data, requests, reset_files, server, win # noqa: F401 @pytest.fixture() @@ -147,6 +147,14 @@ def test_exists(server, sync): h.cat(server.address + "/notafile") +def test_exists_strict(server, sync): + h = fsspec.filesystem("http") + assert not h.exists(server.address + "/notafile", strict=True) + with pytest.raises(requests.exceptions.HTTPError) as e: + h.exists(server.address + "/unauthorized", strict=True) + assert e.value.response.status_code == 401 + + def test_read(server, sync): h = fsspec.filesystem("http") out = server.address + "/index/realfile" diff --git a/fsspec/tests/conftest.py b/fsspec/tests/conftest.py index ccf7bdccf..0a56521ed 100644 --- a/fsspec/tests/conftest.py +++ b/fsspec/tests/conftest.py @@ -54,6 +54,7 @@ class HTTPTestHandler(BaseHTTPRequestHandler): "/simple/file": data, "/simple/dir/": _make_listing("/simple/dir/file"), "/simple/dir/file": data, + "/unauthorized": AssertionError("shouldn't access"), } dynamic_files = {} @@ -85,6 +86,8 @@ def do_GET(self): if "redirect" in self.headers and file_path != "/index/realfile": new_url = _make_realfile(baseurl) return self._respond(301, {"Location": new_url}) + if file_path == "/unauthorized": + return self._respond(401) if file_data is None: return self._respond(404)