scrapy-plugins
diff --git a/‎.bumpversion.cfg‎
Lines changed: 1 addition & 1 deletion b/‎.bumpversion.cfg‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/tests.yml‎
Lines changed: 5 additions & 0 deletions b/‎.github/workflows/tests.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 5 additions & 0 deletions b/‎.gitignore‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 73 additions & 7 deletions b/‎README.md‎
Lines changed: 73 additions & 7 deletions
diff --git a/‎docs/changelog.md‎
Lines changed: 13 additions & 0 deletions b/‎docs/changelog.md‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎examples/books.py‎
Lines changed: 2 additions & 1 deletion b/‎examples/books.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/contexts.py‎
Lines changed: 2 additions & 1 deletion b/‎examples/contexts.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/max_pages.py‎
Lines changed: 2 additions & 1 deletion b/‎examples/max_pages.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/storage.py‎
Lines changed: 2 additions & 1 deletion b/‎examples/storage.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎scrapy_playwright/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎scrapy_playwright/__init__.py‎
Lines changed: 1 addition & 1 deletion
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.0.37
+current_version = 0.0.39
 commit = True
 tag = True
 
 
@@ -25,6 +25,11 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
 
+    - name: Set up node
+      uses: actions/setup-node@v4
+      with:
+        node-version: 18
+
     - name: Install tox
       run: pip install tox
 
 
@@ -17,3 +17,8 @@ coverage.xml
 coverage-*.xml
 coverage-asyncio/
 coverage-twisted/
+
+# nodejs stuff
+node_modules/
+package-lock.json
+package.json
@@ -168,14 +168,17 @@ Type `Optional[str]`, default `None`
 The endpoint of a remote Chromium browser to connect using the
 [Chrome DevTools Protocol](https://chromedevtools.github.io/devtools-protocol/),
 via [`BrowserType.connect_over_cdp`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-connect-over-cdp).
+
+```python
+PLAYWRIGHT_CDP_URL = "http://localhost:9222"
+```
+
 If this setting is used:
 * all non-persistent contexts will be created on the connected remote browser
 * the `PLAYWRIGHT_LAUNCH_OPTIONS` setting is ignored
 * the `PLAYWRIGHT_BROWSER_TYPE` setting must not be set to a value different than "chromium"
 
-```python
-PLAYWRIGHT_CDP_URL = "http://localhost:9222"
-```
+**This settings CANNOT be used at the same time as `PLAYWRIGHT_CONNECT_URL`**
 
 ### `PLAYWRIGHT_CDP_KWARGS`
 Type `dict[str, Any]`, default `{}`
@@ -192,6 +195,41 @@ PLAYWRIGHT_CDP_KWARGS = {
 }
 ```
 
+### `PLAYWRIGHT_CONNECT_URL`
+Type `Optional[str]`, default `None`
+
+URL of a remote Playwright browser instance to connect using
+[`BrowserType.connect`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-connect).
+
+From the upstream Playwright docs:
+> When connecting to another browser launched via
+> [`BrowserType.launchServer`](https://playwright.dev/docs/api/class-browsertype#browser-type-launch-server)
+> in Node.js, the major and minor version needs to match the client version (1.2.3 → is compatible with 1.2.x).
+
+```python
+PLAYWRIGHT_CONNECT_URL = "ws://localhost:35477/ae1fa0bc325adcfd9600d9f712e9c733"
+```
+
+If this setting is used:
+* all non-persistent contexts will be created on the connected remote browser
+* the `PLAYWRIGHT_LAUNCH_OPTIONS` setting is ignored
+
+**This settings CANNOT be used at the same time as `PLAYWRIGHT_CDP_URL`**
+
+### `PLAYWRIGHT_CONNECT_KWARGS`
+Type `dict[str, Any]`, default `{}`
+
+Additional keyword arguments to be passed to
+[`BrowserType.connect`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-connect)
+when using `PLAYWRIGHT_CONNECT_URL`. The `ws_endpoint` key is always ignored,
+`PLAYWRIGHT_CONNECT_URL` is used instead.
+
+```python
+PLAYWRIGHT_CONNECT_KWARGS = {
+    "slow_mo": 1000,
+    "timeout": 10 * 1000
+}
+```
 
 ### `PLAYWRIGHT_CONTEXTS`
 Type `dict[str, dict]`, default `{}`
@@ -286,6 +324,17 @@ def custom_headers(
 PLAYWRIGHT_PROCESS_REQUEST_HEADERS = custom_headers
 ```
 
+### `PLAYWRIGHT_RESTART_DISCONNECTED_BROWSER`
+Type `bool`, default `True`
+
+Whether the browser will be restarted if it gets disconnected, for instance if the local
+browser crashes or a remote connection times out.
+Implemented by listening to the
+[`disconnected` Browser event](https://playwright.dev/python/docs/api/class-browser#browser-event-disconnected),
+for this reason it does not apply to persistent contexts since
+[BrowserType.launch_persistent_context](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context)
+returns the context directly.
+
 ### `PLAYWRIGHT_MAX_PAGES_PER_CONTEXT`
 Type `int`, defaults to the value of Scrapy's `CONCURRENT_REQUESTS` setting
 
@@ -459,14 +508,16 @@ This key could be used in conjunction with `playwright_include_page` to make a c
 requests using the same page. For instance:
 
 ```python
+from playwright.async_api import Page
+
 def start_requests(self):
     yield scrapy.Request(
         url="https://httpbin.org/get",
         meta={"playwright": True, "playwright_include_page": True},
     )
 
 def parse(self, response, **kwargs):
-    page = response.meta["playwright_page"]
+    page: Page = response.meta["playwright_page"]
     yield scrapy.Request(
         url="https://httpbin.org/headers",
         callback=self.parse_headers,
@@ -507,6 +558,20 @@ def parse(self, response, **kwargs):
     # {'issuer': 'DigiCert TLS RSA SHA256 2020 CA1', 'protocol': 'TLS 1.3', 'subjectName': 'www.example.org', 'validFrom': 1647216000, 'validTo': 1678838399}
 ```
 
+### `playwright_suggested_filename`
+Type `Optional[str]`, read only
+
+The value of the [`Download.suggested_filename`](https://playwright.dev/python/docs/api/class-download#download-suggested-filename)
+attribute when the response is the binary contents of a
+[download](https://playwright.dev/python/docs/downloads) (e.g. a PDF file).
+Only available for responses that only caused a download. Can be accessed
+in the callback via `response.meta['playwright_suggested_filename']`
+
+```python
+def parse(self, response, **kwargs):
+    print(response.meta["playwright_suggested_filename"])
+    # 'sample_file.pdf'
+```
 
 ## Receiving Page objects in callbacks
 
@@ -525,6 +590,7 @@ necessary the spider job could get stuck because of the limit set by the
 `PLAYWRIGHT_MAX_PAGES_PER_CONTEXT` setting.
 
 ```python
+from playwright.async_api import Page
 import scrapy
 
 class AwesomeSpiderWithPage(scrapy.Spider):
@@ -539,7 +605,7 @@ class AwesomeSpiderWithPage(scrapy.Spider):
         )
 
     def parse_first(self, response):
-        page = response.meta["playwright_page"]
+        page: Page = response.meta["playwright_page"]
         return scrapy.Request(
             url="https://example.com",
             callback=self.parse_second,
@@ -548,13 +614,13 @@ class AwesomeSpiderWithPage(scrapy.Spider):
         )
 
     async def parse_second(self, response):
-        page = response.meta["playwright_page"]
+        page: Page = response.meta["playwright_page"]
         title = await page.title()  # "Example Domain"
         await page.close()
         return {"title": title}
 
     async def errback_close_page(self, failure):
-        page = failure.request.meta["playwright_page"]
+        page: Page = failure.request.meta["playwright_page"]
         await page.close()
 ```
 
 
@@ -1,5 +1,18 @@
 # scrapy-playwright changelog
 
+### [v0.0.39](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.39) (2024-07-11)
+
+* Return proper status and headers for downloads (#293)
+* Restart on browser crash (#295)
+* Override method and/or body only for the first matching request (#297)
+
+
+### [v0.0.38](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.38) (2024-07-06)
+
+* Fix freezing on responses with status 204 (#292)
+* Connect to remote browser using BrowserType.connect (#283)
+
+
 ### [v0.0.37](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.37) (2024-07-03)
 
 * Improve Windows concurrency (#286)
 
@@ -3,6 +3,7 @@
 from pathlib import Path
 from typing import Generator, Optional
 
+from playwright.async_api import Page
 from scrapy import Spider
 from scrapy.http.response import Response
 
@@ -51,7 +52,7 @@ def parse(self, response: Response, current_page: Optional[int] = None) -> Gener
 
     async def parse_book(self, response: Response) -> dict:
         url_sha256 = hashlib.sha256(response.url.encode("utf-8")).hexdigest()
-        page = response.meta["playwright_page"]
+        page: Page = response.meta["playwright_page"]
         await page.screenshot(
             path=Path(__file__).parent / "books" / f"{url_sha256}.png", full_page=True
         )
 
@@ -1,5 +1,6 @@
 from pathlib import Path
 
+from playwright.async_api import Page
 from scrapy import Spider, Request
 
 
@@ -96,7 +97,7 @@ def start_requests(self):
             )
 
     async def parse(self, response, **kwargs):
-        page = response.meta["playwright_page"]
+        page: Page = response.meta["playwright_page"]
         context_name = response.meta["playwright_context"]
         storage_state = await page.context.storage_state()
         await page.close()
 
@@ -1,3 +1,4 @@
+from playwright.async_api import Page
 from scrapy import Spider, Request
 
 
@@ -45,5 +46,5 @@ def parse(self, response, **kwargs):
         return {"url": response.url}
 
     async def errback(self, failure):
-        page = failure.request.meta["playwright_page"]
+        page: Page = failure.request.meta["playwright_page"]
         await page.close()
@@ -1,3 +1,4 @@
+from playwright.async_api import Page
 from scrapy import Spider, Request
 from scrapy_playwright.page import PageMethod
 
@@ -27,7 +28,7 @@ def start_requests(self):
         )
 
     async def parse(self, response, **kwargs):
-        page = response.meta["playwright_page"]
+        page: Page = response.meta["playwright_page"]
         storage_state = await page.context.storage_state()
         await page.close()
         return {
 
@@ -1 +1 @@
-__version__ = "0.0.37"
+__version__ = "0.0.39"
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.0.37"`
	`1`	`+__version__ = "0.0.39"`