@@ -168,14 +168,17 @@ Type `Optional[str]`, default `None`
168168The endpoint of a remote Chromium browser to connect using the
169169[ Chrome DevTools Protocol] ( https://chromedevtools.github.io/devtools-protocol/ ) ,
170170via [ ` BrowserType.connect_over_cdp ` ] ( https://playwright.dev/python/docs/api/class-browsertype#browser-type-connect-over-cdp ) .
171+
172+ ``` python
173+ PLAYWRIGHT_CDP_URL = " http://localhost:9222"
174+ ```
175+
171176If this setting is used:
172177* all non-persistent contexts will be created on the connected remote browser
173178* the ` PLAYWRIGHT_LAUNCH_OPTIONS ` setting is ignored
174179* the ` PLAYWRIGHT_BROWSER_TYPE ` setting must not be set to a value different than "chromium"
175180
176- ``` python
177- PLAYWRIGHT_CDP_URL = " http://localhost:9222"
178- ```
181+ ** This settings CANNOT be used at the same time as ` PLAYWRIGHT_CONNECT_URL ` **
179182
180183### ` PLAYWRIGHT_CDP_KWARGS `
181184Type ` dict[str, Any] ` , default ` {} `
@@ -192,6 +195,41 @@ PLAYWRIGHT_CDP_KWARGS = {
192195}
193196```
194197
198+ ### ` PLAYWRIGHT_CONNECT_URL `
199+ Type ` Optional[str] ` , default ` None `
200+
201+ URL of a remote Playwright browser instance to connect using
202+ [ ` BrowserType.connect ` ] ( https://playwright.dev/python/docs/api/class-browsertype#browser-type-connect ) .
203+
204+ From the upstream Playwright docs:
205+ > When connecting to another browser launched via
206+ > [ ` BrowserType.launchServer ` ] ( https://playwright.dev/docs/api/class-browsertype#browser-type-launch-server )
207+ > in Node.js, the major and minor version needs to match the client version (1.2.3 → is compatible with 1.2.x).
208+
209+ ``` python
210+ PLAYWRIGHT_CONNECT_URL = " ws://localhost:35477/ae1fa0bc325adcfd9600d9f712e9c733"
211+ ```
212+
213+ If this setting is used:
214+ * all non-persistent contexts will be created on the connected remote browser
215+ * the ` PLAYWRIGHT_LAUNCH_OPTIONS ` setting is ignored
216+
217+ ** This settings CANNOT be used at the same time as ` PLAYWRIGHT_CDP_URL ` **
218+
219+ ### ` PLAYWRIGHT_CONNECT_KWARGS `
220+ Type ` dict[str, Any] ` , default ` {} `
221+
222+ Additional keyword arguments to be passed to
223+ [ ` BrowserType.connect ` ] ( https://playwright.dev/python/docs/api/class-browsertype#browser-type-connect )
224+ when using ` PLAYWRIGHT_CONNECT_URL ` . The ` ws_endpoint ` key is always ignored,
225+ ` PLAYWRIGHT_CONNECT_URL ` is used instead.
226+
227+ ``` python
228+ PLAYWRIGHT_CONNECT_KWARGS = {
229+ " slow_mo" : 1000 ,
230+ " timeout" : 10 * 1000
231+ }
232+ ```
195233
196234### ` PLAYWRIGHT_CONTEXTS `
197235Type ` dict[str, dict] ` , default ` {} `
@@ -286,6 +324,17 @@ def custom_headers(
286324PLAYWRIGHT_PROCESS_REQUEST_HEADERS = custom_headers
287325```
288326
327+ ### ` PLAYWRIGHT_RESTART_DISCONNECTED_BROWSER `
328+ Type ` bool ` , default ` True `
329+
330+ Whether the browser will be restarted if it gets disconnected, for instance if the local
331+ browser crashes or a remote connection times out.
332+ Implemented by listening to the
333+ [ ` disconnected ` Browser event] ( https://playwright.dev/python/docs/api/class-browser#browser-event-disconnected ) ,
334+ for this reason it does not apply to persistent contexts since
335+ [ BrowserType.launch_persistent_context] ( https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context )
336+ returns the context directly.
337+
289338### ` PLAYWRIGHT_MAX_PAGES_PER_CONTEXT `
290339Type ` int ` , defaults to the value of Scrapy's ` CONCURRENT_REQUESTS ` setting
291340
@@ -459,14 +508,16 @@ This key could be used in conjunction with `playwright_include_page` to make a c
459508requests using the same page. For instance:
460509
461510``` python
511+ from playwright.async_api import Page
512+
462513def start_requests (self ):
463514 yield scrapy.Request(
464515 url = " https://httpbin.org/get" ,
465516 meta = {" playwright" : True , " playwright_include_page" : True },
466517 )
467518
468519def parse (self , response , ** kwargs ):
469- page = response.meta[" playwright_page" ]
520+ page: Page = response.meta[" playwright_page" ]
470521 yield scrapy.Request(
471522 url = " https://httpbin.org/headers" ,
472523 callback = self .parse_headers,
@@ -507,6 +558,20 @@ def parse(self, response, **kwargs):
507558 # {'issuer': 'DigiCert TLS RSA SHA256 2020 CA1', 'protocol': 'TLS 1.3', 'subjectName': 'www.example.org', 'validFrom': 1647216000, 'validTo': 1678838399}
508559```
509560
561+ ### ` playwright_suggested_filename `
562+ Type ` Optional[str] ` , read only
563+
564+ The value of the [ ` Download.suggested_filename ` ] ( https://playwright.dev/python/docs/api/class-download#download-suggested-filename )
565+ attribute when the response is the binary contents of a
566+ [ download] ( https://playwright.dev/python/docs/downloads ) (e.g. a PDF file).
567+ Only available for responses that only caused a download. Can be accessed
568+ in the callback via ` response.meta['playwright_suggested_filename'] `
569+
570+ ``` python
571+ def parse (self , response , ** kwargs ):
572+ print (response.meta[" playwright_suggested_filename" ])
573+ # 'sample_file.pdf'
574+ ```
510575
511576## Receiving Page objects in callbacks
512577
@@ -525,6 +590,7 @@ necessary the spider job could get stuck because of the limit set by the
525590` PLAYWRIGHT_MAX_PAGES_PER_CONTEXT ` setting.
526591
527592``` python
593+ from playwright.async_api import Page
528594import scrapy
529595
530596class AwesomeSpiderWithPage (scrapy .Spider ):
@@ -539,7 +605,7 @@ class AwesomeSpiderWithPage(scrapy.Spider):
539605 )
540606
541607 def parse_first (self , response ):
542- page = response.meta[" playwright_page" ]
608+ page: Page = response.meta[" playwright_page" ]
543609 return scrapy.Request(
544610 url = " https://example.com" ,
545611 callback = self .parse_second,
@@ -548,13 +614,13 @@ class AwesomeSpiderWithPage(scrapy.Spider):
548614 )
549615
550616 async def parse_second (self , response ):
551- page = response.meta[" playwright_page" ]
617+ page: Page = response.meta[" playwright_page" ]
552618 title = await page.title() # "Example Domain"
553619 await page.close()
554620 return {" title" : title}
555621
556622 async def errback_close_page (self , failure ):
557- page = failure.request.meta[" playwright_page" ]
623+ page: Page = failure.request.meta[" playwright_page" ]
558624 await page.close()
559625```
560626
0 commit comments