@@ -290,7 +290,7 @@ return scrapy.Request(
290290 url = " https://example.org" ,
291291 meta = {
292292 " playwright" : True ,
293- " playwright_context" : " persistent " ,
293+ " playwright_context" : " awesome_context " ,
294294 },
295295)
296296```
@@ -307,7 +307,7 @@ return scrapy.Request(
307307 url = " https://example.org" ,
308308 meta = {
309309 " playwright" : True ,
310- " playwright_context" : " new " ,
310+ " playwright_context" : " awesome_context " ,
311311 " playwright_context_kwargs" : {
312312 " ignore_https_errors" : True ,
313313 },
@@ -319,15 +319,16 @@ return scrapy.Request(
319319Type ` bool ` , default ` False `
320320
321321If ` True ` , the [ Playwright page] ( https://playwright.dev/python/docs/api/class-page )
322- that was used to download the request will be available in the callback via
323- ` response.meta['playwright_page'] ` .
322+ that was used to download the request will be available in the callback at
323+ ` response.meta['playwright_page'] ` . If ` False ` (or unset) the page will be
324+ closed immediately after processing the request.
324325
325326** Important!**
326327
327328This meta key is entirely optional, it's NOT necessary for the page to load or for any
328329asynchronous operation to be performed (specifically, it's NOT necessary for ` PageMethod `
329330objects to be applied). Use it only if you need access to the Page object in the callback
330- that handles the request .
331+ that handles the response .
331332
332333For more information and important notes see
333334[ Receiving Page objects in callbacks] ( #receiving-page-objects-in-callbacks ) .
@@ -371,8 +372,8 @@ class AwesomeSpider(scrapy.Spider):
371372
372373** Important!**
373374
374- ` scrapy-playwright ` uses ` Page.route ` & ` Page.unroute ` internally, please
375- avoid using these methods unless you know exactly what you're doing.
375+ ` scrapy-playwright ` uses ` Page.route ` & ` Page.unroute ` internally, avoid using
376+ these methods unless you know exactly what you're doing.
376377
377378### ` playwright_page_methods `
378379Type ` Iterable[PageMethod] ` , default ` () `
@@ -494,7 +495,7 @@ class AwesomeSpiderWithPage(scrapy.Spider):
494495* When passing ` playwright_include_page=True ` , make sure pages are always closed
495496 when they are no longer used. It's recommended to set a Request errback to make
496497 sure pages are closed even if a request fails (if ` playwright_include_page=False `
497- or unset, pages are automatically closed upon encountering an exception).
498+ pages are automatically closed upon encountering an exception).
498499 This is important, as open pages count towards the limit set by
499500 ` PLAYWRIGHT_MAX_PAGES_PER_CONTEXT ` and crawls could freeze if the limit is reached
500501 and pages remain open indefinitely.
@@ -575,7 +576,11 @@ def parse(self, response):
575576 url = " https://example.org" ,
576577 callback = self .parse_in_new_context,
577578 errback = self .close_context_on_error,
578- meta = {" playwright" : True , " playwright_context" : " new" , " playwright_include_page" : True },
579+ meta = {
580+ " playwright" : True ,
581+ " playwright_context" : " awesome_context" ,
582+ " playwright_include_page" : True ,
583+ },
579584 )
580585
581586async def parse_in_new_context (self , response ):
@@ -585,19 +590,17 @@ async def parse_in_new_context(self, response):
585590 return {" title" : title}
586591
587592async def close_context_on_error (self , failure ):
588- self .logger.warning(" There was an error when processing %s : %s " , failure.request, failure.value)
589593 page = failure.request.meta[" playwright_page" ]
590594 await page.context.close()
591595```
592596
593597### Maximum concurrent context count
594598
595599Specify a value for the ` PLAYWRIGHT_MAX_CONTEXTS ` setting to limit the amount
596- of concurent contexts. This setting should be used with caution: it's possible
597- to block the whole crawl if contexts are not closed after they are no longer
598- used (refer to the above section to dinamically close contexts). Make sure to
599- define an errback to still be able to close the context even if there are
600- errors with a request.
600+ of concurent contexts. Use with caution: it's possible to block the whole crawl
601+ if contexts are not closed after they are no longer used (refer to the above
602+ section to dinamically close contexts). Make sure to define an errback to still
603+ close contexts even if there are errors.
601604
602605
603606## Proxy support
@@ -627,7 +630,7 @@ class ProxySpider(Spider):
627630 print (response.text)
628631```
629632
630- You can also set proxies per context with the ` PLAYWRIGHT_CONTEXTS ` setting:
633+ Proxies can also be set at the context level with the ` PLAYWRIGHT_CONTEXTS ` setting:
631634
632635``` python
633636PLAYWRIGHT_CONTEXTS = {
@@ -715,7 +718,7 @@ async def parse(self, response):
715718
716719### Supported methods
717720
718- Please refer to the [ upstream docs for the ` Page ` class] ( https://playwright.dev/python/docs/api/class-page )
721+ Refer to the [ upstream docs for the ` Page ` class] ( https://playwright.dev/python/docs/api/class-page )
719722to see available methods.
720723
721724### Impact on Response objects
@@ -761,14 +764,20 @@ class EventSpider(scrapy.Spider):
761764 logging.info(f " Received response with URL { response.url} " )
762765```
763766
764- See the [ upstream ` Page ` docs] ( https://playwright.dev/python/docs/api/class-page ) for a list of
765- the accepted events and the arguments passed to their handlers.
767+ See the [ upstream ` Page ` docs] ( https://playwright.dev/python/docs/api/class-page )
768+ for a list of the accepted events and the arguments passed to their handlers.
769+
770+ ### Notes about page event handlers
766771
767- ** Note** : keep in mind that, unless they are
768- [ removed later] ( https://playwright.dev/python/docs/events#addingremoving-event-listener ) ,
769- these handlers will remain attached to the page and will be called for subsequent
770- downloads using the same page. This is usually not a problem, since by default
771- requests are performed in single-use pages.
772+ * Event handlers will remain attached to the page and will be called for
773+ subsequent downloads using the same page unless they are
774+ [ removed later] ( https://playwright.dev/python/docs/events#addingremoving-event-listener ) .
775+ This is usually not a problem, since by default requests are performed in
776+ single-use pages.
777+ * Event handlers will process Playwright objects, not Scrapy ones. For example,
778+ for each Scrapy request/response there will be a matching Playwright
779+ request/response, but not the other way: background requests/responses to get
780+ images, scripts, stylesheets, etc are not seen by Scrapy.
772781
773782
774783## Examples
0 commit comments