@@ -83,6 +83,7 @@ def __init__(self, crawler: Crawler) -> None:
83
83
) or settings .getint ("CONCURRENT_REQUESTS" )
84
84
self .context_launch_lock = asyncio .Lock ()
85
85
self .context_wrappers : Dict [str , BrowserContextWrapper ] = {}
86
+ self .existing_context : bool = settings .getbool ("PLAYWRIGHT_EXISTING_CONTEXT" )
86
87
self .startup_context_kwargs : dict = settings .getdict ("PLAYWRIGHT_CONTEXTS" )
87
88
if settings .getint ("PLAYWRIGHT_MAX_CONTEXTS" ):
88
89
self .context_semaphore = asyncio .Semaphore (
@@ -136,6 +137,16 @@ async def _launch(self) -> None:
136
137
self ._set_max_concurrent_context_count ()
137
138
logger .info ("Startup context(s) launched" )
138
139
self .stats .set_value ("playwright/page_count" , self ._get_total_page_count ())
140
+ if self .existing_context and self .browser_cdp_url :
141
+ logger .info ("Getting existing context(s)" )
142
+ if not hasattr (self , "browser" ):
143
+ await self ._maybe_connect_devtools ()
144
+ await asyncio .gather (
145
+ * [self ._get_existing_browser_context (index = i ) for i in range (len (self .browser .contexts ))]
146
+ )
147
+ self ._set_max_concurrent_context_count ()
148
+ logger .info ("Existing context(s) retrieved" )
149
+ self .stats .set_value ("playwright/page_count" , self ._get_total_page_count ())
139
150
del self .startup_context_kwargs
140
151
141
152
async def _maybe_launch_browser (self ) -> None :
@@ -209,6 +220,48 @@ async def _create_browser_context(
209
220
self ._set_max_concurrent_context_count ()
210
221
return self .context_wrappers [name ]
211
222
223
+ async def _get_existing_browser_context (self , index : int = 0 ,
224
+ spider : Optional [Spider ] = None ,
225
+ ) -> BrowserContextWrapper | None :
226
+ if not self .browser_cdp_url or len (self .browser .contexts ) <= index - 1 :
227
+ return None
228
+ if not hasattr (self , "browser" ):
229
+ await self ._maybe_connect_devtools ()
230
+ if hasattr (self , "context_semaphore" ):
231
+ await self .context_semaphore .acquire ()
232
+ name = f"existing_context_{ index } "
233
+ context = self .browser .contexts [index ]
234
+ persistent = False
235
+ remote = True
236
+
237
+ context .on (
238
+ "close" , self ._make_close_browser_context_callback (name , persistent , remote , spider )
239
+ )
240
+ self .stats .inc_value ("playwright/context_count" )
241
+ self .stats .inc_value (f"playwright/context_count/persistent/{ persistent } " )
242
+ self .stats .inc_value (f"playwright/context_count/remote/{ remote } " )
243
+ logger .debug (
244
+ "Browser context started: '%s' (persistent=%s, remote=%s)" ,
245
+ name ,
246
+ persistent ,
247
+ remote ,
248
+ extra = {
249
+ "spider" : spider ,
250
+ "context_name" : name ,
251
+ "persistent" : persistent ,
252
+ "remote" : remote ,
253
+ },
254
+ )
255
+ if self .default_navigation_timeout is not None :
256
+ context .set_default_navigation_timeout (self .default_navigation_timeout )
257
+ self .context_wrappers [name ] = BrowserContextWrapper (
258
+ context = context ,
259
+ semaphore = asyncio .Semaphore (value = self .max_pages_per_context ),
260
+ persistent = persistent ,
261
+ )
262
+ self ._set_max_concurrent_context_count ()
263
+ return self .context_wrappers [name ]
264
+
212
265
async def _create_page (self , request : Request , spider : Spider ) -> Page :
213
266
"""Create a new page in a context, also creating a new context if necessary."""
214
267
context_name = request .meta .setdefault ("playwright_context" , DEFAULT_CONTEXT_NAME )
0 commit comments