1212# limitations under the License.
1313# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
1414import logging
15+ import subprocess
1516import threading
1617import time
1718from typing import Any , Dict , List , Optional , Type , Union
@@ -94,11 +95,6 @@ def __init__(
9495 )
9596
9697 def _start_server (self ) -> None :
97- from sglang .utils import ( # type: ignore[import-untyped]
98- execute_shell_command ,
99- wait_for_server ,
100- )
101-
10298 try :
10399 if not self ._url :
104100 cmd = (
@@ -108,10 +104,10 @@ def _start_server(self) -> None:
108104 f"--host 0.0.0.0"
109105 )
110106
111- server_process = execute_shell_command (cmd )
112- wait_for_server ("http://localhost:30000" )
107+ server_process = _execute_shell_command (cmd )
108+ _wait_for_server ("http://localhost:30000" )
113109 self ._url = "http://127.0.0.1:30000/v1"
114- self .server_process = server_process
110+ self .server_process = server_process # type: ignore[assignment]
115111 # Start the inactivity monitor in a background thread
116112 self ._inactivity_thread = threading .Thread (
117113 target = self ._monitor_inactivity , daemon = True
@@ -138,8 +134,6 @@ def _monitor_inactivity(self):
138134 r"""Monitor whether the server process has been inactive for over 10
139135 minutes.
140136 """
141- from sglang .utils import terminate_process
142-
143137 while True :
144138 # Check every 10 seconds
145139 time .sleep (10 )
@@ -150,7 +144,7 @@ def _monitor_inactivity(self):
150144 time .time () - self .last_run_time > 600
151145 ):
152146 if self .server_process :
153- terminate_process (self .server_process )
147+ _terminate_process (self .server_process )
154148 self .server_process = None
155149 self ._client = None # Invalidate the client
156150 logging .info (
@@ -270,3 +264,101 @@ def stream(self) -> bool:
270264 bool: Whether the model is in stream mode.
271265 """
272266 return self .model_config_dict .get ('stream' , False )
267+
268+
269+ # Below are helper functions from sglang.utils
270+ def _terminate_process (process ):
271+ _kill_process_tree (process .pid )
272+
273+
274+ def _kill_process_tree (
275+ parent_pid , include_parent : bool = True , skip_pid : Optional [int ] = None
276+ ):
277+ r"""Kill the process and all its child processes."""
278+ import os
279+ import signal
280+
281+ import psutil
282+
283+ if parent_pid is None :
284+ parent_pid = os .getpid ()
285+ include_parent = False
286+
287+ try :
288+ itself = psutil .Process (parent_pid )
289+ except psutil .NoSuchProcess :
290+ return
291+
292+ children = itself .children (recursive = True )
293+ for child in children :
294+ if child .pid == skip_pid :
295+ continue
296+ try :
297+ child .kill ()
298+ except psutil .NoSuchProcess :
299+ pass
300+
301+ if include_parent :
302+ try :
303+ itself .kill ()
304+
305+ # Sometime processes cannot be killed with SIGKILL
306+ # so we send an additional signal to kill them.
307+ itself .send_signal (signal .SIGQUIT )
308+ except psutil .NoSuchProcess :
309+ pass
310+
311+
312+ def _execute_shell_command (command : str ) -> subprocess .Popen :
313+ r"""Execute a shell command and return the process handle
314+
315+ Args:
316+ command: Shell command as a string (can include \\ line continuations)
317+ Returns:
318+ subprocess.Popen: Process handle
319+ """
320+ import subprocess
321+
322+ # Replace \ newline with space and split
323+ command = command .replace ("\\ \n " , " " ).replace ("\\ " , " " )
324+ parts = command .split ()
325+
326+ return subprocess .Popen (parts , text = True , stderr = subprocess .STDOUT )
327+
328+
329+ def _wait_for_server (base_url : str , timeout : Optional [int ] = None ) -> None :
330+ r"""Wait for the server to be ready by polling the /v1/models endpoint.
331+
332+ Args:
333+ base_url: The base URL of the server
334+ timeout: Maximum time to wait in seconds. None means wait forever.
335+ """
336+ import requests
337+
338+ start_time = time .time ()
339+ while True :
340+ try :
341+ response = requests .get (
342+ f"{ base_url } /v1/models" ,
343+ headers = {"Authorization" : "Bearer None" },
344+ )
345+ if response .status_code == 200 :
346+ time .sleep (5 )
347+ print (
348+ """\n
349+ NOTE: Typically, the server runs in a separate terminal.
350+ In this notebook, we run the server and notebook code
351+ together, so their outputs are combined.
352+ To improve clarity, the server logs are displayed in the
353+ original black color, while the notebook outputs are
354+ highlighted in blue.
355+ """
356+ )
357+ break
358+
359+ if timeout and time .time () - start_time > timeout :
360+ raise TimeoutError (
361+ "Server did not become ready within timeout period"
362+ )
363+ except requests .exceptions .RequestException :
364+ time .sleep (1 )
0 commit comments