11from __future__ import annotations
22
3- import asyncio , time , typing
3+ import asyncio , time , typing , os
44import httpx , openai
55
66from openai .types .chat import ChatCompletionAssistantMessageParam , ChatCompletionUserMessageParam
1919
2020
2121def prep_env_vars (bento : BentoInfo ) -> None :
22- import os
23-
2422 env_vars = bento .envs
2523 for env_var in env_vars :
2624 if not env_var .get ('value' ):
@@ -30,23 +28,57 @@ def prep_env_vars(bento: BentoInfo) -> None:
3028 os .environ [key ] = value
3129
3230
33- def _get_serve_cmd (bento : BentoInfo , port : int = 3000 ) -> tuple [list [str ], EnvVars ]:
31+ def _get_serve_cmd (
32+ bento : BentoInfo , port : int = 3000 , cli_args : typing .Optional [list [str ]] = None
33+ ) -> tuple [list [str ], EnvVars ]:
3434 cmd = ['bentoml' , 'serve' , bento .bentoml_tag ]
3535 if port != 3000 :
3636 cmd += ['--port' , str (port )]
37+
38+ # Add CLI arguments if provided
39+ if cli_args :
40+ for arg in cli_args :
41+ cmd += ['--arg' , arg ]
42+
3743 return cmd , EnvVars ({'BENTOML_HOME' : f'{ bento .repo .path } /bentoml' })
3844
3945
40- def serve (bento : BentoInfo , port : int = 3000 ) -> None :
46+ def serve (
47+ bento : BentoInfo ,
48+ port : int = 3000 ,
49+ cli_envs : typing .Optional [list [str ]] = None ,
50+ cli_args : typing .Optional [list [str ]] = None ,
51+ ) -> None :
4152 prep_env_vars (bento )
42- cmd , env = _get_serve_cmd (bento , port = port )
53+ cmd , env = _get_serve_cmd (bento , port = port , cli_args = cli_args )
54+
55+ # Add CLI environment variables if provided
56+ if cli_envs :
57+ for env_var in cli_envs :
58+ if '=' in env_var :
59+ key , value = env_var .split ('=' , 1 )
60+ env [key ] = value
61+ else :
62+ env [env_var ] = os .environ .get (env_var , '' )
63+
4364 venv = ensure_venv (bento , runtime_envs = env )
4465 output (f'Access the Chat UI at http://localhost:{ port } /chat (or with you IP)' )
4566 run_command (cmd , env = env , cwd = None , venv = venv )
4667
4768
48- async def _run_model (bento : BentoInfo , port : int = 3000 , timeout : int = 600 ) -> None :
49- cmd , env = _get_serve_cmd (bento , port )
69+ async def _run_model (
70+ bento : BentoInfo ,
71+ port : int = 3000 ,
72+ timeout : int = 600 ,
73+ cli_env : typing .Optional [dict [str , typing .Any ]] = None ,
74+ cli_args : typing .Optional [list [str ]] = None ,
75+ ) -> None :
76+ cmd , env = _get_serve_cmd (bento , port , cli_args = cli_args )
77+
78+ # Merge cli environment variables if provided
79+ if cli_env :
80+ env .update (cli_env )
81+
5082 venv = ensure_venv (bento , runtime_envs = env )
5183 async with async_run_command (cmd , env = env , cwd = None , venv = venv , silent = False ) as server_proc :
5284 output (f'Model server started { server_proc .pid } ' )
@@ -109,9 +141,26 @@ async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600) ->
109141 except KeyboardInterrupt :
110142 break
111143 output ('\n Stopping model server...' , style = 'green' )
112- output ('Stopped model server' , style = 'green' )
144+ output ('Stopped model server' , style = 'green' )
113145
114146
115- def run (bento : BentoInfo , port : int = 3000 , timeout : int = 600 ) -> None :
147+ def run (
148+ bento : BentoInfo ,
149+ port : int = 3000 ,
150+ timeout : int = 600 ,
151+ cli_envs : typing .Optional [list [str ]] = None ,
152+ cli_args : typing .Optional [list [str ]] = None ,
153+ ) -> None :
116154 prep_env_vars (bento )
117- asyncio .run (_run_model (bento , port = port , timeout = timeout ))
155+
156+ # Add CLI environment variables to the process
157+ env = {}
158+ if cli_envs :
159+ for env_var in cli_envs :
160+ if '=' in env_var :
161+ key , value = env_var .split ('=' , 1 )
162+ env [key ] = value
163+ else :
164+ env [env_var ] = os .environ .get (env_var , '' )
165+
166+ asyncio .run (_run_model (bento , port = port , timeout = timeout , cli_env = env , cli_args = cli_args ))
0 commit comments