Description
delopy脚本
swift deploy
--model /v0-20250702-155647/checkpoint-410
--infer_backend vllm
--max_new_tokens 8048
--tp 4
--host '127.0.0.1'
--port 8081
--tensor_parallel_size 4
--max_batch_size 32
--gpu_memory_utilization 0.9
--served_model_name Qwen3-8B
显示:
time: 2025-07-09 11:23:32; host: dlp2-33-151; message: INFO: Uvicorn running on http://127.0.0.1:8081 (Press CTRL+C to quit)
time: 2025-07-09 11:23:32; host: dlp2-33-151; message: INFO: Waiting for application startup.
time: 2025-07-09 11:23:32; host: dlp2-33-151; message: INFO: Application startup complete.
time: 2025-07-09 11:23:32; host: dlp2-33-151; message: INFO: Started server process [190]
client代码:
from swift.llm import InferRequest, InferClient, RequestConfig
from swift.plugin import InferStats
engine = InferClient(host='127.0.0.1', port=8081)
print(f'models: {engine.models}')
metric = InferStats()
request_config = RequestConfig(max_tokens=8512, temperature=0)
Using 3 infer_requests to demonstrate batch inference
Supports local paths, base64, and URLs
infer_requests = [
InferRequest(messages=[{'role': 'user', 'content': 'Who are you?'}]),
InferRequest(messages=[{'role': 'user', 'content': '给出冒泡排序的java代码'}]),
]
resp_list = engine.infer(infer_requests, request_config, metrics=[metric])
print(f'response0: {resp_list[0].choices[0].message.content}')
print(metric.compute())
metric.reset()
报错:
Traceback (most recent call last):
File "//anaconda3_new/envs/vl/lib/python3.11/site-packages/aiohttp/connector.py", line 1115, in _wrap_create_connection
sock = await aiohappyeyeballs.start_connection(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "//anaconda3_new/envs/vl/lib/python3.11/site-packages/aiohappyeyeballs/impl.py", line 93, in start_connection
raise first_exception
File "//anaconda3_new/envs/vl/lib/python3.11/site-packages/aiohappyeyeballs/impl.py", line 71, in start_connection
sock = await _connect_sock(
^^^^^^^^^^^^^^^^^^^^
File "//anaconda3_new/envs/vl/lib/python3.11/site-packages/aiohappyeyeballs/impl.py", line 163, in _connect_sock
await loop.sock_connect(sock, address)
File "//anaconda3_new/envs/vl/lib/python3.11/asyncio/selector_events.py", line 633, in sock_connect
return await fut
^^^^^^^^^
File "//anaconda3_new/envs/vl/lib/python3.11/asyncio/selector_events.py", line 668, in _sock_connect_cb
raise OSError(err, f'Connect call failed {address}')
ConnectionRefusedError: [Errno 111] Connect call failed ('0.0.0.0', 8081)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "//RL_chat/server/try.py", line 5, in
print(f'models: {engine.models}')
^^^^^^^^^^^^^
File "//base_model/ms-swift-vl/swift/llm/infer/infer_engine/infer_client.py", line 52, in models
for model in self.get_model_list().data:
^^^^^^^^^^^^^^^^^^^^^
File "//base_model/ms-swift-vl/swift/llm/infer/infer_engine/infer_client.py", line 62, in get_model_list
return self.safe_asyncio_run(coro)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "//base_model/ms-swift-vl/swift/llm/infer/infer_engine/infer_engine.py", line 276, in safe_asyncio_run
return InferEngine.thread_run(asyncio.run, args=(coro, ))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "//base_model/ms-swift-vl/swift/llm/infer/infer_engine/infer_engine.py", line 271, in thread_run
raise result
File "//base_model/ms-swift-vl/swift/llm/infer/infer_engine/infer_engine.py", line 261, in func
queue.put(target(*args, **kwargs))
^^^^^^^^^^^^^^^^^^^^^^^
File "//anaconda3_new/envs/vl/lib/python3.11/asyncio/runners.py", line 190, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "//anaconda3_new/envs/vl/lib/python3.11/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "//anaconda3_new/envs/vl/lib/python3.11/asyncio/base_events.py", line 650, in run_until_complete
return future.result()
^^^^^^^^^^^^^^^
File "//base_model/ms-swift-vl/swift/llm/infer/infer_engine/infer_client.py", line 75, in get_model_list_async
async with session.get(url, **self._get_request_kwargs()) as resp:
File "//anaconda3_new/envs/vl/lib/python3.11/site-packages/aiohttp/client.py", line 1425, in aenter
self._resp: _RetType = await self._coro
^^^^^^^^^^^^^^^^
File "//anaconda3_new/envs/vl/lib/python3.11/site-packages/aiohttp/client.py", line 703, in _request
conn = await self._connector.connect(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "//anaconda3_new/envs/vl/lib/python3.11/site-packages/aiohttp/connector.py", line 548, in connect
proto = await self._create_connection(req, traces, timeout)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "//anaconda3_new/envs/vl/lib/python3.11/site-packages/aiohttp/connector.py", line 1056, in _create_connection
_, proto = await self._create_direct_connection(req, traces, timeout)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "//anaconda3_new/envs/vl/lib/python3.11/site-packages/aiohttp/connector.py", line 1400, in _create_direct_connection
raise last_exc
File "//anaconda3_new/envs/vl/lib/python3.11/site-packages/aiohttp/connector.py", line 1369, in _create_direct_connection
transp, proto = await self._wrap_create_connection(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "//anaconda3_new/envs/vl/lib/python3.11/site-packages/aiohttp/connector.py", line 1130, in _wrap_create_connection
raise client_error(req.connection_key, exc) from exc
aiohttp.client_exceptions.ClientConnectorError: Cannot connect to host 0.0.0.0:8081 ssl:default [Connect call failed ('0.0.0.0', 8081)]