File tree Expand file tree Collapse file tree 2 files changed +8
-2
lines changed Expand file tree Collapse file tree 2 files changed +8
-2
lines changed Original file line number Diff line number Diff line change 33On the server side, run one of the following commands:
44 (vLLM backend)
55 python -m vllm.entrypoints.api_server \
6- --disable-log-requests --model <your_model>
6+ --model <your_model> --swap-space 16 \
7+ --disable-log-requests
78
89 (TGI backend)
910 ./launch_hf_server.sh <your_model>
Original file line number Diff line number Diff line change @@ -409,7 +409,12 @@ def _swap_out(
409409 seq_group : SequenceGroup ,
410410 blocks_to_swap_out : Dict [int , int ],
411411 ) -> None :
412- assert self .block_manager .can_swap_out (seq_group )
412+ if not self .block_manager .can_swap_out (seq_group ):
413+ # FIXME(woosuk): Abort the sequence group instead of aborting the
414+ # entire engine.
415+ raise RuntimeError (
416+ "Aborted due to the lack of CPU swap space. Please increase "
417+ "the swap space to avoid this error." )
413418 mapping = self .block_manager .swap_out (seq_group )
414419 blocks_to_swap_out .update (mapping )
415420 for seq in seq_group .get_seqs (status = SequenceStatus .RUNNING ):
You can’t perform that action at this time.
0 commit comments