diff --git a/vllm/config.py b/vllm/config.py index ed33030ed589..770fa9f627e2 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -261,7 +261,8 @@ class ModelConfig: - "float" is shorthand for FP32 precision.\n - "float32" for FP32 precision.""" seed: Optional[int] = None - """Random seed for reproducibility.""" + """Random seed for reproducibility. Initialized to None in V0, but + initialized to 0 in V1.""" hf_config_path: Optional[str] = None """Name or path of the Hugging Face config to use. If unspecified, model name or path will be used.""" @@ -441,6 +442,24 @@ def compute_hash(self) -> str: return hashlib.sha256(str(factors).encode()).hexdigest() def __post_init__(self) -> None: + # Set the default seed to 0 in V1. + # NOTE(woosuk): In V0, we set the default seed to None because the + # driver worker shares the same process as the user process, and thus + # setting a seed affects the user process as well. + # In V1, we use separate processes for workers (unless + # VLLM_ENABLE_V1_MULTIPROCESSING=0), so setting a seed here + # doesn't affect the user process. However, without a consistent seed, + # different tensor parallel workers would sample different tokens, + # leading to inconsistent results. + if envs.VLLM_USE_V1 and self.seed is None: + self.seed = 0 + if not envs.VLLM_ENABLE_V1_MULTIPROCESSING: + logger.warning( + "The global random seed is set to %d. Since " + "VLLM_ENABLE_V1_MULTIPROCESSING is set to False, this may " + "affect the random state of the Python process that " + "launched vLLM.", self.seed) + self.model = maybe_model_redirect(self.model) # The tokenizer is consistent with the model by default. if self.tokenizer is None: