-
-
Notifications
You must be signed in to change notification settings - Fork 10.6k
[New Model]: support GTE NewModel #17986
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
21 commits
Select commit
Hold shift + click to select a range
4a99827
support GTE NewModel
noooop 6e806b3
fix example_prompts
noooop 05679a2
fix
noooop ba8cec6
fix
noooop 0fab260
fix
noooop 6a7bafc
fix
noooop 4e9a108
fix
noooop d7e7eeb
fix
noooop 2c53d8a
gelu_and_mul -> geglu
noooop 37cc953
Snowflake Arctic-Embed-2.0-M
noooop 777f57b
mGTE-TRM
noooop 60d6822
fix
noooop 67f4e0a
fix
noooop 117d8b3
update
noooop 03dbc77
update
noooop fa92077
Merge branch 'vllm-project:main' into gte
noooop 8e01707
hf_overrides
noooop 2e83797
Merge remote-tracking branch 'origin/gte' into gte
noooop c121f68
Merge branch 'vllm-project:main' into gte
noooop deb6c74
Skipping mteb test.
noooop 52ba2d5
Merge branch 'vllm-project:main' into gte
noooop File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
# SPDX-License-Identifier: Apache-2.0 | ||
from typing import Any | ||
|
||
import pytest | ||
|
||
from ...utils import EmbedModelInfo, run_embedding_correctness_test | ||
|
||
MODELS = [ | ||
########## BertModel | ||
EmbedModelInfo("thenlper/gte-large", | ||
architecture="BertModel", | ||
dtype="float32", | ||
enable_test=True), | ||
EmbedModelInfo("thenlper/gte-base", | ||
architecture="BertModel", | ||
dtype="float32", | ||
enable_test=False), | ||
EmbedModelInfo("thenlper/gte-small", | ||
architecture="BertModel", | ||
dtype="float32", | ||
enable_test=False), | ||
EmbedModelInfo("thenlper/gte-large-zh", | ||
architecture="BertModel", | ||
dtype="float32", | ||
enable_test=False), | ||
EmbedModelInfo("thenlper/gte-base-zh", | ||
architecture="BertModel", | ||
dtype="float32", | ||
enable_test=False), | ||
EmbedModelInfo("thenlper/gte-small-zh", | ||
architecture="BertModel", | ||
dtype="float32", | ||
enable_test=False), | ||
########### NewModel | ||
EmbedModelInfo("Alibaba-NLP/gte-multilingual-base", | ||
architecture="GteNewModel", | ||
enable_test=True), | ||
EmbedModelInfo("Alibaba-NLP/gte-base-en-v1.5", | ||
architecture="GteNewModel", | ||
enable_test=True), | ||
EmbedModelInfo("Alibaba-NLP/gte-large-en-v1.5", | ||
architecture="GteNewModel", | ||
enable_test=True), | ||
########### Qwen2ForCausalLM | ||
EmbedModelInfo("Alibaba-NLP/gte-Qwen2-1.5B-instruct", | ||
architecture="Qwen2ForCausalLM", | ||
enable_test=True), | ||
EmbedModelInfo("Alibaba-NLP/gte-Qwen2-7B-instruct", | ||
architecture="Qwen2ForCausalLM", | ||
enable_test=False), | ||
########## ModernBertModel | ||
EmbedModelInfo("Alibaba-NLP/gte-modernbert-base", | ||
architecture="ModernBertModel", | ||
enable_test=True), | ||
] | ||
|
||
|
||
@pytest.mark.parametrize("model_info", MODELS) | ||
def test_models_mteb(hf_runner, vllm_runner, | ||
model_info: EmbedModelInfo) -> None: | ||
pytest.skip("Skipping mteb test.") | ||
|
||
from .mteb_utils import mteb_test_embed_models | ||
|
||
vllm_extra_kwargs: dict[str, Any] = {} | ||
if model_info.name == "Alibaba-NLP/gte-Qwen2-1.5B-instruct": | ||
vllm_extra_kwargs["hf_overrides"] = {"is_causal": True} | ||
|
||
if model_info.architecture == "GteNewModel": | ||
vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]} | ||
|
||
mteb_test_embed_models(hf_runner, vllm_runner, model_info, | ||
vllm_extra_kwargs) | ||
|
||
|
||
@pytest.mark.parametrize("model_info", MODELS) | ||
def test_models_correctness(hf_runner, vllm_runner, model_info: EmbedModelInfo, | ||
example_prompts) -> None: | ||
if not model_info.enable_test: | ||
pytest.skip("Skipping test.") | ||
|
||
# ST will strip the input texts, see test_embedding.py | ||
example_prompts = [str(s).strip() for s in example_prompts] | ||
|
||
vllm_extra_kwargs: dict[str, Any] = {} | ||
if model_info.name == "Alibaba-NLP/gte-Qwen2-1.5B-instruct": | ||
vllm_extra_kwargs["hf_overrides"] = {"is_causal": True} | ||
|
||
if model_info.architecture == "GteNewModel": | ||
vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]} | ||
|
||
with vllm_runner(model_info.name, | ||
task="embed", | ||
dtype=model_info.dtype, | ||
max_model_len=None, | ||
**vllm_extra_kwargs) as vllm_model: | ||
vllm_outputs = vllm_model.encode(example_prompts) | ||
|
||
with hf_runner( | ||
model_info.name, | ||
dtype=model_info.dtype, | ||
is_sentence_transformer=True, | ||
) as hf_model: | ||
run_embedding_correctness_test(hf_model, example_prompts, vllm_outputs) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.