Skip to content

Commit 9a4510d

Browse files
SaedbhatiSaed bhatiJINO-ROHITWendong-Fan
authored
feat: azure embedding (camel-ai#2120)
Co-authored-by: Saed bhati <[email protected]> Co-authored-by: JINO ROHIT <[email protected]> Co-authored-by: Wendong-Fan <[email protected]>
1 parent 5c2815d commit 9a4510d

File tree

4 files changed

+164
-0
lines changed

4 files changed

+164
-0
lines changed

camel/embeddings/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
# See the License for the specific language governing permissions and
1212
# limitations under the License.
1313
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14+
from .azure_embedding import AzureEmbedding
1415
from .base import BaseEmbedding
1516
from .jina_embedding import JinaEmbedding
1617
from .mistral_embedding import MistralEmbedding
@@ -23,6 +24,7 @@
2324
__all__ = [
2425
"BaseEmbedding",
2526
"OpenAIEmbedding",
27+
"AzureEmbedding",
2628
"SentenceTransformerEncoder",
2729
"VisionLanguageEmbedding",
2830
"MistralEmbedding",
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14+
15+
16+
from __future__ import annotations
17+
18+
import os
19+
from typing import Any, Union
20+
21+
from openai import AzureOpenAI
22+
23+
from camel.embeddings.base import BaseEmbedding
24+
from camel.types import EmbeddingModelType
25+
from camel.utils import api_keys_required # Add this import
26+
27+
28+
class AzureEmbedding(BaseEmbedding[str]):
29+
r"""Provides text embedding functionalities using Azure's OpenAI models.
30+
31+
Args:
32+
model_type (EmbeddingModelType, optional): The model type to be
33+
used for text embeddings.
34+
(default: :obj:`TEXT_EMBEDDING_ADA_2`)
35+
url (Optional[str], optional): The url to the Azure OpenAI service.
36+
(default: :obj:`None`)
37+
api_key (str, optional): The API key for authenticating with the
38+
Azure OpenAI service. (default: :obj:`None`)
39+
api_version (str, optional): The API version for Azure OpenAI service.
40+
(default: :obj:`None`)
41+
dimensions (Optional[int], optional): The text embedding output
42+
dimensions. (default: :obj:`None`)
43+
44+
Raises:
45+
RuntimeError: If an unsupported model type is specified.
46+
ValueError: If required API configuration is missing.
47+
"""
48+
49+
@api_keys_required(
50+
[
51+
("api_key", 'AZURE_OPENAI_API_KEY'),
52+
("url", 'AZURE_OPENAI_BASE_URL'),
53+
]
54+
)
55+
def __init__(
56+
self,
57+
model_type: EmbeddingModelType = (
58+
EmbeddingModelType.TEXT_EMBEDDING_3_SMALL
59+
),
60+
url: Union[str, None] = None,
61+
api_key: Union[str, None] = None,
62+
api_version: Union[str, None] = None,
63+
dimensions: Union[int, None] = None,
64+
) -> None:
65+
self.model_type = model_type
66+
self.api_version = api_version or os.environ.get("AZURE_API_VERSION")
67+
if dimensions is None:
68+
self.output_dim = model_type.output_dim
69+
else:
70+
if not isinstance(dimensions, int):
71+
raise ValueError("dimensions must be an integer")
72+
self.output_dim = dimensions
73+
74+
self._api_key = api_key or os.environ.get("AZURE_OPENAI_API_KEY")
75+
self._url = url or os.environ.get("AZURE_OPENAI_BASE_URL")
76+
77+
self.client = AzureOpenAI(
78+
api_key=self._api_key,
79+
api_version=self.api_version,
80+
azure_endpoint=str(self._url),
81+
)
82+
83+
def embed_list(
84+
self,
85+
objs: list[str],
86+
**kwargs: Any,
87+
) -> list[list[float]]:
88+
r"""Embeds a list of texts using the Azure OpenAI model.
89+
90+
Args:
91+
objs (list[str]): The list of texts to embed.
92+
**kwargs (Any): Additional keyword arguments to pass to the API.
93+
94+
Returns:
95+
list[list[float]]: The embeddings for the input texts.
96+
"""
97+
if self.model_type == EmbeddingModelType.TEXT_EMBEDDING_ADA_2:
98+
response = self.client.embeddings.create(
99+
input=objs,
100+
model=self.model_type.value,
101+
**kwargs,
102+
)
103+
return [data.embedding for data in response.data]
104+
105+
response = self.client.embeddings.create(
106+
input=objs,
107+
model=self.model_type.value,
108+
dimensions=self.output_dim,
109+
**kwargs,
110+
)
111+
return [data.embedding for data in response.data]
112+
113+
def get_output_dim(self) -> int:
114+
r"""Returns the output dimension of the embeddings.
115+
116+
Returns:
117+
int: The dimensionality of the embedding for the current model.
118+
"""
119+
return self.output_dim

docs/key_modules/embeddings.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ Utilizes open-source models from the Sentence Transformers library for generatin
3434
### 2.4. `VisionLanguageEmbedding`
3535
Utilizes OpenAI's models for generating image embeddings. This will requires OpenAI API Key.
3636

37+
### 2.5. `AzureOpenAI`
38+
Utilizes OpenAI's models for generating text embeddings. This will requires Azure OpenAI API Key.
39+
3740

3841
## 3. Get Started
3942
To use the embedding functionalities, you need to import the necessary classes.
@@ -85,3 +88,15 @@ test_images = [image, image]
8588

8689
embeddings = vlm_embedding.embed_list(test_images)
8790
```
91+
92+
### 3.5. Using `AzureOpenAI`
93+
```python
94+
from camel.embeddings import AzureEmbedding
95+
from camel.types import EmbeddingModelType
96+
97+
# Initialize the OpenAI embedding with a specific model
98+
azure_openai_embedding = AzureEmbedding(model_type=EmbeddingModelType.TEXT_EMBEDDING_ADA_2)
99+
100+
# Generate embeddings for a list of texts
101+
embeddings = azure_openai_embedding.embed_list(["Hello, world!", "Another example"])
102+
```
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14+
15+
16+
from camel.embeddings import AzureEmbedding
17+
18+
19+
def test_azure_embedding():
20+
embedding_model = AzureEmbedding()
21+
text = "test 1."
22+
vector = embedding_model.embed(text)
23+
assert len(vector) == embedding_model.get_output_dim()
24+
25+
embedding_model = AzureEmbedding(dimensions=256)
26+
text = "test 2"
27+
vector = embedding_model.embed(text)
28+
assert len(vector) == embedding_model.get_output_dim() == 256

0 commit comments

Comments
 (0)