Skip to content

Commit 525ada8

Browse files
Merge pull request #97 from intelligentnode/update-evaluator
Update evaluator
2 parents 3f283c6 + 9340bab commit 525ada8

File tree

4 files changed

+100
-65
lines changed

4 files changed

+100
-65
lines changed

IntelliNode/model/input/ChatModelInput.js

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,8 @@ class GeminiInput extends ChatModelInput {
180180
constructor(systemMessage, options = {}) {
181181
super(options);
182182
this.messages = [];
183+
this.maxOutputTokens = options.maxTokens
184+
this.temperature = options.temperature
183185

184186
if (systemMessage && typeof systemMessage === 'string') {
185187
this.addUserMessage(systemMessage);
@@ -207,7 +209,11 @@ class GeminiInput extends ChatModelInput {
207209

208210
getChatInput() {
209211
return {
210-
contents: this.messages
212+
contents: this.messages,
213+
generationConfig: {
214+
...(this.temperature && { temperature: this.temperature }),
215+
...(this.maxOutputTokens && { maxOutputTokens: this.maxOutputTokens }),
216+
}
211217
};
212218
}
213219

IntelliNode/package.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "intellinode",
3-
"version": "1.7.8",
3+
"version": "1.7.9",
44
"description": "Integrate and evaluate various AI models, such as ChatGPT, Llama, Diffusion, Cohere, Gemini and Hugging Face.",
55
"main": "index.js",
66
"keywords": [
@@ -17,7 +17,8 @@
1717
"prompt",
1818
"automation",
1919
"mistralai",
20-
"gemini"
20+
"gemini",
21+
"robotics"
2122
],
2223
"author": "IntelliNode",
2324
"license": "Apache",

IntelliNode/test/integration/ModelEvaluation.test.js

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,28 +5,42 @@ const { SupportedChatModels } = require('../../function/Chatbot');
55
const { SupportedLangModels } = require('../../controller/RemoteLanguageModel');
66

77
// prepare the evaluation settings
8-
const llamaChat = { apiKey: process.env.REPLICATE_API_KEY, provider: SupportedChatModels.REPLICATE,
9-
type:'chat', model: '13b-chat', maxTokens: 50};
10-
const openaiChat = { apiKey: process.env.OPENAI_API_KEY, provider: SupportedChatModels.OPENAI,
11-
type: 'chat', model:'gpt-3.5-turbo', maxTokens: 50};
12-
const cohereCompletion = { apiKey: process.env.COHERE_API_KEY, provider: SupportedLangModels.COHERE,
13-
type:'completion', model: 'command', maxTokens: 50};
8+
const llamaChat = {
9+
apiKey: process.env.REPLICATE_API_KEY, provider: SupportedChatModels.REPLICATE,
10+
type: 'chat', model: '13b-chat', maxTokens: 50
11+
};
12+
const openaiChat = {
13+
apiKey: process.env.OPENAI_API_KEY, provider: SupportedChatModels.OPENAI,
14+
type: 'chat', model: 'gpt-3.5-turbo', maxTokens: 50
15+
};
16+
const cohereCompletion = {
17+
apiKey: process.env.COHERE_API_KEY, provider: SupportedLangModels.COHERE,
18+
type: 'completion', model: 'command', maxTokens: 50
19+
};
20+
const geminiChat = {
21+
apiKey: process.env.GEMINI_API_KEY, provider: SupportedChatModels.GEMINI,
22+
type: 'chat', model: 'gemini'
23+
};
24+
const mistralChat = {
25+
apiKey: process.env.MISTRAL_API_KEY, provider: SupportedChatModels.MISTRAL,
26+
type: 'chat', model: 'mistral-medium', maxTokens: 50
27+
};
1428

1529
// create the evaluation object
1630
const llmEvaluation = new LLMEvaluation(process.env.OPENAI_API_KEY, 'openai');
1731

1832
async function testLLMEvaluation() {
1933
const inputString = "Explain the process of photosynthesis in simple terms.";
2034
const targetAnswers = ["Photosynthesis is the process where green plants use sunlight to turn carbon dioxide and water into glucose and oxygen. The glucose provides food for the plant, and the oxygen gets released back into the air.",
21-
"Photosynthesis is how plants make their own food. They take in water and carbon dioxide, use the energy from sunlight to transform them into glucose (their food) and oxygen, which they release into the air.",
22-
"In simple terms, photosynthesis is like cooking for plants but instead of a stove, they use sunlight. They mix water and carbon dioxide with the sunlight to create glucose, which is their food, and also produce oxygen."];
23-
const providerSets = [llamaChat, openaiChat, cohereCompletion];
35+
"Photosynthesis is how plants make their own food. They take in water and carbon dioxide, use the energy from sunlight to transform them into glucose (their food) and oxygen, which they release into the air.",
36+
"In simple terms, photosynthesis is like cooking for plants but instead of a stove, they use sunlight. They mix water and carbon dioxide with the sunlight to create glucose, which is their food, and also produce oxygen."];
37+
const providerSets = [llamaChat, openaiChat, cohereCompletion, geminiChat, mistralChat];
2438

2539
const results = await llmEvaluation.compareModels(inputString, targetAnswers, providerSets);
2640

2741
console.log('OpenAI Chat and Cohere Completion ModelEvaluation Results:', results);
2842

29-
assert(Object.keys(results).length === providerSets.length+1, 'Test failed');
43+
assert(Object.keys(results).length === providerSets.length + 1, 'Test failed');
3044
}
3145

3246

@@ -35,15 +49,15 @@ async function testLLMEvaluationJson() {
3549
const inputString = "Explain the process of photosynthesis in simple terms.";
3650

3751
const targetAnswers = ["Photosynthesis is the process where green plants use sunlight to turn carbon dioxide and water into glucose and oxygen. The glucose provides food for the plant, and the oxygen gets released back into the air.",
38-
"Photosynthesis is how plants make their own food. They take in water and carbon dioxide, use the energy from sunlight to transform them into glucose (their food) and oxygen, which they release into the air.",
39-
"In simple terms, photosynthesis is like cooking for plants but instead of a stove, they use sunlight. They mix water and carbon dioxide with the sunlight to create glucose, which is their food, and also produce oxygen."];
52+
"Photosynthesis is how plants make their own food. They take in water and carbon dioxide, use the energy from sunlight to transform them into glucose (their food) and oxygen, which they release into the air.",
53+
"In simple terms, photosynthesis is like cooking for plants but instead of a stove, they use sunlight. They mix water and carbon dioxide with the sunlight to create glucose, which is their food, and also produce oxygen."];
4054

4155
const providerSets = [llamaChat, openaiChat, cohereCompletion];
4256

4357
const results = await llmEvaluation.compareModels(inputString, targetAnswers, providerSets, true);
4458

4559
console.log('Json Results:', results);
46-
60+
4761
}
4862

4963
(async () => {

IntelliNode/utils/LLMEvaluation.js

Lines changed: 63 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ const { RemoteEmbedModel, SupportedEmbedModels } = require('../controller/Remote
22
const LanguageModelInput = require('../model/input/LanguageModelInput');
33
const { Chatbot, SupportedChatModels } = require("../function/Chatbot");
44
const { RemoteLanguageModel, SupportedLangModels } = require("../controller/RemoteLanguageModel");
5-
const { ChatGPTInput, LLamaReplicateInput, LLamaSageInput } = require("../model/input/ChatModelInput");
5+
const { ChatGPTInput, LLamaReplicateInput, LLamaSageInput, GeminiInput, CohereInput, MistralInput } = require("../model/input/ChatModelInput");
66
const MatchHelpers = require('../utils/MatchHelpers');
77
const EmbedInput = require('../model/input/EmbedInput');
88
const { ModelEvaluation } = require('./ModelEvaluation');
@@ -26,36 +26,42 @@ class LLMEvaluation extends ModelEvaluation {
2626
}
2727

2828
async generateText(apiKey, inputString, provider, modelName, type,
29-
maxTokens = 400, custom_url = null) {
29+
maxTokens = 500, custom_url = null) {
3030

3131
if (type == 'chat' && Object.values(SupportedChatModels).includes(provider.toLowerCase())) {
3232

33-
const customProxy = (custom_url != undefined && custom_url != null && custom_url != '') ? {url: custom_url } : null;
34-
35-
const chatbot = new Chatbot(apiKey, provider, customProxy);
36-
37-
// define the chat input
38-
let input;
39-
if (SupportedChatModels.REPLICATE == provider.toLowerCase()) {
40-
input = new LLamaReplicateInput("provide direct answer", { model: modelName, maxTokens: maxTokens});
41-
} else if (SupportedChatModels.SAGEMAKER == provider.toLowerCase()) {
42-
input = new LLamaSageInput("provide direct answer", {maxTokens: maxTokens});
43-
} else {
44-
input = new ChatGPTInput("provide direct answer", { model: modelName, maxTokens: maxTokens});
45-
}
33+
const customProxy = (custom_url != undefined && custom_url != null && custom_url != '') ? { url: custom_url } : null;
34+
35+
const chatbot = new Chatbot(apiKey, provider, customProxy);
36+
37+
// define the chat input
38+
let input;
39+
if (SupportedChatModels.REPLICATE == provider.toLowerCase()) {
40+
input = new LLamaReplicateInput("provide direct answer", { model: modelName, maxTokens: maxTokens });
41+
} else if (SupportedChatModels.SAGEMAKER == provider.toLowerCase()) {
42+
input = new LLamaSageInput("provide direct answer", { maxTokens: maxTokens });
43+
} else if (SupportedChatModels.GEMINI == provider.toLowerCase()) {
44+
input = new GeminiInput("provide direct answer", { maxTokens: maxTokens });
45+
} else if (SupportedChatModels.COHERE == provider.toLowerCase()) {
46+
input = new CohereInput("provide direct answer", { maxTokens: maxTokens });
47+
} else if (SupportedChatModels.MISTRAL == provider.toLowerCase()) {
48+
input = new MistralInput("provide direct answer", { maxTokens: maxTokens });
49+
} else {
50+
input = new ChatGPTInput("provide direct answer", { model: modelName, maxTokens: maxTokens });
51+
}
4652

47-
input.addUserMessage(inputString);
48-
const responses = await chatbot.chat(input);
53+
input.addUserMessage(inputString);
54+
const responses = await chatbot.chat(input);
4955

50-
return responses[0].trim();
56+
return responses[0].trim();
5157
} else if (type == 'completion' && Object.values(SupportedLangModels).includes(provider.toLowerCase())) {
5258

53-
const languageModel = new RemoteLanguageModel(apiKey, provider);
54-
const langInput = new LanguageModelInput({ prompt: inputString, model: modelName, maxTokens: maxTokens });
55-
langInput.setDefaultValues(provider, maxTokens);
59+
const languageModel = new RemoteLanguageModel(apiKey, provider);
60+
const langInput = new LanguageModelInput({ prompt: inputString, model: modelName, maxTokens: maxTokens });
61+
langInput.setDefaultValues(provider, maxTokens);
5662

57-
const responses = await languageModel.generateText(langInput);
58-
return responses[0].trim();
63+
const responses = await languageModel.generateText(langInput);
64+
return responses[0].trim();
5965
} else {
6066
throw new Error('Provider not supported');
6167
}
@@ -86,45 +92,53 @@ class LLMEvaluation extends ModelEvaluation {
8692
let targetEmbeddings = [];
8793

8894
// Initiate Embedding for targets
89-
for(let target of targetAnswers) {
95+
for (let target of targetAnswers) {
9096
const embedding = await this.generateEmbedding(target);
9197
targetEmbeddings.push(embedding);
9298
}
9399

94-
for(let provider of providerSets) {
100+
for (let provider of providerSets) {
95101
console.log(`- start ${provider.model} evaluation`)
96102

97103
let predictions = [];
98-
let prediction = await this.generateText(provider.apiKey, inputString, provider.provider,
99-
provider.model, provider.type,
100-
provider.maxTokens, provider.url);
101-
const predictionEmbedding = await this.generateEmbedding(prediction);
102-
103-
let cosineSum = 0, euclideanSum = 0, manhattanSum = 0;
104-
for(let targetEmbedding of targetEmbeddings) {
105-
cosineSum += MatchHelpers.cosineSimilarity(predictionEmbedding, targetEmbedding);
106-
euclideanSum += MatchHelpers.euclideanDistance(predictionEmbedding, targetEmbedding);
107-
manhattanSum += MatchHelpers.manhattanDistance(predictionEmbedding, targetEmbedding);
108-
}
109-
110-
const avgCosine = cosineSum / targetEmbeddings.length;
111-
const avgEuclidean = euclideanSum / targetEmbeddings.length;
112-
const avgManhattan = manhattanSum / targetEmbeddings.length;
104+
try {
105+
let prediction = await this.generateText(provider.apiKey, inputString, provider.provider,
106+
provider.model, provider.type,
107+
provider.maxTokens, provider.url);
108+
const predictionEmbedding = await this.generateEmbedding(prediction);
109+
110+
let cosineSum = 0, euclideanSum = 0, manhattanSum = 0;
111+
for (let targetEmbedding of targetEmbeddings) {
112+
cosineSum += MatchHelpers.cosineSimilarity(predictionEmbedding, targetEmbedding);
113+
euclideanSum += MatchHelpers.euclideanDistance(predictionEmbedding, targetEmbedding);
114+
manhattanSum += MatchHelpers.manhattanDistance(predictionEmbedding, targetEmbedding);
115+
}
113116

114-
predictions.push({
115-
prediction: prediction,
116-
score_cosine_similarity: avgCosine,
117-
score_euclidean_distance: avgEuclidean,
118-
score_manhattan_distance: avgManhattan
119-
});
117+
const avgCosine = cosineSum / targetEmbeddings.length;
118+
const avgEuclidean = euclideanSum / targetEmbeddings.length;
119+
const avgManhattan = manhattanSum / targetEmbeddings.length;
120+
121+
predictions.push({
122+
prediction: prediction,
123+
score_cosine_similarity: avgCosine,
124+
score_euclidean_distance: avgEuclidean,
125+
score_manhattan_distance: avgManhattan,
126+
stop_reason: "complete"
127+
});
128+
} catch (error) {
129+
console.error(error);
130+
predictions.push({
131+
stop_reason: "error"
132+
});
133+
}
120134

121135
results[`${provider.provider}/${provider.model}`] = predictions;
122136
}
123137

124138
results['lookup'] = {
125-
'cosine_similarity': 'a value closer to 1 indicates a higher degree of similarity between two vectors.',
126-
'euclidean_distance': 'the lower the value, the closer the two points.',
127-
'manhattan_distance': 'the lower the value, the closer the two vectors.'
139+
'cosine_similarity': 'a value closer to 1 indicates a higher degree of similarity between two vectors.',
140+
'euclidean_distance': 'the lower the value, the closer the two points.',
141+
'manhattan_distance': 'the lower the value, the closer the two vectors.'
128142
}
129143

130144
if (isJson) {

0 commit comments

Comments
 (0)