Skip to content

Fix evals in CI #143

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/evals.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ jobs:
run: |
echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" > ./apps/sandbox-container/.dev.vars
echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" > ./apps/workers-bindings/.dev.vars
echo "DEV_CLOUDFLARE_API_TOKEN=${{ secrets.DEV_CLOUDFLARE_API_TOKEN }}" >> ./apps/sandbox-container/.dev.vars
echo "DEV_CLOUDFLARE_API_TOKEN=${{ secrets.DEV_CLOUDFLARE_API_TOKEN }}" >> ./apps/workers-bindings/.dev.vars
- name: Verify .dev.vars file
run: |
Expand Down
12 changes: 7 additions & 5 deletions apps/sandbox-container/evals/utils.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import { jsonSchemaToZod } from '@n8n/json-schema-to-zod'
import { MCPClientManager } from 'agents/mcp/client'
import { streamText, tool } from 'ai'
import { jsonSchema, streamText, tool } from 'ai'
import { z } from 'zod'

import type { JsonSchemaObject } from '@n8n/json-schema-to-zod'
import type { LanguageModelV1, StreamTextResult, ToolCallPart, ToolSet } from 'ai'

export async function initializeClient(): Promise<MCPClientManager> {
Expand All @@ -23,10 +21,14 @@ export async function runTask(
}> {
const tools = clientManager.listTools()
const toolSet: ToolSet = tools.reduce((acc, v) => {
if (!v.inputSchema.properties) {
v.inputSchema.properties = {}
}

acc[v.name] = tool({
parameters: jsonSchemaToZod(v.inputSchema as JsonSchemaObject),
parameters: jsonSchema(v.inputSchema as any),
description: v.description,
execute: async (args, opts) => {
execute: async (args: any, opts) => {
try {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just curious why set to this to :any ?

Copy link
Collaborator Author

@cmsparks cmsparks May 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was just copypasted from the workers-bindings evals. IIRC this was to prevent eslint from complaining. We should honestly move this to be a common function though tbh (but I'll do that in a follow up pr)

const res = await clientManager.callTool(
{
Expand Down
2 changes: 1 addition & 1 deletion apps/sandbox-container/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"test": "vitest",
"types": "wrangler types --include-env=false",
"eval:dev": "start-server-and-test --expect 404 eval:server http://localhost:8976 'vitest --testTimeout=60000 --config vitest.config.evals.ts'",
"eval:server": "concurrently \"tsx container/index.ts\" \"wrangler dev --var \"ENVIRONMENT:test\"\"",
"eval:server": "concurrently \"tsx container/index.ts\" \"wrangler dev --var ENVIRONMENT:test --var DEV_DISABLE_OAUTH:true --var DEV_CLOUDFLARE_EMAIL:[email protected]\"",
"eval:ci": "start-server-and-test --expect 404 eval:server http://localhost:8976 'vitest run --testTimeout=60000 --config vitest.config.evals.ts'"
},
"dependencies": {
Expand Down
2 changes: 1 addition & 1 deletion apps/sandbox-container/server/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ export interface Env {
MCP_SERVER_NAME: string
MCP_SERVER_VERSION: string
OPENAI_API_KEY: string
CONTAINER_MCP_AGENT: DurableObjectNamespace<ContainerMcpAgent>
MCP_OBJECT: DurableObjectNamespace<ContainerMcpAgent>
CONTAINER_MANAGER: DurableObjectNamespace<ContainerManager>
USER_CONTAINER: DurableObjectNamespace<UserContainer>
USER_BLOCKLIST: KVNamespace
Expand Down
25 changes: 5 additions & 20 deletions apps/sandbox-container/server/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import { ContainerManager } from './containerManager'
import { ContainerMcpAgent } from './containerMcp'
import { UserContainer } from './userContainer'

import type { McpAgent } from 'agents/mcp'
import type { AuthProps } from '@repo/mcp-common/src/cloudflare-oauth-handler'
import type { Env } from './context'

Expand All @@ -40,30 +39,16 @@ const ContainerScopes = {

export default {
fetch: async (req: Request, env: Env, ctx: ExecutionContext) => {
// @ts-ignore
if (env.ENVIRONMENT === 'test') {
ctx.props = {
accessToken: 'foobar',
user: {
id: '123def',
email: '[email protected]',
},
accounts: [],
} as Props
return ContainerMcpAgent.mount('/sse', { binding: 'CONTAINER_MCP_AGENT' }).fetch(
req,
env as Record<string, DurableObjectNamespace<McpAgent> | any>,
ctx
)
}

if (env.ENVIRONMENT === 'dev' && env.DEV_DISABLE_OAUTH === 'true') {
if (
(env.ENVIRONMENT === 'dev' || env.ENVIRONMENT === 'test') &&
env.DEV_DISABLE_OAUTH === 'true'
) {
return await handleDevMode(ContainerMcpAgent, req, env, ctx)
}

return new OAuthProvider({
apiRoute: ['/mcp', '/sse'],
apiHandler: createApiHandler(ContainerMcpAgent, { binding: 'CONTAINER_MCP_AGENT' }),
apiHandler: createApiHandler(ContainerMcpAgent),
// @ts-ignore
defaultHandler: createAuthHandlers({ scopes: ContainerScopes, metrics }),
authorizeEndpoint: '/oauth/authorize',
Expand Down
6 changes: 3 additions & 3 deletions apps/sandbox-container/wrangler.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"bindings": [
{
"class_name": "ContainerMcpAgent",
"name": "CONTAINER_MCP_AGENT"
"name": "MCP_OBJECT"
},
{
"class_name": "ContainerManager",
Expand Down Expand Up @@ -91,7 +91,7 @@
"bindings": [
{
"class_name": "ContainerMcpAgent",
"name": "CONTAINER_MCP_AGENT"
"name": "MCP_OBJECT"
},
{
"class_name": "ContainerManager",
Expand Down Expand Up @@ -144,7 +144,7 @@
"bindings": [
{
"class_name": "ContainerMcpAgent",
"name": "CONTAINER_MCP_AGENT"
"name": "MCP_OBJECT"
},
{
"class_name": "ContainerManager",
Expand Down
2 changes: 1 addition & 1 deletion apps/workers-bindings/evals/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ export async function runTask(
})

// we need to consume the fill stream, so this is empty
/* eslint-ignore no-empty */
// eslint-disable-next-line no-empty
for await (const _ of res.fullStream) {
}

Expand Down
Loading