Skip to content

Commit d48a1e0

Browse files
authored
feat: jmanus cookies and interactive dialogue support (alibaba#1064)
* feat: 更新版本号至0.8.0 * Refactor tools to use Jackson for JSON processing and enhance error handling * Enhanced ChromeDriverService to save and load cookies using Jackson. * feat(MoveToAndClickAction): 添加点击事件监听和元素信息日志记录 * feat(FormInputTool): 添加用户输入状态管理和超时处理逻辑 * feat(ManusController): 添加用户输入等待状态通知接口 * feat: 添加聊天输入处理模块,重构用户输入逻辑并实现事件驱动交互 * feat: 重构聊天区域的整个js实现,接入class for js * feat: Implement Plan Execution Manager and UI Handlers * feat: 添加用户输入超时配置,合并用户输入等待状态到计划执行记录
1 parent 1ab2fc7 commit d48a1e0

File tree

67 files changed

+3582
-3196
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+3582
-3196
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ spring-ai-alibaba-examples/playground-flight-booking/src/main/bundles/
4242
**/spring-ai-alibaba-jmanus/extensions/*
4343
**/venv/*
4444
**/spring-ai-alibaba-jmanus/h2-data/*
45+
**/playwright/*
4546

4647
# windows os
4748
/extensions/

spring-ai-alibaba-jmanus/README-zh.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ Spring AI Alibaba Java Manus 提供以下关键功能:
2828
npm install -g npx
2929
```
3030

31-
3. 设置 [dashscope api key](https://help.aliyun.com/zh/model-studio/getting-started/first-api-call-to-qwen)
31+
3. 设置 [dashscope api key](https://help.aliyun.com/zh/model-studio/getting-started/first-api-call-to-qwen) dashscope是阿里云百炼的api .
3232

3333
```shell
3434
export AI_DASHSCOPE_API_KEY=xxxx
@@ -61,3 +61,8 @@ mvn spring-boot:run
6161
## 架构
6262

6363
![架构图](https://github.com/user-attachments/assets/4ad14a72-667b-456e-85c1-b05eef8fd414)
64+
65+
66+
## 之前的版本
67+
如果你想要之前的稳定版本,可以在这里找到:
68+
[之前的稳定版](https://github.com/rainerWJY/Java-Open-Manus/releases)

spring-ai-alibaba-jmanus/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,7 @@ mvn spring-boot:run
6161
## Architecture
6262

6363
![aaa](https://github.com/user-attachments/assets/4ad14a72-667b-456e-85c1-b05eef8fd414)
64+
65+
## Previous Versions
66+
If you want a previous stable version, you can find it here:
67+
[Previous Stable Versions](https://github.com/rainerWJY/Java-Open-Manus/releases)

spring-ai-alibaba-jmanus/pom.xml

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
<groupId>com.alibaba.cloud.ai</groupId>
88
<name>Spring AI Alibaba JManus</name>
99
<artifactId>spring-ai-alibaba-jmanus</artifactId>
10-
<version>0.1.1</version>
10+
<version>0.8.0</version>
1111

1212
<!-- The required dependencies are maintained separately by the JManus module -->
1313
<properties>
@@ -90,18 +90,7 @@
9090
<!-- <version>${spring-ai-alibaba}</version>-->
9191
<!-- </dependency>-->
9292

93-
<dependency>
94-
<groupId>com.alibaba</groupId>
95-
<artifactId>fastjson</artifactId>
96-
<version>1.2.83</version>
97-
</dependency>
98-
99-
<dependency>
100-
<groupId>com.alibaba.fastjson2</groupId>
101-
<artifactId>fastjson2</artifactId>
102-
<version>2.0.56</version>
103-
</dependency>
104-
93+
10594
<dependency>
10695
<groupId>com.google.code.gson</groupId>
10796
<artifactId>gson</artifactId>

spring-ai-alibaba-jmanus/src/main/java/com/alibaba/cloud/ai/example/manus/agent/BaseAgent.java

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,21 @@ protected Message addThinkPrompt(List<Message> messages) {
122122

123123
// 获取当前日期时间,格式为yyyy-MM-dd
124124
String currentDateTime = java.time.LocalDate.now().toString(); // 格式为yyyy-MM-dd
125+
boolean isDebugModel = manusProperties.getBrowserDebug();
126+
String detailOutput = "";
127+
if (isDebugModel) {
128+
detailOutput = """
129+
1. 使用工具调用时,必须给出解释说明,说明使用这个工具的理由和背后的思考
130+
2. 简述过去的所有步骤已经都做了什么事
131+
""";
132+
}
133+
else {
134+
detailOutput = """
135+
1. 使用工具调用时,不需要额外的任何解释说明!
136+
2. 不要在工具调用前提供推理或描述!
137+
""";
138+
139+
}
125140

126141
String stepPrompt = """
127142
- SYSTEM INFORMATION:
@@ -139,13 +154,12 @@ protected Message addThinkPrompt(List<Message> messages) {
139154
{extraParams}
140155
141156
重要说明:
142-
1. 使用工具调用时,不需要额外的任何解释说明!
143-
2. 不要在工具调用前提供推理或描述!
157+
%s
144158
3. 做且只做当前要做的步骤要求中的内容
145159
4. 如果当前要做的步骤要求已经做完,则调用terminate工具来完成当前步骤。
146160
5. 全局目标 是用来有个全局认识的,不要在当前步骤中去完成这个全局目标。
147161
148-
""".formatted(osName, osVersion, osArch, currentDateTime);
162+
""".formatted(osName, osVersion, osArch, currentDateTime, detailOutput);
149163

150164
SystemPromptTemplate promptTemplate = new SystemPromptTemplate(stepPrompt);
151165

spring-ai-alibaba-jmanus/src/main/java/com/alibaba/cloud/ai/example/manus/config/ManusProperties.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,4 +156,46 @@ public void setResetAgents(Boolean resetAgents) {
156156
this.resetAgents = resetAgents;
157157
}
158158

159+
@ConfigProperty(group = "manus", subGroup = "general", key = "baseDir", path = "manus.baseDir",
160+
description = "manus根目录", defaultValue = "", inputType = ConfigInputType.TEXT)
161+
private volatile String baseDir = "";
162+
163+
public String getBaseDir() {
164+
String configPath = "manus.baseDir";
165+
String value = configService.getConfigValue(configPath);
166+
if (value != null) {
167+
baseDir = value;
168+
}
169+
return baseDir;
170+
}
171+
172+
public void setBaseDir(String baseDir) {
173+
this.baseDir = baseDir;
174+
}
175+
176+
@ConfigProperty(group = "manus", subGroup = "agent", key = "userInputTimeout",
177+
path = "manus.agent.userInputTimeout", description = "用户输入表单等待超时时间(秒)", defaultValue = "300",
178+
inputType = ConfigInputType.NUMBER)
179+
private volatile Integer userInputTimeout;
180+
181+
public Integer getUserInputTimeout() {
182+
String configPath = "manus.agent.userInputTimeout";
183+
String value = configService.getConfigValue(configPath);
184+
if (value != null) {
185+
userInputTimeout = Integer.valueOf(value);
186+
}
187+
// Ensure a default value if not configured and not set
188+
if (userInputTimeout == null) {
189+
// Attempt to parse the default value specified in the annotation,
190+
// or use a hardcoded default if parsing fails or is complex to retrieve here.
191+
// For simplicity, directly using the intended default.
192+
userInputTimeout = 300;
193+
}
194+
return userInputTimeout;
195+
}
196+
197+
public void setUserInputTimeout(Integer userInputTimeout) {
198+
this.userInputTimeout = userInputTimeout;
199+
}
200+
159201
}

spring-ai-alibaba-jmanus/src/main/java/com/alibaba/cloud/ai/example/manus/dynamic/agent/DynamicAgent.java

Lines changed: 105 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@
2020
import java.util.HashMap;
2121
import java.util.List;
2222
import java.util.Map;
23+
import java.util.concurrent.TimeUnit;
2324
import java.util.stream.Collectors;
2425

26+
import com.alibaba.cloud.ai.example.manus.planning.service.UserInputService;
2527
import io.micrometer.common.util.StringUtils;
2628
import org.slf4j.Logger;
2729
import org.slf4j.LoggerFactory;
@@ -50,6 +52,9 @@
5052
import com.alibaba.cloud.ai.example.manus.recorder.entity.AgentExecutionRecord;
5153
import com.alibaba.cloud.ai.example.manus.recorder.entity.ThinkActRecord;
5254
import com.alibaba.cloud.ai.example.manus.tool.TerminateTool;
55+
import com.alibaba.cloud.ai.example.manus.tool.ToolCallBiFunctionDef;
56+
import com.alibaba.cloud.ai.example.manus.tool.FormInputTool;
57+
import org.springframework.beans.factory.annotation.Autowired;
5358

5459
import static org.springframework.ai.chat.memory.ChatMemory.CONVERSATION_ID;
5560

@@ -75,6 +80,8 @@ public class DynamicAgent extends ReActAgent {
7580

7681
private final ToolCallingManager toolCallingManager;
7782

83+
private final UserInputService userInputService;
84+
7885
public void clearUp(String planId) {
7986
Map<String, ToolCallBackContext> toolCallBackContext = toolCallbackProvider.getToolCallBackContext();
8087
for (ToolCallBackContext toolCallBack : toolCallBackContext.values()) {
@@ -85,18 +92,23 @@ public void clearUp(String planId) {
8592
log.error("Error cleaning up tool callback context: {}", e.getMessage(), e);
8693
}
8794
}
95+
// Also remove any pending form input tool for this planId
96+
if (userInputService != null) {
97+
userInputService.removeFormInputTool(planId);
98+
}
8899
}
89100

90101
public DynamicAgent(LlmService llmService, PlanExecutionRecorder planExecutionRecorder,
91102
ManusProperties manusProperties, String name, String description, String nextStepPrompt,
92103
List<String> availableToolKeys, ToolCallingManager toolCallingManager,
93-
Map<String, Object> initialAgentSetting) {
104+
Map<String, Object> initialAgentSetting, UserInputService userInputService) {
94105
super(llmService, planExecutionRecorder, manusProperties, initialAgentSetting);
95106
this.agentName = name;
96107
this.agentDescription = description;
97108
this.nextStepPrompt = nextStepPrompt;
98109
this.availableToolKeys = availableToolKeys;
99110
this.toolCallingManager = toolCallingManager;
111+
this.userInputService = userInputService;
100112
}
101113

102114
@Override
@@ -176,7 +188,6 @@ protected AgentExecResult act() {
176188
thinkActRecord.startAction("Executing tool: " + toolCall.name(), toolCall.name(), toolCall.arguments());
177189
ToolExecutionResult toolExecutionResult = toolCallingManager.executeToolCalls(userPrompt, response);
178190

179-
// setData(getData());
180191
ToolResponseMessage toolResponseMessage = (ToolResponseMessage) toolExecutionResult.conversationHistory()
181192
.get(toolExecutionResult.conversationHistory().size() - 1);
182193

@@ -187,16 +198,53 @@ protected AgentExecResult act() {
187198

188199
thinkActRecord.finishAction(llmCallResponse, "SUCCESS");
189200
String toolcallName = toolCall.name();
190-
AgentExecResult agentExecResult = null;
191-
// 如果是终止工具,则返回完成状态
192-
// 否则返回运行状态
193-
if (TerminateTool.name.equals(toolcallName)) {
194-
agentExecResult = new AgentExecResult(llmCallResponse, AgentState.COMPLETED);
201+
202+
// Handle FormInputTool logic
203+
if (FormInputTool.name.equals(toolcallName)) {
204+
ToolCallBiFunctionDef formInputToolDef = getToolCallBackContext(toolcallName).getFunctionInstance();
205+
if (formInputToolDef instanceof FormInputTool) {
206+
FormInputTool formInputTool = (FormInputTool) formInputToolDef;
207+
// Check if the tool is waiting for user input
208+
if (formInputTool.getInputState() == FormInputTool.InputState.AWAITING_USER_INPUT) {
209+
log.info("FormInputTool is awaiting user input for planId: {}", getPlanId());
210+
userInputService.storeFormInputTool(getPlanId(), formInputTool);
211+
// Wait for user input or timeout
212+
waitForUserInputOrTimeout(formInputTool);
213+
214+
// After waiting, check the state again
215+
if (formInputTool.getInputState() == FormInputTool.InputState.INPUT_RECEIVED) {
216+
log.info("User input received for planId: {}", getPlanId());
217+
// The UserInputService.submitUserInputs would have updated
218+
// the tool's internal state.
219+
// We can now get the updated state string for the LLM.
220+
llmCallResponse = formInputTool.getCurrentToolStateString();
221+
// Update the toolResponseMessage in memory for the next LLM
222+
// call
223+
ToolResponseMessage.ToolResponse updatedToolResponse = new ToolResponseMessage.ToolResponse(
224+
toolCall.id(), toolCall.name(), llmCallResponse);
225+
ToolResponseMessage updatedToolResponseMessage = new ToolResponseMessage(
226+
List.of(updatedToolResponse), Map.of());
227+
llmService.getAgentMemory().add(getPlanId(), updatedToolResponseMessage);
228+
229+
}
230+
else if (formInputTool.getInputState() == FormInputTool.InputState.INPUT_TIMEOUT) {
231+
log.warn("Input timeout occurred for FormInputTool for planId: {}", getPlanId());
232+
userInputService.removeFormInputTool(getPlanId()); // Clean up
233+
return new AgentExecResult("Input timeout occurred.", AgentState.IN_PROGRESS); // Or
234+
// FAILED
235+
}
236+
}
237+
}
195238
}
196-
else {
197-
agentExecResult = new AgentExecResult(llmCallResponse, AgentState.IN_PROGRESS);
239+
240+
// If the tool is TerminateTool, return completed state
241+
if (TerminateTool.name.equals(toolcallName)) {
242+
userInputService.removeFormInputTool(getPlanId()); // Clean up any pending
243+
// form
244+
return new AgentExecResult(llmCallResponse, AgentState.COMPLETED);
198245
}
199-
return agentExecResult;
246+
247+
return new AgentExecResult(llmCallResponse, AgentState.IN_PROGRESS);
200248
}
201249
catch (Exception e) {
202250
ToolCall toolCall = response.getResult().getOutput().getToolCalls().get(0);
@@ -207,6 +255,7 @@ protected AgentExecResult act() {
207255
log.error(e.getMessage());
208256

209257
thinkActRecord.recordError(e.getMessage());
258+
userInputService.removeFormInputTool(getPlanId()); // Clean up on error
210259

211260
return new AgentExecResult(e.getMessage(), AgentState.FAILED);
212261
}
@@ -255,6 +304,17 @@ protected Message addThinkPrompt(List<Message> messages) {
255304
return systemMessage;
256305
}
257306

307+
private ToolCallBackContext getToolCallBackContext(String toolKey) {
308+
Map<String, ToolCallBackContext> toolCallBackContext = toolCallbackProvider.getToolCallBackContext();
309+
if (toolCallBackContext.containsKey(toolKey)) {
310+
return toolCallBackContext.get(toolKey);
311+
}
312+
else {
313+
log.warn("Tool callback for {} not found in the map.", toolKey);
314+
return null;
315+
}
316+
}
317+
258318
@Override
259319
public List<ToolCallback> getToolCallList() {
260320
List<ToolCallback> toolCallbacks = new ArrayList<>();
@@ -326,4 +386,39 @@ public String convertEnvDataToString() {
326386
return envDataStringBuilder.toString();
327387
}
328388

389+
// Add a method to wait for user input or handle timeout.
390+
private void waitForUserInputOrTimeout(FormInputTool formInputTool) {
391+
log.info("Waiting for user input for planId: {}...", getPlanId());
392+
long startTime = System.currentTimeMillis();
393+
// Get timeout from ManusProperties and convert to milliseconds
394+
long userInputTimeoutMs = getManusProperties().getUserInputTimeout() * 1000L;
395+
396+
while (formInputTool.getInputState() == FormInputTool.InputState.AWAITING_USER_INPUT) {
397+
if (System.currentTimeMillis() - startTime > userInputTimeoutMs) {
398+
log.warn("Timeout waiting for user input for planId: {}", getPlanId());
399+
formInputTool.handleInputTimeout(); // This will change its state to
400+
// INPUT_TIMEOUT
401+
break;
402+
}
403+
try {
404+
// Poll for input state change. In a real scenario, this might involve
405+
// a more sophisticated mechanism like a Future or a callback from the UI.
406+
TimeUnit.MILLISECONDS.sleep(500); // Check every 500ms
407+
}
408+
catch (InterruptedException e) {
409+
log.warn("Interrupted while waiting for user input for planId: {}", getPlanId());
410+
Thread.currentThread().interrupt();
411+
formInputTool.handleInputTimeout(); // Treat interruption as timeout for
412+
// simplicity
413+
break;
414+
}
415+
}
416+
if (formInputTool.getInputState() == FormInputTool.InputState.INPUT_RECEIVED) {
417+
log.info("User input received for planId: {}", getPlanId());
418+
}
419+
else if (formInputTool.getInputState() == FormInputTool.InputState.INPUT_TIMEOUT) {
420+
log.warn("User input timed out for planId: {}", getPlanId());
421+
}
422+
}
423+
329424
}

spring-ai-alibaba-jmanus/src/main/java/com/alibaba/cloud/ai/example/manus/dynamic/agent/service/DynamicAgentLoader.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import com.alibaba.cloud.ai.example.manus.dynamic.agent.entity.DynamicAgentEntity;
2828
import com.alibaba.cloud.ai.example.manus.dynamic.agent.repository.DynamicAgentRepository;
2929
import com.alibaba.cloud.ai.example.manus.llm.LlmService;
30+
import com.alibaba.cloud.ai.example.manus.planning.service.UserInputService;
3031
import com.alibaba.cloud.ai.example.manus.recorder.PlanExecutionRecorder;
3132

3233
@Service
@@ -42,13 +43,17 @@ public class DynamicAgentLoader {
4243

4344
private final ToolCallingManager toolCallingManager;
4445

46+
private final UserInputService userInputService;
47+
4548
public DynamicAgentLoader(DynamicAgentRepository repository, @Lazy LlmService llmService,
46-
PlanExecutionRecorder recorder, ManusProperties properties, @Lazy ToolCallingManager toolCallingManager) {
49+
PlanExecutionRecorder recorder, ManusProperties properties, @Lazy ToolCallingManager toolCallingManager,
50+
UserInputService userInputService) {
4751
this.repository = repository;
4852
this.llmService = llmService;
4953
this.recorder = recorder;
5054
this.properties = properties;
5155
this.toolCallingManager = toolCallingManager;
56+
this.userInputService = userInputService;
5257
}
5358

5459
public DynamicAgent loadAgent(String agentName, Map<String, Object> initialAgentSetting) {
@@ -58,7 +63,8 @@ public DynamicAgent loadAgent(String agentName, Map<String, Object> initialAgent
5863
}
5964

6065
return new DynamicAgent(llmService, recorder, properties, entity.getAgentName(), entity.getAgentDescription(),
61-
entity.getNextStepPrompt(), entity.getAvailableToolKeys(), toolCallingManager, initialAgentSetting);
66+
entity.getNextStepPrompt(), entity.getAvailableToolKeys(), toolCallingManager, initialAgentSetting,
67+
userInputService);
6268
}
6369

6470
public List<DynamicAgentEntity> getAllAgents() {

0 commit comments

Comments
 (0)