Skip to content

Commit 43e7f0d

Browse files
authored
feat: Add WebToolkit from owl (camel-ai#1751)
1 parent 67a0608 commit 43e7f0d

File tree

6 files changed

+1808
-0
lines changed

6 files changed

+1808
-0
lines changed

camel/toolkits/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
from .excel_toolkit import ExcelToolkit
5555
from .video_analysis_toolkit import VideoAnalysisToolkit
5656
from .image_analysis_toolkit import ImageAnalysisToolkit
57+
from .web_toolkit import WebToolkit
5758

5859

5960
__all__ = [
@@ -96,4 +97,5 @@
9697
'ExcelToolkit',
9798
'VideoAnalysisToolkit',
9899
'ImageAnalysisToolkit',
100+
'WebToolkit',
99101
]

camel/toolkits/page_script.js

Lines changed: 376 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,376 @@
1+
var MultimodalWebSurfer = MultimodalWebSurfer || (function() {
2+
let nextLabel = 10;
3+
4+
let roleMapping = {
5+
"a": "link",
6+
"area": "link",
7+
"button": "button",
8+
"input, type=button": "button",
9+
"input, type=checkbox": "checkbox",
10+
"input, type=email": "textbox",
11+
"input, type=number": "spinbutton",
12+
"input, type=radio": "radio",
13+
"input, type=range": "slider",
14+
"input, type=reset": "button",
15+
"input, type=search": "searchbox",
16+
"input, type=submit": "button",
17+
"input, type=tel": "textbox",
18+
"input, type=text": "textbox",
19+
"input, type=url": "textbox",
20+
"search": "search",
21+
"select": "combobox",
22+
"option": "option",
23+
"textarea": "textbox"
24+
};
25+
26+
let getCursor = function(elm) {
27+
return window.getComputedStyle(elm)["cursor"];
28+
};
29+
30+
let getInteractiveElements = function() {
31+
32+
let results = []
33+
let roles = ["scrollbar", "searchbox", "slider", "spinbutton", "switch", "tab", "treeitem", "button", "checkbox", "gridcell", "link", "menuitem", "menuitemcheckbox", "menuitemradio", "option", "progressbar", "radio", "textbox", "combobox", "menu", "tree", "treegrid", "grid", "listbox", "radiogroup", "widget"];
34+
let inertCursors = ["auto", "default", "none", "text", "vertical-text", "not-allowed", "no-drop"];
35+
36+
// Get the main interactive elements
37+
let nodeList = document.querySelectorAll("input, select, textarea, button, [href], [onclick], [contenteditable], [tabindex]:not([tabindex='-1'])");
38+
for (let i=0; i<nodeList.length; i++) { // Copy to something mutable
39+
results.push(nodeList[i]);
40+
}
41+
42+
// Anything not already included that has a suitable role
43+
nodeList = document.querySelectorAll("[role]");
44+
for (let i=0; i<nodeList.length; i++) { // Copy to something mutable
45+
if (results.indexOf(nodeList[i]) == -1) {
46+
let role = nodeList[i].getAttribute("role");
47+
if (roles.indexOf(role) > -1) {
48+
results.push(nodeList[i]);
49+
}
50+
}
51+
}
52+
53+
// Any element that changes the cursor to something implying interactivity
54+
nodeList = document.querySelectorAll("*");
55+
for (let i=0; i<nodeList.length; i++) {
56+
let node = nodeList[i];
57+
58+
// Cursor is default, or does not suggest interactivity
59+
let cursor = getCursor(node);
60+
if (inertCursors.indexOf(cursor) >= 0) {
61+
continue;
62+
}
63+
64+
// Move up to the first instance of this cursor change
65+
parent = node.parentNode;
66+
while (parent && getCursor(parent) == cursor) {
67+
node = parent;
68+
parent = node.parentNode;
69+
}
70+
71+
// Add the node if it is new
72+
if (results.indexOf(node) == -1) {
73+
results.push(node);
74+
}
75+
}
76+
77+
return results;
78+
};
79+
80+
let labelElements = function(elements) {
81+
for (let i=0; i<elements.length; i++) {
82+
if (!elements[i].hasAttribute("__elementId")) {
83+
elements[i].setAttribute("__elementId", "" + (nextLabel++));
84+
}
85+
}
86+
};
87+
88+
let isTopmost = function(element, x, y) {
89+
let hit = document.elementFromPoint(x, y);
90+
91+
// Hack to handle elements outside the viewport
92+
if (hit === null) {
93+
return true;
94+
}
95+
96+
while (hit) {
97+
if (hit == element) return true;
98+
hit = hit.parentNode;
99+
}
100+
return false;
101+
};
102+
103+
let getFocusedElementId = function() {
104+
let elm = document.activeElement;
105+
while (elm) {
106+
if (elm.hasAttribute && elm.hasAttribute("__elementId")) {
107+
return elm.getAttribute("__elementId");
108+
}
109+
elm = elm.parentNode;
110+
}
111+
return null;
112+
};
113+
114+
let trimmedInnerText = function(element) {
115+
if (!element) {
116+
return "";
117+
}
118+
let text = element.innerText;
119+
if (!text) {
120+
return "";
121+
}
122+
return text.trim();
123+
};
124+
125+
let getApproximateAriaName = function(element) {
126+
// Check for aria labels
127+
if (element.hasAttribute("aria-labelledby")) {
128+
let buffer = "";
129+
let ids = element.getAttribute("aria-labelledby").split(" ");
130+
for (let i=0; i<ids.length; i++) {
131+
let label = document.getElementById(ids[i]);
132+
if (label) {
133+
buffer = buffer + " " + trimmedInnerText(label);
134+
}
135+
}
136+
return buffer.trim();
137+
}
138+
139+
if (element.hasAttribute("aria-label")) {
140+
return element.getAttribute("aria-label");
141+
}
142+
143+
// Check for labels
144+
if (element.hasAttribute("id")) {
145+
let label_id = element.getAttribute("id");
146+
let label = "";
147+
let labels = document.querySelectorAll("label[for='" + label_id + "']");
148+
for (let j=0; j<labels.length; j++) {
149+
label += labels[j].innerText + " ";
150+
}
151+
label = label.trim();
152+
if (label != "") {
153+
return label;
154+
}
155+
}
156+
157+
if (element.parentElement && element.parentElement.tagName == "LABEL") {
158+
return element.parentElement.innerText;
159+
}
160+
161+
// Check for alt text or titles
162+
if (element.hasAttribute("alt")) {
163+
return element.getAttribute("alt")
164+
}
165+
166+
if (element.hasAttribute("title")) {
167+
return element.getAttribute("title")
168+
}
169+
170+
return trimmedInnerText(element);
171+
};
172+
173+
let getApproximateAriaRole = function(element) {
174+
let tag = element.tagName.toLowerCase();
175+
if (tag == "input" && element.hasAttribute("type")) {
176+
tag = tag + ", type=" + element.getAttribute("type");
177+
}
178+
179+
if (element.hasAttribute("role")) {
180+
return [element.getAttribute("role"), tag];
181+
}
182+
else if (tag in roleMapping) {
183+
return [roleMapping[tag], tag];
184+
}
185+
else {
186+
return ["", tag];
187+
}
188+
};
189+
190+
let getInteractiveRects = function() {
191+
labelElements(getInteractiveElements());
192+
let elements = document.querySelectorAll("[__elementId]");
193+
let results = {};
194+
for (let i=0; i<elements.length; i++) {
195+
let key = elements[i].getAttribute("__elementId");
196+
let rects = elements[i].getClientRects();
197+
let ariaRole = getApproximateAriaRole(elements[i]);
198+
let ariaName = getApproximateAriaName(elements[i]);
199+
let vScrollable = elements[i].scrollHeight - elements[i].clientHeight >= 1;
200+
201+
let record = {
202+
"tag_name": ariaRole[1],
203+
"role": ariaRole[0],
204+
"aria-name": ariaName,
205+
"v-scrollable": vScrollable,
206+
"rects": []
207+
};
208+
209+
for (const rect of rects) {
210+
let x = rect.left + rect.width/2;
211+
let y = rect.top + rect.height/2;
212+
if (isTopmost(elements[i], x, y)) {
213+
record["rects"].push(JSON.parse(JSON.stringify(rect)));
214+
}
215+
}
216+
217+
if (record["rects"].length > 0) {
218+
results[key] = record;
219+
}
220+
}
221+
return results;
222+
};
223+
224+
let getVisualViewport = function() {
225+
let vv = window.visualViewport;
226+
let de = document.documentElement;
227+
return {
228+
"height": vv ? vv.height : 0,
229+
"width": vv ? vv.width : 0,
230+
"offsetLeft": vv ? vv.offsetLeft : 0,
231+
"offsetTop": vv ? vv.offsetTop : 0,
232+
"pageLeft": vv ? vv.pageLeft : 0,
233+
"pageTop": vv ? vv.pageTop : 0,
234+
"scale": vv ? vv.scale : 0,
235+
"clientWidth": de ? de.clientWidth : 0,
236+
"clientHeight": de ? de.clientHeight : 0,
237+
"scrollWidth": de ? de.scrollWidth : 0,
238+
"scrollHeight": de ? de.scrollHeight : 0
239+
};
240+
};
241+
242+
let _getMetaTags = function() {
243+
let meta = document.querySelectorAll("meta");
244+
let results = {};
245+
for (let i = 0; i<meta.length; i++) {
246+
let key = null;
247+
if (meta[i].hasAttribute("name")) {
248+
key = meta[i].getAttribute("name");
249+
}
250+
else if (meta[i].hasAttribute("property")) {
251+
key = meta[i].getAttribute("property");
252+
}
253+
else {
254+
continue;
255+
}
256+
if (meta[i].hasAttribute("content")) {
257+
results[key] = meta[i].getAttribute("content");
258+
}
259+
}
260+
return results;
261+
};
262+
263+
let _getJsonLd = function() {
264+
let jsonld = [];
265+
let scripts = document.querySelectorAll('script[type="application/ld+json"]');
266+
for (let i=0; i<scripts.length; i++) {
267+
jsonld.push(scripts[i].innerHTML.trim());
268+
}
269+
return jsonld;
270+
};
271+
272+
// From: https://www.stevefenton.co.uk/blog/2022/12/parse-microdata-with-javascript/
273+
let _getMicrodata = function() {
274+
function sanitize(input) {
275+
return input.replace(/\s/gi, ' ').trim();
276+
}
277+
278+
function addValue(information, name, value) {
279+
if (information[name]) {
280+
if (typeof information[name] === 'array') {
281+
information[name].push(value);
282+
} else {
283+
const arr = [];
284+
arr.push(information[name]);
285+
arr.push(value);
286+
information[name] = arr;
287+
}
288+
} else {
289+
information[name] = value;
290+
}
291+
}
292+
293+
function traverseItem(item, information) {
294+
const children = item.children;
295+
296+
for (let i = 0; i < children.length; i++) {
297+
const child = children[i];
298+
299+
if (child.hasAttribute('itemscope')) {
300+
if (child.hasAttribute('itemprop')) {
301+
const itemProp = child.getAttribute('itemprop');
302+
const itemType = child.getAttribute('itemtype');
303+
304+
const childInfo = {
305+
itemType: itemType
306+
};
307+
308+
traverseItem(child, childInfo);
309+
310+
itemProp.split(' ').forEach(propName => {
311+
addValue(information, propName, childInfo);
312+
});
313+
}
314+
315+
} else if (child.hasAttribute('itemprop')) {
316+
const itemProp = child.getAttribute('itemprop');
317+
itemProp.split(' ').forEach(propName => {
318+
if (propName === 'url') {
319+
addValue(information, propName, child.href);
320+
} else {
321+
addValue(information, propName, sanitize(child.getAttribute("content") || child.content || child.textContent || child.src || ""));
322+
}
323+
});
324+
traverseItem(child, information);
325+
} else {
326+
traverseItem(child, information);
327+
}
328+
}
329+
}
330+
331+
const microdata = [];
332+
333+
document.querySelectorAll("[itemscope]").forEach(function(elem, i) {
334+
const itemType = elem.getAttribute('itemtype');
335+
const information = {
336+
itemType: itemType
337+
};
338+
traverseItem(elem, information);
339+
microdata.push(information);
340+
});
341+
342+
return microdata;
343+
};
344+
345+
let getPageMetadata = function() {
346+
let jsonld = _getJsonLd();
347+
let metaTags = _getMetaTags();
348+
let microdata = _getMicrodata();
349+
let results = {}
350+
if (jsonld.length > 0) {
351+
try {
352+
results["jsonld"] = JSON.parse(jsonld);
353+
}
354+
catch (e) {
355+
results["jsonld"] = jsonld;
356+
}
357+
}
358+
if (microdata.length > 0) {
359+
results["microdata"] = microdata;
360+
}
361+
for (let key in metaTags) {
362+
if (metaTags.hasOwnProperty(key)) {
363+
results["meta_tags"] = metaTags;
364+
break;
365+
}
366+
}
367+
return results;
368+
};
369+
370+
return {
371+
getInteractiveRects: getInteractiveRects,
372+
getVisualViewport: getVisualViewport,
373+
getFocusedElementId: getFocusedElementId,
374+
getPageMetadata: getPageMetadata,
375+
};
376+
})();

0 commit comments

Comments
 (0)