fix: line chart verification bug && unknow status (microsoft#15)

cxxxxxn · ultmaster · web-flow · commit 9a4d670c3f7f · 2023-11-06T15:37:42.000+08:00
* fix: line chart verification bug &amp;&amp; unknow status

* Minor improvements

* Reformat

* Minor improvements

---------

Co-authored-by: Yuge Zhang &lt;scottyugochang@gmail.com&gt;
diff --git a/coml/core.py b/coml/core.py
@@ -195,7 +195,7 @@ def explain(self, code: str) -> str:
 
     def static_check(
         self, code: str, context: GenerateContext | FixContext
-    ) -> tuple[bool, str]:
+    ) -> tuple[bool | None, str]:
         # Check the quality of code by looking at it (i.e., rubberduck)
         messages = [
             SystemMessage(content=CHECK_INSTRUCTION),
@@ -209,15 +209,15 @@ def static_check(
             return False, reason
         if "CORRECT" in last_line.upper():
             return True, reason
-        raise ValueError("Unable to parse the response.")
+        return None, response.content
 
     def output_sanity_check(
         self,
         code: str,
         context: GenerateContext | FixContext,
         error: str | None,
         output: str | None,
-    ) -> tuple[bool, str]:
+    ) -> tuple[bool | None, str]:
         # Run a sanity check of the output of the code
         messages = [
             SystemMessage(content=SANITY_CHECK_INSTRUCTION),
@@ -233,7 +233,7 @@ def output_sanity_check(
             return False, reason
         if "CORRECT" in last_line.upper():
             return True, reason
-        raise ValueError("Unable to parse the response.")
+        return None, response.content
 
     def visualization_check(
         self,
@@ -242,12 +242,20 @@ def visualization_check(
         svg_string: str,
         variable_descriptions: dict[str, str],
         source,
-    ) -> tuple[bool, list[tuple[bool, str]]]:
+    ) -> tuple[bool | None, list[tuple[bool | None, str]]]:
         vis_verifier = VisVerifier(self.llm, self)
         verifications = vis_verifier.verify(
             request, previous_code, svg_string, variable_descriptions, source
         )
-        pass_verify = all([verification["answer"] for verification in verifications])
+
+        answers = [verification["answer"] for verification in verifications]
+        if False in answers:
+            pass_verify = False
+        elif None in answers:
+            pass_verify = None
+        else:
+            pass_verify = True
+
         reason = []
         for verification in verifications:
             answer = verification["answer"]
diff --git a/coml/magics.py b/coml/magics.py
@@ -66,6 +66,16 @@
 </style>
 """
 
+VERIFY_STATUS_ICON = {
+    "error": "❌",
+    "warning": "⚠️",
+    "info": "ℹ️",
+    "ok": "✅",
+    None: "❔",
+    True: "✅",
+    False: "❌",
+}
+
 
 @magics_class
 class CoMLMagics(Magics):
@@ -265,14 +275,6 @@ def display_statuses(statuses):
             elif error or output:
                 display_names["sanity"] = "Output sanity check"
 
-            status_icon = {
-                "error": "❌",
-                "warning": "⚠️",
-                "info": "ℹ️",
-                "ok": "✅",
-                True: "✅",
-                False: "❌",
-            }
             loading = "<span class='loader'></span>"
             message_template = "<details><summary><b>{}:</b> {}</summary>\n{}</details>"
             for name in display_names:
@@ -285,7 +287,7 @@ def display_statuses(statuses):
                     display_names[name],
                     loading
                     if name not in statuses
-                    else status_icon[statuses[name]["result"]],
+                    else VERIFY_STATUS_ICON[statuses[name]["result"]],
                     detail_message,
                 )
 
@@ -318,14 +320,14 @@ def display_statuses(statuses):
                     visualization_check_details,
                 ) = self.agent.visualization_check(
                     context["request"],
-                    "\n".join(self._get_code_context()),
+                    "\n".join(context["codes"]),
                     output.replace("<image/svg+xml>", ""),
                     context["variables"],
                     vis_framework,
                 )
                 details = ""
                 for detail in visualization_check_details:
-                    details += ("✅" if detail[0] else "❌") + " " + detail[1] + "\n"
+                    details += VERIFY_STATUS_ICON[detail[0]] + " " + detail[1] + "\n"
                 result["vis"] = {
                     "result": visualization_check_result,
                     "details": details,
diff --git a/coml/vis_utils/deconstruct.py b/coml/vis_utils/deconstruct.py
@@ -329,7 +329,10 @@ def process_legend_matplotlib(spec):
     spec["type"] = "legend"
     labels = []
     examples = []
-    for i in range(1, len(spec["children"])):
+    # element(index 0) might be background
+    # todo: A more accurate way to recognize background
+    first = 0 if spec["children"][0]["tag"] == "text" else 1
+    for i in range(first, len(spec["children"])):
         child = spec["children"][i]
         if child["tag"] == "text":
             labels.append(child)
@@ -1054,7 +1057,7 @@ def analysis_mark(nodes, spec):
         lines = []
         if "path" in nodes:
             paths = nodes["path"]
-            lines += identify_mark_lines(paths, spec)
+            lines += identify_mark_lines(paths)
         if "line" in nodes:
             lines += nodes["line"]
             # line chart that only has two points
@@ -1213,26 +1216,51 @@ def deconstruct(svg, source="matplotlib"):
     # matplotlib parser
     defss = {}
     spec = parser_node(svg, None, defss, [0, 0], {}, source)
-
-    for child in spec["children"][0]["children"]:
-        if "type" in child and child["type"] == "subplot":
-            child["encoding"] = {}
-            others = {}
-            for child2 in child["children"]:
-                if "type" in child2:
-                    if child2["type"] == "xaxis" or child2["type"] == "yaxis":
-                        analysis_axis(child2, child["encoding"])
-                    elif child2["type"] == "legend":
-                        analysis_legend(child2, child["encoding"])
+    subplots = [
+        child
+        for child in spec["children"][0]["children"]
+        if ("type" in child and child["type"] == "subplot")
+    ]
+    if len(subplots) != 1:
+        return None
+    subplot = subplots[0]
+
+    # find legend
+    legends = [
+        child
+        for child in subplot["children"]
+        if ("type" in child and child["type"] == "legend")
+    ]
+    legend = None
+    if len(legends) > 1:
+        return None
+    elif len(legends) == 1:
+        legend = legends[0]
+    else:
+        # legend may out of subplot
+        legends = [
+            child
+            for child in spec["children"][0]["children"]
+            if ("type" in child and child["type"] == "legend")
+        ]
+        if len(legends) == 1:
+            legend = legends[0]
+
+    subplot["encoding"] = {}
+    if legend is not None:
+        analysis_legend(legend, subplot["encoding"])
+    others = {}
+    for child in subplot["children"]:
+        if "type" in child:
+            if child["type"] == "xaxis" or child["type"] == "yaxis":
+                analysis_axis(child, subplot["encoding"])
+        else:
+            nodes = get_leaf_nodes(child)
+            for node in nodes:
+                if node["tag"] not in others:
+                    others[node["tag"]] = [node]
                 else:
-                    nodes = get_leaf_nodes(child2)
-                    for node in nodes:
-                        if node["tag"] not in others:
-                            others[node["tag"]] = [node]
-                        else:
-                            others[node["tag"]].append(node)
-            analysis_scale(child)
-            analysis_mark(others, child)
-            return child
-
-    return None
+                    others[node["tag"]].append(node)
+    analysis_scale(subplot)
+    analysis_mark(others, subplot)
+    return subplot
diff --git a/coml/vis_utils/verifier.py b/coml/vis_utils/verifier.py
@@ -158,8 +158,9 @@ def answer_question(
         [previous_code],
     )
     code = generating_context["answer"]
-
     final_code = previous_code + "\n" + code
+    # do not show figure
+    final_code = final_code.replace("plt.show()", "")
     global_env = {"finding": None}
     try:
         exec(final_code, global_env)
@@ -607,9 +608,9 @@ def check_order(order: dict, chart_info: dict):
             if not is_sorted:
                 result["answer"] = False
 
-    result["rationale"] = f"Sort {order['channel']} in {order['order']} order."
+    result["rationale"] = f"{order['channel']} is sorted in {order['order']} order."
     if result["answer"] is False:
-        result["rationale"] = result["rationale"].replace("Sort", "Doesn't sort")
+        result["rationale"] = result["rationale"].replace("is sorted", "is not sorted")
 
     return result
 
@@ -627,7 +628,13 @@ def __init__(self, llm: BaseChatModel, agent):
     def _add_verification(self, verification):
         self.verifications.append(verification)
         # display
-        answer = "✅" if verification["answer"] else "❌"
+        answer = ""
+        if verification["answer"] is True:
+            answer = "✅"
+        elif verification["answer"] is False:
+            answer = "❌"
+        elif verification["answer"] is None:
+            answer = "❔"
         aspect = verification["aspect"].capitalize()
         rationale = verification["rationale"]
         print(answer + " " + aspect + ": " + rationale)
@@ -642,7 +649,7 @@ def verify(
     ):
         self.verifications = []
         understand_fail_result = {
-            "answer": False,
+            "answer": None,
             "aspect": "Visualization understanding",
             "rationale": "Cannot understand the visualization.",
         }
@@ -658,7 +665,10 @@ def verify(
                 # STEP2: check chart type, data encoding and title
                 self.verify_chart_info(request, chart_info, variable_descriptions)
                 pass_verify = all(
-                    [verification["answer"] for verification in self.verifications]
+                    [
+                        verification["answer"] is True
+                        for verification in self.verifications
+                    ]
                 )
                 if pass_verify:
                     # STEP3: check visualization data
@@ -707,6 +717,22 @@ def verify_data(
         try:
             # STEP 1: Spot-Check
             data = chart_info["data"]
+            encoding = chart_info["encoding"]
+            # check label
+            for channel in encoding.keys():
+                if "title" not in encoding[channel]:
+                    verification = {
+                        "aspect": channel + " label",
+                        "answer": None,
+                        "rationale": "Channel "
+                        + channel
+                        + " is not labeled, so accurate understanding of the data on the graph is difficult.",
+                    }
+                    self._add_verification(verification)
+                    verifications.append(verification)
+            if len(verifications) > 0:
+                return verifications
+
             # random pick NUM_SAMPLE data points
             indexes = range(len(data))
             sampled_indexes = random.sample(indexes, NUM_SAMPLE)
@@ -737,11 +763,11 @@ def verify_data(
                     if verification:
                         self._add_verification(verification)
                         verifications.append(verification)
-                        if verification["answer"] is False:
+                        if verification["answer"] is not True:
                             break
 
             pass_verify = all(
-                [verification["answer"] for verification in verifications]
+                [verification["answer"] is True for verification in verifications]
             )
             if not pass_verify:
                 return verifications