refac

2026-02-24 04:00:31 +01:00 · 2026-02-06 22:25:18 +04:00
parent 26286625f4
commit b1737040a7
3 changed files with 43 additions and 6 deletions
--- a/backend/open_webui/tools/builtin.py
+++ b/backend/open_webui/tools/builtin.py
@@ -36,7 +36,7 @@ from open_webui.models.chats import Chats
 from open_webui.models.channels import Channels, ChannelMember, Channel
 from open_webui.models.messages import Messages, Message
 from open_webui.models.groups import Groups
-from open_webui.utils.sanitize import strip_markdown_code_fences
+from open_webui.utils.sanitize import sanitize_code

 log = logging.getLogger(__name__)

@@ -371,8 +371,8 @@ async def execute_code(
        return json.dumps({"error": "Request context not available"})

    try:
-        # Strip markdown fences if model included them
-        code = strip_markdown_code_fences(code)
+        # Sanitize code (strips ANSI codes and markdown fences)
+        code = sanitize_code(code)

        # Import blocked modules from config (same as middleware)
        from open_webui.config import CODE_INTERPRETER_BLOCKED_MODULES
--- a/backend/open_webui/utils/middleware.py
+++ b/backend/open_webui/utils/middleware.py
@@ -73,7 +73,7 @@ from open_webui.models.models import Models
 from open_webui.retrieval.utils import get_sources_from_items


-from open_webui.utils.sanitize import strip_markdown_code_fences
+from open_webui.utils.sanitize import sanitize_code
 from open_webui.utils.chat import generate_chat_completion
 from open_webui.utils.task import (
    get_task_model_id,
@@ -4175,8 +4175,8 @@ async def streaming_chat_response_handler(response, ctx):
                        try:
                            if content_blocks[-1]["attributes"].get("type") == "code":
                                code = content_blocks[-1]["content"]
-                                # Strip markdown fences if model included them
-                                code = strip_markdown_code_fences(code)
+                                # Sanitize code (strips ANSI codes and markdown fences)
+                                code = sanitize_code(code)

                                if CODE_INTERPRETER_BLOCKED_MODULES:
                                    blocking_code = textwrap.dedent(
--- a/backend/open_webui/utils/sanitize.py
+++ b/backend/open_webui/utils/sanitize.py
@@ -1,5 +1,25 @@
 import re

+# ANSI escape code pattern - matches all common ANSI sequences
+# This includes color codes, cursor movement, and other terminal control sequences
+ANSI_ESCAPE_PATTERN = re.compile(r'\x1b\[[0-9;]*[A-Za-z]|\x1b\([AB]|\x1b[PX^_].*?\x1b\\|\x1b\].*?(?:\x07|\x1b\\)')
+
+
+def strip_ansi_codes(text: str) -> str:
+    """
+    Strip ANSI escape codes from text.
+
+    ANSI escape codes can be introduced by LLMs that include terminal
+    color codes in their output. These codes cause syntax errors when
+    the code is sent to Jupyter for execution.
+
+    Common ANSI codes include:
+    - Color codes: \x1b[31m (red), \x1b[32m (green), etc.
+    - Reset codes: \x1b[0m, \x1b[39m
+    - Cursor movement: \x1b[1A, \x1b[2J, etc.
+    """
+    return ANSI_ESCAPE_PATTERN.sub('', text)
+

 def strip_markdown_code_fences(code: str) -> str:
    """
@@ -19,3 +39,20 @@ def strip_markdown_code_fences(code: str) -> str:
    # Remove closing fence
    code = re.sub(r"\n?```\s*$", "", code)
    return code.strip()
+
+
+def sanitize_code(code: str) -> str:
+    """
+    Sanitize code for execution by applying all necessary cleanup steps.
+
+    This is the recommended function to use before sending code to
+    interpreters like Jupyter or Pyodide.
+
+    Steps applied:
+    1. Strip ANSI escape codes (from LLM output)
+    2. Strip markdown code fences (if model included them)
+    """
+    code = strip_ansi_codes(code)
+    code = strip_markdown_code_fences(code)
+    return code
+