diff --git a/backend/open_webui/tools/builtin.py b/backend/open_webui/tools/builtin.py index 78d64faafc..6aca621c97 100644 --- a/backend/open_webui/tools/builtin.py +++ b/backend/open_webui/tools/builtin.py @@ -36,7 +36,7 @@ from open_webui.models.chats import Chats from open_webui.models.channels import Channels, ChannelMember, Channel from open_webui.models.messages import Messages, Message from open_webui.models.groups import Groups -from open_webui.utils.sanitize import strip_markdown_code_fences +from open_webui.utils.sanitize import sanitize_code log = logging.getLogger(__name__) @@ -371,8 +371,8 @@ async def execute_code( return json.dumps({"error": "Request context not available"}) try: - # Strip markdown fences if model included them - code = strip_markdown_code_fences(code) + # Sanitize code (strips ANSI codes and markdown fences) + code = sanitize_code(code) # Import blocked modules from config (same as middleware) from open_webui.config import CODE_INTERPRETER_BLOCKED_MODULES diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 5db562725d..402898ad7c 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -73,7 +73,7 @@ from open_webui.models.models import Models from open_webui.retrieval.utils import get_sources_from_items -from open_webui.utils.sanitize import strip_markdown_code_fences +from open_webui.utils.sanitize import sanitize_code from open_webui.utils.chat import generate_chat_completion from open_webui.utils.task import ( get_task_model_id, @@ -4175,8 +4175,8 @@ async def streaming_chat_response_handler(response, ctx): try: if content_blocks[-1]["attributes"].get("type") == "code": code = content_blocks[-1]["content"] - # Strip markdown fences if model included them - code = strip_markdown_code_fences(code) + # Sanitize code (strips ANSI codes and markdown fences) + code = sanitize_code(code) if CODE_INTERPRETER_BLOCKED_MODULES: blocking_code = textwrap.dedent( diff --git a/backend/open_webui/utils/sanitize.py b/backend/open_webui/utils/sanitize.py index 5755a9be48..344a7e08e2 100644 --- a/backend/open_webui/utils/sanitize.py +++ b/backend/open_webui/utils/sanitize.py @@ -1,5 +1,25 @@ import re +# ANSI escape code pattern - matches all common ANSI sequences +# This includes color codes, cursor movement, and other terminal control sequences +ANSI_ESCAPE_PATTERN = re.compile(r'\x1b\[[0-9;]*[A-Za-z]|\x1b\([AB]|\x1b[PX^_].*?\x1b\\|\x1b\].*?(?:\x07|\x1b\\)') + + +def strip_ansi_codes(text: str) -> str: + """ + Strip ANSI escape codes from text. + + ANSI escape codes can be introduced by LLMs that include terminal + color codes in their output. These codes cause syntax errors when + the code is sent to Jupyter for execution. + + Common ANSI codes include: + - Color codes: \x1b[31m (red), \x1b[32m (green), etc. + - Reset codes: \x1b[0m, \x1b[39m + - Cursor movement: \x1b[1A, \x1b[2J, etc. + """ + return ANSI_ESCAPE_PATTERN.sub('', text) + def strip_markdown_code_fences(code: str) -> str: """ @@ -19,3 +39,20 @@ def strip_markdown_code_fences(code: str) -> str: # Remove closing fence code = re.sub(r"\n?```\s*$", "", code) return code.strip() + + +def sanitize_code(code: str) -> str: + """ + Sanitize code for execution by applying all necessary cleanup steps. + + This is the recommended function to use before sending code to + interpreters like Jupyter or Pyodide. + + Steps applied: + 1. Strip ANSI escape codes (from LLM output) + 2. Strip markdown code fences (if model included them) + """ + code = strip_ansi_codes(code) + code = strip_markdown_code_fences(code) + return code +