import re # ANSI escape code pattern - matches all common ANSI sequences # This includes color codes, cursor movement, and other terminal control sequences ANSI_ESCAPE_PATTERN = re.compile( r"\x1b\[[0-9;]*[A-Za-z]|\x1b\([AB]|\x1b[PX^_].*?\x1b\\|\x1b\].*?(?:\x07|\x1b\\)" ) def strip_ansi_codes(text: str) -> str: """ Strip ANSI escape codes from text. ANSI escape codes can be introduced by LLMs that include terminal color codes in their output. These codes cause syntax errors when the code is sent to Jupyter for execution. Common ANSI codes include: - Color codes: \x1b[31m (red), \x1b[32m (green), etc. - Reset codes: \x1b[0m, \x1b[39m - Cursor movement: \x1b[1A, \x1b[2J, etc. """ return ANSI_ESCAPE_PATTERN.sub("", text) def strip_markdown_code_fences(code: str) -> str: """ Strip markdown code fences if present. This is a defensive, non-breaking change — if the code doesn't contain fences, it passes through unchanged. Handles patterns like: - ```python - ```py - ``` """ code = code.strip() # Remove opening fence (```python, ```py, ``` etc.) code = re.sub(r"^```\w*\n?", "", code) # Remove closing fence code = re.sub(r"\n?```\s*$", "", code) return code.strip() def sanitize_code(code: str) -> str: """ Sanitize code for execution by applying all necessary cleanup steps. This is the recommended function to use before sending code to interpreters like Jupyter or Pyodide. Steps applied: 1. Strip ANSI escape codes (from LLM output) 2. Strip markdown code fences (if model included them) """ code = strip_ansi_codes(code) code = strip_markdown_code_fences(code) return code