2026-01-26 18:04:58 +04:00
|
|
|
import re
|
|
|
|
|
|
2026-02-06 22:25:18 +04:00
|
|
|
# ANSI escape code pattern - matches all common ANSI sequences
|
|
|
|
|
# This includes color codes, cursor movement, and other terminal control sequences
|
2026-02-11 16:24:11 -06:00
|
|
|
ANSI_ESCAPE_PATTERN = re.compile(
|
|
|
|
|
r"\x1b\[[0-9;]*[A-Za-z]|\x1b\([AB]|\x1b[PX^_].*?\x1b\\|\x1b\].*?(?:\x07|\x1b\\)"
|
|
|
|
|
)
|
2026-02-06 22:25:18 +04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def strip_ansi_codes(text: str) -> str:
|
|
|
|
|
"""
|
|
|
|
|
Strip ANSI escape codes from text.
|
|
|
|
|
|
|
|
|
|
ANSI escape codes can be introduced by LLMs that include terminal
|
|
|
|
|
color codes in their output. These codes cause syntax errors when
|
|
|
|
|
the code is sent to Jupyter for execution.
|
|
|
|
|
|
|
|
|
|
Common ANSI codes include:
|
|
|
|
|
- Color codes: \x1b[31m (red), \x1b[32m (green), etc.
|
|
|
|
|
- Reset codes: \x1b[0m, \x1b[39m
|
|
|
|
|
- Cursor movement: \x1b[1A, \x1b[2J, etc.
|
|
|
|
|
"""
|
2026-02-11 16:24:11 -06:00
|
|
|
return ANSI_ESCAPE_PATTERN.sub("", text)
|
2026-02-06 22:25:18 +04:00
|
|
|
|
2026-01-26 18:04:58 +04:00
|
|
|
|
|
|
|
|
def strip_markdown_code_fences(code: str) -> str:
|
|
|
|
|
"""
|
|
|
|
|
Strip markdown code fences if present.
|
|
|
|
|
|
|
|
|
|
This is a defensive, non-breaking change — if the code doesn't
|
|
|
|
|
contain fences, it passes through unchanged.
|
|
|
|
|
|
|
|
|
|
Handles patterns like:
|
|
|
|
|
- ```python
|
|
|
|
|
- ```py
|
|
|
|
|
- ```
|
|
|
|
|
"""
|
|
|
|
|
code = code.strip()
|
|
|
|
|
# Remove opening fence (```python, ```py, ``` etc.)
|
|
|
|
|
code = re.sub(r"^```\w*\n?", "", code)
|
|
|
|
|
# Remove closing fence
|
|
|
|
|
code = re.sub(r"\n?```\s*$", "", code)
|
|
|
|
|
return code.strip()
|
2026-02-06 22:25:18 +04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def sanitize_code(code: str) -> str:
|
|
|
|
|
"""
|
|
|
|
|
Sanitize code for execution by applying all necessary cleanup steps.
|
|
|
|
|
|
|
|
|
|
This is the recommended function to use before sending code to
|
|
|
|
|
interpreters like Jupyter or Pyodide.
|
|
|
|
|
|
|
|
|
|
Steps applied:
|
|
|
|
|
1. Strip ANSI escape codes (from LLM output)
|
|
|
|
|
2. Strip markdown code fences (if model included them)
|
|
|
|
|
"""
|
|
|
|
|
code = strip_ansi_codes(code)
|
|
|
|
|
code = strip_markdown_code_fences(code)
|
|
|
|
|
return code
|