fix gpt-4 censorship triggered by system message (#74)

2025-12-16 11:47:48 +01:00 · 2024-02-01 12:15:30 +02:00
parent 4ba635497b
commit eb251d6e37
14 changed files with 125 additions and 32 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.masonry.api"

 [tool.poetry]
 name = "talemate"
-version = "0.18.1"
+version = "0.18.2"
 description = "AI-backed roleplay and narrative tools"
 authors = ["FinalWombat"]
 license = "GNU Affero General Public License v3.0"
--- a/src/talemate/init.py
+++ b/src/talemate/init.py
@@ -2,4 +2,4 @@ from .agents import Agent
 from .client import TextGeneratorWebuiClient
 from .tale_mate import *

-VERSION = "0.18.1"
+VERSION = "0.18.2"
--- a/src/talemate/client/base.py
+++ b/src/talemate/client/base.py
@@ -22,6 +22,8 @@ from talemate.emit import emit
 # Set up logging level for httpx to WARNING to suppress debug logs.
 logging.getLogger("httpx").setLevel(logging.WARNING)

+log = structlog.get_logger("client.base")
+
 REMOTE_SERVICES = [
    # TODO: runpod.py should add this to the list
    ".runpod.net"
@@ -54,7 +56,7 @@ class ClientBase:
    connected: bool = False
    conversation_retries: int = 2
    auto_break_repetition_enabled: bool = True
-
+    decensor_enabled: bool = True
    client_type = "base"

    class Meta(pydantic.BaseModel):
@@ -151,33 +153,60 @@ class ClientBase:
        - kind: the kind of generation
        """

-        # TODO: make extensible
-
-        if "narrate" in kind:
-            return system_prompts.NARRATOR
-        if "story" in kind:
-            return system_prompts.NARRATOR
-        if "director" in kind:
-            return system_prompts.DIRECTOR
-        if "create" in kind:
-            return system_prompts.CREATOR
-        if "roleplay" in kind:
-            return system_prompts.ROLEPLAY
-        if "conversation" in kind:
-            return system_prompts.ROLEPLAY
-        if "editor" in kind:
-            return system_prompts.EDITOR
-        if "world_state" in kind:
-            return system_prompts.WORLD_STATE
-        if "analyze_freeform" in kind:
-            return system_prompts.ANALYST_FREEFORM
-        if "analyst" in kind:
-            return system_prompts.ANALYST
-        if "analyze" in kind:
-            return system_prompts.ANALYST
-        if "summarize" in kind:
-            return system_prompts.SUMMARIZE
-
+        if self.decensor_enabled:
+            
+            if "narrate" in kind:
+                return system_prompts.NARRATOR
+            if "story" in kind:
+                return system_prompts.NARRATOR
+            if "director" in kind:
+                return system_prompts.DIRECTOR
+            if "create" in kind:
+                return system_prompts.CREATOR
+            if "roleplay" in kind:
+                return system_prompts.ROLEPLAY
+            if "conversation" in kind:
+                return system_prompts.ROLEPLAY
+            if "editor" in kind:
+                return system_prompts.EDITOR
+            if "world_state" in kind:
+                return system_prompts.WORLD_STATE
+            if "analyze_freeform" in kind:
+                return system_prompts.ANALYST_FREEFORM
+            if "analyst" in kind:
+                return system_prompts.ANALYST
+            if "analyze" in kind:
+                return system_prompts.ANALYST
+            if "summarize" in kind:
+                return system_prompts.SUMMARIZE
+            
+        else:
+            
+            if "narrate" in kind:
+                return system_prompts.NARRATOR_NO_DECENSOR
+            if "story" in kind:
+                return system_prompts.NARRATOR_NO_DECENSOR
+            if "director" in kind:
+                return system_prompts.DIRECTOR_NO_DECENSOR
+            if "create" in kind:
+                return system_prompts.CREATOR_NO_DECENSOR
+            if "roleplay" in kind:
+                return system_prompts.ROLEPLAY_NO_DECENSOR
+            if "conversation" in kind:
+                return system_prompts.ROLEPLAY_NO_DECENSOR
+            if "editor" in kind:
+                return system_prompts.EDITOR_NO_DECENSOR
+            if "world_state" in kind:
+                return system_prompts.WORLD_STATE_NO_DECENSOR
+            if "analyze_freeform" in kind:
+                return system_prompts.ANALYST_FREEFORM_NO_DECENSOR
+            if "analyst" in kind:
+                return system_prompts.ANALYST_NO_DECENSOR
+            if "analyze" in kind:
+                return system_prompts.ANALYST_NO_DECENSOR
+            if "summarize" in kind:
+                return system_prompts.SUMMARIZE_NO_DECENSOR
+            
        return system_prompts.BASIC

    def emit_status(self, processing: bool = None):
--- a/src/talemate/client/openai.py
+++ b/src/talemate/client/openai.py
@@ -83,6 +83,8 @@ class OpenAIClient(ClientBase):
    client_type = "openai"
    conversation_retries = 0
    auto_break_repetition_enabled = False
+    # TODO: make this configurable?
+    decensor_enabled = False

    class Meta(ClientBase.Meta):
        name_prefix: str = "OpenAI"
@@ -253,8 +255,8 @@ class OpenAIClient(ClientBase):
        human_message = {"role": "user", "content": prompt.strip()}
        system_message = {"role": "system", "content": self.get_system_message(kind)}

-        self.log.debug("generate", prompt=prompt[:128] + " ...", parameters=parameters)
-
+        self.log.debug("generate", prompt=prompt[:128] + " ...", parameters=parameters, system_message=system_message)
+        
        try:
            response = await self.client.chat.completions.create(
                model=self.model_name,
--- a/src/talemate/client/system_prompts.py
+++ b/src/talemate/client/system_prompts.py
@@ -19,3 +19,23 @@ EDITOR = str(Prompt.get("editor.system"))
 WORLD_STATE = str(Prompt.get("world_state.system-analyst"))

 SUMMARIZE = str(Prompt.get("summarizer.system"))
+
+# CAREBEAR PROMPTS
+
+ROLEPLAY_NO_DECENSOR = str(Prompt.get("conversation.system-no-decensor"))
+
+NARRATOR_NO_DECENSOR = str(Prompt.get("narrator.system-no-decensor"))
+
+CREATOR_NO_DECENSOR = str(Prompt.get("creator.system-no-decensor"))
+
+DIRECTOR_NO_DECENSOR = str(Prompt.get("director.system-no-decensor"))
+
+ANALYST_NO_DECENSOR = str(Prompt.get("world_state.system-analyst-no-decensor"))
+
+ANALYST_FREEFORM_NO_DECENSOR = str(Prompt.get("world_state.system-analyst-freeform-no-decensor"))
+
+EDITOR_NO_DECENSOR = str(Prompt.get("editor.system-no-decensor"))
+
+WORLD_STATE_NO_DECENSOR = str(Prompt.get("world_state.system-analyst-no-decensor"))
+
+SUMMARIZE_NO_DECENSOR = str(Prompt.get("summarizer.system-no-decensor"))
--- a/src/talemate/prompts/templates/conversation/system-no-decensor.jinja2
+++ b/src/talemate/prompts/templates/conversation/system-no-decensor.jinja2
@@ -0,0 +1 @@
+A roleplaying session between a user and a talented actor. The actor will follow the instructions for the scene and dialogue and will improvise as needed. The actor will only respond as one character.
--- a/src/talemate/prompts/templates/creator/system-no-decensor.jinja2
+++ b/src/talemate/prompts/templates/creator/system-no-decensor.jinja2
@@ -0,0 +1 @@
+A chat between a user and a talented fiction narrator. The narrator will describe scenes and characters based on stories provided to him in easy-to-read and easy-to-understand yet exciting detail. The narrator will never remind us that what he writes is fictional.
--- a/src/talemate/prompts/templates/director/system-no-decensor.jinja2
+++ b/src/talemate/prompts/templates/director/system-no-decensor.jinja2
@@ -0,0 +1 @@
+A chat between a user and a talented fiction director. The director will give instructions to a specific character to help them guide the story towards a specific goal.
--- a/src/talemate/prompts/templates/editor/system-no-decensor.jinja2
+++ b/src/talemate/prompts/templates/editor/system-no-decensor.jinja2
@@ -0,0 +1 @@
+A chat between an author and a talented fiction editor. The editor will do his best to improve the given dialogue or narrative, while staying true to the author's vision.
--- a/src/talemate/prompts/templates/narrator/system-no-decensor.jinja2
+++ b/src/talemate/prompts/templates/narrator/system-no-decensor.jinja2
@@ -0,0 +1 @@
+A chat between a user and a talented fiction narrator.The narrator will describe scenes and characters based on stories provided to him in easy-to-read and easy-to-understand yet exciting detail. The narrator will never remind us that what he writes is fictional.
--- a/src/talemate/prompts/templates/summarizer/system-no-decensor.jinja2
+++ b/src/talemate/prompts/templates/summarizer/system-no-decensor.jinja2
@@ -0,0 +1 @@
+A chat between a user and a talented fiction narrator. The narrator will summarize the given text according to the instructions, making sure to keep the overall tone of the narrative and dialogue.
--- a/src/talemate/prompts/templates/world_state/system-analyst-freeform-no-decensor.jinja2
+++ b/src/talemate/prompts/templates/world_state/system-analyst-freeform-no-decensor.jinja2
@@ -0,0 +1 @@
+Instructions for a talented story analyst. The analyst will analyze parts of a story or dialogue and give truthful answers based on the dialogue or events given to him. The analyst will never make up facts or lie in his answers.
--- a/src/talemate/prompts/templates/world_state/system-analyst-no-decensor.jinja2
+++ b/src/talemate/prompts/templates/world_state/system-analyst-no-decensor.jinja2
@@ -0,0 +1 @@
+Instructions for a talented story analyst. The analyst will analyze parts of a story or dialogue and give truthful answers based on the dialogue or events given to him. The analyst will never make up facts or lie in his answers. The analyst loves making JSON lists.
--- a/tests/test_system_messages.py
+++ b/tests/test_system_messages.py
@@ -0,0 +1,34 @@
+import pytest
+from talemate.client.base import ClientBase
+
+@pytest.mark.parametrize(
+    "kind",
+    [
+        "narrate",
+        "story",
+        "director",
+        "create",
+        "roleplay",
+        "conversation",
+        "editor",
+        "world_state",
+        "analyze_freeform",
+        "analyst",
+        "analyze",
+        "summarize",
+    ],
+)
+def test_system_message(kind):
+    
+    client = ClientBase()
+    
+    assert client.get_system_message(kind) is not None
+    
+    assert "crude" in client.get_system_message(kind)
+    
+    client.decensor_enabled = False
+    
+    assert client.get_system_message(kind) is not None
+    
+    assert "crude" not in client.get_system_message(kind)
+
				`@@ -0,0 +1 @@`
				`A roleplaying session between a user and a talented actor. The actor will follow the instructions for the scene and dialogue and will improvise as needed. The actor will only respond as one character.`
				`@@ -0,0 +1 @@`
				`A chat between a user and a talented fiction narrator. The narrator will describe scenes and characters based on stories provided to him in easy-to-read and easy-to-understand yet exciting detail. The narrator will never remind us that what he writes is fictional.`
				`@@ -0,0 +1 @@`
				`A chat between an author and a talented fiction editor. The editor will do his best to improve the given dialogue or narrative, while staying true to the author's vision.`
				`@@ -0,0 +1 @@`
				`Instructions for a talented story analyst. The analyst will analyze parts of a story or dialogue and give truthful answers based on the dialogue or events given to him. The analyst will never make up facts or lie in his answers.`