mirror of
https://github.com/vegu-ai/talemate.git
synced 2025-12-15 19:27:47 +01:00
0.25.3 (#113)
* add gpt-4o add gpt-4o-2024-05-13 * fix koboldcpp client jiggle arguments * kcpp api url default port 5001 * fix repetition breaking issues with kcpp client * use tokencount endpoint if available * auto configure visual agent with koboldcpp * env var config for frontend serve * its not clear that gpt-4o is better than turbo, dont default to it yet * 0.25.3 * handle kcpp being down during a1111 setup check * only check a1111 setup if client is connected * fix kcpp a1111 setup check * fix issue where saving a new scene could cause recent config changes to revert
This commit is contained in:
15
README.md
15
README.md
@@ -57,6 +57,7 @@ Please read the documents in the `docs` folder for more advanced configuration a
|
||||
- [Ready to go](#ready-to-go)
|
||||
- [Load the introductory scenario "Infinity Quest"](#load-the-introductory-scenario-infinity-quest)
|
||||
- [Loading character cards](#loading-character-cards)
|
||||
- [Configure for hosting](#configure-for-hosting)
|
||||
- [Text-to-Speech (TTS)](docs/tts.md)
|
||||
- [Visual Generation](docs/visual.md)
|
||||
- [ChromaDB (long term memory) configuration](docs/chromadb.md)
|
||||
@@ -252,3 +253,17 @@ Expand the "Load" menu in the top left corner and either click on "Upload a char
|
||||
Once a character is uploaded, talemate may actually take a moment because it needs to convert it to a talemate format and will also run additional LLM prompts to generate character attributes and world state.
|
||||
|
||||
Make sure you save the scene after the character is loaded as it can then be loaded as normal talemate scenario in the future.
|
||||
|
||||
## Configure for hosting
|
||||
|
||||
By default talemate is configured to run locally. If you want to host it behind a reverse proxy or on a server, you will need create some environment variables in the `talemate_frontend/.env.development.local` file
|
||||
|
||||
Start by copying `talemate_frontend/example.env.development.local` to `talemate_frontend/.env.development.local`.
|
||||
|
||||
Then open the file and edit the `ALLOWED_HOSTS` and `VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL` variables.
|
||||
|
||||
```sh
|
||||
ALLOWED_HOSTS=example.com
|
||||
# wss if behind ssl, ws if not
|
||||
VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL=wss://example.com:5050
|
||||
```
|
||||
@@ -4,7 +4,7 @@ build-backend = "poetry.masonry.api"
|
||||
|
||||
[tool.poetry]
|
||||
name = "talemate"
|
||||
version = "0.25.2"
|
||||
version = "0.25.3"
|
||||
description = "AI-backed roleplay and narrative tools"
|
||||
authors = ["FinalWombat"]
|
||||
license = "GNU Affero General Public License v3.0"
|
||||
|
||||
@@ -2,4 +2,4 @@ from .agents import Agent
|
||||
from .client import TextGeneratorWebuiClient
|
||||
from .tale_mate import *
|
||||
|
||||
VERSION = "0.25.2"
|
||||
VERSION = "0.25.3"
|
||||
|
||||
@@ -221,6 +221,9 @@ class Agent(ABC):
|
||||
if callback:
|
||||
await callback()
|
||||
|
||||
async def setup_check(self):
|
||||
return False
|
||||
|
||||
async def ready_check(self, task: asyncio.Task = None):
|
||||
self.ready_check_error = None
|
||||
if task:
|
||||
|
||||
@@ -80,6 +80,11 @@ class VisualBase(Agent):
|
||||
),
|
||||
},
|
||||
),
|
||||
"automatic_setup": AgentAction(
|
||||
enabled=True,
|
||||
label="Automatic Setup",
|
||||
description="Automatically setup the visual agent if the selected client has an implementation of the selected backend. (Like the KoboldCpp Automatic1111 api)",
|
||||
),
|
||||
"automatic_generation": AgentAction(
|
||||
enabled=False,
|
||||
label="Automatic Generation",
|
||||
@@ -187,8 +192,10 @@ class VisualBase(Agent):
|
||||
prev_ready = self.backend_ready
|
||||
self.backend_ready = False
|
||||
self.ready_check_error = str(error)
|
||||
await self.setup_check()
|
||||
if prev_ready:
|
||||
await self.emit_status()
|
||||
|
||||
|
||||
async def ready_check(self):
|
||||
if not self.enabled:
|
||||
@@ -198,6 +205,15 @@ class VisualBase(Agent):
|
||||
task = asyncio.create_task(fn())
|
||||
await super().ready_check(task)
|
||||
|
||||
async def setup_check(self):
|
||||
|
||||
if not self.actions["automatic_setup"].enabled:
|
||||
return
|
||||
|
||||
backend = self.backend
|
||||
if self.client and hasattr(self.client, f"visual_{backend.lower()}_setup"):
|
||||
await getattr(self.client, f"visual_{backend.lower()}_setup")(self)
|
||||
|
||||
async def apply_config(self, *args, **kwargs):
|
||||
|
||||
try:
|
||||
|
||||
@@ -122,6 +122,10 @@ class ClientBase:
|
||||
"""
|
||||
return self.Meta().requires_prompt_template
|
||||
|
||||
@property
|
||||
def max_tokens_param_name(self):
|
||||
return "max_tokens"
|
||||
|
||||
def set_client(self, **kwargs):
|
||||
self.client = AsyncOpenAI(base_url=self.api_url, api_key="sk-1111")
|
||||
|
||||
@@ -625,7 +629,7 @@ class ClientBase:
|
||||
is_repetition, similarity_score, matched_line = util.similarity_score(
|
||||
response, finalized_prompt.split("\n"), similarity_threshold=80
|
||||
)
|
||||
|
||||
|
||||
if not is_repetition:
|
||||
# not a repetition, return the response
|
||||
|
||||
@@ -659,7 +663,7 @@ class ClientBase:
|
||||
|
||||
# then we pad the max_tokens by the pad_max_tokens amount
|
||||
|
||||
prompt_param["max_tokens"] += pad_max_tokens
|
||||
prompt_param[self.max_tokens_param_name] += pad_max_tokens
|
||||
|
||||
# send the prompt again
|
||||
# we use the repetition_adjustment method to further encourage
|
||||
@@ -681,7 +685,7 @@ class ClientBase:
|
||||
|
||||
# a lot of the times the response will now contain the repetition + something new
|
||||
# so we dedupe the response to remove the repetition on sentences level
|
||||
|
||||
|
||||
response = util.dedupe_sentences(
|
||||
response, matched_line, similarity_threshold=85, debug=True
|
||||
)
|
||||
|
||||
@@ -1,18 +1,24 @@
|
||||
import random
|
||||
import re
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
# import urljoin
|
||||
from urllib.parse import urljoin
|
||||
from urllib.parse import urljoin, urlparse
|
||||
import httpx
|
||||
import structlog
|
||||
|
||||
from talemate.client.base import STOPPING_STRINGS, ClientBase, Defaults, ExtraField
|
||||
from talemate.client.registry import register
|
||||
import talemate.util as util
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from talemate.agents.visual import VisualBase
|
||||
|
||||
log = structlog.get_logger("talemate.client.koboldcpp")
|
||||
|
||||
|
||||
class KoboldCppClientDefaults(Defaults):
|
||||
api_url: str = "http://localhost:5001"
|
||||
api_key: str = ""
|
||||
|
||||
|
||||
@@ -35,6 +41,11 @@ class KoboldCppClient(ClientBase):
|
||||
headers["Authorization"] = f"Bearer {self.api_key}"
|
||||
return headers
|
||||
|
||||
@property
|
||||
def url(self) -> str:
|
||||
parts = urlparse(self.api_url)
|
||||
return f"{parts.scheme}://{parts.netloc}"
|
||||
|
||||
@property
|
||||
def is_openai(self) -> bool:
|
||||
"""
|
||||
@@ -63,13 +74,20 @@ class KoboldCppClient(ClientBase):
|
||||
# join /api/v1/generate
|
||||
return urljoin(self.api_url, "generate")
|
||||
|
||||
@property
|
||||
def max_tokens_param_name(self):
|
||||
if self.is_openai:
|
||||
return "max_tokens"
|
||||
else:
|
||||
return "max_length"
|
||||
|
||||
def api_endpoint_specified(self, url: str) -> bool:
|
||||
return "/v1" in self.api_url
|
||||
|
||||
def ensure_api_endpoint_specified(self):
|
||||
if not self.api_endpoint_specified(self.api_url):
|
||||
# url doesn't specify the api endpoint
|
||||
# use the koboldcpp openai api
|
||||
# use the koboldcpp united api
|
||||
self.api_url = urljoin(self.api_url.rstrip("/") + "/", "/api/v1/")
|
||||
if not self.api_url.endswith("/"):
|
||||
self.api_url += "/"
|
||||
@@ -126,6 +144,9 @@ class KoboldCppClient(ClientBase):
|
||||
def set_client(self, **kwargs):
|
||||
self.api_key = kwargs.get("api_key", self.api_key)
|
||||
self.ensure_api_endpoint_specified()
|
||||
|
||||
|
||||
|
||||
|
||||
async def get_model_name(self):
|
||||
self.ensure_api_endpoint_specified()
|
||||
@@ -153,6 +174,35 @@ class KoboldCppClient(ClientBase):
|
||||
|
||||
return model_name
|
||||
|
||||
async def tokencount(self, content:str) -> int:
|
||||
"""
|
||||
KoboldCpp has a tokencount endpoint we can use to count tokens
|
||||
for the prompt and response
|
||||
|
||||
If the endpoint is not available, we will use the default token count estimate
|
||||
"""
|
||||
|
||||
# extract scheme and host from api url
|
||||
|
||||
parts = urlparse(self.api_url)
|
||||
|
||||
url_tokencount = f"{parts.scheme}://{parts.netloc}/api/extra/tokencount"
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
url_tokencount,
|
||||
json={"prompt":content},
|
||||
timeout=None,
|
||||
headers=self.request_headers,
|
||||
)
|
||||
|
||||
if response.status_code == 404:
|
||||
# kobold united doesn't have tokencount endpoint
|
||||
return util.count_tokens(content)
|
||||
|
||||
tokencount = len(response.json().get("ids",[]))
|
||||
return tokencount
|
||||
|
||||
async def generate(self, prompt: str, parameters: dict, kind: str):
|
||||
"""
|
||||
Generates text from the given prompt and parameters.
|
||||
@@ -160,6 +210,8 @@ class KoboldCppClient(ClientBase):
|
||||
|
||||
parameters["prompt"] = prompt.strip(" ")
|
||||
|
||||
self._returned_prompt_tokens = await self.tokencount(parameters["prompt"] )
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
self.api_url_for_generation,
|
||||
@@ -168,15 +220,18 @@ class KoboldCppClient(ClientBase):
|
||||
headers=self.request_headers,
|
||||
)
|
||||
response_data = response.json()
|
||||
|
||||
try:
|
||||
if self.is_openai:
|
||||
return response_data["choices"][0]["text"]
|
||||
response_text = response_data["choices"][0]["text"]
|
||||
else:
|
||||
return response_data["results"][0]["text"]
|
||||
response_text = response_data["results"][0]["text"]
|
||||
except (TypeError, KeyError) as exc:
|
||||
log.error("Failed to generate text", exc=exc, response_data=response_data, response_status=response.status_code)
|
||||
return ""
|
||||
response_text = ""
|
||||
|
||||
self._returned_response_tokens = await self.tokencount(response_text)
|
||||
return response_text
|
||||
|
||||
|
||||
def jiggle_randomness(self, prompt_config: dict, offset: float = 0.3) -> dict:
|
||||
"""
|
||||
@@ -185,17 +240,64 @@ class KoboldCppClient(ClientBase):
|
||||
"""
|
||||
|
||||
temp = prompt_config["temperature"]
|
||||
rep_pen = prompt_config["rep_pen"]
|
||||
|
||||
if "rep_pen" in prompt_config:
|
||||
rep_pen_key = "rep_pen"
|
||||
elif "frequency_penalty" in prompt_config:
|
||||
rep_pen_key = "frequency_penalty"
|
||||
else:
|
||||
rep_pen_key = "repetition_penalty"
|
||||
|
||||
rep_pen = prompt_config[rep_pen_key]
|
||||
|
||||
min_offset = offset * 0.3
|
||||
|
||||
prompt_config["temperature"] = random.uniform(temp + min_offset, temp + offset)
|
||||
prompt_config["rep_pen"] = random.uniform(
|
||||
prompt_config[rep_pen_key] = random.uniform(
|
||||
rep_pen + min_offset * 0.3, rep_pen + offset * 0.3
|
||||
)
|
||||
|
||||
|
||||
def reconfigure(self, **kwargs):
|
||||
if "api_key" in kwargs:
|
||||
self.api_key = kwargs.pop("api_key")
|
||||
|
||||
super().reconfigure(**kwargs)
|
||||
|
||||
|
||||
async def visual_automatic1111_setup(self, visual_agent:"VisualBase") -> bool:
|
||||
|
||||
"""
|
||||
Automatically configure the visual agent for automatic1111
|
||||
if the koboldcpp server has a SD model available
|
||||
"""
|
||||
|
||||
if not self.connected:
|
||||
return False
|
||||
|
||||
sd_models_url = urljoin(self.url, "/sdapi/v1/sd-models")
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
|
||||
try:
|
||||
response = await client.get(
|
||||
url=sd_models_url, timeout=2
|
||||
)
|
||||
except Exception as exc:
|
||||
log.error(f"Failed to fetch sd models from {sd_models_url}", exc=exc)
|
||||
return False
|
||||
|
||||
if response.status_code != 200:
|
||||
return False
|
||||
|
||||
response_data = response.json()
|
||||
|
||||
sd_model = response_data[0].get("model_name") if response_data else None
|
||||
|
||||
log.info("automatic1111_setup", sd_model=sd_model)
|
||||
if not sd_model:
|
||||
return False
|
||||
|
||||
visual_agent.actions["automatic1111"].config["api_url"].value = self.url
|
||||
visual_agent.is_enabled = True
|
||||
return True
|
||||
|
||||
@@ -28,12 +28,14 @@ SUPPORTED_MODELS = [
|
||||
"gpt-4-turbo-preview",
|
||||
"gpt-4-turbo-2024-04-09",
|
||||
"gpt-4-turbo",
|
||||
"gpt-4o-2024-05-13",
|
||||
"gpt-4o",
|
||||
]
|
||||
|
||||
# any model starting with gpt-4- is assumed to support 'json_object'
|
||||
# for others we need to explicitly state the model name
|
||||
JSON_OBJECT_RESPONSE_MODELS = [
|
||||
"gpt-4-1106-preview",
|
||||
"gpt-4-0125-preview",
|
||||
"gpt-4-turbo-preview",
|
||||
"gpt-4o",
|
||||
"gpt-3.5-turbo-0125",
|
||||
]
|
||||
|
||||
|
||||
@@ -187,3 +187,5 @@ async def agent_ready_checks():
|
||||
for agent in AGENTS.values():
|
||||
if agent and agent.enabled:
|
||||
await agent.ready_check()
|
||||
elif agent and not agent.enabled:
|
||||
await agent.setup_check()
|
||||
|
||||
@@ -2123,7 +2123,7 @@ class Scene(Emitter):
|
||||
|
||||
async def add_to_recent_scenes(self):
|
||||
log.debug("add_to_recent_scenes", filename=self.filename)
|
||||
config = Config(**self.config)
|
||||
config = load_config(as_model=True)
|
||||
config.recent_scenes.push(self)
|
||||
config.save()
|
||||
|
||||
|
||||
3
talemate_frontend/example.env.development.local
Normal file
3
talemate_frontend/example.env.development.local
Normal file
@@ -0,0 +1,3 @@
|
||||
ALLOWED_HOSTS=example.com
|
||||
# wss if behind ssl, ws if not
|
||||
VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL=wss://example.com:5050
|
||||
4
talemate_frontend/package-lock.json
generated
4
talemate_frontend/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "talemate_frontend",
|
||||
"version": "0.25.2",
|
||||
"version": "0.25.3",
|
||||
"lockfileVersion": 2,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "talemate_frontend",
|
||||
"version": "0.25.2",
|
||||
"version": "0.25.3",
|
||||
"dependencies": {
|
||||
"@codemirror/lang-markdown": "^6.2.5",
|
||||
"@codemirror/theme-one-dark": "^6.1.2",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "talemate_frontend",
|
||||
"version": "0.25.2",
|
||||
"version": "0.25.3",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"serve": "vue-cli-service serve",
|
||||
|
||||
@@ -303,9 +303,11 @@ export default {
|
||||
|
||||
this.connecting = true;
|
||||
let currentUrl = new URL(window.location.href);
|
||||
console.log(currentUrl);
|
||||
let websocketUrl = process.env.VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL || `ws://${currentUrl.hostname}:5050/ws`;
|
||||
|
||||
this.websocket = new WebSocket(`ws://${currentUrl.hostname}:5050/ws`);
|
||||
console.log("urls", { websocketUrl, currentUrl }, {env : process.env});
|
||||
|
||||
this.websocket = new WebSocket(websocketUrl);
|
||||
console.log("Websocket connecting ...")
|
||||
this.websocket.onmessage = this.handleMessage;
|
||||
this.websocket.onopen = () => {
|
||||
|
||||
@@ -1,4 +1,16 @@
|
||||
const { defineConfig } = require('@vue/cli-service')
|
||||
|
||||
const ALLOWED_HOSTS = process.env.ALLOWED_HOSTS || "all"
|
||||
const VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL = process.env.VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL || null
|
||||
|
||||
// if ALLOWED_HOSTS is set and has , then split it
|
||||
if (ALLOWED_HOSTS !== "all") {
|
||||
ALLOWED_HOSTS = ALLOWED_HOSTS.split(",")
|
||||
}
|
||||
|
||||
console.log("ALLOWED_HOSTS", ALLOWED_HOSTS)
|
||||
console.log("VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL", VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL)
|
||||
|
||||
module.exports = defineConfig({
|
||||
transpileDependencies: true,
|
||||
|
||||
@@ -9,7 +21,7 @@ module.exports = defineConfig({
|
||||
},
|
||||
|
||||
devServer: {
|
||||
allowedHosts: "all",
|
||||
allowedHosts: ALLOWED_HOSTS,
|
||||
client: {
|
||||
overlay: {
|
||||
warnings: false,
|
||||
|
||||
Reference in New Issue
Block a user