* add gpt-4o

add gpt-4o-2024-05-13

* fix koboldcpp client jiggle arguments

* kcpp api url default port 5001

* fix repetition breaking issues with kcpp client

* use tokencount endpoint if available

* auto configure visual agent with koboldcpp

* env var config for frontend serve

* its not clear that gpt-4o is better than turbo, dont default to it yet

* 0.25.3

* handle kcpp being down during a1111 setup check

* only check a1111 setup if client is connected

* fix kcpp a1111 setup check

* fix issue where saving a new scene could cause recent config changes to revert
This commit is contained in:
veguAI
2024-05-15 00:31:36 +03:00
committed by GitHub
parent 419371e0fb
commit 02c88f75a1
15 changed files with 185 additions and 24 deletions

View File

@@ -57,6 +57,7 @@ Please read the documents in the `docs` folder for more advanced configuration a
- [Ready to go](#ready-to-go) - [Ready to go](#ready-to-go)
- [Load the introductory scenario "Infinity Quest"](#load-the-introductory-scenario-infinity-quest) - [Load the introductory scenario "Infinity Quest"](#load-the-introductory-scenario-infinity-quest)
- [Loading character cards](#loading-character-cards) - [Loading character cards](#loading-character-cards)
- [Configure for hosting](#configure-for-hosting)
- [Text-to-Speech (TTS)](docs/tts.md) - [Text-to-Speech (TTS)](docs/tts.md)
- [Visual Generation](docs/visual.md) - [Visual Generation](docs/visual.md)
- [ChromaDB (long term memory) configuration](docs/chromadb.md) - [ChromaDB (long term memory) configuration](docs/chromadb.md)
@@ -252,3 +253,17 @@ Expand the "Load" menu in the top left corner and either click on "Upload a char
Once a character is uploaded, talemate may actually take a moment because it needs to convert it to a talemate format and will also run additional LLM prompts to generate character attributes and world state. Once a character is uploaded, talemate may actually take a moment because it needs to convert it to a talemate format and will also run additional LLM prompts to generate character attributes and world state.
Make sure you save the scene after the character is loaded as it can then be loaded as normal talemate scenario in the future. Make sure you save the scene after the character is loaded as it can then be loaded as normal talemate scenario in the future.
## Configure for hosting
By default talemate is configured to run locally. If you want to host it behind a reverse proxy or on a server, you will need create some environment variables in the `talemate_frontend/.env.development.local` file
Start by copying `talemate_frontend/example.env.development.local` to `talemate_frontend/.env.development.local`.
Then open the file and edit the `ALLOWED_HOSTS` and `VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL` variables.
```sh
ALLOWED_HOSTS=example.com
# wss if behind ssl, ws if not
VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL=wss://example.com:5050
```

View File

@@ -4,7 +4,7 @@ build-backend = "poetry.masonry.api"
[tool.poetry] [tool.poetry]
name = "talemate" name = "talemate"
version = "0.25.2" version = "0.25.3"
description = "AI-backed roleplay and narrative tools" description = "AI-backed roleplay and narrative tools"
authors = ["FinalWombat"] authors = ["FinalWombat"]
license = "GNU Affero General Public License v3.0" license = "GNU Affero General Public License v3.0"

View File

@@ -2,4 +2,4 @@ from .agents import Agent
from .client import TextGeneratorWebuiClient from .client import TextGeneratorWebuiClient
from .tale_mate import * from .tale_mate import *
VERSION = "0.25.2" VERSION = "0.25.3"

View File

@@ -221,6 +221,9 @@ class Agent(ABC):
if callback: if callback:
await callback() await callback()
async def setup_check(self):
return False
async def ready_check(self, task: asyncio.Task = None): async def ready_check(self, task: asyncio.Task = None):
self.ready_check_error = None self.ready_check_error = None
if task: if task:

View File

@@ -80,6 +80,11 @@ class VisualBase(Agent):
), ),
}, },
), ),
"automatic_setup": AgentAction(
enabled=True,
label="Automatic Setup",
description="Automatically setup the visual agent if the selected client has an implementation of the selected backend. (Like the KoboldCpp Automatic1111 api)",
),
"automatic_generation": AgentAction( "automatic_generation": AgentAction(
enabled=False, enabled=False,
label="Automatic Generation", label="Automatic Generation",
@@ -187,9 +192,11 @@ class VisualBase(Agent):
prev_ready = self.backend_ready prev_ready = self.backend_ready
self.backend_ready = False self.backend_ready = False
self.ready_check_error = str(error) self.ready_check_error = str(error)
await self.setup_check()
if prev_ready: if prev_ready:
await self.emit_status() await self.emit_status()
async def ready_check(self): async def ready_check(self):
if not self.enabled: if not self.enabled:
return return
@@ -198,6 +205,15 @@ class VisualBase(Agent):
task = asyncio.create_task(fn()) task = asyncio.create_task(fn())
await super().ready_check(task) await super().ready_check(task)
async def setup_check(self):
if not self.actions["automatic_setup"].enabled:
return
backend = self.backend
if self.client and hasattr(self.client, f"visual_{backend.lower()}_setup"):
await getattr(self.client, f"visual_{backend.lower()}_setup")(self)
async def apply_config(self, *args, **kwargs): async def apply_config(self, *args, **kwargs):
try: try:

View File

@@ -122,6 +122,10 @@ class ClientBase:
""" """
return self.Meta().requires_prompt_template return self.Meta().requires_prompt_template
@property
def max_tokens_param_name(self):
return "max_tokens"
def set_client(self, **kwargs): def set_client(self, **kwargs):
self.client = AsyncOpenAI(base_url=self.api_url, api_key="sk-1111") self.client = AsyncOpenAI(base_url=self.api_url, api_key="sk-1111")
@@ -659,7 +663,7 @@ class ClientBase:
# then we pad the max_tokens by the pad_max_tokens amount # then we pad the max_tokens by the pad_max_tokens amount
prompt_param["max_tokens"] += pad_max_tokens prompt_param[self.max_tokens_param_name] += pad_max_tokens
# send the prompt again # send the prompt again
# we use the repetition_adjustment method to further encourage # we use the repetition_adjustment method to further encourage

View File

@@ -1,18 +1,24 @@
import random import random
import re import re
from typing import TYPE_CHECKING
# import urljoin # import urljoin
from urllib.parse import urljoin from urllib.parse import urljoin, urlparse
import httpx import httpx
import structlog import structlog
from talemate.client.base import STOPPING_STRINGS, ClientBase, Defaults, ExtraField from talemate.client.base import STOPPING_STRINGS, ClientBase, Defaults, ExtraField
from talemate.client.registry import register from talemate.client.registry import register
import talemate.util as util
if TYPE_CHECKING:
from talemate.agents.visual import VisualBase
log = structlog.get_logger("talemate.client.koboldcpp") log = structlog.get_logger("talemate.client.koboldcpp")
class KoboldCppClientDefaults(Defaults): class KoboldCppClientDefaults(Defaults):
api_url: str = "http://localhost:5001"
api_key: str = "" api_key: str = ""
@@ -35,6 +41,11 @@ class KoboldCppClient(ClientBase):
headers["Authorization"] = f"Bearer {self.api_key}" headers["Authorization"] = f"Bearer {self.api_key}"
return headers return headers
@property
def url(self) -> str:
parts = urlparse(self.api_url)
return f"{parts.scheme}://{parts.netloc}"
@property @property
def is_openai(self) -> bool: def is_openai(self) -> bool:
""" """
@@ -63,13 +74,20 @@ class KoboldCppClient(ClientBase):
# join /api/v1/generate # join /api/v1/generate
return urljoin(self.api_url, "generate") return urljoin(self.api_url, "generate")
@property
def max_tokens_param_name(self):
if self.is_openai:
return "max_tokens"
else:
return "max_length"
def api_endpoint_specified(self, url: str) -> bool: def api_endpoint_specified(self, url: str) -> bool:
return "/v1" in self.api_url return "/v1" in self.api_url
def ensure_api_endpoint_specified(self): def ensure_api_endpoint_specified(self):
if not self.api_endpoint_specified(self.api_url): if not self.api_endpoint_specified(self.api_url):
# url doesn't specify the api endpoint # url doesn't specify the api endpoint
# use the koboldcpp openai api # use the koboldcpp united api
self.api_url = urljoin(self.api_url.rstrip("/") + "/", "/api/v1/") self.api_url = urljoin(self.api_url.rstrip("/") + "/", "/api/v1/")
if not self.api_url.endswith("/"): if not self.api_url.endswith("/"):
self.api_url += "/" self.api_url += "/"
@@ -127,6 +145,9 @@ class KoboldCppClient(ClientBase):
self.api_key = kwargs.get("api_key", self.api_key) self.api_key = kwargs.get("api_key", self.api_key)
self.ensure_api_endpoint_specified() self.ensure_api_endpoint_specified()
async def get_model_name(self): async def get_model_name(self):
self.ensure_api_endpoint_specified() self.ensure_api_endpoint_specified()
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
@@ -153,6 +174,35 @@ class KoboldCppClient(ClientBase):
return model_name return model_name
async def tokencount(self, content:str) -> int:
"""
KoboldCpp has a tokencount endpoint we can use to count tokens
for the prompt and response
If the endpoint is not available, we will use the default token count estimate
"""
# extract scheme and host from api url
parts = urlparse(self.api_url)
url_tokencount = f"{parts.scheme}://{parts.netloc}/api/extra/tokencount"
async with httpx.AsyncClient() as client:
response = await client.post(
url_tokencount,
json={"prompt":content},
timeout=None,
headers=self.request_headers,
)
if response.status_code == 404:
# kobold united doesn't have tokencount endpoint
return util.count_tokens(content)
tokencount = len(response.json().get("ids",[]))
return tokencount
async def generate(self, prompt: str, parameters: dict, kind: str): async def generate(self, prompt: str, parameters: dict, kind: str):
""" """
Generates text from the given prompt and parameters. Generates text from the given prompt and parameters.
@@ -160,6 +210,8 @@ class KoboldCppClient(ClientBase):
parameters["prompt"] = prompt.strip(" ") parameters["prompt"] = prompt.strip(" ")
self._returned_prompt_tokens = await self.tokencount(parameters["prompt"] )
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
response = await client.post( response = await client.post(
self.api_url_for_generation, self.api_url_for_generation,
@@ -168,15 +220,18 @@ class KoboldCppClient(ClientBase):
headers=self.request_headers, headers=self.request_headers,
) )
response_data = response.json() response_data = response.json()
try: try:
if self.is_openai: if self.is_openai:
return response_data["choices"][0]["text"] response_text = response_data["choices"][0]["text"]
else: else:
return response_data["results"][0]["text"] response_text = response_data["results"][0]["text"]
except (TypeError, KeyError) as exc: except (TypeError, KeyError) as exc:
log.error("Failed to generate text", exc=exc, response_data=response_data, response_status=response.status_code) log.error("Failed to generate text", exc=exc, response_data=response_data, response_status=response.status_code)
return "" response_text = ""
self._returned_response_tokens = await self.tokencount(response_text)
return response_text
def jiggle_randomness(self, prompt_config: dict, offset: float = 0.3) -> dict: def jiggle_randomness(self, prompt_config: dict, offset: float = 0.3) -> dict:
""" """
@@ -185,12 +240,20 @@ class KoboldCppClient(ClientBase):
""" """
temp = prompt_config["temperature"] temp = prompt_config["temperature"]
rep_pen = prompt_config["rep_pen"]
if "rep_pen" in prompt_config:
rep_pen_key = "rep_pen"
elif "frequency_penalty" in prompt_config:
rep_pen_key = "frequency_penalty"
else:
rep_pen_key = "repetition_penalty"
rep_pen = prompt_config[rep_pen_key]
min_offset = offset * 0.3 min_offset = offset * 0.3
prompt_config["temperature"] = random.uniform(temp + min_offset, temp + offset) prompt_config["temperature"] = random.uniform(temp + min_offset, temp + offset)
prompt_config["rep_pen"] = random.uniform( prompt_config[rep_pen_key] = random.uniform(
rep_pen + min_offset * 0.3, rep_pen + offset * 0.3 rep_pen + min_offset * 0.3, rep_pen + offset * 0.3
) )
@@ -199,3 +262,42 @@ class KoboldCppClient(ClientBase):
self.api_key = kwargs.pop("api_key") self.api_key = kwargs.pop("api_key")
super().reconfigure(**kwargs) super().reconfigure(**kwargs)
async def visual_automatic1111_setup(self, visual_agent:"VisualBase") -> bool:
"""
Automatically configure the visual agent for automatic1111
if the koboldcpp server has a SD model available
"""
if not self.connected:
return False
sd_models_url = urljoin(self.url, "/sdapi/v1/sd-models")
async with httpx.AsyncClient() as client:
try:
response = await client.get(
url=sd_models_url, timeout=2
)
except Exception as exc:
log.error(f"Failed to fetch sd models from {sd_models_url}", exc=exc)
return False
if response.status_code != 200:
return False
response_data = response.json()
sd_model = response_data[0].get("model_name") if response_data else None
log.info("automatic1111_setup", sd_model=sd_model)
if not sd_model:
return False
visual_agent.actions["automatic1111"].config["api_url"].value = self.url
visual_agent.is_enabled = True
return True

View File

@@ -28,12 +28,14 @@ SUPPORTED_MODELS = [
"gpt-4-turbo-preview", "gpt-4-turbo-preview",
"gpt-4-turbo-2024-04-09", "gpt-4-turbo-2024-04-09",
"gpt-4-turbo", "gpt-4-turbo",
"gpt-4o-2024-05-13",
"gpt-4o",
] ]
# any model starting with gpt-4- is assumed to support 'json_object'
# for others we need to explicitly state the model name
JSON_OBJECT_RESPONSE_MODELS = [ JSON_OBJECT_RESPONSE_MODELS = [
"gpt-4-1106-preview", "gpt-4o",
"gpt-4-0125-preview",
"gpt-4-turbo-preview",
"gpt-3.5-turbo-0125", "gpt-3.5-turbo-0125",
] ]

View File

@@ -187,3 +187,5 @@ async def agent_ready_checks():
for agent in AGENTS.values(): for agent in AGENTS.values():
if agent and agent.enabled: if agent and agent.enabled:
await agent.ready_check() await agent.ready_check()
elif agent and not agent.enabled:
await agent.setup_check()

View File

@@ -2123,7 +2123,7 @@ class Scene(Emitter):
async def add_to_recent_scenes(self): async def add_to_recent_scenes(self):
log.debug("add_to_recent_scenes", filename=self.filename) log.debug("add_to_recent_scenes", filename=self.filename)
config = Config(**self.config) config = load_config(as_model=True)
config.recent_scenes.push(self) config.recent_scenes.push(self)
config.save() config.save()

View File

@@ -0,0 +1,3 @@
ALLOWED_HOSTS=example.com
# wss if behind ssl, ws if not
VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL=wss://example.com:5050

View File

@@ -1,12 +1,12 @@
{ {
"name": "talemate_frontend", "name": "talemate_frontend",
"version": "0.25.2", "version": "0.25.3",
"lockfileVersion": 2, "lockfileVersion": 2,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "talemate_frontend", "name": "talemate_frontend",
"version": "0.25.2", "version": "0.25.3",
"dependencies": { "dependencies": {
"@codemirror/lang-markdown": "^6.2.5", "@codemirror/lang-markdown": "^6.2.5",
"@codemirror/theme-one-dark": "^6.1.2", "@codemirror/theme-one-dark": "^6.1.2",

View File

@@ -1,6 +1,6 @@
{ {
"name": "talemate_frontend", "name": "talemate_frontend",
"version": "0.25.2", "version": "0.25.3",
"private": true, "private": true,
"scripts": { "scripts": {
"serve": "vue-cli-service serve", "serve": "vue-cli-service serve",

View File

@@ -303,9 +303,11 @@ export default {
this.connecting = true; this.connecting = true;
let currentUrl = new URL(window.location.href); let currentUrl = new URL(window.location.href);
console.log(currentUrl); let websocketUrl = process.env.VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL || `ws://${currentUrl.hostname}:5050/ws`;
this.websocket = new WebSocket(`ws://${currentUrl.hostname}:5050/ws`); console.log("urls", { websocketUrl, currentUrl }, {env : process.env});
this.websocket = new WebSocket(websocketUrl);
console.log("Websocket connecting ...") console.log("Websocket connecting ...")
this.websocket.onmessage = this.handleMessage; this.websocket.onmessage = this.handleMessage;
this.websocket.onopen = () => { this.websocket.onopen = () => {

View File

@@ -1,4 +1,16 @@
const { defineConfig } = require('@vue/cli-service') const { defineConfig } = require('@vue/cli-service')
const ALLOWED_HOSTS = process.env.ALLOWED_HOSTS || "all"
const VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL = process.env.VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL || null
// if ALLOWED_HOSTS is set and has , then split it
if (ALLOWED_HOSTS !== "all") {
ALLOWED_HOSTS = ALLOWED_HOSTS.split(",")
}
console.log("ALLOWED_HOSTS", ALLOWED_HOSTS)
console.log("VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL", VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL)
module.exports = defineConfig({ module.exports = defineConfig({
transpileDependencies: true, transpileDependencies: true,
@@ -9,7 +21,7 @@ module.exports = defineConfig({
}, },
devServer: { devServer: {
allowedHosts: "all", allowedHosts: ALLOWED_HOSTS,
client: { client: {
overlay: { overlay: {
warnings: false, warnings: false,