diff --git a/README.md b/README.md
index f29771b6..8e6dd782 100644
--- a/README.md
+++ b/README.md
@@ -57,6 +57,7 @@ Please read the documents in the `docs` folder for more advanced configuration a
     - [Ready to go](#ready-to-go)
     - [Load the introductory scenario "Infinity Quest"](#load-the-introductory-scenario-infinity-quest)
     - [Loading character cards](#loading-character-cards)
+- [Configure for hosting](#configure-for-hosting)
 - [Text-to-Speech (TTS)](docs/tts.md)
 - [Visual Generation](docs/visual.md)
 - [ChromaDB (long term memory) configuration](docs/chromadb.md)
@@ -252,3 +253,17 @@ Expand the "Load" menu in the top left corner and either click on "Upload a char
 Once a character is uploaded, talemate may actually take a moment because it needs to convert it to a talemate format and will also run additional LLM prompts to generate character attributes and world state.
 
 Make sure you save the scene after the character is loaded as it can then be loaded as normal talemate scenario in the future.
+
+## Configure for hosting
+
+By default talemate is configured to run locally. If you want to host it behind a reverse proxy or on a server, you will need create some environment variables in the `talemate_frontend/.env.development.local` file
+
+Start by copying `talemate_frontend/example.env.development.local` to `talemate_frontend/.env.development.local`.
+
+Then open the file and edit the `ALLOWED_HOSTS` and  `VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL` variables.
+
+```sh
+ALLOWED_HOSTS=example.com
+# wss if behind ssl, ws if not
+VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL=wss://example.com:5050
+```
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 60e38161..146098c4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.masonry.api"
 
 [tool.poetry]
 name = "talemate"
-version = "0.25.2"
+version = "0.25.3"
 description = "AI-backed roleplay and narrative tools"
 authors = ["FinalWombat"]
 license = "GNU Affero General Public License v3.0"
diff --git a/src/talemate/__init__.py b/src/talemate/__init__.py
index 60d9ae28..050e5050 100644
--- a/src/talemate/__init__.py
+++ b/src/talemate/__init__.py
@@ -2,4 +2,4 @@ from .agents import Agent
 from .client import TextGeneratorWebuiClient
 from .tale_mate import *
 
-VERSION = "0.25.2"
+VERSION = "0.25.3"
diff --git a/src/talemate/agents/base.py b/src/talemate/agents/base.py
index ca148f9c..e3af4866 100644
--- a/src/talemate/agents/base.py
+++ b/src/talemate/agents/base.py
@@ -221,6 +221,9 @@ class Agent(ABC):
         if callback:
             await callback()
 
+    async def setup_check(self):
+        return False
+
     async def ready_check(self, task: asyncio.Task = None):
         self.ready_check_error = None
         if task:
diff --git a/src/talemate/agents/visual/__init__.py b/src/talemate/agents/visual/__init__.py
index fe16ab6f..411ea1b7 100644
--- a/src/talemate/agents/visual/__init__.py
+++ b/src/talemate/agents/visual/__init__.py
@@ -80,6 +80,11 @@ class VisualBase(Agent):
                     ),
                 },
             ),
+            "automatic_setup": AgentAction(
+                enabled=True,
+                label="Automatic Setup",
+                description="Automatically setup the visual agent if the selected client has an implementation of the selected backend. (Like the KoboldCpp Automatic1111 api)",
+            ),
             "automatic_generation": AgentAction(
                 enabled=False,
                 label="Automatic Generation",
@@ -187,8 +192,10 @@ class VisualBase(Agent):
         prev_ready = self.backend_ready
         self.backend_ready = False
         self.ready_check_error = str(error)
+        await self.setup_check()
         if prev_ready:
             await self.emit_status()
+        
 
     async def ready_check(self):
         if not self.enabled:
@@ -198,6 +205,15 @@ class VisualBase(Agent):
         task = asyncio.create_task(fn())
         await super().ready_check(task)
 
+    async def setup_check(self):
+        
+        if not self.actions["automatic_setup"].enabled:
+            return
+        
+        backend = self.backend
+        if self.client and hasattr(self.client, f"visual_{backend.lower()}_setup"):
+            await getattr(self.client, f"visual_{backend.lower()}_setup")(self)
+
     async def apply_config(self, *args, **kwargs):
 
         try:
diff --git a/src/talemate/client/base.py b/src/talemate/client/base.py
index a72436d6..448d9d9c 100644
--- a/src/talemate/client/base.py
+++ b/src/talemate/client/base.py
@@ -122,6 +122,10 @@ class ClientBase:
         """
         return self.Meta().requires_prompt_template
 
+    @property
+    def max_tokens_param_name(self):
+        return "max_tokens"
+
     def set_client(self, **kwargs):
         self.client = AsyncOpenAI(base_url=self.api_url, api_key="sk-1111")
 
@@ -625,7 +629,7 @@ class ClientBase:
             is_repetition, similarity_score, matched_line = util.similarity_score(
                 response, finalized_prompt.split("\n"), similarity_threshold=80
             )
-
+            
             if not is_repetition:
                 # not a repetition, return the response
 
@@ -659,7 +663,7 @@ class ClientBase:
 
                 # then we pad the max_tokens by the pad_max_tokens amount
 
-                prompt_param["max_tokens"] += pad_max_tokens
+                prompt_param[self.max_tokens_param_name] += pad_max_tokens
 
                 # send the prompt again
                 # we use the repetition_adjustment method to further encourage
@@ -681,7 +685,7 @@ class ClientBase:
 
                 # a lot of the times the response will now contain the repetition + something new
                 # so we dedupe the response to remove the repetition on sentences level
-
+                
                 response = util.dedupe_sentences(
                     response, matched_line, similarity_threshold=85, debug=True
                 )
diff --git a/src/talemate/client/koboldccp.py b/src/talemate/client/koboldccp.py
index cf31289c..3cc471d9 100644
--- a/src/talemate/client/koboldccp.py
+++ b/src/talemate/client/koboldccp.py
@@ -1,18 +1,24 @@
 import random
 import re
+from typing import TYPE_CHECKING
 
 # import urljoin
-from urllib.parse import urljoin
+from urllib.parse import urljoin, urlparse
 import httpx
 import structlog
 
 from talemate.client.base import STOPPING_STRINGS, ClientBase, Defaults, ExtraField
 from talemate.client.registry import register
+import talemate.util as util
+
+if TYPE_CHECKING:
+    from talemate.agents.visual import VisualBase
 
 log = structlog.get_logger("talemate.client.koboldcpp")
 
 
 class KoboldCppClientDefaults(Defaults):
+    api_url: str = "http://localhost:5001"
     api_key: str = ""
 
 
@@ -35,6 +41,11 @@ class KoboldCppClient(ClientBase):
             headers["Authorization"] = f"Bearer {self.api_key}"
         return headers
 
+    @property
+    def url(self) -> str:
+        parts = urlparse(self.api_url)
+        return f"{parts.scheme}://{parts.netloc}"
+
     @property
     def is_openai(self) -> bool:
         """
@@ -63,13 +74,20 @@ class KoboldCppClient(ClientBase):
             # join /api/v1/generate
             return urljoin(self.api_url, "generate")
 
+    @property
+    def max_tokens_param_name(self):
+        if self.is_openai:
+            return "max_tokens"
+        else:
+            return "max_length"
+
     def api_endpoint_specified(self, url: str) -> bool:
         return "/v1" in self.api_url
 
     def ensure_api_endpoint_specified(self):
         if not self.api_endpoint_specified(self.api_url):
             # url doesn't specify the api endpoint
-            # use the koboldcpp openai api
+            # use the koboldcpp united api
             self.api_url = urljoin(self.api_url.rstrip("/") + "/", "/api/v1/")
         if not self.api_url.endswith("/"):
             self.api_url += "/"
@@ -126,6 +144,9 @@ class KoboldCppClient(ClientBase):
     def set_client(self, **kwargs):
         self.api_key = kwargs.get("api_key", self.api_key)
         self.ensure_api_endpoint_specified()
+        
+        
+        
 
     async def get_model_name(self):
         self.ensure_api_endpoint_specified()
@@ -153,6 +174,35 @@ class KoboldCppClient(ClientBase):
 
         return model_name
 
+    async def tokencount(self, content:str) -> int:
+        """
+        KoboldCpp has a tokencount endpoint we can use to count tokens
+        for the prompt and response
+        
+        If the endpoint is not available, we will use the default token count estimate
+        """
+        
+        # extract scheme and host from api url
+        
+        parts = urlparse(self.api_url)
+        
+        url_tokencount = f"{parts.scheme}://{parts.netloc}/api/extra/tokencount"
+        
+        async with httpx.AsyncClient() as client:
+            response = await client.post(
+                url_tokencount,
+                json={"prompt":content},
+                timeout=None,
+                headers=self.request_headers,
+            )
+            
+            if response.status_code == 404:
+                # kobold united doesn't have tokencount endpoint
+                return util.count_tokens(content)
+            
+            tokencount = len(response.json().get("ids",[]))
+            return tokencount
+        
     async def generate(self, prompt: str, parameters: dict, kind: str):
         """
         Generates text from the given prompt and parameters.
@@ -160,6 +210,8 @@ class KoboldCppClient(ClientBase):
 
         parameters["prompt"] = prompt.strip(" ")
         
+        self._returned_prompt_tokens = await self.tokencount(parameters["prompt"] )
+        
         async with httpx.AsyncClient() as client:
             response = await client.post(
                 self.api_url_for_generation,
@@ -168,15 +220,18 @@ class KoboldCppClient(ClientBase):
                 headers=self.request_headers,
             )
             response_data = response.json()
-
             try:
                 if self.is_openai:
-                    return response_data["choices"][0]["text"]
+                    response_text = response_data["choices"][0]["text"]
                 else:
-                    return response_data["results"][0]["text"]
+                    response_text = response_data["results"][0]["text"]
             except (TypeError, KeyError) as exc:
                 log.error("Failed to generate text", exc=exc, response_data=response_data, response_status=response.status_code)
-                return ""
+                response_text = ""
+                
+            self._returned_response_tokens = await self.tokencount(response_text)
+            return response_text
+            
 
     def jiggle_randomness(self, prompt_config: dict, offset: float = 0.3) -> dict:
         """
@@ -185,17 +240,64 @@ class KoboldCppClient(ClientBase):
         """
 
         temp = prompt_config["temperature"]
-        rep_pen = prompt_config["rep_pen"]
+        
+        if "rep_pen" in prompt_config:
+            rep_pen_key = "rep_pen"
+        elif "frequency_penalty" in prompt_config:
+            rep_pen_key = "frequency_penalty"
+        else:
+            rep_pen_key = "repetition_penalty"
+        
+        rep_pen = prompt_config[rep_pen_key]
 
         min_offset = offset * 0.3
 
         prompt_config["temperature"] = random.uniform(temp + min_offset, temp + offset)
-        prompt_config["rep_pen"] = random.uniform(
+        prompt_config[rep_pen_key] = random.uniform(
             rep_pen + min_offset * 0.3, rep_pen + offset * 0.3
         )
-
+        
     def reconfigure(self, **kwargs):
         if "api_key" in kwargs:
             self.api_key = kwargs.pop("api_key")
 
         super().reconfigure(**kwargs)
+
+
+    async def visual_automatic1111_setup(self, visual_agent:"VisualBase") -> bool:
+        
+        """
+        Automatically configure the visual agent for automatic1111
+        if the koboldcpp server has a SD model available
+        """
+        
+        if not self.connected:
+            return False
+        
+        sd_models_url = urljoin(self.url, "/sdapi/v1/sd-models")
+        
+        async with httpx.AsyncClient() as client:
+            
+            try:
+                response = await client.get(
+                   url=sd_models_url, timeout=2
+                )
+            except Exception as exc:
+                log.error(f"Failed to fetch sd models from {sd_models_url}", exc=exc)
+                return False
+            
+            if response.status_code != 200:
+                return False
+            
+            response_data = response.json()
+            
+            sd_model = response_data[0].get("model_name") if response_data else None
+            
+        log.info("automatic1111_setup", sd_model=sd_model)
+        if not sd_model:
+            return False
+        
+        visual_agent.actions["automatic1111"].config["api_url"].value = self.url
+        visual_agent.is_enabled = True
+        return True
+        
\ No newline at end of file
diff --git a/src/talemate/client/openai.py b/src/talemate/client/openai.py
index 44ddc70b..b3a84f1a 100644
--- a/src/talemate/client/openai.py
+++ b/src/talemate/client/openai.py
@@ -28,12 +28,14 @@ SUPPORTED_MODELS = [
     "gpt-4-turbo-preview",
     "gpt-4-turbo-2024-04-09",
     "gpt-4-turbo",
+    "gpt-4o-2024-05-13",
+    "gpt-4o",
 ]
 
+# any model starting with gpt-4- is assumed to support 'json_object'
+# for others we need to explicitly state the model name
 JSON_OBJECT_RESPONSE_MODELS = [
-    "gpt-4-1106-preview",
-    "gpt-4-0125-preview",
-    "gpt-4-turbo-preview",
+    "gpt-4o",
     "gpt-3.5-turbo-0125",
 ]
 
diff --git a/src/talemate/instance.py b/src/talemate/instance.py
index 7d06b41d..f279e334 100644
--- a/src/talemate/instance.py
+++ b/src/talemate/instance.py
@@ -187,3 +187,5 @@ async def agent_ready_checks():
     for agent in AGENTS.values():
         if agent and agent.enabled:
             await agent.ready_check()
+        elif agent and not agent.enabled:
+            await agent.setup_check()
diff --git a/src/talemate/tale_mate.py b/src/talemate/tale_mate.py
index 72b56652..d6340577 100644
--- a/src/talemate/tale_mate.py
+++ b/src/talemate/tale_mate.py
@@ -2123,7 +2123,7 @@ class Scene(Emitter):
 
     async def add_to_recent_scenes(self):
         log.debug("add_to_recent_scenes", filename=self.filename)
-        config = Config(**self.config)
+        config = load_config(as_model=True)
         config.recent_scenes.push(self)
         config.save()
 
diff --git a/talemate_frontend/example.env.development.local b/talemate_frontend/example.env.development.local
new file mode 100644
index 00000000..5cae18be
--- /dev/null
+++ b/talemate_frontend/example.env.development.local
@@ -0,0 +1,3 @@
+ALLOWED_HOSTS=example.com
+# wss if behind ssl, ws if not
+VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL=wss://example.com:5050
\ No newline at end of file
diff --git a/talemate_frontend/package-lock.json b/talemate_frontend/package-lock.json
index 7e37ac22..e0aefa4c 100644
--- a/talemate_frontend/package-lock.json
+++ b/talemate_frontend/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "talemate_frontend",
-  "version": "0.25.2",
+  "version": "0.25.3",
   "lockfileVersion": 2,
   "requires": true,
   "packages": {
     "": {
       "name": "talemate_frontend",
-      "version": "0.25.2",
+      "version": "0.25.3",
       "dependencies": {
         "@codemirror/lang-markdown": "^6.2.5",
         "@codemirror/theme-one-dark": "^6.1.2",
diff --git a/talemate_frontend/package.json b/talemate_frontend/package.json
index 12c092ab..4b88c730 100644
--- a/talemate_frontend/package.json
+++ b/talemate_frontend/package.json
@@ -1,6 +1,6 @@
 {
   "name": "talemate_frontend",
-  "version": "0.25.2",
+  "version": "0.25.3",
   "private": true,
   "scripts": {
     "serve": "vue-cli-service serve",
diff --git a/talemate_frontend/src/components/TalemateApp.vue b/talemate_frontend/src/components/TalemateApp.vue
index b52d58a8..253cded7 100644
--- a/talemate_frontend/src/components/TalemateApp.vue
+++ b/talemate_frontend/src/components/TalemateApp.vue
@@ -303,9 +303,11 @@ export default {
 
       this.connecting = true;
       let currentUrl = new URL(window.location.href);
-      console.log(currentUrl);
+      let websocketUrl = process.env.VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL || `ws://${currentUrl.hostname}:5050/ws`;
 
-      this.websocket = new WebSocket(`ws://${currentUrl.hostname}:5050/ws`);
+      console.log("urls", { websocketUrl, currentUrl }, {env : process.env});
+
+      this.websocket = new WebSocket(websocketUrl);
       console.log("Websocket connecting ...")
       this.websocket.onmessage = this.handleMessage;
       this.websocket.onopen = () => {
diff --git a/talemate_frontend/vue.config.js b/talemate_frontend/vue.config.js
index 09892f06..9073118a 100644
--- a/talemate_frontend/vue.config.js
+++ b/talemate_frontend/vue.config.js
@@ -1,4 +1,16 @@
 const { defineConfig } = require('@vue/cli-service')
+
+const ALLOWED_HOSTS = process.env.ALLOWED_HOSTS || "all"
+const VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL = process.env.VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL || null
+
+// if ALLOWED_HOSTS is set and has , then split it
+if (ALLOWED_HOSTS !== "all") {
+  ALLOWED_HOSTS = ALLOWED_HOSTS.split(",")
+}
+
+console.log("ALLOWED_HOSTS", ALLOWED_HOSTS)
+console.log("VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL", VUE_APP_TALEMATE_BACKEND_WEBSOCKET_URL)
+
 module.exports = defineConfig({
   transpileDependencies: true,
 
@@ -9,7 +21,7 @@ module.exports = defineConfig({
   },
 
   devServer: {
-    allowedHosts: "all",
+    allowedHosts: ALLOWED_HOSTS,
     client: {
       overlay: {
         warnings: false,