Files
talemate/tests/test_util_init.py
veguAI f5d41c04c8 0.37.0 (#267)
0.37.0

- **Director Planning** — Multi-step todo lists in director chat plus a Generate long progress action for multi-beat scene arcs.
- **Auto Narration** — Unified auto-narration replacing the old Narrate after Dialogue toggle, with a chance slider and weighted action mix.
- **LLM Prompt Templates Manager** — Dedicated UI tab for viewing, creating, editing, and deleting prompt templates.
- **Character Folders** — Collapsible folders in the World Editor character list, synced across linked scenes.
- **OpenAI Compatible TTS** — Connect any number of OpenAI-compatible TTS servers in parallel.
- **KoboldCpp TTS Auto-Setup** — KoboldCpp clients with a TTS model loaded register themselves as a TTS backend.
- **Model Testing Harness** — Bundled scene that runs basic capability tests against any connected LLM.

Plus 27 improvements and 28 bug fixes
2026-05-12 21:01:51 +03:00

352 lines
12 KiB
Python

"""Tests for talemate.util.__init__ pure helpers.
Covers count_tokens, limit_tokens, chunk_items_by_tokens, remove_substring_names,
select_best_texts_by_keyword, clean_id, and slugify.
"""
from talemate.scene_message import SceneMessage
from talemate.util import (
chunk_items_by_tokens,
clean_id,
count_tokens,
limit_tokens,
remove_substring_names,
select_best_texts_by_keyword,
slugify,
)
# ---------------------------------------------------------------------------
# count_tokens
# ---------------------------------------------------------------------------
def test_count_tokens_empty_string_is_zero():
assert count_tokens("") == 0
def test_count_tokens_string_returns_positive_count():
# tiktoken encoding produces at least one token for short ASCII strings
n = count_tokens("hello world")
assert n >= 1
# And longer strings produce strictly more tokens
assert count_tokens("hello world goodbye world hello again") > n
def test_count_tokens_list_sums_components():
"""List of strings should count to the sum of individual counts."""
parts = ["alpha", "beta gamma", "delta"]
expected = sum(count_tokens(p) for p in parts)
assert count_tokens(parts) == expected
def test_count_tokens_empty_list_is_zero():
assert count_tokens([]) == 0
def test_count_tokens_scene_message_uses_str():
"""SceneMessage should be tokenized by its string representation."""
msg = SceneMessage(message="hello world")
assert count_tokens(msg) == count_tokens("hello world")
def test_count_tokens_unknown_type_returns_zero():
"""Unknown types log a warning and return 0."""
assert count_tokens(12345) == 0
assert count_tokens({"a": 1}) == 0
assert count_tokens(None) == 0
def test_count_tokens_nested_list():
"""count_tokens recursively sums nested lists of strings."""
nested = [["alpha", "beta"], "gamma"]
expected = count_tokens("alpha") + count_tokens("beta") + count_tokens("gamma")
assert count_tokens(nested) == expected
# ---------------------------------------------------------------------------
# limit_tokens
# ---------------------------------------------------------------------------
def test_limit_tokens_under_limit_returns_input_unchanged():
text = "line one\nline two\nline three"
# Plenty of headroom
assert limit_tokens(text, 1000) == text
def test_limit_tokens_drops_trailing_lines_until_under_limit():
"""When text exceeds the limit, trailing lines should be popped from the end."""
lines = [f"sentence number {i} in this paragraph" for i in range(20)]
text = "\n".join(lines)
# Use the same per-line counting strategy that the function uses internally
full_tokens = count_tokens(lines)
target = full_tokens // 2
result = limit_tokens(text, target)
# Function uses list-based count internally; verify the surviving lines are within budget
surviving_lines = result.split("\n")
assert count_tokens(surviving_lines) <= target
# Result must still be a prefix of the original (lines popped from the end)
assert text.startswith(result)
# First line must be preserved
assert surviving_lines[0] == lines[0]
# Some lines should have been dropped
assert len(surviving_lines) < len(lines)
def test_limit_tokens_returns_empty_when_first_line_exceeds_limit():
"""If even the first line is too long, all lines are popped."""
text = "this is a fairly long line of words\nanother line"
# Set limit too small so even one line cannot fit
result = limit_tokens(text, 0)
assert result == ""
# ---------------------------------------------------------------------------
# chunk_items_by_tokens
# ---------------------------------------------------------------------------
def test_chunk_items_by_tokens_groups_items_under_limit():
"""Multiple small items should pack into a chunk that fits the limit."""
items = ["one", "two", "three", "four"]
# Each item is ~1 token, so any reasonable limit packs them together
chunks = list(chunk_items_by_tokens(items, max_tokens=100))
assert chunks == [items]
def test_chunk_items_by_tokens_starts_new_chunk_when_full():
"""When adding an item would exceed the limit, a new chunk begins."""
items = ["alpha beta", "gamma delta", "epsilon zeta"]
# Set max_tokens to roughly the size of one item so each item gets its own chunk
max_tokens = max(count_tokens(i) for i in items)
chunks = list(chunk_items_by_tokens(items, max_tokens=max_tokens))
# Every item should be present, distributed across chunks
flattened = [item for chunk in chunks for item in chunk]
assert flattened == items
# No chunk should exceed the limit
for chunk in chunks:
assert count_tokens(chunk) <= max_tokens
# Multiple chunks since each item nearly fills the budget
assert len(chunks) >= 2
def test_chunk_items_by_tokens_oversized_item_yielded_alone():
"""An item larger than max_tokens is yielded as its own chunk."""
long_text = " ".join(["word"] * 200) # large
items = ["small one", long_text, "small two"]
chunks = list(chunk_items_by_tokens(items, max_tokens=5))
# The long item appears as a single-item chunk (size 1)
oversized_chunks = [c for c in chunks if c == [long_text]]
assert len(oversized_chunks) == 1
# All original items appear in the output
flattened = [i for c in chunks for i in c]
assert flattened == items
def test_chunk_items_by_tokens_filters_empty_when_filter_empty_true():
items = ["one", "", " ", "two", None]
chunks = list(chunk_items_by_tokens(items, max_tokens=100))
# Only "one" and "two" survive
assert chunks == [["one", "two"]]
def test_chunk_items_by_tokens_keeps_empty_when_filter_empty_false():
items = ["one", "", "two"]
chunks = list(chunk_items_by_tokens(items, max_tokens=100, filter_empty=False))
flattened = [i for c in chunks for i in c]
assert flattened == items
def test_chunk_items_by_tokens_empty_input_yields_nothing():
assert list(chunk_items_by_tokens([], max_tokens=100)) == []
# All-empty after filter -> nothing
assert list(chunk_items_by_tokens(["", " ", None], max_tokens=100)) == []
def test_chunk_items_by_tokens_custom_count_fn():
"""count_fn can be overridden; verify each item counted as 1."""
items = ["a", "bb", "ccc", "dddd"]
chunks = list(chunk_items_by_tokens(items, max_tokens=2, count_fn=lambda _: 1))
# Each chunk should hold up to 2 items
assert chunks == [["a", "bb"], ["ccc", "dddd"]]
# ---------------------------------------------------------------------------
# remove_substring_names
# ---------------------------------------------------------------------------
def test_remove_substring_names_empty_input():
assert remove_substring_names([]) == []
def test_remove_substring_names_drops_substring_of_longer_name():
"""When a shorter name appears as a whole word in a longer name, drop it."""
names = ["julia", "julia smith"]
assert remove_substring_names(names) == ["julia smith"]
def test_remove_substring_names_preserves_original_order():
"""The function should preserve the input order of surviving names."""
names = ["julia smith", "bob", "julia"]
# "julia" is dropped; "julia smith" and "bob" remain in original order
assert remove_substring_names(names) == ["julia smith", "bob"]
def test_remove_substring_names_does_not_match_partial_words():
"""'jul' inside 'julia' should not be considered a substring (whole-word match only)."""
names = ["jul", "julia"]
# 'jul' is not a whole word inside 'julia', so it should be kept
result = remove_substring_names(names)
assert "jul" in result
assert "julia" in result
def test_remove_substring_names_case_insensitive():
names = ["JULIA", "Julia Smith"]
# 'JULIA' is a whole word inside 'Julia Smith' (case-insensitive)
result = remove_substring_names(names)
assert result == ["Julia Smith"]
def test_remove_substring_names_skips_blank_entries():
names = [" ", "alice", "alice cooper"]
result = remove_substring_names(names)
assert "alice" not in result
assert "alice cooper" in result
assert " " not in result
# ---------------------------------------------------------------------------
# select_best_texts_by_keyword
# ---------------------------------------------------------------------------
def test_select_best_texts_by_keyword_empty_returns_empty():
assert select_best_texts_by_keyword([], "anything", 100) == []
def test_select_best_texts_by_keyword_no_keyword_returns_input():
"""Falsy keyword short-circuits and returns the original list."""
texts = ["one", "two"]
assert select_best_texts_by_keyword(texts, "", 100) == texts
def test_select_best_texts_by_keyword_filters_texts_without_keyword():
texts = [
"Alice walked into the room.",
"The weather is nice today.",
"Alice loves to read books.",
]
result = select_best_texts_by_keyword(texts, "alice", max_token_length=200)
# Only texts containing "alice" should remain
for selected in result:
assert "alice" in selected.lower()
assert len(result) == 2
def test_select_best_texts_by_keyword_orders_by_score_desc():
texts = [
"Alice mentioned once.",
"Alice and Alice and Alice all spoke.", # 3 occurrences
"Alice went to Alice's house.", # 2 occurrences
]
result = select_best_texts_by_keyword(texts, "alice", max_token_length=1000)
# Highest-occurrence text should be first
assert result[0] == "Alice and Alice and Alice all spoke."
assert result[1] == "Alice went to Alice's house."
assert result[2] == "Alice mentioned once."
def test_select_best_texts_by_keyword_respects_token_budget():
"""Selection stops once the chunk_size budget is filled."""
texts = [f"alice {i} alice" for i in range(50)]
# tight budget — should select fewer than all 50
result = select_best_texts_by_keyword(
texts, "alice", max_token_length=20, chunk_size_ratio=1.0
)
assert 0 < len(result) < 50
def test_select_best_texts_by_keyword_whole_word_match_only():
"""Substring of another word should not count as a keyword occurrence."""
texts = [
"alicewonder is not alice", # 1 whole-word "alice"
"the cat is here", # 0
]
result = select_best_texts_by_keyword(texts, "alice", max_token_length=200)
assert result == ["alicewonder is not alice"]
def test_select_best_texts_by_keyword_skips_blank_texts():
# blank/whitespace entries are skipped before scoring
# Note: None would error in .strip() — verify only non-None blanks are skipped
texts_clean = ["", " ", "alice spoke"]
result = select_best_texts_by_keyword(texts_clean, "alice", max_token_length=200)
assert result == ["alice spoke"]
# ---------------------------------------------------------------------------
# clean_id
# ---------------------------------------------------------------------------
def test_clean_id_removes_special_characters():
assert clean_id("hello!@#$%^&*()world") == "helloworld"
def test_clean_id_preserves_allowed_characters():
assert clean_id("Foo_Bar-Baz 123") == "Foo_Bar-Baz 123"
def test_clean_id_empty_input():
assert clean_id("") == ""
def test_clean_id_only_special_characters():
assert clean_id("@#$%") == ""
def test_clean_id_unicode_dropped():
"""Unicode characters outside a-zA-Z0-9_- and space are removed."""
assert clean_id("café 42") == "caf 42"
# ---------------------------------------------------------------------------
# slugify
# ---------------------------------------------------------------------------
def test_slugify_basic_label():
assert slugify("My vLLM Local") == "my-vllm-local"
def test_slugify_only_separators_returns_empty():
assert slugify(" ___ ") == ""
def test_slugify_strips_punctuation_runs():
assert slugify("Voice 1!") == "voice-1"
def test_slugify_collapses_consecutive_specials():
assert slugify("a b!!!c") == "a-b-c"
def test_slugify_empty_input():
assert slugify("") == ""
def test_slugify_none_safe():
"""slugify guards against None input."""
assert slugify(None) == ""
def test_slugify_already_slug():
assert slugify("already-a-slug") == "already-a-slug"
def test_slugify_strips_leading_trailing_dashes():
assert slugify("-foo-bar-") == "foo-bar"