2023-12-02 00:40:14 +02:00
|
|
|
import pytest
|
2025-02-01 17:44:06 +02:00
|
|
|
from talemate.util import ensure_dialog_format, clean_dialogue, remove_trailing_markers
|
2023-12-02 00:40:14 +02:00
|
|
|
|
2025-06-03 12:26:12 +03:00
|
|
|
MULTILINE_TEST_A_INPUT = """
|
|
|
|
|
\"The first line.
|
|
|
|
|
|
|
|
|
|
The second line.
|
|
|
|
|
|
|
|
|
|
- list item
|
|
|
|
|
- list item
|
|
|
|
|
|
|
|
|
|
The third line.\"
|
|
|
|
|
"""
|
|
|
|
|
MULTILINE_TEST_A_EXPECTED = """
|
|
|
|
|
\"The first line.
|
|
|
|
|
|
|
|
|
|
The second line.
|
|
|
|
|
|
|
|
|
|
- list item
|
|
|
|
|
- list item
|
|
|
|
|
|
|
|
|
|
The third line.\"
|
|
|
|
|
"""
|
|
|
|
|
|
2025-06-29 19:51:08 +03:00
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
|
"input, expected",
|
|
|
|
|
[
|
|
|
|
|
("Hello how are you?", "Hello how are you?"),
|
|
|
|
|
('"Hello how are you?"', '"Hello how are you?"'),
|
|
|
|
|
(
|
|
|
|
|
'"Hello how are you?" he asks "I am fine"',
|
|
|
|
|
'"Hello how are you?" *he asks* "I am fine"',
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
"Hello how are you? *he asks* I am fine",
|
|
|
|
|
'"Hello how are you?" *he asks* "I am fine"',
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
'Hello how are you?" *he asks* I am fine',
|
|
|
|
|
'"Hello how are you?" *he asks* "I am fine"',
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
'Hello how are you?" *he asks I am fine',
|
|
|
|
|
'"Hello how are you?" *he asks I am fine*',
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
'Hello how are you?" *he asks* "I am fine" *',
|
|
|
|
|
'"Hello how are you?" *he asks* "I am fine"',
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
'"Hello how are you *he asks* I am fine"',
|
|
|
|
|
'"Hello how are you" *he asks* "I am fine"',
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
"This is a string without any markers",
|
|
|
|
|
"This is a string without any markers",
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
'This is a string with an ending quote"',
|
|
|
|
|
'"This is a string with an ending quote"',
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
"This is a string with an ending asterisk*",
|
|
|
|
|
"*This is a string with an ending asterisk*",
|
|
|
|
|
),
|
|
|
|
|
('"Mixed markers*', "*Mixed markers*"),
|
|
|
|
|
(
|
|
|
|
|
'*narrative.* dialogue" *more narrative.*',
|
|
|
|
|
'*narrative.* "dialogue" *more narrative.*',
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
'"*messed up dialogue formatting.*" *some narration.*',
|
|
|
|
|
'"messed up dialogue formatting." *some narration.*',
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
'*"messed up narration formatting."* "some dialogue."',
|
|
|
|
|
'"messed up narration formatting." "some dialogue."',
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
"Some dialogue and two line-breaks right after, followed by narration.\n\n*Narration*",
|
|
|
|
|
'"Some dialogue and two line-breaks right after, followed by narration."\n\n*Narration*',
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
'*Some narration with a "quoted" string in it.* Then some unquoted dialogue.\n\n*More narration.*',
|
|
|
|
|
'*Some narration with a* "quoted" *string in it.* "Then some unquoted dialogue."\n\n*More narration.*',
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
"*Some narration* Some dialogue but not in quotes. *",
|
|
|
|
|
'*Some narration* "Some dialogue but not in quotes."',
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
"*First line\nSecond line\nThird line*",
|
|
|
|
|
"*First line\nSecond line\nThird line*",
|
|
|
|
|
),
|
|
|
|
|
(MULTILINE_TEST_A_INPUT, MULTILINE_TEST_A_EXPECTED),
|
|
|
|
|
],
|
|
|
|
|
)
|
2023-12-02 00:40:14 +02:00
|
|
|
def test_dialogue_cleanup(input, expected):
|
2024-01-19 11:47:38 +02:00
|
|
|
assert ensure_dialog_format(input) == expected
|
2025-06-29 19:51:08 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
|
"input, expected, main_name",
|
|
|
|
|
[
|
|
|
|
|
("bob: says a sentence", "bob: says a sentence", "bob"),
|
|
|
|
|
(
|
|
|
|
|
"bob: says a sentence\nbob: says another sentence",
|
|
|
|
|
"bob: says a sentence\nsays another sentence",
|
|
|
|
|
"bob",
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
"bob: says a sentence with a colon: to explain something",
|
|
|
|
|
"bob: says a sentence with a colon: to explain something",
|
|
|
|
|
"bob",
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
"bob: i have a riddle for you, alice: the riddle",
|
|
|
|
|
"bob: i have a riddle for you, alice: the riddle",
|
|
|
|
|
"bob",
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
"bob: says something\nalice: says something else",
|
|
|
|
|
"bob: says something",
|
|
|
|
|
"bob",
|
|
|
|
|
),
|
|
|
|
|
("bob: says a sentence. then a", "bob: says a sentence.", "bob"),
|
|
|
|
|
(
|
|
|
|
|
"bob: first paragraph\n\nsecond paragraph",
|
|
|
|
|
"bob: first paragraph\n\nsecond paragraph",
|
|
|
|
|
"bob",
|
|
|
|
|
),
|
|
|
|
|
# movie script new speaker cutoff
|
|
|
|
|
(
|
|
|
|
|
"bob: says a sentence\n\nALICE\nsays something else",
|
|
|
|
|
"bob: says a sentence",
|
|
|
|
|
"bob",
|
|
|
|
|
),
|
|
|
|
|
],
|
|
|
|
|
)
|
2024-01-19 11:47:38 +02:00
|
|
|
def test_clean_dialogue(input, expected, main_name):
|
2025-02-01 17:44:06 +02:00
|
|
|
assert clean_dialogue(input, main_name) == expected
|
2025-06-29 19:51:08 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
|
"input, expected",
|
|
|
|
|
[
|
|
|
|
|
('Hello how are you? "', "Hello how are you?"),
|
|
|
|
|
("Hello how are you? *", "Hello how are you?"),
|
|
|
|
|
("Hello how are you? {", "Hello how are you?"),
|
|
|
|
|
("Hello how are you? [", "Hello how are you?"),
|
|
|
|
|
("Hello how are you? (", "Hello how are you?"),
|
|
|
|
|
('"Hello how are you?"', '"Hello how are you?"'),
|
|
|
|
|
('"Hello how are you?" "', '"Hello how are you?"'),
|
|
|
|
|
('"Hello how are you?" *', '"Hello how are you?"'),
|
|
|
|
|
('"Hello how are you?" *"', '"Hello how are you?"'),
|
|
|
|
|
('*He says* "Hello how are you?"', '*He says* "Hello how are you?"'),
|
|
|
|
|
('*He says* "Hello how are you?" *', '*He says* "Hello how are you?"'),
|
|
|
|
|
('*He says* "Hello how are you?" *"', '*He says* "Hello how are you?"'),
|
|
|
|
|
("(Some thoughts)", "(Some thoughts)"),
|
|
|
|
|
("(Some thoughts) ", "(Some thoughts)"),
|
|
|
|
|
("(Some thoughts) (", "(Some thoughts)"),
|
|
|
|
|
("(Some thoughts) [", "(Some thoughts)"),
|
|
|
|
|
],
|
|
|
|
|
)
|
2025-02-01 17:44:06 +02:00
|
|
|
def test_remove_trailing_markers(input, expected):
|
2025-06-03 12:26:12 +03:00
|
|
|
assert remove_trailing_markers(input) == expected
|
|
|
|
|
|
|
|
|
|
|
2025-06-29 19:51:08 +03:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
|
"input, anchor_length, expected_non_anchor, expected_anchor",
|
|
|
|
|
[
|
|
|
|
|
("", 10, "", ""),
|
|
|
|
|
("Hello", 10, "", "Hello"),
|
|
|
|
|
("This is a short example", 10, "This is", "a short example"),
|
|
|
|
|
("One two three four", 4, "One two", "three four"),
|
|
|
|
|
(
|
|
|
|
|
"This is a longer example with more than ten words to test the anchor functionality",
|
|
|
|
|
10,
|
|
|
|
|
"This is a longer example",
|
|
|
|
|
"with more than ten words to test the anchor functionality",
|
|
|
|
|
),
|
|
|
|
|
(
|
|
|
|
|
"One two three four five six seven eight nine ten",
|
|
|
|
|
10,
|
|
|
|
|
"One two three four five",
|
|
|
|
|
"six seven eight nine ten",
|
|
|
|
|
),
|
|
|
|
|
("Two words", 10, "Two", "words"),
|
|
|
|
|
("One Two Three", 3, "One", "Two Three"),
|
|
|
|
|
],
|
|
|
|
|
)
|
2025-06-03 12:26:12 +03:00
|
|
|
def test_split_anchor_text(input, anchor_length, expected_non_anchor, expected_anchor):
|
|
|
|
|
from talemate.util.dialogue import split_anchor_text
|
2025-06-29 19:51:08 +03:00
|
|
|
|
2025-06-03 12:26:12 +03:00
|
|
|
non_anchor, anchor = split_anchor_text(input, anchor_length)
|
|
|
|
|
assert non_anchor == expected_non_anchor
|
2025-06-29 19:51:08 +03:00
|
|
|
assert anchor == expected_anchor
|
0.32.0 (#208)
* separate other tts apis and improve chunking
* move old tts config to voice agent config and implement config widget ux elements for table editing
* elevenlabs updated to use their client and expose model selection
* linting
* separate character class into character.pt and start on voice routing
* linting
* tts hot swapping and chunking improvements
* linting
* add support for piper-tts
* update gitignore
* linting
* support google tts
fix issue where quick_toggle agent config didnt work on standard config items
* linting
* only show agent quick toggles if the agent is enabled
* change elevenlabs to use a locally maintained voice list
* tts generate before / after events
* voice library refactor
* linting
* update openai model and voices
* tweak configs
* voice library ux
* linting
* add support for kokoro tts
* fix add / remove voice
* voice library tags
* linting
* linting
* tts api status
* api infos and add more kokoro voices
* allow voice testing before saving a new voice
* tweaks to voice library ux and some api info text
* linting
* voice mixer
* polish
* voice files go into /tts instead of templates/voice
* change default narrator voice
* xtts confirmation note
* character voice select
* koboldai format template
* polish
* skip empty chunks
* change default voice
* replace em-dash with normal dash
* adjust limit
* replace libebreaks
* chunk cleanup for whitespace
* info updated
* remove invalid endif tag
* sort voices by ready api
* Character hashable type
* clarify set_simulated_environment use to avoid unwanted character deactivated
* allow manual generation of tts and fix assorted issues with tts
* tts websocket handler router renamed
* voice mixer: when there are only 2 voices auto adjust the other weight as needed
* separate persist character functions into own mixin
* auto assign voices
* fix chara load and auto assign voice during chara load
* smart speaker separation
* tts speaker separation config
* generate tts for intro text
* fix prompting issues with anthropic, google and openrouter clients
* decensor flag off again
* only to ai assisted voice markup on narrator messages
* openrouter provider configuration
* linting
* improved sound controls
* add support for chatterbox
* fix info
* chatterbox dependencies
* remove piper and xtts2
* linting
* voice params
* linting
* tts model overrides and move tts info to tab
* reorg toolbar
* allow overriding of test text
* more tts fixes, apply intensity, chatterbox voices
* confirm voice delete
* lintinG
* groq updates
* reorg decorators
* tts fixes
* cancelable audio queue
* voice library uploads
* scene voice library
* Config refactor (#13)
* config refactor progres
* config nuke continues
* fix system prompts
* linting
* client fun
* client config refactor
* fix kcpp auto embedding selection
* linting
* fix proxy config
* remove cruft
* fix remaining client bugs from config refactor
always use get_config(), dont keep an instance reference
* support for reasoning models
* more reasoning tweaks
* only allow one frontend to connect at a time
* fix tests
* relock
* relock
* more client adjustments
* pattern prefill
* some tts agent fixes
* fix ai assist cond
* tts nodes
* fix config retrieval
* assign voice node and fixes
* sim suite char gen assign voice
* fix voice assign template to consider used voices
* get rid of auto break repetition which wasn't working right for a while anyhow
* linting
* generate tts node
as string node
* linting
* voice change on character event
* tweak chatterbox max length
* koboldai default template
* linting
* fix saving of existing voice
* relock
* adjust params of eva default voice
* f5tts support
* f5tts samples
* f5tts support
* f5tts tweaks
* chunk size per tts api and reorg defaul f5tts voices
* chatterbox default voice reog to match f5-tts default voices
* voice library ux polish pass
* cleanup
* f5-tts tweaks
* missing samples
* get rid of old save cmd
* add chatterbox and f5tts
* housekeeping
* fix some issues with world entry editing
* remove cruft
* replace exclamation marks
* fix save immutable check
* fix replace_exclamation_marks
* better error handling in websocket plugins and fix issue with saves
* agent config save on dialog close
* ctrl click to disable / enable agents
* fix quick config
* allow modifying response size of focal requests
* sim suite set goal always sets story intent, encourage calling of set goal during simulation start
* allow setting of model
* voice param tweaks
* tts tweaks
* fix character card load
* fix note_on_value
* add mixed speaker_separation mode
* indicate which message the audio is for and provide way to stop audio from the message
* fix issue with some tts generation failing
* linting
* fix speaker separate modes
* bad idea
* linting
* refactor speaker separation prompt
* add kimi think pattern
* fix issue with unwanted cover image replacemenT
* no scene analysis for visual promp generation (for now)
* linting
* tts for context investigation messages
* prompt tweaks
* tweak intro
* fix intro text tts not auto playing sometimes
* consider narrator voice when assigning voice tro a character
* allow director log messages to go only into the director console
* linting
* startup performance fixes
* init time
* linting
* only show audio control for messagews taht can have it
* always create story intent and dont override existing saves during character card load
* fix history check in dynamic story line node
add HasHistory node
* linting
* fix intro message not having speaker separation
* voice library character manager
* sequantial and cancelable auto assign all
* linting
* fix generation cancel handling
* tooltips
* fix auto assign voice from scene voices
* polish
* kokoro does not like lazy import
* update info text
* complete scene export / import
* linting
* wording
* remove cruft
* fix story intent generation during character card import
* fix generation cancelled emit status inf loop
* prompt tweak
* reasoning quick toggle, reasoning token slider, tooltips
* improved reasoning pattern handling
* fix indirect coercion response parsing
* fix streaming issue
* response length instructions
* more robust streaming
* adjust default
* adjust formatting
* litning
* remove debug output
* director console log function calls
* install cuda script updated
* linting
* add another step
* adjust default
* update dialogue examples
* fix voice selection issues
* what's happening here
* third time's the charm?
* Vite migration (#207)
* add vite config
* replace babel, webpack, vue-cli deps with vite, switch to esm modules, separate eslint config
* change process.env to import.meta.env
* update index.html for vite and move to root
* update docs for vite
* remove vue cli config
* update example env with vite
* bump frontend deps after rebase to 32.0
---------
Co-authored-by: pax-co <Pax_801@proton.me>
* properly referencer data type
* what's new
* better indication of dialogue example supporting multiple lines, improve dialogue example display
* fix potential issue with cached scene anlysis being reused when it shouldn't
* fix character creation issues with player character toggle
* fix issue where editing a message would sometimes lose parts of the message
* fix slider ux thumb labels (vuetify update)
* relock
* narrative conversation format
* remove planning step
* linting
* tweaks
* don't overthink
* update dialogue examples and intro
* dont dictate response length instructions when data structures are expected
* prompt tweaks
* prompt tweaks
* linting
* fix edit message not handling : well
* prompt tweaks
* fix tests
* fix manual revision when character message was generated in new narrative mode
* fix issue with message editing
* Docker packages relese (#204)
* add CI workflow for Docker image build and MkDocs deployment
* rename CI workflow from 'ci' to 'package'
* refactor CI workflow: consolidate container build and documentation deployment into a single file
* fix: correct indentation for permissions in CI workflow
* fix: correct indentation for steps in deploy-docs job in CI workflow
* build both cpu and cuda image
* docs
* docs
* expose writing style during state reinforcement
* prompt tweaks
* test container build
* test container image
* update docker compose
* docs
* test-container-build
* test container build
* test container build
* update docker build workflows
* fix guidance prompt prefix not being dropped
* mount tts dir
* add gpt-5
* remove debug output
* docs
* openai auto toggle reasoning based on model selection
* linting
---------
Co-authored-by: pax-co <123330830+pax-co@users.noreply.github.com>
Co-authored-by: pax-co <Pax_801@proton.me>
Co-authored-by: Luis Alexandre Deschamps Brandão <brandao_luis@yahoo.com>
2025-08-08 13:56:29 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
|
"input, expected",
|
|
|
|
|
[
|
|
|
|
|
# Empty text
|
|
|
|
|
("", []),
|
|
|
|
|
# Only dialogue
|
|
|
|
|
('"Hello world"', [{"text": '"Hello world"', "type": "dialogue"}]),
|
|
|
|
|
# Only exposition
|
|
|
|
|
("This is exposition", [{"text": "This is exposition", "type": "exposition"}]),
|
|
|
|
|
# Simple mixed case
|
|
|
|
|
(
|
|
|
|
|
'He said "Hello" to her',
|
|
|
|
|
[
|
|
|
|
|
{"text": "He said ", "type": "exposition"},
|
|
|
|
|
{"text": '"Hello"', "type": "dialogue"},
|
|
|
|
|
{"text": " to her", "type": "exposition"},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
# Multiple dialogues
|
|
|
|
|
(
|
|
|
|
|
'"Hi" she said "Bye"',
|
|
|
|
|
[
|
|
|
|
|
{"text": '"Hi"', "type": "dialogue"},
|
|
|
|
|
{"text": " she said ", "type": "exposition"},
|
|
|
|
|
{"text": '"Bye"', "type": "dialogue"},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
# Exposition with asterisks (should be treated as exposition)
|
|
|
|
|
(
|
|
|
|
|
'*He walks* "Hello" *He smiles*',
|
|
|
|
|
[
|
|
|
|
|
{"text": "*He walks* ", "type": "exposition"},
|
|
|
|
|
{"text": '"Hello"', "type": "dialogue"},
|
|
|
|
|
{"text": " *He smiles*", "type": "exposition"},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
# Dialogue spanning multiple lines
|
|
|
|
|
(
|
|
|
|
|
'He said "Hello\nHow are you?" nicely',
|
|
|
|
|
[
|
|
|
|
|
{"text": "He said ", "type": "exposition"},
|
|
|
|
|
{"text": '"Hello\nHow are you?"', "type": "dialogue"},
|
|
|
|
|
{"text": " nicely", "type": "exposition"},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
# Complex mixed content
|
|
|
|
|
(
|
|
|
|
|
'The man said "I am fine" and *walked away* before saying "Goodbye"',
|
|
|
|
|
[
|
|
|
|
|
{"text": "The man said ", "type": "exposition"},
|
|
|
|
|
{"text": '"I am fine"', "type": "dialogue"},
|
|
|
|
|
{"text": " and *walked away* before saying ", "type": "exposition"},
|
|
|
|
|
{"text": '"Goodbye"', "type": "dialogue"},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
# Unmatched quotes (last quote doesn't close)
|
|
|
|
|
(
|
|
|
|
|
'He said "Hello',
|
|
|
|
|
[
|
|
|
|
|
{"text": "He said ", "type": "exposition"},
|
|
|
|
|
{"text": '"Hello', "type": "dialogue"},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
# Empty dialogue
|
|
|
|
|
(
|
|
|
|
|
'Before "" after',
|
|
|
|
|
[
|
|
|
|
|
{"text": "Before ", "type": "exposition"},
|
|
|
|
|
{"text": '""', "type": "dialogue"},
|
|
|
|
|
{"text": " after", "type": "exposition"},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
# Multiple quotes in exposition (edge case)
|
|
|
|
|
(
|
|
|
|
|
'She thought about the word "love" and "hate" often',
|
|
|
|
|
[
|
|
|
|
|
{"text": "She thought about the word ", "type": "exposition"},
|
|
|
|
|
{"text": '"love"', "type": "dialogue"},
|
|
|
|
|
{"text": " and ", "type": "exposition"},
|
|
|
|
|
{"text": '"hate"', "type": "dialogue"},
|
|
|
|
|
{"text": " often", "type": "exposition"},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
# Nested quotes scenario (treating inner quotes as part of dialogue)
|
|
|
|
|
(
|
|
|
|
|
"He said \"She told me 'hi' yesterday\"",
|
|
|
|
|
[
|
|
|
|
|
{"text": "He said ", "type": "exposition"},
|
|
|
|
|
{"text": "\"She told me 'hi' yesterday\"", "type": "dialogue"},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
# Just quotes
|
|
|
|
|
('""', [{"text": '""', "type": "dialogue"}]),
|
|
|
|
|
# Quotes at start and end
|
|
|
|
|
(
|
|
|
|
|
'"Start" middle "End"',
|
|
|
|
|
[
|
|
|
|
|
{"text": '"Start"', "type": "dialogue"},
|
|
|
|
|
{"text": " middle ", "type": "exposition"},
|
|
|
|
|
{"text": '"End"', "type": "dialogue"},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
# Single quote (unmatched)
|
|
|
|
|
('"', [{"text": '"', "type": "dialogue"}]),
|
|
|
|
|
# Text ending with quote start
|
|
|
|
|
(
|
|
|
|
|
'Hello "',
|
|
|
|
|
[
|
|
|
|
|
{"text": "Hello ", "type": "exposition"},
|
|
|
|
|
{"text": '"', "type": "dialogue"},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
],
|
|
|
|
|
)
|
|
|
|
|
def test_separate_dialogue_from_exposition(input, expected):
|
|
|
|
|
from talemate.util.dialogue import separate_dialogue_from_exposition
|
|
|
|
|
|
|
|
|
|
result = separate_dialogue_from_exposition(input)
|
|
|
|
|
|
|
|
|
|
# Convert result to list of dicts for easier comparison
|
|
|
|
|
result_dicts = [{"text": chunk.text, "type": chunk.type} for chunk in result]
|
|
|
|
|
|
|
|
|
|
assert result_dicts == expected
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# New tests to validate speaker identification within dialogue chunks
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
|
"input, expected",
|
|
|
|
|
[
|
|
|
|
|
# Single dialogue with speaker
|
|
|
|
|
(
|
|
|
|
|
'"{John}I am leaving now."',
|
|
|
|
|
[
|
|
|
|
|
{"text": '"I am leaving now."', "type": "dialogue", "speaker": "John"},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
# Dialogue embedded within exposition, with speaker
|
|
|
|
|
(
|
|
|
|
|
'She whispered "{Alice}Be careful" before disappearing.',
|
|
|
|
|
[
|
|
|
|
|
{"text": "She whispered ", "type": "exposition", "speaker": None},
|
|
|
|
|
{"text": '"Be careful"', "type": "dialogue", "speaker": "Alice"},
|
|
|
|
|
{
|
|
|
|
|
"text": " before disappearing.",
|
|
|
|
|
"type": "exposition",
|
|
|
|
|
"speaker": None,
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
# Multiple dialogues with different speakers
|
|
|
|
|
(
|
|
|
|
|
'"{Bob}Hi" she replied "{Carol}Hello"',
|
|
|
|
|
[
|
|
|
|
|
{"text": '"Hi"', "type": "dialogue", "speaker": "Bob"},
|
|
|
|
|
{"text": " she replied ", "type": "exposition", "speaker": None},
|
|
|
|
|
{"text": '"Hello"', "type": "dialogue", "speaker": "Carol"},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
# Prev speaker
|
|
|
|
|
(
|
|
|
|
|
'"{Bob}First dialog" some exposition "Second dialog" some more expostition "{Sarah}Third dialog"',
|
|
|
|
|
[
|
|
|
|
|
{"text": '"First dialog"', "type": "dialogue", "speaker": "Bob"},
|
|
|
|
|
{"text": " some exposition ", "type": "exposition", "speaker": None},
|
|
|
|
|
{"text": '"Second dialog"', "type": "dialogue", "speaker": "Bob"},
|
|
|
|
|
{
|
|
|
|
|
"text": " some more expostition ",
|
|
|
|
|
"type": "exposition",
|
|
|
|
|
"speaker": None,
|
|
|
|
|
},
|
|
|
|
|
{"text": '"Third dialog"', "type": "dialogue", "speaker": "Sarah"},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
],
|
|
|
|
|
)
|
|
|
|
|
def test_separate_dialogue_from_exposition_speaker(input, expected):
|
|
|
|
|
"""Ensure that speakers wrapped in curly-braces at the start of a dialogue segment
|
|
|
|
|
are correctly extracted into the `speaker` field and removed from the `text`."""
|
|
|
|
|
from talemate.util.dialogue import separate_dialogue_from_exposition
|
|
|
|
|
|
|
|
|
|
result = separate_dialogue_from_exposition(input)
|
|
|
|
|
|
|
|
|
|
# Convert result to list of dicts including the speaker field for comparison
|
|
|
|
|
result_dicts = [
|
|
|
|
|
{"text": chunk.text, "type": chunk.type, "speaker": chunk.speaker}
|
|
|
|
|
for chunk in result
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
assert result_dicts == expected
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Tests for the new TTS markup format parsing
|
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
|
"input, expected",
|
|
|
|
|
[
|
|
|
|
|
# Empty input
|
|
|
|
|
("", []),
|
|
|
|
|
# Simple narrator line
|
|
|
|
|
(
|
|
|
|
|
"[Narrator] He walked into the room.",
|
|
|
|
|
[
|
|
|
|
|
{
|
|
|
|
|
"text": "He walked into the room.",
|
|
|
|
|
"type": "exposition",
|
|
|
|
|
"speaker": None,
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
# Simple dialogue line
|
|
|
|
|
(
|
|
|
|
|
"[John] Hello world.",
|
|
|
|
|
[{"text": "Hello world.", "type": "dialogue", "speaker": "John"}],
|
|
|
|
|
),
|
|
|
|
|
# Mixed dialogue and narration
|
|
|
|
|
(
|
|
|
|
|
"[Narrator] He said\n[John] Hello world\n[Narrator] and walked away.",
|
|
|
|
|
[
|
|
|
|
|
{"text": "He said", "type": "exposition", "speaker": None},
|
|
|
|
|
{"text": "Hello world", "type": "dialogue", "speaker": "John"},
|
|
|
|
|
{"text": "and walked away.", "type": "exposition", "speaker": None},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
# Multiple speakers
|
|
|
|
|
(
|
|
|
|
|
"[John] Hi there\n[Mary] Hello back\n[John] How are you?",
|
|
|
|
|
[
|
|
|
|
|
{"text": "Hi there", "type": "dialogue", "speaker": "John"},
|
|
|
|
|
{"text": "Hello back", "type": "dialogue", "speaker": "Mary"},
|
|
|
|
|
{"text": "How are you?", "type": "dialogue", "speaker": "John"},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
# Line without proper format (fallback to exposition)
|
|
|
|
|
(
|
|
|
|
|
"Some random text without brackets",
|
|
|
|
|
[
|
|
|
|
|
{
|
|
|
|
|
"text": "Some random text without brackets",
|
|
|
|
|
"type": "exposition",
|
|
|
|
|
"speaker": None,
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
# Empty lines should be ignored
|
|
|
|
|
(
|
|
|
|
|
"[John] Hello\n\n[Mary] Hi\n",
|
|
|
|
|
[
|
|
|
|
|
{"text": "Hello", "type": "dialogue", "speaker": "John"},
|
|
|
|
|
{"text": "Hi", "type": "dialogue", "speaker": "Mary"},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
# Different narrator casings
|
|
|
|
|
(
|
|
|
|
|
"[NARRATOR] Some narration\n[narrator] More narration",
|
|
|
|
|
[
|
|
|
|
|
{"text": "Some narration", "type": "exposition", "speaker": None},
|
|
|
|
|
{"text": "More narration", "type": "exposition", "speaker": None},
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
],
|
|
|
|
|
)
|
|
|
|
|
def test_parse_tts_markup(input, expected):
|
|
|
|
|
"""Test the parse_tts_markup function that handles the new [Speaker] format."""
|
|
|
|
|
from talemate.util.dialogue import parse_tts_markup
|
|
|
|
|
|
|
|
|
|
result = parse_tts_markup(input)
|
|
|
|
|
|
|
|
|
|
# Convert result to list of dicts for easier comparison
|
|
|
|
|
result_dicts = [
|
|
|
|
|
{"text": chunk.text, "type": chunk.type, "speaker": chunk.speaker}
|
|
|
|
|
for chunk in result
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
assert result_dicts == expected
|