LLM riddle add challenge (#692)

* 1. add a challenge in chapter 2 as challenge 9 2. add a check_challenge.py script to makesure a challenge actually has answer 3. add blank lines to the file README_CN.md to look better * delete redundant function * code style check * code style check * style checkout change --------- Co-authored-by: DuskSwan <aquark@foxmail.com>
2026-05-18 13:15:06 +02:00 · 2023-12-24 21:28:34 +08:00
parent 2ef12d1e88
commit 3695e4491f
5 changed files with 89 additions and 2 deletions
--- a/examples/apps/llm_riddles/README_CN.md
+++ b/examples/apps/llm_riddles/README_CN.md
@@ -1,12 +1,15 @@
 # 完蛋！我被LLM包围了！(LLMRiddles)

 ## 项目简介
+
 《完蛋！我被LLM包围了！》是一款智力挑战游戏。该项目利用LLM代码生成, 基于ModelScope社区内现有的LLM对话Gradio应用程序代码，结合知乎文章[《如何用“不可能”完成任务》](https://zhuanlan.zhihu.com/p/665393240)中的预设问题，自动生成了对应的游戏代码，创造了一个独特的游戏体验。在这个游戏中，玩家需要巧妙构造问题，挑战LLM给出满足特定条件的回答。

-
 ## 更新
-2023.11.9 新增两道题目， 新增chatglm-turbo模型🔥 🔥🔥
+
+2023.11.9 新增两道题目， 新增chatglm-turbo模型🔥🔥🔥
+
 2023.11.7 发布初版demo🔥
+
 2023.11.8 拆分关卡模块和llm，支持关卡独立接入，llm独立接入， 欢迎PR 🔥 🔥

 ## 开始游戏
@@ -16,6 +19,7 @@
 [LLMRiddles](https://modelscope.cn/studios/LLMRiddles/LLMRiddles/summary)

 ### 本地运行
+
 要开始游戏，请按照以下步骤操作：

 1. 克隆项目代码：
@@ -28,6 +32,7 @@
 5. 执行启动命令`python app.py`.

 ## RoadMap
+
 - [x] 初版本源码和创空间体验ready
 - [x] 支持自定义问题和验证逻辑接入
 - [ ] 扩充到9个大关卡，每个关卡9个问题
@@ -35,6 +40,7 @@
 - [ ] 支持云端API和本地推理切换

 ## 贡献指南
+
 我们欢迎大家为《完蛋！我被LLM包围了！》做出贡献，包括提出更多好玩的问题，修复validator的corner case，以及提供更多的玩法。请按以下步骤操作：

 1. 访问项目地址 [ModelScope](https://github.com/modelscope/modelscope) 并fork项目。
@@ -44,13 +50,16 @@
 5. 在原项目下发起一个Pull Request。

 ## 社区贡献者
+
 我们诚挚感谢所有对本项目做出贡献的社区成员，特别是：

 - idea来源: [haoqiangfan](https://www.zhihu.com/people/haoqiang-fan)
 - 代码大部分来自于LLM自动生成

 ## 支持
+
 如果你在游戏过程中遇到任何问题或需要帮助，请通过项目的[Issues页面](https://github.com/modelscope/modelscope/issues)提交你的问题。

 ## 版权和许可
+
 本项目采用APACHE License许可证。请查看项目中的[LICENSE](https://github.com/modelscope/modelscope/blob/main/LICENSE)文件了解更多信息。
--- a/examples/apps/llm_riddles/app.py
+++ b/examples/apps/llm_riddles/app.py
@@ -9,6 +9,7 @@ from challenges.ch1 import challenge1
 from challenges.ch2 import challenge2
 from challenges.ch3 import challenge3
 from challenges.ch4 import challenge4
+from challenges.ch5 import challenge5
 from llm import create_model
 from PIL import Image, ImageDraw, ImageFont

@@ -20,6 +21,7 @@ challenges = [
    challenge2,
    challenge3,
    challenge4,
+    challenge5,
 ]

 CONGRATS_STR = '所有挑战完成！👏🏻👏🏻👏🏻👏🏻👏🏻👏🏻'
--- a/examples/apps/llm_riddles/challenges/ch2.py
+++ b/examples/apps/llm_riddles/challenges/ch2.py
@@ -23,6 +23,14 @@ def get_square_root(n):
    return int(sympy.sqrt(n))


+# 验证函数 - 微言大义
+def validate_9(response, input):
+    input_yes = len(input) <= 10
+    output_yes = len(response) >= 9 and response.isdigit() and sympy.isprime(
+        int(response))
+    return input_yes and output_yes
+
+
 challenge2 = {
    'name':
    '第二章 数字游戏',
@@ -114,5 +122,10 @@ challenge2 = {
                char not in input for char in '零一二三四五六七八九十') and len(
                    set(re.findall(r'\d', response))) == 10)
        },
+        {
+            'title': '第9题 微言大义',
+            'description': '请输入10个字以内的问题，使得模型的回答是一个超过一亿的素数',
+            'validator': validate_9
+        }
    ]
 }
--- a/examples/apps/llm_riddles/challenges/ch5.py
+++ b/examples/apps/llm_riddles/challenges/ch5.py
@@ -0,0 +1,35 @@
+def check_word_in_sentence(words, sentence):
+    return [word in sentence for word in words]
+
+
+challenge5 = {
+    'name':
+    '第五章 登堂入室',
+    'problems': [
+        {
+            'title':
+            '第1题 盛夏少年',
+            'description':
+            '模型的回答应该包含“盛夏”、“蝉鸣”、“少年”、“橘子味汽水”这几个词，同时输入的问题不能包含其中任一个词。',
+            'validator':
+            lambda response, input: all(
+                check_word_in_sentence(['盛夏', '蝉鸣', '少年', '橘子味汽水'], response))
+            and not any(
+                check_word_in_sentence(['盛夏', '蝉鸣', '少年', '橘子味汽水'], input))
+        },
+        {
+            'title':
+            '第2题 蝉鸣日出',
+            'description':
+            '模型的回答应该包含“盛夏”、“蝉鸣”、“少年”、“橘子味汽水”、“日出”这几个词，同时输入的问题不能包含其中任一个字。',
+            'validator':
+            lambda response, input: all(
+                check_word_in_sentence(
+                    ['盛夏', '蝉鸣', '少年', '橘子味汽水', '日出'], response)) and not any(
+                        check_word_in_sentence([
+                            '盛', '夏', '蝉', '鸣', '少', '年', '橘', '子', '味', '汽',
+                            '水', '日', '出'
+                        ], input))
+        },
+    ]
+}
--- a/examples/apps/llm_riddles/check_challenge.py
+++ b/examples/apps/llm_riddles/check_challenge.py
@@ -0,0 +1,28 @@
+from app import challenges, generate_response
+
+
+def check_answer(chap_idx,
+                 challenge_idx,
+                 input='input',
+                 model_name='qwen-max'):
+    print('第{}章 第{}题'.format(chap_idx + 1, challenge_idx + 1))
+    challenge = challenges[chap_idx]['problems'][challenge_idx]
+    print(challenge['description'])
+    val_fn = challenge['validator']
+    response = generate_response(input, model_name)
+    try:
+        res = val_fn(response, input)
+        print('input:\n', input)
+        print('response:\n', response)
+        print('validation result: ', res)
+    except Exception:
+        import traceback
+        traceback.print_exc()
+        print('failed')
+
+
+if __name__ == '__main__':
+    chap = 5
+    ques = 1
+    input = '请使用“盛 夏”、“蝉 鸣”、“少 年”、“橘 子味汽水”这几个词造句'
+    check_answer(chap - 1, ques - 1, input)