tests/pipelines/test_table_question_answering.py

# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import unittest
from threading import Thread
from typing import List

import json
from transformers import BertTokenizer

from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models import Model
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.nlp import TableQuestionAnsweringPipeline
from modelscope.preprocessors import TableQuestionAnsweringPreprocessor
from modelscope.preprocessors.nlp.space_T_cn.fields.database import Database
from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.test_utils import test_level


def tableqa_tracking_and_print_results_with_history(
        pipelines: List[TableQuestionAnsweringPipeline]):
    test_case = {
        'utterance': [
            '有哪些风险类型？',
            '风险类型有多少种？',
            '珠江流域的小型水库的库容总量是多少？',
            '那平均值是多少？',
            '那水库的名称呢？',
            '换成中型的呢？',
        ]
    }
    for p in pipelines:
        historical_queries = None
        for question in test_case['utterance']:
            output_dict = p({
                'question': question,
                'history_sql': historical_queries
            })[OutputKeys.OUTPUT]
            print('question', question)
            print('sql text:', output_dict[OutputKeys.SQL_STRING])
            print('sql query:', output_dict[OutputKeys.SQL_QUERY])
            print('query result:', output_dict[OutputKeys.QUERY_RESULT])
            print('json dumps', json.dumps(output_dict, ensure_ascii=False))
            print()
            historical_queries = output_dict[OutputKeys.HISTORY]


def tableqa_tracking_and_print_results_without_history(
        pipelines: List[TableQuestionAnsweringPipeline]):
    test_case = {
        'utterance': [['列出油耗大于8但是功率低于200的名称和价格', 'car'],
                      ['油耗低于5的suv有哪些？', 'car'], ['上个月收益超过3的有几个基金？', 'fund'],
                      ['净值不等于1的基金平均月收益率和年收益率是多少？', 'fund'],
                      ['计算机或者成绩优秀的同学有哪些？学号是多少？', 'student'],
                      ['本部博士生中平均身高是多少？', 'student'],
                      ['长江流域和珠江流域的水库库容总量是多少？', 'reservoir'],
                      ['今天星期几？', 'reservoir']]
    }
    for p in pipelines:
        for question, table_id in test_case['utterance']:
            output_dict = p({
                'question': question,
                'table_id': table_id
            })[OutputKeys.OUTPUT]
            print('question', question)
            print('sql text:', output_dict[OutputKeys.SQL_STRING])
            print('sql query:', output_dict[OutputKeys.SQL_QUERY])
            print('query result:', output_dict[OutputKeys.QUERY_RESULT])
            print('json dumps', json.dumps(output_dict, ensure_ascii=False))
            print()


def tableqa_tracking_and_print_results_with_tableid(
        pipelines: List[TableQuestionAnsweringPipeline]):
    test_case = {
        'utterance': [
            ['有哪些风险类型？', 'fund', False],
            ['风险类型有多少种？', 'fund', True],
            ['珠江流域的小型水库的库容总量是多少？', 'reservoir', False],
            ['那平均值是多少？', 'reservoir', True],
            ['那水库的名称呢？', 'reservoir', True],
            ['换成中型的呢？', 'reservoir', True],
            ['近7年来车辆的销量趋势？', 'car_sales', False],
            ['近7年来车辆的销量月环比是多少呢？', 'car_sales', True],
        ],
    }
    for p in pipelines:
        historical_queries = None
        for question, table_id, use_history in test_case['utterance']:
            output_dict = p({
                'question':
                question,
                'table_id':
                table_id,
                'history_sql':
                historical_queries if use_history else None
            })[OutputKeys.OUTPUT]
            print('question', question)
            print('sql text:', output_dict[OutputKeys.SQL_STRING])
            print('sql query:', output_dict[OutputKeys.SQL_QUERY])
            print('query result:', output_dict[OutputKeys.QUERY_RESULT])
            print('json dumps', json.dumps(output_dict, ensure_ascii=False))
            print()
            historical_queries = output_dict[OutputKeys.HISTORY]


class TableQuestionAnswering(unittest.TestCase):

    def setUp(self) -> None:
        self.task = Tasks.table_question_answering
        self.model_id = 'damo/nlp_convai_text2sql_pretrain_cn'

    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
    def test_run_by_direct_model_download(self):
        cache_path = snapshot_download(self.model_id)
        preprocessor = TableQuestionAnsweringPreprocessor(model_dir=cache_path)
        pipelines = [
            pipeline(
                Tasks.table_question_answering,
                model=cache_path,
                preprocessor=preprocessor)
        ]
        tableqa_tracking_and_print_results_with_history(pipelines)

    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
    def test_run_by_direct_model_download_with_multithreads(self):
        cache_path = snapshot_download(self.model_id)
        pl = pipeline(Tasks.table_question_answering, model=cache_path)

        def print_func(pl, i):
            result = pl({
                'question': '上个月收益从低到高排前七的基金的名称和风险等级是什么',
                'table_id': 'fund',
                'history_sql': None
            })
            print(i, result[OutputKeys.OUTPUT][OutputKeys.SQL_QUERY],
                  result[OutputKeys.OUTPUT][OutputKeys.QUERY_RESULT],
                  json.dumps(result))

        procs = []
        for i in range(5):
            proc = Thread(target=print_func, args=(pl, i))
            procs.append(proc)
            proc.start()
        for proc in procs:
            proc.join()

    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_run_with_model_from_modelhub(self):
        model = Model.from_pretrained(self.model_id)
        self.tokenizer = BertTokenizer(
            os.path.join(model.model_dir, ModelFile.VOCAB_FILE))
        db = Database(
            tokenizer=self.tokenizer,
            table_file_path=[
                os.path.join(model.model_dir, 'databases', fname)
                for fname in os.listdir(
                    os.path.join(model.model_dir, 'databases'))
            ],
            syn_dict_file_path=os.path.join(model.model_dir, 'synonym.txt'),
            is_use_sqlite=True)
        preprocessor = TableQuestionAnsweringPreprocessor(
            model_dir=model.model_dir, db=db)
        pipelines = [
            pipeline(
                Tasks.table_question_answering,
                model=model,
                preprocessor=preprocessor,
                db=db)
        ]
        tableqa_tracking_and_print_results_with_tableid(pipelines)

    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
    def test_run_with_model_from_modelhub_with_other_classes(self):
        model = Model.from_pretrained(self.model_id)
        self.tokenizer = BertTokenizer(
            os.path.join(model.model_dir, ModelFile.VOCAB_FILE))
        db = Database(
            tokenizer=self.tokenizer,
            table_file_path=[
                os.path.join(model.model_dir, 'databases', fname)
                for fname in os.listdir(
                    os.path.join(model.model_dir, 'databases'))
            ],
            syn_dict_file_path=os.path.join(model.model_dir, 'synonym.txt'),
            is_use_sqlite=True)
        preprocessor = TableQuestionAnsweringPreprocessor(
            model_dir=model.model_dir, db=db)
        pipelines = [
            pipeline(
                Tasks.table_question_answering,
                model=model,
                preprocessor=preprocessor,
                db=db)
        ]
        tableqa_tracking_and_print_results_without_history(pipelines)


if __name__ == '__main__':
    unittest.main()
-												[to #42322933] commit nlp_convai_text2sql_pretrain_cn inference process to modelscope 

commit nlp_convai_text2sql_pretrain_cn inference process to modelscope
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10025155

  
											
										
										
											2022-09-14 19:04:56 +08:00
+								# Copyright (c) Alibaba, Inc. and its affiliates.
 								import os
 								import unittest
-												[to #42322933] add ut for multi threads 

1. 修复multi thread引起的问题
2. 增加multi thread的unittest
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10502008


											
										
										
											2022-10-25 09:49:02 +08:00
+								from threading import Thread
-												[to #42322933] reivse model problem and remove history sql for demo 

相比于master上的tableqa，做出了如下修复：
1. 修复了schema linking中的问题。
2. 同时设置了有history sql和没有history sql的两种输入
3. 增加了sqlite执行逻辑，可以返回sql执行结果
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10365114

											
										
										
											2022-10-12 15:18:35 +08:00
+								from typing import List
-												[to #42322933] commit nlp_convai_text2sql_pretrain_cn inference process to modelscope 

commit nlp_convai_text2sql_pretrain_cn inference process to modelscope
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10025155

  
											
										
										
											2022-09-14 19:04:56 +08:00
-												[to #42322933] change tableqa output 

修改output的结构，直接返回可转化成json format的结构
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10415403

											
										
										
											2022-10-14 23:11:19 +08:00
+								import json
-												[to #42322933] commit nlp_convai_text2sql_pretrain_cn inference process to modelscope 

commit nlp_convai_text2sql_pretrain_cn inference process to modelscope
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10025155

  
											
										
										
											2022-09-14 19:04:56 +08:00
+								from transformers import BertTokenizer
 								from modelscope.hub.snapshot_download import snapshot_download
 								from modelscope.models import Model
-												[to #42322933] change tableqa output 

修改output的结构，直接返回可转化成json format的结构
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10415403

											
										
										
											2022-10-14 23:11:19 +08:00
+								from modelscope.outputs import OutputKeys
-												[to #42322933] commit nlp_convai_text2sql_pretrain_cn inference process to modelscope 

commit nlp_convai_text2sql_pretrain_cn inference process to modelscope
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10025155

  
											
										
										
											2022-09-14 19:04:56 +08:00
+								from modelscope.pipelines import pipeline
 								from modelscope.pipelines.nlp import TableQuestionAnsweringPipeline
 								from modelscope.preprocessors import TableQuestionAnsweringPreprocessor
-												[to #42322933] NLP 1030 Refactor 

Features:
1. Refactor the directory structure of nlp models. All model files are placed into either the model folder or the task_model folder
2. Refactor all the comments to google style
3. Add detail comments to important tasks and nlp models, to list the description of the model, and its preprocessor&trainer
4. Model Exporting now supports a direct all to TorchModelExporter(no need to derive from it)
5. Refactor model save_pretrained method to support direct running(independent from trainer)
6. Remove the judgement of Model in the pipeline base class, to support outer register models running in our pipelines
7. Nlp trainer now has a NLPTrainingArguments class , user can pass arguments into the dataclass, and use it as a normal cfg_modify_fn, to simplify the operation of modify cfg.
8. Merge the BACKBONES and the MODELS, so user can get a backbone with the Model.from_pretrained call
9. Model.from_pretrained now support a task argument, so user can use a backbone and load it with a specific task class.
10. Support Preprocessor.from_pretrained method
11. Add standard return classes to important nlp tasks, so some of the pipelines and the models are independent now, the return values of the models will always be tensors, and the pipelines will take care of the conversion to numpy and the following stuffs.
12. Split the file of the nlp preprocessors, to make the dir structure more clear.

Bugs Fixing:
1. Fix a bug that lr_scheduler can be called earlier than the optimizer's step
2. Fix a bug that the direct call of Pipelines (not from pipeline(xxx)) throws error
3. Fix a bug that the trainer will not call the correct TaskDataset class
4. Fix a bug that the internal loading of dataset will throws error in the trainer class
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10490585

											
										
										
											2022-10-25 12:26:25 +08:00
+								from modelscope.preprocessors.nlp.space_T_cn.fields.database import Database
-												[to #42322933] commit nlp_convai_text2sql_pretrain_cn inference process to modelscope 

commit nlp_convai_text2sql_pretrain_cn inference process to modelscope
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10025155

  
											
										
										
											2022-09-14 19:04:56 +08:00
+								from modelscope.utils.constant import ModelFile, Tasks
 								from modelscope.utils.test_utils import test_level
-												[to #42322933] reivse model problem and remove history sql for demo 

相比于master上的tableqa，做出了如下修复：
1. 修复了schema linking中的问题。
2. 同时设置了有history sql和没有history sql的两种输入
3. 增加了sqlite执行逻辑，可以返回sql执行结果
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10365114

											
										
										
											2022-10-12 15:18:35 +08:00
+								def tableqa_tracking_and_print_results_with_history(
 								        pipelines: List[TableQuestionAnsweringPipeline]):
 								    test_case = {
 								        'utterance': [
 								            '有哪些风险类型？',
 								            '风险类型有多少种？',
-												[to #42322933] add synonym 

主要做了如下修改：
1. 加入了同义词词典
2. 对SQL进行后处理，如果包含排序，则将空列转化成Primary列
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10670121


											
										
										
											2022-11-08 22:20:03 +08:00
+								            '珠江流域的小型水库的库容总量是多少？',
-												[to #42322933] reivse model problem and remove history sql for demo 

相比于master上的tableqa，做出了如下修复：
1. 修复了schema linking中的问题。
2. 同时设置了有history sql和没有history sql的两种输入
3. 增加了sqlite执行逻辑，可以返回sql执行结果
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10365114

											
										
										
											2022-10-12 15:18:35 +08:00
+								            '那平均值是多少？',
 								            '那水库的名称呢？',
 								            '换成中型的呢？',
 								        ]
 								    }
 								    for p in pipelines:
 								        historical_queries = None
 								        for question in test_case['utterance']:
 								            output_dict = p({
 								                'question': question,
 								                'history_sql': historical_queries
-												[to #42322933] change star3 to space_T_cn 

1. 合并star和star3框架
2. 修改star和star3的model type
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10492793

											
										
										
											2022-10-23 20:25:24 +08:00
+								            })[OutputKeys.OUTPUT]
-												[to #42322933] reivse model problem and remove history sql for demo 

相比于master上的tableqa，做出了如下修复：
1. 修复了schema linking中的问题。
2. 同时设置了有history sql和没有history sql的两种输入
3. 增加了sqlite执行逻辑，可以返回sql执行结果
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10365114

											
										
										
											2022-10-12 15:18:35 +08:00
+								            print('question', question)
-												[to #42322933] change tableqa output 

修改output的结构，直接返回可转化成json format的结构
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10415403

											
										
										
											2022-10-14 23:11:19 +08:00
+								            print('sql text:', output_dict[OutputKeys.SQL_STRING])
 								            print('sql query:', output_dict[OutputKeys.SQL_QUERY])
-												add 1.6

											
										
										
											2023-05-22 10:53:18 +08:00
+								            print('query result:', output_dict[OutputKeys.QUERY_RESULT])
-												[to #42322933] update tableqa params 

1. 增加传入table_id
2. 将result和table的结构统一
3. 默认开启is_use_sqlite
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10492027


											
										
										
											2022-10-22 20:33:49 +08:00
+								            print('json dumps', json.dumps(output_dict, ensure_ascii=False))
-												[to #42322933] reivse model problem and remove history sql for demo 

相比于master上的tableqa，做出了如下修复：
1. 修复了schema linking中的问题。
2. 同时设置了有history sql和没有history sql的两种输入
3. 增加了sqlite执行逻辑，可以返回sql执行结果
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10365114

											
										
										
											2022-10-12 15:18:35 +08:00
+								            print()
-												[to #42322933] change tableqa output 

修改output的结构，直接返回可转化成json format的结构
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10415403

											
										
										
											2022-10-14 23:11:19 +08:00
+								            historical_queries = output_dict[OutputKeys.HISTORY]
-												[to #42322933] reivse model problem and remove history sql for demo 

相比于master上的tableqa，做出了如下修复：
1. 修复了schema linking中的问题。
2. 同时设置了有history sql和没有history sql的两种输入
3. 增加了sqlite执行逻辑，可以返回sql执行结果
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10365114

											
										
										
											2022-10-12 15:18:35 +08:00
 								def tableqa_tracking_and_print_results_without_history(
 								        pipelines: List[TableQuestionAnsweringPipeline]):
 								    test_case = {
-												add bi model 

1. 更新模型文件，支持BI能力
2. 更新代码，支持BI能力
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11115454

   
											
										
										
											2022-12-20 01:51:00 +08:00
+								        'utterance': [['列出油耗大于8但是功率低于200的名称和价格', 'car'],
 								                      ['油耗低于5的suv有哪些？', 'car'], ['上个月收益超过3的有几个基金？', 'fund'],
 								                      ['净值不等于1的基金平均月收益率和年收益率是多少？', 'fund'],
 								                      ['计算机或者成绩优秀的同学有哪些？学号是多少？', 'student'],
 								                      ['本部博士生中平均身高是多少？', 'student'],
 								                      ['长江流域和珠江流域的水库库容总量是多少？', 'reservoir'],
 								                      ['今天星期几？', 'reservoir']]
-												[to #42322933] reivse model problem and remove history sql for demo 

相比于master上的tableqa，做出了如下修复：
1. 修复了schema linking中的问题。
2. 同时设置了有history sql和没有history sql的两种输入
3. 增加了sqlite执行逻辑，可以返回sql执行结果
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10365114

											
										
										
											2022-10-12 15:18:35 +08:00
+								    }
 								    for p in pipelines:
-												add bi model 

1. 更新模型文件，支持BI能力
2. 更新代码，支持BI能力
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11115454

   
											
										
										
											2022-12-20 01:51:00 +08:00
+								        for question, table_id in test_case['utterance']:
 								            output_dict = p({
 								                'question': question,
 								                'table_id': table_id
 								            })[OutputKeys.OUTPUT]
-												[to #42322933] reivse model problem and remove history sql for demo 

相比于master上的tableqa，做出了如下修复：
1. 修复了schema linking中的问题。
2. 同时设置了有history sql和没有history sql的两种输入
3. 增加了sqlite执行逻辑，可以返回sql执行结果
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10365114

											
										
										
											2022-10-12 15:18:35 +08:00
+								            print('question', question)
-												[to #42322933] change tableqa output 

修改output的结构，直接返回可转化成json format的结构
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10415403

											
										
										
											2022-10-14 23:11:19 +08:00
+								            print('sql text:', output_dict[OutputKeys.SQL_STRING])
 								            print('sql query:', output_dict[OutputKeys.SQL_QUERY])
-												add 1.6

											
										
										
											2023-05-22 10:53:18 +08:00
+								            print('query result:', output_dict[OutputKeys.QUERY_RESULT])
-												[to #42322933] update tableqa params 

1. 增加传入table_id
2. 将result和table的结构统一
3. 默认开启is_use_sqlite
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10492027


											
										
										
											2022-10-22 20:33:49 +08:00
+								            print('json dumps', json.dumps(output_dict, ensure_ascii=False))
-												[to #42322933] reivse model problem and remove history sql for demo 

相比于master上的tableqa，做出了如下修复：
1. 修复了schema linking中的问题。
2. 同时设置了有history sql和没有history sql的两种输入
3. 增加了sqlite执行逻辑，可以返回sql执行结果
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10365114

											
										
										
											2022-10-12 15:18:35 +08:00
+								            print()
-												[to #42322933] update tableqa params 

1. 增加传入table_id
2. 将result和table的结构统一
3. 默认开启is_use_sqlite
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10492027


											
										
										
											2022-10-22 20:33:49 +08:00
+								def tableqa_tracking_and_print_results_with_tableid(
 								        pipelines: List[TableQuestionAnsweringPipeline]):
 								    test_case = {
 								        'utterance': [
-												add bi model 

1. 更新模型文件，支持BI能力
2. 更新代码，支持BI能力
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11115454

   
											
										
										
											2022-12-20 01:51:00 +08:00
+								            ['有哪些风险类型？', 'fund', False],
 								            ['风险类型有多少种？', 'fund', True],
 								            ['珠江流域的小型水库的库容总量是多少？', 'reservoir', False],
 								            ['那平均值是多少？', 'reservoir', True],
 								            ['那水库的名称呢？', 'reservoir', True],
 								            ['换成中型的呢？', 'reservoir', True],
 								            ['近7年来车辆的销量趋势？', 'car_sales', False],
 								            ['近7年来车辆的销量月环比是多少呢？', 'car_sales', True],
-												[to #42322933] update tableqa params 

1. 增加传入table_id
2. 将result和table的结构统一
3. 默认开启is_use_sqlite
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10492027


											
										
										
											2022-10-22 20:33:49 +08:00
+								        ],
 								    }
 								    for p in pipelines:
 								        historical_queries = None
-												add bi model 

1. 更新模型文件，支持BI能力
2. 更新代码，支持BI能力
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11115454

   
											
										
										
											2022-12-20 01:51:00 +08:00
+								        for question, table_id, use_history in test_case['utterance']:
-												[to #42322933] update tableqa params 

1. 增加传入table_id
2. 将result和table的结构统一
3. 默认开启is_use_sqlite
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10492027


											
										
										
											2022-10-22 20:33:49 +08:00
+								            output_dict = p({
-												add bi model 

1. 更新模型文件，支持BI能力
2. 更新代码，支持BI能力
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11115454

   
											
										
										
											2022-12-20 01:51:00 +08:00
+								                'question':
 								                question,
 								                'table_id':
 								                table_id,
 								                'history_sql':
 								                historical_queries if use_history else None
-												[to #42322933] change star3 to space_T_cn 

1. 合并star和star3框架
2. 修改star和star3的model type
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10492793

											
										
										
											2022-10-23 20:25:24 +08:00
+								            })[OutputKeys.OUTPUT]
-												[to #42322933] update tableqa params 

1. 增加传入table_id
2. 将result和table的结构统一
3. 默认开启is_use_sqlite
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10492027


											
										
										
											2022-10-22 20:33:49 +08:00
+								            print('question', question)
 								            print('sql text:', output_dict[OutputKeys.SQL_STRING])
 								            print('sql query:', output_dict[OutputKeys.SQL_QUERY])
-												add 1.6

											
										
										
											2023-05-22 10:53:18 +08:00
+								            print('query result:', output_dict[OutputKeys.QUERY_RESULT])
-												[to #42322933] update tableqa params 

1. 增加传入table_id
2. 将result和table的结构统一
3. 默认开启is_use_sqlite
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10492027


											
										
										
											2022-10-22 20:33:49 +08:00
+								            print('json dumps', json.dumps(output_dict, ensure_ascii=False))
 								            print()
 								            historical_queries = output_dict[OutputKeys.HISTORY]
-												[to #42322933] commit nlp_convai_text2sql_pretrain_cn inference process to modelscope 

commit nlp_convai_text2sql_pretrain_cn inference process to modelscope
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10025155

  
											
										
										
											2022-09-14 19:04:56 +08:00
+								class TableQuestionAnswering(unittest.TestCase):
 								    def setUp(self) -> None:
 								        self.task = Tasks.table_question_answering
 								        self.model_id = 'damo/nlp_convai_text2sql_pretrain_cn'
 								    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
 								    def test_run_by_direct_model_download(self):
 								        cache_path = snapshot_download(self.model_id)
 								        preprocessor = TableQuestionAnsweringPreprocessor(model_dir=cache_path)
 								        pipelines = [
-												[to #42322933] reivse model problem and remove history sql for demo 

相比于master上的tableqa，做出了如下修复：
1. 修复了schema linking中的问题。
2. 同时设置了有history sql和没有history sql的两种输入
3. 增加了sqlite执行逻辑，可以返回sql执行结果
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10365114

											
										
										
											2022-10-12 15:18:35 +08:00
+								            pipeline(
 								                Tasks.table_question_answering,
 								                model=cache_path,
 								                preprocessor=preprocessor)
-												[to #42322933] commit nlp_convai_text2sql_pretrain_cn inference process to modelscope 

commit nlp_convai_text2sql_pretrain_cn inference process to modelscope
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10025155

  
											
										
										
											2022-09-14 19:04:56 +08:00
+								        ]
-												[to #42322933] reivse model problem and remove history sql for demo 

相比于master上的tableqa，做出了如下修复：
1. 修复了schema linking中的问题。
2. 同时设置了有history sql和没有history sql的两种输入
3. 增加了sqlite执行逻辑，可以返回sql执行结果
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10365114

											
										
										
											2022-10-12 15:18:35 +08:00
+								        tableqa_tracking_and_print_results_with_history(pipelines)
-												[to #42322933] commit nlp_convai_text2sql_pretrain_cn inference process to modelscope 

commit nlp_convai_text2sql_pretrain_cn inference process to modelscope
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10025155

  
											
										
										
											2022-09-14 19:04:56 +08:00
-												[to #42322933] add ut for multi threads 

1. 修复multi thread引起的问题
2. 增加multi thread的unittest
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10502008


											
										
										
											2022-10-25 09:49:02 +08:00
+								    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
 								    def test_run_by_direct_model_download_with_multithreads(self):
 								        cache_path = snapshot_download(self.model_id)
 								        pl = pipeline(Tasks.table_question_answering, model=cache_path)
 								        def print_func(pl, i):
 								            result = pl({
-												[to #42322933] debug header ids and header names 

修复header_ids和header_names命名反了的问题
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10516557

    
											
										
										
											2022-10-26 16:04:14 +08:00
+								                'question': '上个月收益从低到高排前七的基金的名称和风险等级是什么',
 								                'table_id': 'fund',
-												[to #42322933] add ut for multi threads 

1. 修复multi thread引起的问题
2. 增加multi thread的unittest
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10502008


											
										
										
											2022-10-25 09:49:02 +08:00
+								                'history_sql': None
 								            })
-												[to #42322933] debug header ids and header names 

修复header_ids和header_names命名反了的问题
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10516557

    
											
										
										
											2022-10-26 16:04:14 +08:00
+								            print(i, result[OutputKeys.OUTPUT][OutputKeys.SQL_QUERY],
-												add 1.6

											
										
										
											2023-05-22 10:53:18 +08:00
+								                  result[OutputKeys.OUTPUT][OutputKeys.QUERY_RESULT],
-												[to #42322933] debug header ids and header names 

修复header_ids和header_names命名反了的问题
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10516557

    
											
										
										
											2022-10-26 16:04:14 +08:00
+								                  json.dumps(result))
-												[to #42322933] add ut for multi threads 

1. 修复multi thread引起的问题
2. 增加multi thread的unittest
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10502008


											
										
										
											2022-10-25 09:49:02 +08:00
 								        procs = []
 								        for i in range(5):
 								            proc = Thread(target=print_func, args=(pl, i))
 								            procs.append(proc)
 								            proc.start()
 								        for proc in procs:
 								            proc.join()
-												[to #42322933] commit nlp_convai_text2sql_pretrain_cn inference process to modelscope 

commit nlp_convai_text2sql_pretrain_cn inference process to modelscope
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10025155

  
											
										
										
											2022-09-14 19:04:56 +08:00
+								    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
 								    def test_run_with_model_from_modelhub(self):
 								        model = Model.from_pretrained(self.model_id)
-												[to #42322933] update tableqa params 

1. 增加传入table_id
2. 将result和table的结构统一
3. 默认开启is_use_sqlite
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10492027


											
										
										
											2022-10-22 20:33:49 +08:00
+								        self.tokenizer = BertTokenizer(
 								            os.path.join(model.model_dir, ModelFile.VOCAB_FILE))
 								        db = Database(
 								            tokenizer=self.tokenizer,
 								            table_file_path=[
 								                os.path.join(model.model_dir, 'databases', fname)
 								                for fname in os.listdir(
 								                    os.path.join(model.model_dir, 'databases'))
 								            ],
 								            syn_dict_file_path=os.path.join(model.model_dir, 'synonym.txt'),
-												[to #42322933] add synonym 

主要做了如下修改：
1. 加入了同义词词典
2. 对SQL进行后处理，如果包含排序，则将空列转化成Primary列
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10670121


											
										
										
											2022-11-08 22:20:03 +08:00
+								            is_use_sqlite=True)
-												[to #42322933] commit nlp_convai_text2sql_pretrain_cn inference process to modelscope 

commit nlp_convai_text2sql_pretrain_cn inference process to modelscope
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10025155

  
											
										
										
											2022-09-14 19:04:56 +08:00
+								        preprocessor = TableQuestionAnsweringPreprocessor(
-												[to #42322933] update tableqa params 

1. 增加传入table_id
2. 将result和table的结构统一
3. 默认开启is_use_sqlite
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10492027


											
										
										
											2022-10-22 20:33:49 +08:00
+								            model_dir=model.model_dir, db=db)
-												[to #42322933] commit nlp_convai_text2sql_pretrain_cn inference process to modelscope 

commit nlp_convai_text2sql_pretrain_cn inference process to modelscope
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10025155

  
											
										
										
											2022-09-14 19:04:56 +08:00
+								        pipelines = [
-												[to #42322933] reivse model problem and remove history sql for demo 

相比于master上的tableqa，做出了如下修复：
1. 修复了schema linking中的问题。
2. 同时设置了有history sql和没有history sql的两种输入
3. 增加了sqlite执行逻辑，可以返回sql执行结果
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10365114

											
										
										
											2022-10-12 15:18:35 +08:00
+								            pipeline(
 								                Tasks.table_question_answering,
 								                model=model,
-												[to #42322933] update tableqa params 

1. 增加传入table_id
2. 将result和table的结构统一
3. 默认开启is_use_sqlite
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10492027


											
										
										
											2022-10-22 20:33:49 +08:00
+								                preprocessor=preprocessor,
 								                db=db)
-												[to #42322933] commit nlp_convai_text2sql_pretrain_cn inference process to modelscope 

commit nlp_convai_text2sql_pretrain_cn inference process to modelscope
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10025155

  
											
										
										
											2022-09-14 19:04:56 +08:00
+								        ]
-												[to #42322933] update tableqa params 

1. 增加传入table_id
2. 将result和table的结构统一
3. 默认开启is_use_sqlite
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10492027


											
										
										
											2022-10-22 20:33:49 +08:00
+								        tableqa_tracking_and_print_results_with_tableid(pipelines)
-												[to #42322933] commit nlp_convai_text2sql_pretrain_cn inference process to modelscope 

commit nlp_convai_text2sql_pretrain_cn inference process to modelscope
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10025155

  
											
										
										
											2022-09-14 19:04:56 +08:00
 								    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
 								    def test_run_with_model_from_modelhub_with_other_classes(self):
 								        model = Model.from_pretrained(self.model_id)
 								        self.tokenizer = BertTokenizer(
 								            os.path.join(model.model_dir, ModelFile.VOCAB_FILE))
 								        db = Database(
 								            tokenizer=self.tokenizer,
-												[to #42322933] reivse model problem and remove history sql for demo 

相比于master上的tableqa，做出了如下修复：
1. 修复了schema linking中的问题。
2. 同时设置了有history sql和没有history sql的两种输入
3. 增加了sqlite执行逻辑，可以返回sql执行结果
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10365114

											
										
										
											2022-10-12 15:18:35 +08:00
+								            table_file_path=[
 								                os.path.join(model.model_dir, 'databases', fname)
 								                for fname in os.listdir(
 								                    os.path.join(model.model_dir, 'databases'))
 								            ],
 								            syn_dict_file_path=os.path.join(model.model_dir, 'synonym.txt'),
 								            is_use_sqlite=True)
-												[to #42322933] commit nlp_convai_text2sql_pretrain_cn inference process to modelscope 

commit nlp_convai_text2sql_pretrain_cn inference process to modelscope
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10025155

  
											
										
										
											2022-09-14 19:04:56 +08:00
+								        preprocessor = TableQuestionAnsweringPreprocessor(
 								            model_dir=model.model_dir, db=db)
 								        pipelines = [
-												[to #42322933] reivse model problem and remove history sql for demo 

相比于master上的tableqa，做出了如下修复：
1. 修复了schema linking中的问题。
2. 同时设置了有history sql和没有history sql的两种输入
3. 增加了sqlite执行逻辑，可以返回sql执行结果
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10365114

											
										
										
											2022-10-12 15:18:35 +08:00
+								            pipeline(
 								                Tasks.table_question_answering,
 								                model=model,
 								                preprocessor=preprocessor,
 								                db=db)
-												[to #42322933] commit nlp_convai_text2sql_pretrain_cn inference process to modelscope 

commit nlp_convai_text2sql_pretrain_cn inference process to modelscope
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10025155

  
											
										
										
											2022-09-14 19:04:56 +08:00
+								        ]
-												[to #42322933] reivse model problem and remove history sql for demo 

相比于master上的tableqa，做出了如下修复：
1. 修复了schema linking中的问题。
2. 同时设置了有history sql和没有history sql的两种输入
3. 增加了sqlite执行逻辑，可以返回sql执行结果
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10365114

											
										
										
											2022-10-12 15:18:35 +08:00
+								        tableqa_tracking_and_print_results_without_history(pipelines)
-												[to #42322933] commit nlp_convai_text2sql_pretrain_cn inference process to modelscope 

commit nlp_convai_text2sql_pretrain_cn inference process to modelscope
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10025155

  
											
										
										
											2022-09-14 19:04:56 +08:00
 								if __name__ == '__main__':
 								    unittest.main()