From 7dc02a79ef51821d755d78df8c97a5db6e75d7d8 Mon Sep 17 00:00:00 2001
From: "lingcai.wl" <lingcai.wl@alibaba-inc.com>
Date: Fri, 28 Oct 2022 16:10:50 +0800
Subject: [PATCH] [to #44834022] add service utils for model deploy

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10529621
---
 modelscope/utils/demo_utils.py         |  17 +--
 modelscope/utils/regress_test_utils.py |  15 +--
 modelscope/utils/service_utils.py      | 179 +++++++++++++++++++++++++
 3 files changed, 182 insertions(+), 29 deletions(-)
 create mode 100644 modelscope/utils/service_utils.py

diff --git a/modelscope/utils/demo_utils.py b/modelscope/utils/demo_utils.py
index 363ae950..e57b3348 100644
--- a/modelscope/utils/demo_utils.py
+++ b/modelscope/utils/demo_utils.py
@@ -4,11 +4,11 @@ import io
 
 import cv2
 import json
-import numpy as np
 
 from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks, TasksIODescriptions
+from modelscope.utils.service_utils import NumpyEncoder
 
 TASKS_INPUT_TEMPLATES = {
     # vision tasks
@@ -234,21 +234,6 @@ class DemoCompatibilityCheck(object):
         return True
 
 
-class NumpyEncoder(json.JSONEncoder):
-
-    def default(self, obj):
-        if isinstance(obj, np.ndarray):
-            return obj.tolist()
-
-        if isinstance(obj, np.floating):
-            return float(obj)
-
-        if isinstance(obj, np.integer):
-            return int(obj)
-
-        return json.JSONEncoder.default(self, obj)
-
-
 def preprocess(req):
     in_urls = req.get('urlPaths').get('inUrls')
     if len(req['inputs']) == 1:
diff --git a/modelscope/utils/regress_test_utils.py b/modelscope/utils/regress_test_utils.py
index 8045d3e9..be983c6c 100644
--- a/modelscope/utils/regress_test_utils.py
+++ b/modelscope/utils/regress_test_utils.py
@@ -19,6 +19,8 @@ import torch
 import torch.optim
 from torch import nn
 
+from modelscope.utils.service_utils import NumpyEncoder
+
 
 class RegressTool:
     """This class is used to stop inference/training results from changing by some unaware affections by unittests.
@@ -117,19 +119,6 @@ class RegressTool:
             with open(baseline, 'rb') as f:
                 base = pickle.load(f)
 
-            class NumpyEncoder(json.JSONEncoder):
-                """Special json encoder for numpy types
-                """
-
-                def default(self, obj):
-                    if isinstance(obj, np.integer):
-                        return int(obj)
-                    elif isinstance(obj, np.floating):
-                        return float(obj)
-                    elif isinstance(obj, np.ndarray):
-                        return obj.tolist()
-                    return json.JSONEncoder.default(self, obj)
-
             print(f'baseline: {json.dumps(base, cls=NumpyEncoder)}')
             print(f'latest  : {json.dumps(io_json, cls=NumpyEncoder)}')
             if not compare_io_and_print(base, io_json, compare_fn, **kwargs):
diff --git a/modelscope/utils/service_utils.py b/modelscope/utils/service_utils.py
new file mode 100644
index 00000000..29c111f8
--- /dev/null
+++ b/modelscope/utils/service_utils.py
@@ -0,0 +1,179 @@
+import base64
+import mimetypes
+from io import BytesIO
+
+import json
+import numpy as np
+import requests
+from PIL import Image
+
+from modelscope.outputs import TASK_OUTPUTS, OutputKeys
+from modelscope.pipeline_inputs import TASK_INPUTS, InputType
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks, TasksIODescriptions
+
+
+# service data decoder func decodes data from network and convert it to pipeline's input
+# for example
+def ExampleDecoder(data):
+    # Assuming the pipeline inputs is a dict contains an image and a text,
+    # to decode the data from network we decode the image as base64
+    data_json = json.loads(data)
+    # data: {"image": "xxxxxxxx=="(base64 str), "text": "a question"}
+    # pipeline(inputs) as follows:
+    # pipeline({'image': image, 'text': text})
+    inputs = {
+        'image': decode_base64_to_image(data_json.get('image')),
+        'text': data_json.get('text')
+    }
+    return inputs
+
+
+# service data encoder func encodes data from pipeline outputs and convert to network response (such as json)
+# for example
+def ExampleEncoder(data):
+    # Assuming the pipeline outputs is a dict contains an image and a text,
+    # and transmit it through network, this func encode image to base64 and dumps into json
+    # data (for e.g. python dict):
+    # {"image": a numpy array represents a image, "text": "output"}
+    image = data['image']
+    text = data['text']
+    data = {'image': encode_array_to_img_base64(image), 'text': text}
+    return json.dumps(data, cls=NumpyEncoder)
+
+
+CustomEncoder = {
+    # Tasks.visual_question_answering: ExampleEncoder
+}
+
+CustomDecoder = {
+    # Tasks.visual_question_answering: ExampleDecoder
+}
+
+
+class NumpyEncoder(json.JSONEncoder):
+
+    def default(self, obj):
+        if isinstance(obj, np.ndarray):
+            return obj.tolist()
+
+        if isinstance(obj, np.floating):
+            return float(obj)
+
+        if isinstance(obj, np.integer):
+            return int(obj)
+
+        return json.JSONEncoder.default(self, obj)
+
+
+def get_extension(encoding):
+    encoding = encoding.replace('audio/wav', 'audio/x-wav')
+    tp = mimetypes.guess_type(encoding)[0]
+    if tp == 'audio/flac':  # flac is not supported by mimetypes
+        return 'flac'
+    extension = mimetypes.guess_extension(tp)
+    if extension is not None and extension.startswith('.'):
+        extension = extension[1:]
+    return extension
+
+
+def get_mimetype(filename):
+    mimetype = mimetypes.guess_type(filename)[0]
+    if mimetype is not None:
+        mimetype = mimetype.replace('x-wav', 'wav').replace('x-flac', 'flac')
+    return mimetype
+
+
+def decode_base64_to_binary(encoding):
+    extension = get_extension(encoding)
+    data = encoding.split(',')[1]
+    return base64.b64decode(data), extension
+
+
+def decode_base64_to_image(encoding):
+    content = encoding.split(';')[1]
+    image_encoded = content.split(',')[1]
+    return Image.open(BytesIO(base64.b64decode(image_encoded)))
+
+
+def encode_array_to_img_base64(image_array):
+    with BytesIO() as output_bytes:
+        pil_image = Image.fromarray(image_array.astype(np.uint8))
+        pil_image.save(output_bytes, 'PNG')
+        bytes_data = output_bytes.getvalue()
+    base64_str = str(base64.b64encode(bytes_data), 'utf-8')
+    return 'data:image/png;base64,' + base64_str
+
+
+def encode_pcm_to_base64(bytes_data):
+    from scipy.io.wavfile import write
+    with BytesIO() as out_mem_file:
+        write(out_mem_file, 16000, bytes_data)
+        base64_str = str(base64.b64encode(out_mem_file.getvalue()), 'utf-8')
+    return 'data:audio/pcm;base64,' + base64_str
+
+
+def encode_url_to_base64(url):
+    encoded_string = base64.b64encode(requests.get(url).content)
+    base64_str = str(encoded_string, 'utf-8')
+    mimetype = get_mimetype(url)
+    return ('data:' + (mimetype if mimetype is not None else '') + ';base64,'
+            + base64_str)
+
+
+def encode_file_to_base64(f):
+    with open(f, 'rb') as file:
+        encoded_string = base64.b64encode(file.read())
+        base64_str = str(encoded_string, 'utf-8')
+        mimetype = get_mimetype(f)
+        return ('data:' + (mimetype if mimetype is not None else '')
+                + ';base64,' + base64_str)
+
+
+def encode_url_or_file_to_base64(path):
+    try:
+        requests.get(path)
+        return encode_url_to_base64(path)
+    except (requests.exceptions.MissingSchema,
+            requests.exceptions.InvalidSchema):
+        return encode_file_to_base64(path)
+
+
+def service_data_decoder(task, data):
+    if CustomDecoder.get(task) is not None:
+        return CustomDecoder[task](data)
+    input_type = TASK_INPUTS[task]
+    input_data = data.decode('utf-8')
+    if input_type == InputType.IMAGE:
+        return decode_base64_to_image(input_data)
+    elif input_type == InputType.AUDIO:
+        return decode_base64_to_binary(input_data)[0]
+    elif input_type == InputType.TEXT:
+        return input_data
+    elif isinstance(input_type, dict):
+        input_data = {}
+        for key, val in input_type.items():
+            if val == InputType.IMAGE:
+                input_data[key] = decode_base64_to_image(data[key])
+            elif val == InputType.AUDIO:
+                input_data[key] = decode_base64_to_binary(data[key])[0]
+            elif val == InputType.TEXT:
+                input_data[key] = data[key]
+
+    return input_data
+
+
+def service_data_encoder(task, data):
+    if CustomEncoder.get(task) is not None:
+        return CustomEncoder[task](data)
+    output_keys = TASK_OUTPUTS[task]
+    result = data
+    for output_key in output_keys:
+        if output_key == OutputKeys.OUTPUT_IMG:
+            result[OutputKeys.OUTPUT_IMG] = encode_array_to_img_base64(
+                data[OutputKeys.OUTPUT_IMG][..., ::-1])
+        elif output_key == OutputKeys.OUTPUT_PCM:
+            result[OutputKeys.OUTPUT_PCM] = encode_pcm_to_base64(
+                data[OutputKeys.OUTPUT_PCM])
+    result = bytes(json.dumps(result, cls=NumpyEncoder), encoding='utf8')
+    return result