mirror of
https://github.com/AIGC-Audio/AudioGPT.git
synced 2025-12-16 11:57:58 +01:00
183 lines
5.9 KiB
Python
183 lines
5.9 KiB
Python
import copy
|
|
import json
|
|
|
|
import numpy as np
|
|
import fire
|
|
|
|
|
|
def evaluate_annotation(key2refs, scorer):
|
|
if scorer.method() == "Bleu":
|
|
scores = np.array([ 0.0 for n in range(4) ])
|
|
else:
|
|
scores = 0
|
|
num_cap_per_audio = len(next(iter(key2refs.values())))
|
|
|
|
for i in range(num_cap_per_audio):
|
|
if i > 0:
|
|
for key in key2refs:
|
|
key2refs[key].insert(0, res[key][0])
|
|
res = { key: [refs.pop(),] for key, refs in key2refs.items() }
|
|
score, _ = scorer.compute_score(key2refs, res)
|
|
|
|
if scorer.method() == "Bleu":
|
|
scores += np.array(score)
|
|
else:
|
|
scores += score
|
|
|
|
score = scores / num_cap_per_audio
|
|
return score
|
|
|
|
def evaluate_prediction(key2pred, key2refs, scorer):
|
|
if scorer.method() == "Bleu":
|
|
scores = np.array([ 0.0 for n in range(4) ])
|
|
else:
|
|
scores = 0
|
|
num_cap_per_audio = len(next(iter(key2refs.values())))
|
|
|
|
for i in range(num_cap_per_audio):
|
|
key2refs_i = {}
|
|
for key, refs in key2refs.items():
|
|
key2refs_i[key] = refs[:i] + refs[i+1:]
|
|
score, _ = scorer.compute_score(key2refs_i, key2pred)
|
|
|
|
if scorer.method() == "Bleu":
|
|
scores += np.array(score)
|
|
else:
|
|
scores += score
|
|
|
|
score = scores / num_cap_per_audio
|
|
return score
|
|
|
|
|
|
class Evaluator(object):
|
|
|
|
def eval_annotation(self, annotation, output):
|
|
captions = json.load(open(annotation, "r"))["audios"]
|
|
|
|
key2refs = {}
|
|
for audio_idx in range(len(captions)):
|
|
audio_id = captions[audio_idx]["audio_id"]
|
|
key2refs[audio_id] = []
|
|
for caption in captions[audio_idx]["captions"]:
|
|
key2refs[audio_id].append(caption["caption"])
|
|
|
|
from fense.fense import Fense
|
|
scores = {}
|
|
scorer = Fense()
|
|
scores[scorer.method()] = evaluate_annotation(copy.deepcopy(key2refs), scorer)
|
|
|
|
refs4eval = {}
|
|
for key, refs in key2refs.items():
|
|
refs4eval[key] = []
|
|
for idx, ref in enumerate(refs):
|
|
refs4eval[key].append({
|
|
"audio_id": key,
|
|
"id": idx,
|
|
"caption": ref
|
|
})
|
|
|
|
from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer
|
|
|
|
tokenizer = PTBTokenizer()
|
|
key2refs = tokenizer.tokenize(refs4eval)
|
|
|
|
|
|
from pycocoevalcap.bleu.bleu import Bleu
|
|
from pycocoevalcap.cider.cider import Cider
|
|
from pycocoevalcap.rouge.rouge import Rouge
|
|
from pycocoevalcap.meteor.meteor import Meteor
|
|
from pycocoevalcap.spice.spice import Spice
|
|
|
|
|
|
scorers = [Bleu(), Rouge(), Cider(), Meteor(), Spice()]
|
|
for scorer in scorers:
|
|
scores[scorer.method()] = evaluate_annotation(copy.deepcopy(key2refs), scorer)
|
|
|
|
spider = 0
|
|
with open(output, "w") as f:
|
|
for name, score in scores.items():
|
|
if name == "Bleu":
|
|
for n in range(4):
|
|
f.write("Bleu-{}: {:6.3f}\n".format(n + 1, score[n]))
|
|
else:
|
|
f.write("{}: {:6.3f}\n".format(name, score))
|
|
if name in ["CIDEr", "SPICE"]:
|
|
spider += score
|
|
f.write("SPIDEr: {:6.3f}\n".format(spider / 2))
|
|
|
|
def eval_prediction(self, prediction, annotation, output):
|
|
ref_captions = json.load(open(annotation, "r"))["audios"]
|
|
|
|
key2refs = {}
|
|
for audio_idx in range(len(ref_captions)):
|
|
audio_id = ref_captions[audio_idx]["audio_id"]
|
|
key2refs[audio_id] = []
|
|
for caption in ref_captions[audio_idx]["captions"]:
|
|
key2refs[audio_id].append(caption["caption"])
|
|
|
|
pred_captions = json.load(open(prediction, "r"))["predictions"]
|
|
|
|
key2pred = {}
|
|
for audio_idx in range(len(pred_captions)):
|
|
item = pred_captions[audio_idx]
|
|
audio_id = item["filename"]
|
|
key2pred[audio_id] = [item["tokens"]]
|
|
|
|
from fense.fense import Fense
|
|
scores = {}
|
|
scorer = Fense()
|
|
scores[scorer.method()] = evaluate_prediction(key2pred, key2refs, scorer)
|
|
|
|
refs4eval = {}
|
|
for key, refs in key2refs.items():
|
|
refs4eval[key] = []
|
|
for idx, ref in enumerate(refs):
|
|
refs4eval[key].append({
|
|
"audio_id": key,
|
|
"id": idx,
|
|
"caption": ref
|
|
})
|
|
|
|
preds4eval = {}
|
|
for key, preds in key2pred.items():
|
|
preds4eval[key] = []
|
|
for idx, pred in enumerate(preds):
|
|
preds4eval[key].append({
|
|
"audio_id": key,
|
|
"id": idx,
|
|
"caption": pred
|
|
})
|
|
|
|
from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer
|
|
|
|
tokenizer = PTBTokenizer()
|
|
key2refs = tokenizer.tokenize(refs4eval)
|
|
key2pred = tokenizer.tokenize(preds4eval)
|
|
|
|
|
|
from pycocoevalcap.bleu.bleu import Bleu
|
|
from pycocoevalcap.cider.cider import Cider
|
|
from pycocoevalcap.rouge.rouge import Rouge
|
|
from pycocoevalcap.meteor.meteor import Meteor
|
|
from pycocoevalcap.spice.spice import Spice
|
|
|
|
scorers = [Bleu(), Rouge(), Cider(), Meteor(), Spice()]
|
|
for scorer in scorers:
|
|
scores[scorer.method()] = evaluate_prediction(key2pred, key2refs, scorer)
|
|
|
|
spider = 0
|
|
with open(output, "w") as f:
|
|
for name, score in scores.items():
|
|
if name == "Bleu":
|
|
for n in range(4):
|
|
f.write("Bleu-{}: {:6.3f}\n".format(n + 1, score[n]))
|
|
else:
|
|
f.write("{}: {:6.3f}\n".format(name, score))
|
|
if name in ["CIDEr", "SPICE"]:
|
|
spider += score
|
|
f.write("SPIDEr: {:6.3f}\n".format(spider / 2))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
fire.Fire(Evaluator)
|