mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-25 20:49:37 +01:00
update finetune
This commit is contained in:
1
modelscope/metrics/ciderD/__init__.py
Executable file
1
modelscope/metrics/ciderD/__init__.py
Executable file
@@ -0,0 +1 @@
|
||||
__author__ = 'tylin'
|
||||
57
modelscope/metrics/ciderD/ciderD.py
Executable file
57
modelscope/metrics/ciderD/ciderD.py
Executable file
@@ -0,0 +1,57 @@
|
||||
# Filename: ciderD.py
|
||||
#
|
||||
# Description: Describes the class to compute the CIDEr-D (Consensus-Based Image Description Evaluation) Metric
|
||||
# by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726)
|
||||
#
|
||||
# Creation Date: Sun Feb 8 14:16:54 2015
|
||||
#
|
||||
# Authors: Ramakrishna Vedantam <vrama91@vt.edu> and Tsung-Yi Lin <tl483@cornell.edu>
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from .ciderD_scorer import CiderScorer
|
||||
|
||||
|
||||
class CiderD:
|
||||
"""
|
||||
Main Class to compute the CIDEr metric
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, n=4, sigma=6.0, df='corpus'):
|
||||
# set cider to sum over 1 to 4-grams
|
||||
self._n = n
|
||||
# set the standard deviation parameter for gaussian penalty
|
||||
self._sigma = sigma
|
||||
# set which where to compute document frequencies from
|
||||
self._df = df
|
||||
self.cider_scorer = CiderScorer(n=self._n, df_mode=self._df)
|
||||
|
||||
def compute_score(self, gts, res):
|
||||
"""
|
||||
Main function to compute CIDEr score
|
||||
:param hypo_for_image (dict) : dictionary with key <image> and value <tokenized hypothesis / candidate sentence>
|
||||
ref_for_image (dict) : dictionary with key <image> and value <tokenized reference sentence>
|
||||
:return: cider (float) : computed CIDEr score for the corpus
|
||||
""" # noqa
|
||||
|
||||
# clear all the previous hypos and refs
|
||||
tmp_cider_scorer = self.cider_scorer.copy_empty()
|
||||
tmp_cider_scorer.clear()
|
||||
for res_id in res:
|
||||
|
||||
hypo = res_id['caption']
|
||||
ref = gts[res_id['image_id']]
|
||||
|
||||
# Sanity check.
|
||||
assert (type(hypo) is list)
|
||||
assert (len(hypo) == 1)
|
||||
assert (type(ref) is list)
|
||||
assert (len(ref) > 0)
|
||||
tmp_cider_scorer += (hypo[0], ref)
|
||||
|
||||
(score, scores) = tmp_cider_scorer.compute_score()
|
||||
|
||||
return score, scores
|
||||
|
||||
def method(self):
|
||||
return 'CIDEr-D'
|
||||
233
modelscope/metrics/ciderD/ciderD_scorer.py
Executable file
233
modelscope/metrics/ciderD/ciderD_scorer.py
Executable file
@@ -0,0 +1,233 @@
|
||||
#!/usr/bin/env python
|
||||
# Tsung-Yi Lin <tl483@cornell.edu>
|
||||
# Ramakrishna Vedantam <vrama91@vt.edu>
|
||||
from __future__ import absolute_import, division, print_function
|
||||
import copy
|
||||
import math
|
||||
import os
|
||||
import pdb
|
||||
from collections import defaultdict
|
||||
|
||||
import numpy as np
|
||||
import six
|
||||
from six.moves import cPickle
|
||||
|
||||
|
||||
def precook(s, n=4, out=False):
|
||||
"""
|
||||
Takes a string as input and returns an object that can be given to
|
||||
either cook_refs or cook_test. This is optional: cook_refs and cook_test
|
||||
can take string arguments as well.
|
||||
:param s: string : sentence to be converted into ngrams
|
||||
:param n: int : number of ngrams for which representation is calculated
|
||||
:return: term frequency vector for occuring ngrams
|
||||
"""
|
||||
words = s.split()
|
||||
counts = defaultdict(int)
|
||||
for k in range(1, n + 1):
|
||||
for i in range(len(words) - k + 1):
|
||||
ngram = tuple(words[i:i + k])
|
||||
counts[ngram] += 1
|
||||
return counts
|
||||
|
||||
|
||||
def cook_refs(refs, n=4): # lhuang: oracle will call with "average"
|
||||
'''Takes a list of reference sentences for a single segment
|
||||
and returns an object that encapsulates everything that BLEU
|
||||
needs to know about them.
|
||||
:param refs: list of string : reference sentences for some image
|
||||
:param n: int : number of ngrams for which (ngram) representation is calculated
|
||||
:return: result (list of dict)
|
||||
'''
|
||||
return [precook(ref, n) for ref in refs]
|
||||
|
||||
|
||||
def cook_test(test, n=4):
|
||||
'''Takes a test sentence and returns an object that
|
||||
encapsulates everything that BLEU needs to know about it.
|
||||
:param test: list of string : hypothesis sentence for some image
|
||||
:param n: int : number of ngrams for which (ngram) representation is calculated
|
||||
:return: result (dict)
|
||||
'''
|
||||
return precook(test, n, True)
|
||||
|
||||
|
||||
class CiderScorer(object):
|
||||
"""CIDEr scorer.
|
||||
"""
|
||||
|
||||
def copy(self):
|
||||
''' copy the refs.'''
|
||||
new = CiderScorer(n=self.n)
|
||||
new.ctest = copy.copy(self.ctest)
|
||||
new.crefs = copy.copy(self.crefs)
|
||||
return new
|
||||
|
||||
def copy_empty(self):
|
||||
new = CiderScorer(df_mode='corpus', n=self.n, sigma=self.sigma)
|
||||
new.df_mode = self.df_mode
|
||||
new.ref_len = self.ref_len
|
||||
new.document_frequency = self.document_frequency
|
||||
return new
|
||||
|
||||
def __init__(self, df_mode='corpus', test=None, refs=None, n=4, sigma=6.0):
|
||||
''' singular instance '''
|
||||
self.n = n
|
||||
self.sigma = sigma
|
||||
self.crefs = []
|
||||
self.ctest = []
|
||||
self.df_mode = df_mode
|
||||
self.ref_len = None
|
||||
if self.df_mode != 'corpus':
|
||||
pkl_file = cPickle.load(
|
||||
open(df_mode, 'rb'),
|
||||
**(dict(encoding='latin1') if six.PY3 else {}))
|
||||
self.ref_len = np.log(float(pkl_file['ref_len']))
|
||||
self.document_frequency = pkl_file['document_frequency']
|
||||
else:
|
||||
self.document_frequency = None
|
||||
self.cook_append(test, refs)
|
||||
|
||||
def clear(self):
|
||||
self.crefs = []
|
||||
self.ctest = []
|
||||
|
||||
def cook_append(self, test, refs):
|
||||
'''called by constructor and __iadd__ to avoid creating new instances.'''
|
||||
|
||||
if refs is not None:
|
||||
self.crefs.append(cook_refs(refs))
|
||||
if test is not None:
|
||||
self.ctest.append(cook_test(test)) # N.B.: -1
|
||||
else:
|
||||
self.ctest.append(
|
||||
None) # lens of crefs and ctest have to match
|
||||
|
||||
def size(self):
|
||||
assert len(self.crefs) == len(
|
||||
self.ctest), 'refs/test mismatch! %d<>%d' % (len(
|
||||
self.crefs), len(self.ctest))
|
||||
return len(self.crefs)
|
||||
|
||||
def __iadd__(self, other):
|
||||
'''add an instance (e.g., from another sentence).'''
|
||||
|
||||
if type(other) is tuple:
|
||||
# avoid creating new CiderScorer instances
|
||||
self.cook_append(other[0], other[1])
|
||||
else:
|
||||
self.ctest.extend(other.ctest)
|
||||
self.crefs.extend(other.crefs)
|
||||
|
||||
return self
|
||||
|
||||
def compute_doc_freq(self):
|
||||
"""
|
||||
Compute term frequency for reference data.
|
||||
This will be used to compute idf (inverse document frequency later)
|
||||
The term frequency is stored in the object
|
||||
:return: None
|
||||
"""
|
||||
for refs in self.crefs:
|
||||
# refs, k ref captions of one image
|
||||
for ngram in set([
|
||||
ngram for ref in refs for (ngram, count) in ref.items()
|
||||
]): # noqa
|
||||
self.document_frequency[ngram] += 1
|
||||
|
||||
def compute_cider(self):
|
||||
|
||||
def counts2vec(cnts):
|
||||
"""
|
||||
Function maps counts of ngram to vector of tfidf weights.
|
||||
The function returns vec, an array of dictionary that store mapping of n-gram and tf-idf weights.
|
||||
The n-th entry of array denotes length of n-grams.
|
||||
:param cnts:
|
||||
:return: vec (array of dict), norm (array of float), length (int)
|
||||
"""
|
||||
vec = [defaultdict(float) for _ in range(self.n)]
|
||||
length = 0
|
||||
norm = [0.0 for _ in range(self.n)]
|
||||
for (ngram, term_freq) in cnts.items():
|
||||
# give word count 1 if it doesn't appear in reference corpus
|
||||
df = np.log(max(1.0, self.document_frequency[ngram]))
|
||||
# ngram index
|
||||
n = len(ngram) - 1
|
||||
# tf (term_freq) * idf (precomputed idf) for n-grams
|
||||
vec[n][ngram] = float(term_freq) * (self.ref_len - df)
|
||||
# compute norm for the vector. the norm will be used for computing similarity
|
||||
norm[n] += pow(vec[n][ngram], 2)
|
||||
|
||||
if n == 1:
|
||||
length += term_freq
|
||||
norm = [np.sqrt(n) for n in norm]
|
||||
return vec, norm, length
|
||||
|
||||
def sim(vec_hyp, vec_ref, norm_hyp, norm_ref, length_hyp, length_ref):
|
||||
'''
|
||||
Compute the cosine similarity of two vectors.
|
||||
:param vec_hyp: array of dictionary for vector corresponding to hypothesis
|
||||
:param vec_ref: array of dictionary for vector corresponding to reference
|
||||
:param norm_hyp: array of float for vector corresponding to hypothesis
|
||||
:param norm_ref: array of float for vector corresponding to reference
|
||||
:param length_hyp: int containing length of hypothesis
|
||||
:param length_ref: int containing length of reference
|
||||
:return: array of score for each n-grams cosine similarity
|
||||
'''
|
||||
delta = float(length_hyp - length_ref)
|
||||
# measure consine similarity
|
||||
val = np.array([0.0 for _ in range(self.n)])
|
||||
for n in range(self.n):
|
||||
# ngram
|
||||
for (ngram, count) in vec_hyp[n].items():
|
||||
# vrama91 : added clipping
|
||||
val[n] += min(vec_hyp[n][ngram],
|
||||
vec_ref[n][ngram]) * vec_ref[n][ngram]
|
||||
|
||||
if (norm_hyp[n] != 0) and (norm_ref[n] != 0):
|
||||
val[n] /= (norm_hyp[n] * norm_ref[n])
|
||||
|
||||
assert (not math.isnan(val[n]))
|
||||
# vrama91: added a length based gaussian penalty
|
||||
val[n] *= np.e**(-(delta**2) / (2 * self.sigma**2))
|
||||
return val
|
||||
|
||||
# compute log reference length
|
||||
if self.df_mode == 'corpus':
|
||||
self.ref_len = np.log(float(len(self.crefs)))
|
||||
# elif self.df_mode == "coco-val-df":
|
||||
# if coco option selected, use length of coco-val set
|
||||
# self.ref_len = np.log(float(40504))
|
||||
|
||||
scores = []
|
||||
for test, refs in zip(self.ctest, self.crefs):
|
||||
# compute vector for test captions
|
||||
vec, norm, length = counts2vec(test)
|
||||
# compute vector for ref captions
|
||||
score = np.array([0.0 for _ in range(self.n)])
|
||||
for ref in refs:
|
||||
vec_ref, norm_ref, length_ref = counts2vec(ref)
|
||||
score += sim(vec, vec_ref, norm, norm_ref, length, length_ref)
|
||||
# change by vrama91 - mean of ngram scores, instead of sum
|
||||
score_avg = np.mean(score)
|
||||
# divide by number of references
|
||||
score_avg /= len(refs)
|
||||
# multiply score by 10
|
||||
score_avg *= 10.0
|
||||
# append score of an image to the score list
|
||||
scores.append(score_avg)
|
||||
return scores
|
||||
|
||||
def compute_score(self, option=None, verbose=0):
|
||||
# compute idf
|
||||
if self.df_mode == 'corpus':
|
||||
self.document_frequency = defaultdict(float)
|
||||
self.compute_doc_freq()
|
||||
# assert to check document frequency
|
||||
assert (len(self.ctest) >= max(self.document_frequency.values()))
|
||||
# import json for now and write the corresponding files
|
||||
# compute cider score
|
||||
score = self.compute_cider()
|
||||
# debug
|
||||
# print score
|
||||
return np.mean(np.array(score)), np.array(score)
|
||||
@@ -83,8 +83,8 @@ def label_smoothed_nll_loss(lprobs,
|
||||
lprobs = lprobs[indices]
|
||||
|
||||
ntokens = loss.numel()
|
||||
nll_loss = nll_loss.sum()
|
||||
loss = loss.sum()
|
||||
nll_loss = nll_loss.sum() / ntokens # 后面在grads里面处理
|
||||
loss = loss.sum() / ntokens # 后面在grads里面处理
|
||||
if use_rdrop:
|
||||
true_batch_size = lprobs.size(0) // 2
|
||||
p = lprobs[:true_batch_size]
|
||||
|
||||
Reference in New Issue
Block a user