Source code for bigdl.nn.criterion

#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


import sys

from bigdl.util.common import JavaValue
from bigdl.util.common import callBigDlFunc
from bigdl.util.common import JTensor
from bigdl.nn.layer import Layer
import numpy as np

if sys.version >= '3':
    long = int
    unicode = str


[docs]class Criterion(JavaValue): """ Criterion is helpful to train a neural network. Given an input and a target, they compute a gradient according to a given loss function. """ def __init__(self, jvalue, bigdl_type, *args): self.value = jvalue if jvalue else callBigDlFunc( bigdl_type, JavaValue.jvm_class_constructor(self), *args) self.bigdl_type = bigdl_type def __str__(self): return self.value.toString()
[docs] def forward(self, input, target): """ NB: It's for debug only, please use optimizer.optimize() in production. Takes an input object, and computes the corresponding loss of the criterion, compared with `target` :param input: ndarray or list of ndarray :param target: ndarray or list of ndarray :return: value of loss """ jinput, input_is_table = Layer.check_input(input) jtarget, target_is_table = Layer.check_input(target) output = callBigDlFunc(self.bigdl_type, "criterionForward", self.value, jinput, input_is_table, jtarget, target_is_table) return output
[docs] def backward(self, input, target): """ NB: It's for debug only, please use optimizer.optimize() in production. Performs a back-propagation step through the criterion, with respect to the given input. :param input: ndarray or list of ndarray :param target: ndarray or list of ndarray :return: ndarray """ jinput, input_is_table = Layer.check_input(input) jtarget, target_is_table = Layer.check_input(target) output = callBigDlFunc(self.bigdl_type, "criterionBackward", self.value, jinput, input_is_table, jtarget, target_is_table) return Layer.convert_output(output)
[docs] @classmethod def of(cls, jcriterion, bigdl_type="float"): """ Create a python Criterion by a java criterion object :param jcriterion: A java criterion object which created by Py4j :return: a criterion. """ criterion = Criterion(bigdl_type, jcriterion) criterion.value = jcriterion criterion.bigdl_type = bigdl_type return criterion
[docs]class ClassNLLCriterion(Criterion): ''' The negative log likelihood criterion. It is useful to train a classification problem with n classes. If provided, the optional argument weights should be a 1D Tensor assigning weight to each of the classes. This is particularly useful when you have an unbalanced training set. The input given through a forward() is expected to contain log-probabilities/probabilities of each class: input has to be a 1D Tensor of size n. Obtaining log-probabilities/probabilities in a neural network is easily achieved by adding a LogSoftMax/SoftMax layer in the last layer of your neural network. You may use CrossEntropyCriterion instead, if you prefer not to add an extra layer to your network. This criterion expects a class index (1 to the number of class) as target when calling forward(input, target) and backward(input, target). In the log-probabilities case, The loss can be described as: loss(x, class) = -x[class] or in the case of the weights argument it is specified as follows: loss(x, class) = -weights[class] * x[class] Due to the behaviour of the backend code, it is necessary to set sizeAverage to false when calculating losses in non-batch mode. Note that if the target is `-1`, the training process will skip this sample. In other will, the forward process will return zero output and the backward process will also return zero `gradInput`. By default, the losses are averaged over observations for each minibatch. However, if the field sizeAverage is set to false, the losses are instead summed for each minibatch. In particular, when weights=None, size_average=True and logProbAsInput=False, this is same as `sparse_categorical_crossentropy` loss in keras. :param weights: weights of each class :param size_average: whether to average or not :param logProbAsInput: indicating whether to accept log-probabilities or probabilities as input. >>> np.random.seed(123) >>> weights = np.random.uniform(0, 1, (2,)).astype("float32") >>> classNLLCriterion = ClassNLLCriterion(weights, True, True) creating: createClassNLLCriterion >>> classNLLCriterion = ClassNLLCriterion() creating: createClassNLLCriterion ''' def __init__(self, weights=None, size_average=True, logProbAsInput=True, bigdl_type="float"): super(ClassNLLCriterion, self).__init__(None, bigdl_type, JTensor.from_ndarray(weights), size_average, logProbAsInput)
[docs]class MSECriterion(Criterion): ''' Creates a criterion that measures the mean squared error between n elements in the input x and output y: ``` loss(x, y) = 1/n \sum |x_i - y_i|^2 ``` If x and y are d-dimensional Tensors with a total of n elements, the sum operation still operates over all the elements, and divides by n. The two Tensors must have the same number of elements (but their sizes might be different). The division by n can be avoided if one sets the internal variable sizeAverage to false. By default, the losses are averaged over observations for each minibatch. However, if the field sizeAverage is set to false, the losses are instead summed. >>> mSECriterion = MSECriterion() creating: createMSECriterion ''' def __init__(self, bigdl_type="float"): super(MSECriterion, self).__init__(None, bigdl_type)
[docs]class AbsCriterion(Criterion): ''' measures the mean absolute value of the element-wise difference between input >>> absCriterion = AbsCriterion(True) creating: createAbsCriterion ''' def __init__(self, size_average=True, bigdl_type="float"): super(AbsCriterion, self).__init__(None, bigdl_type, size_average)
[docs]class ClassSimplexCriterion(Criterion): ''' ClassSimplexCriterion implements a criterion for classification. It learns an embedding per class, where each class' embedding is a point on an (N-1)-dimensional simplex, where N is the number of classes. :param nClasses: the number of classes. >>> classSimplexCriterion = ClassSimplexCriterion(2) creating: createClassSimplexCriterion ''' def __init__(self, n_classes, bigdl_type="float"): super(ClassSimplexCriterion, self).__init__(None, bigdl_type, n_classes)
[docs]class CosineDistanceCriterion(Criterion): """ Creates a criterion that measures the loss given an input and target, Loss = 1 - cos(x, y) >>> cosineDistanceCriterion = CosineDistanceCriterion(True) creating: createCosineDistanceCriterion >>> cosineDistanceCriterion.forward(np.array([1.0, 2.0, 3.0, 4.0, 5.0]), ... np.array([5.0, 4.0, 3.0, 2.0, 1.0])) 0.07272728 """ def __init__(self, size_average=True, bigdl_type="float"): super(CosineDistanceCriterion, self).__init__(None, bigdl_type, size_average)
[docs]class CosineEmbeddingCriterion(Criterion): """ Creates a criterion that measures the loss given an input x = {x1, x2}, a table of two Tensors, and a Tensor label y with values 1 or -1. :param margin: a number from -1 to 1, 0 to 0.5 is suggested >>> cosineEmbeddingCriterion = CosineEmbeddingCriterion(1e-5, True) creating: createCosineEmbeddingCriterion >>> cosineEmbeddingCriterion.forward([np.array([1.0, 2.0, 3.0, 4.0, 5.0]), ... np.array([5.0, 4.0, 3.0, 2.0, 1.0])], ... [np.ones(5)]) 0.0 """ def __init__(self, margin=0.0, size_average=True, bigdl_type="float"): super(CosineEmbeddingCriterion, self).__init__(None, bigdl_type, margin, size_average)
[docs]class DistKLDivCriterion(Criterion): ''' The Kullback-Leibler divergence criterion :param sizeAverage: >>> distKLDivCriterion = DistKLDivCriterion(True) creating: createDistKLDivCriterion ''' def __init__(self, size_average=True, bigdl_type="float"): super(DistKLDivCriterion, self).__init__(None, bigdl_type, size_average)
[docs]class CategoricalCrossEntropy(Criterion): """ This criterion is same with cross entropy criterion, except it takes a one-hot format target tensor >>> cce = CategoricalCrossEntropy() creating: createCategoricalCrossEntropy """ def __init__(self, bigdl_type="float"): super(CategoricalCrossEntropy, self).__init__(None, bigdl_type)
[docs]class HingeEmbeddingCriterion(Criterion): ''' Creates a criterion that measures the loss given an input x which is a 1-dimensional vector and a label y (1 or -1). This is usually used for measuring whether two inputs are similar or dissimilar, e.g. using the L1 pairwise distance, and is typically used for learning nonlinear embeddings or semi-supervised learning. If x and y are n-dimensional Tensors, the sum operation still operates over all the elements, and divides by n (this can be avoided if one sets the internal variable sizeAverage to false). The margin has a default value of 1, or can be set in the constructor. >>> hingeEmbeddingCriterion = HingeEmbeddingCriterion(1e-5, True) creating: createHingeEmbeddingCriterion ''' def __init__(self, margin=1.0, size_average=True, bigdl_type="float"): super(HingeEmbeddingCriterion, self).__init__(None, bigdl_type, margin, size_average)
[docs]class L1HingeEmbeddingCriterion(Criterion): ''' Creates a criterion that measures the loss given an input x = {x1, x2}, a table of two Tensors, and a label y (1 or -1): :param margin: >>> l1HingeEmbeddingCriterion = L1HingeEmbeddingCriterion(1e-5) creating: createL1HingeEmbeddingCriterion >>> l1HingeEmbeddingCriterion = L1HingeEmbeddingCriterion() creating: createL1HingeEmbeddingCriterion >>> input1 = np.array([2.1, -2.2]) >>> input2 = np.array([-0.55, 0.298]) >>> input = [input1, input2] >>> target = np.array([1.0]) >>> result = l1HingeEmbeddingCriterion.forward(input, target) >>> (result == 5.148) True ''' def __init__(self, margin=1.0, bigdl_type="float"): super(L1HingeEmbeddingCriterion, self).__init__(None, bigdl_type, margin)
[docs]class MarginCriterion(Criterion): ''' Creates a criterion that optimizes a two-class classification hinge loss (margin-based loss) between input x (a Tensor of dimension 1) and output y. When margin = 1, size_average = True and squared = False, this is the same as hinge loss in keras; When margin = 1, size_average = False and squared = True, this is the same as squared_hinge loss in keras. :param margin: if unspecified, is by default 1. :param size_average: size average in a mini-batch :param squared: whether to calculate the squared hinge loss >>> marginCriterion = MarginCriterion(1e-5, True, False) creating: createMarginCriterion ''' def __init__(self, margin=1.0, size_average=True, squared=False, bigdl_type="float"): super(MarginCriterion, self).__init__(None, bigdl_type, margin, size_average, squared)
[docs]class MarginRankingCriterion(Criterion): ''' Creates a criterion that measures the loss given an input x = {x1, x2}, a table of two Tensors of size 1 (they contain only scalars), and a label y (1 or -1). In batch mode, x is a table of two Tensors of size batchsize, and y is a Tensor of size batchsize containing 1 or -1 for each corresponding pair of elements in the input Tensor. If y == 1 then it assumed the first input should be ranked higher (have a larger value) than the second input, and vice-versa for y == -1. :param margin: >>> marginRankingCriterion = MarginRankingCriterion(1e-5, True) creating: createMarginRankingCriterion ''' def __init__(self, margin=1.0, size_average=True, bigdl_type="float"): super(MarginRankingCriterion, self).__init__(None, bigdl_type, margin, size_average)
[docs]class MultiCriterion(Criterion): ''' a weighted sum of other criterions each applied to the same input and target >>> multiCriterion = MultiCriterion() creating: createMultiCriterion >>> mSECriterion = MSECriterion() creating: createMSECriterion >>> multiCriterion = multiCriterion.add(mSECriterion) >>> multiCriterion = multiCriterion.add(mSECriterion) ''' def __init__(self, bigdl_type="float"): super(MultiCriterion, self).__init__(None, bigdl_type)
[docs] def add(self, criterion, weight=1.0): self.value.add(criterion.value, weight) return self
[docs]class MultiLabelMarginCriterion(Criterion): ''' Creates a criterion that optimizes a multi-class multi-classification hinge loss ( margin-based loss) between input x and output y (which is a Tensor of target class indices) :param size_average: size average in a mini-batch >>> multiLabelMarginCriterion = MultiLabelMarginCriterion(True) creating: createMultiLabelMarginCriterion ''' def __init__(self, size_average=True, bigdl_type="float"): super(MultiLabelMarginCriterion, self).__init__(None, bigdl_type, size_average)
[docs]class ParallelCriterion(Criterion): ''' ParallelCriterion is a weighted sum of other criterions each applied to a different input and target. Set repeatTarget = true to share the target for criterions. Use add(criterion[, weight]) method to add criterion. Where weight is a scalar(default 1). :param repeat_target: Whether to share the target for all criterions. >>> parallelCriterion = ParallelCriterion(True) creating: createParallelCriterion >>> mSECriterion = MSECriterion() creating: createMSECriterion >>> parallelCriterion = parallelCriterion.add(mSECriterion) >>> parallelCriterion = parallelCriterion.add(mSECriterion) ''' def __init__(self, repeat_target=False, bigdl_type="float"): super(ParallelCriterion, self).__init__(None, bigdl_type, repeat_target)
[docs] def add(self, criterion, weight=1.0): self.value.add(criterion.value, weight) return self
[docs]class KLDCriterion(Criterion): ''' Computes the KL-divergence of the input normal distribution to a standard normal distribution. The input has to be a table. The first element of input is the mean of the distribution, the second element of input is the log_variance of the distribution. The input distribution is assumed to be diagonal. >>> KLDCriterion = KLDCriterion(True) creating: createKLDCriterion ''' def __init__(self, size_average=True, bigdl_type="float"): super(KLDCriterion, self).__init__(None, bigdl_type, size_average)
[docs]class GaussianCriterion(Criterion): ''' Computes the log-likelihood of a sample x given a Gaussian distribution p. >>> GaussianCriterion = GaussianCriterion() creating: createGaussianCriterion ''' def __init__(self, bigdl_type="float"): super(GaussianCriterion, self).__init__(None, bigdl_type)
[docs]class SmoothL1Criterion(Criterion): ''' Creates a criterion that can be thought of as a smooth version of the AbsCriterion. It uses a squared term if the absolute element-wise error falls below 1. It is less sensitive to outliers than the MSECriterion and in some cases prevents exploding gradients (e.g. see "Fast R-CNN" paper by Ross Girshick). ``` | 0.5 * (x_i - y_i)^2^, if |x_i - y_i| < 1 loss(x, y) = 1/n \sum | | |x_i - y_i| - 0.5, otherwise ``` If x and y are d-dimensional Tensors with a total of n elements, the sum operation still operates over all the elements, and divides by n. The division by n can be avoided if one sets the internal variable sizeAverage to false :param size_average: whether to average the loss >>> smoothL1Criterion = SmoothL1Criterion(True) creating: createSmoothL1Criterion ''' def __init__(self, size_average=True, bigdl_type="float"): super(SmoothL1Criterion, self).__init__(None, bigdl_type, size_average)
[docs]class SmoothL1CriterionWithWeights(Criterion): ''' a smooth version of the AbsCriterion It uses a squared term if the absolute element-wise error falls below 1. It is less sensitive to outliers than the MSECriterion and in some cases prevents exploding gradients (e.g. see "Fast R-CNN" paper by Ross Girshick). ``` d = (x - y) * w_in loss(x, y, w_in, w_out) | 0.5 * (sigma * d_i)^2 * w_out if |d_i| < 1 / sigma / sigma = 1/n \sum | | (|d_i| - 0.5 / sigma / sigma) * w_out otherwise ``` >>> smoothL1CriterionWithWeights = SmoothL1CriterionWithWeights(1e-5, 1) creating: createSmoothL1CriterionWithWeights ''' def __init__(self, sigma, num=0, bigdl_type="float"): super(SmoothL1CriterionWithWeights, self).__init__(None, bigdl_type, sigma, num)
[docs]class SoftmaxWithCriterion(Criterion): ''' Computes the multinomial logistic loss for a one-of-many classification task, passing real-valued predictions through a softmax to get a probability distribution over classes. It should be preferred over separate SoftmaxLayer + MultinomialLogisticLossLayer as its gradient computation is more numerically stable. :param ignoreLabel: (optional) Specify a label value thatshould be ignored when computing the loss. :param normalizeMode: How to normalize the output loss. >>> softmaxWithCriterion = SoftmaxWithCriterion() creating: createSoftmaxWithCriterion >>> softmaxWithCriterion = SoftmaxWithCriterion(1, "FULL") creating: createSoftmaxWithCriterion ''' def __init__(self, ignore_label=None, normalize_mode="VALID", bigdl_type="float"): super(SoftmaxWithCriterion, self).__init__(None, bigdl_type, ignore_label, normalize_mode)
[docs]class TimeDistributedMaskCriterion(Criterion): ''' This class is intended to support inputs with 3 or more dimensions. Apply Any Provided Criterion to every temporal slice of an input. In addition, it supports padding mask. eg. if the target is [ [-1, 1, 2, 3, -1], [5, 4, 3, -1, -1] ], and set the paddingValue property to -1, then the loss of -1 would not be accumulated and the loss is only divided by 6 (ont including the amount of -1, in this case, we are only interested in 1, 2, 3, 5, 4, 3) :param criterion: embedded criterion :param padding_value: padding value >>> td = TimeDistributedMaskCriterion(ClassNLLCriterion()) creating: createClassNLLCriterion creating: createTimeDistributedMaskCriterion ''' def __init__(self, criterion, padding_value=0, bigdl_type="float"): super(TimeDistributedMaskCriterion, self).__init__( None, bigdl_type, criterion, padding_value)
[docs]class TimeDistributedCriterion(Criterion): ''' This class is intended to support inputs with 3 or more dimensions. Apply Any Provided Criterion to every temporal slice of an input. :param criterion: embedded criterion :param size_average: whether to divide the sequence length >>> td = TimeDistributedCriterion(ClassNLLCriterion()) creating: createClassNLLCriterion creating: createTimeDistributedCriterion ''' def __init__(self, criterion, size_average=False, dimension=2, bigdl_type="float"): super(TimeDistributedCriterion, self).__init__( None, bigdl_type, criterion, size_average, dimension)
[docs]class CrossEntropyCriterion(Criterion): """ This criterion combines LogSoftMax and ClassNLLCriterion in one single class. :param weights: A tensor assigning weight to each of the classes >>> np.random.seed(123) >>> weights = np.random.uniform(0, 1, (2,)).astype("float32") >>> cec = CrossEntropyCriterion(weights) creating: createCrossEntropyCriterion >>> cec = CrossEntropyCriterion() creating: createCrossEntropyCriterion """ def __init__(self, weights=None, size_average=True, bigdl_type="float"): super(CrossEntropyCriterion, self).__init__(None, bigdl_type, JTensor.from_ndarray( weights), size_average)
[docs]class BCECriterion(Criterion): ''' Creates a criterion that measures the Binary Cross Entropy between the target and the output :param weights: weights for each class :param sizeAverage: whether to average the loss or not >>> np.random.seed(123) >>> weights = np.random.uniform(0, 1, (2,)).astype("float32") >>> bCECriterion = BCECriterion(weights) creating: createBCECriterion >>> bCECriterion = BCECriterion() creating: createBCECriterion ''' def __init__(self, weights=None, size_average=True, bigdl_type="float"): super(BCECriterion, self).__init__(None, bigdl_type, JTensor.from_ndarray(weights), size_average)
[docs]class MultiLabelSoftMarginCriterion(Criterion): ''' A MultiLabel multiclass criterion based on sigmoid: the loss is: ``` l(x,y) = - sum_i y[i] * log(p[i]) + (1 - y[i]) * log (1 - p[i]) ``` where p[i] = exp(x[i]) / (1 + exp(x[i])) and with weights: ``` l(x,y) = - sum_i weights[i] (y[i] * log(p[i]) + (1 - y[i]) * log (1 - p[i])) ``` >>> np.random.seed(123) >>> weights = np.random.uniform(0, 1, (2,)).astype("float32") >>> multiLabelSoftMarginCriterion = MultiLabelSoftMarginCriterion(weights) creating: createMultiLabelSoftMarginCriterion >>> multiLabelSoftMarginCriterion = MultiLabelSoftMarginCriterion() creating: createMultiLabelSoftMarginCriterion ''' def __init__(self, weights=None, size_average=True, bigdl_type="float"): super(MultiLabelSoftMarginCriterion, self).__init__(None, bigdl_type, JTensor.from_ndarray(weights), size_average)
[docs]class MultiMarginCriterion(Criterion): ''' Creates a criterion that optimizes a multi-class classification hinge loss (margin-based loss) between input x and output y (which is a target class index). :param p: :param weights: :param margin: :param size_average: >>> np.random.seed(123) >>> weights = np.random.uniform(0, 1, (2,)).astype("float32") >>> multiMarginCriterion = MultiMarginCriterion(1,weights) creating: createMultiMarginCriterion >>> multiMarginCriterion = MultiMarginCriterion() creating: createMultiMarginCriterion ''' def __init__(self, p=1, weights=None, margin=1.0, size_average=True, bigdl_type="float"): super(MultiMarginCriterion, self).__init__(None, bigdl_type, p, JTensor.from_ndarray(weights), margin, size_average)
[docs]class SoftMarginCriterion(Criterion): """ Creates a criterion that optimizes a two-class classification logistic loss between input x (a Tensor of dimension 1) and output y (which is a tensor containing either 1s or -1s). ``` loss(x, y) = sum_i (log(1 + exp(-y[i]*x[i]))) / x:nElement() ``` :param sizeaverage: The normalization by the number of elements in the inputcan be disabled by setting >>> softMarginCriterion = SoftMarginCriterion(False) creating: createSoftMarginCriterion >>> softMarginCriterion = SoftMarginCriterion() creating: createSoftMarginCriterion """ def __init__(self, size_average=True, bigdl_type="float"): super(SoftMarginCriterion, self).__init__(None, bigdl_type, size_average)
[docs]class DiceCoefficientCriterion(Criterion): ''' The Dice-Coefficient criterion input: Tensor,target: Tensor ``` return: 2 * (input intersection target) 1 - ---------------------------------- input union target ``` >>> diceCoefficientCriterion = DiceCoefficientCriterion(size_average = True, epsilon = 1.0) creating: createDiceCoefficientCriterion >>> diceCoefficientCriterion = DiceCoefficientCriterion() creating: createDiceCoefficientCriterion ''' def __init__(self, size_average=True, epsilon=1.0, bigdl_type="float"): super(DiceCoefficientCriterion, self).__init__(None, bigdl_type, size_average, epsilon)
[docs]class L1Cost(Criterion): ''' compute L1 norm for input, and sign of input >>> l1Cost = L1Cost() creating: createL1Cost ''' def __init__(self, bigdl_type="float"): super(L1Cost, self).__init__(None, bigdl_type)
[docs]class CosineProximityCriterion(Criterion): ''' compute the negative of the mean cosine proximity between predictions and targets. ``` x'(i) = x(i) / sqrt(max(sum(x(i)^2), 1e-12)) y'(i) = y(i) / sqrt(max(sum(x(i)^2), 1e-12)) cosine_proximity(x, y) = sum_i(-1 * x'(i) * y'(i)) ``` >>> cosineProximityCriterion = CosineProximityCriterion() creating: createCosineProximityCriterion ''' def __init__(self, bigdl_type="float"): super(CosineProximityCriterion, self).__init__(None, bigdl_type)
[docs]class MeanAbsolutePercentageCriterion(Criterion): ''' This method is same as `mean_absolute_percentage_error` loss in keras. It caculates diff = K.abs((y - x) / K.clip(K.abs(y), K.epsilon(), Double.MaxValue)) and return 100 * K.mean(diff) as output. Here, the x and y can have or not have a batch. >>> error = MeanAbsolutePercentageCriterion() creating: createMeanAbsolutePercentageCriterion ''' def __init__(self, bigdl_type="float"): super(MeanAbsolutePercentageCriterion, self).__init__(None, bigdl_type)
[docs]class MeanSquaredLogarithmicCriterion(Criterion): ''' This method is same as `mean_squared_logarithmic_error` loss in keras. It calculates: first_log = K.log(K.clip(y, K.epsilon(), Double.MaxValue) + 1.) second_log = K.log(K.clip(x, K.epsilon(), Double.MaxValue) + 1.) and output K.mean(K.square(first_log - second_log)). Here, the x and y can have or not have a batch. >>> error = MeanSquaredLogarithmicCriterion() creating: createMeanSquaredLogarithmicCriterion ''' def __init__(self, bigdl_type="float"): super(MeanSquaredLogarithmicCriterion, self).__init__(None, bigdl_type)
[docs]class KullbackLeiblerDivergenceCriterion(Criterion): ''' compute Kullback Leibler DivergenceCriterion error for intput and target This method is same as `kullback_leibler_divergence` loss in keras. Loss calculated as: y_true = K.clip(input, K.epsilon(), 1) y_pred = K.clip(target, K.epsilon(), 1) and output K.sum(y_true * K.log(y_true / y_pred), axis=-1) >>> error = KullbackLeiblerDivergenceCriterion() creating: createKullbackLeiblerDivergenceCriterion ''' def __init__(self, bigdl_type="float"): super(KullbackLeiblerDivergenceCriterion, self).__init__(None, bigdl_type)
[docs]class PoissonCriterion(Criterion): ''' compute Poisson error for input and target, loss calculated as: mean(input - target * K.log(input + K.epsilon()), axis=-1) >>> error = PoissonCriterion() creating: createPoissonCriterion ''' def __init__(self, bigdl_type="float"): super(PoissonCriterion, self).__init__(None, bigdl_type)
[docs]class TransformerCriterion(Criterion): ''' The criterion that takes two modules to transform input and target, and take one criterion to compute the loss with the transformed input and target. This criterion can be used to construct complex criterion. For example, the `inputTransformer` and `targetTransformer` can be pre-trained CNN networks, and we can use the networks' output to compute the high-level feature reconstruction loss, which is commonly used in areas like neural style transfer (https://arxiv.org/abs/1508.06576), texture synthesis (https://arxiv.org/abs/1505.07376), .etc. >>> trans = TransformerCriterion(MSECriterion()) creating: createMSECriterion creating: createTransformerCriterion ''' def __init__(self, criterion, input_transformer = None, target_transformer = None, bigdl_type="float"): super(TransformerCriterion, self).__init__(None, bigdl_type, criterion, input_transformer, target_transformer)
[docs]class DotProductCriterion(Criterion): ''' Compute the dot product of input and target tensor. Input and target are required to have the same size. :param size_average: whether to average over each observations in the same batch >>> dp =DotProductCriterion(False) creating: createDotProductCriterion ''' def __init__(self, size_average = False, bigdl_type="float"): super(DotProductCriterion, self).__init__(None, bigdl_type, size_average)
[docs]class PGCriterion(Criterion): ''' The Criterion to compute the negative policy gradient given a multinomial distribution and the sampled action and reward. The input to this criterion should be a 2-D tensor representing a batch of multinomial distribution, the target should also be a 2-D tensor with the same size of input, representing the sampled action and reward/advantage with the index of non-zero element in the vector represents the sampled action and the non-zero element itself represents the reward. If the action is space is large, you should consider using SparseTensor for target. The loss computed is simple the standard policy gradient, loss = - 1/n * sum(R_{n} dot_product log(P_{n})) where R_{n} is the reward vector, and P_{n} is the input distribution. :param sizeAverage whether to average over each observations in the same batch >>> pg = PGCriterion() creating: createPGCriterion ''' def __init__(self, sizeAverage = False, bigdl_type="float"): super(PGCriterion, self).__init__(None, bigdl_type, sizeAverage)
def _test(): import doctest from pyspark import SparkContext from bigdl.nn import criterion from bigdl.util.common import init_engine from bigdl.util.common import create_spark_conf globs = criterion.__dict__.copy() sc = SparkContext(master="local[4]", appName="test criterion", conf=create_spark_conf()) globs['sc'] = sc init_engine() (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) if failure_count: exit(-1) if __name__ == "__main__": _test()