构建TensorFlow双射器错误

我是TensorFlow的新手。我想构建一个具有以下属性的双射器:它接受一个n维概率分布p(x1, x2, …, xn),并且只转换两个特定的维度i和j,使得xi’ = xi, xj’ = xj*exp(s(xi)) + t(xj),其中s和t是通过神经网络实现的两个函数。它输出p(x1, x2, …, xi’, .., xj’, .., xn)。我有一个基本的代码如下所示:

  def net(x, out_size, block_w_id, block_d_id, layer_id):    x = tf.contrib.layers.fully_connected(x, 256, reuse=tf.AUTO_REUSE, scope='x1_block_w_{}_block_d_{}_layer_{}'.format(block_w_id, \                                                                                                                       block_d_id,\                                                                                                                       layer_id))    x = tf.contrib.layers.fully_connected(x, 256, reuse=tf.AUTO_REUSE, scope='x2_block_w_{}_block_d_{}_layer_{}'.format(block_w_id,\                                                                                                                       block_d_id,\                                                                                                                       layer_id))    y = tf.contrib.layers.fully_connected(x, out_size, reuse=tf.AUTO_REUSE, scope='y_block_w_{}_block_d_{}_layer_{}'.format(block_w_id,\                                                                                                                           block_d_id,\                                                                                                                           layer_id))#     return layers.stack(x, layers.fully_connected(reuse=tf.AUTO_REUSE), [512, 512, out_size])    return y

class NVPCoupling(tfb.Bijector):    """NVP仿射耦合层,用于2D单元。    """    def __init__(self, input_idx1, input_idx2, block_w_id = 0, block_d_id = 0, layer_id = 0, validate_args = False\                 , name="NVPCoupling"):        """        NVPCoupling仅操作两个输入,索引为idx1和idx2。        """        super(NVPCoupling, self).__init__(\                                         event_ndims = 1, validate_args = validate_args, name = name)        self.idx1 = input_idx1        self.idx2 = input_idx2        self.block_w_id = block_w_id        self.block_d_id = block_d_id        self.layer_id = layer_id        # 创建变量        tmp = tf.placeholder(dtype=DTYPE, shape = [1, 1])        self.s(tmp)         self.t(tmp)    def s(self, xd):        with tf.variable_scope('s_block_w_id_{}_block_d_id_{}_layer_{}'.format(self.block_w_id,\                                                                              self.block_d_id,\                                                                              self.layer_id),\                              reuse = tf.AUTO_REUSE):            return net(xd, 1, self.block_w_id, self.block_d_id, self.layer_id)    def t(self, xd):        with tf.variable_scope('t_block_w_id_{}_block_d_id_{}_layer_{}'.format(self.block_w_id,\                                                                              self.block_d_id,\                                                                              self.layer_id),\                              reuse = tf.AUTO_REUSE):            return net(xd, 1, self.block_w_id, self.block_d_id, self.layer_id)    def _forward(self, x):        x_left, x_right = x[:, self.idx1:(self.idx1 + 1)], x[:, self.idx2:(self.idx2 + 1)]        y_right = x_right * tf.exp(self.s(x_left)) + self.t(x_left)        output_tensor = tf.concat([ x[:,0:self.idx1], x_left, x[:, self.idx1+1:self.idx2]\                                   , y_right, x[:, (self.idx2+1):]], axis = 1)        return output_tensor    def _inverse(self, y):        y_left, y_right = y[:, self.idx1:(self.idx1 + 1)], y[:, self.idx2:(self.idx2 + 1)]        x_right = (y_right - self.t(y_left)) * tf.exp(-self.s(y_left))        output_tensor = tf.concat([ y[:, 0:self.idx1], y_left, y[:, self.idx1+1 : self.idx2]\                                  , x_right, y[:, (self.idx2+1):]], axis = 1)        return output_tensor    def _forward_log_det_jacobian(self, x):        event_dims = self._event_dims_tensor(x)        x_left = x[:, self.idx1:(self.idx1+1)]        return tf.reduce_sum(self.s(x_left), axis=event_dims)

但它并没有按我预期的那样工作。当我使用这个类时,出现了一个错误:

base_dist = tfd.MultivariateNormalDiag(loc=tf.zeros([2], DTYPE))num_bijectors = 4bijectors = []bijectors.append(NVPCoupling(input_idx1=0, input_idx2=1, \                             block_w_id=0, block_d_id=0, layer_id=0))bijectors.append(NVPCoupling(input_idx1=1, input_idx2=0, \                             block_w_id=0, block_d_id=0, layer_id=1))bijectors.append(NVPCoupling(input_idx1=0, input_idx2=1, \                             block_w_id=0, block_d_id=0, layer_id=2))bijectors.append(NVPCoupling(input_idx1=0, input_idx2=1, \                             block_w_id=0, block_d_id=0, layer_id=3))flow_bijector = tfb.Chain(list(reversed(bijectors)))dist = tfd.TransformedDistribution(    distribution=base_dist,    bijector=flow_bijector)dist.sample(1000)

出现错误如下:

---------------------------------------------------------------------------ValueError                                Traceback (most recent call last)<ipython-input-16-04da05d30f8d> in <module>()----> 1 dist.sample(1000)/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/tensorflow/python/ops/distributions/distribution.pyc in sample(self, sample_shape, seed, name)    708       samples: a `Tensor` with prepended dimensions `sample_shape`.    709     """--> 710     return self._call_sample_n(sample_shape, seed, name)    711     712   def _log_prob(self, value):/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/tensorflow/python/ops/distributions/transformed_distribution.pyc in _call_sample_n(self, sample_shape, seed, name, **kwargs)    412       # returned result.    413       y = self.bijector.forward(x, **kwargs)--> 414       y = self._set_sample_static_shape(y, sample_shape)    415     416       return y/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/tensorflow/python/ops/distributions/distribution.pyc in _set_sample_static_shape(self, x, sample_shape)   1220       shape = tensor_shape.TensorShape(   1221           [None]*(ndims - event_ndims)).concatenate(self.event_shape)-> 1222       x.set_shape(x.get_shape().merge_with(shape))   1223    1224     # Infer batch shape./Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/tensorflow/python/framework/tensor_shape.pyc in merge_with(self, other)    671         return TensorShape(new_dims)    672       except ValueError:--> 673         raise ValueError("Shapes %s and %s are not compatible" % (self, other))    674     675   def concatenate(self, other):ValueError: Shapes (1000, 4) and (?, 2) are not compatible

真心希望有专家能帮助我理解我哪里做错了以及如何修复这个问题。非常感谢!H.


回答:

我认为问题出在这里(为了清晰起见稍作重新格式化):

output_tensor = tf.concat([    x[:,0:self.idx1],    x_left,    x[:, self.idx1+1:self.idx2],    y_right,    x[:, (self.idx2+1):]], axis = 1)

这假设idx2 > idx1,但在你给idx1=1idx2=0的情况下这不是真的。这会导致你拼接的元素比你预期的要多,使得第二维度变成4而不是2。

我在_forward中打印了形状如下:

print("self.idx1: %s" % self.idx1)print("self.idx2: %s" % self.idx2)print("x[:,0:self.idx1]: %s" % x[:,0:self.idx1].shape)print("x_left: %s" % x_left.shape)print("x[:, self.idx1+1:self.idx2]: %s" %      x[:, self.idx1+1:self.idx2].shape)print("x_right.shape: %s" % x_right.shape)print("y_right: %s" % y_right.shape)print("x[:, (self.idx2+1):]: %s" % x[:, (self.idx2+1):].shape)print("output_tensor.shape: %s" % output_tensor.shape)

得到的输出是:

self.idx1: 0self.idx2: 1x[:,0:self.idx1]: (1000, 0)x_left: (1000, 1)x[:, self.idx1+1:self.idx2]: (1000, 0)x_right.shape: (1000, 1)y_right: (1000, 1)x[:, (self.idx2+1):]: (1000, 0)output_tensor.shape: (1000, 2)self.idx1: 1self.idx2: 0x[:,0:self.idx1]: (1000, 1)x_left: (1000, 1)x[:, self.idx1+1:self.idx2]: (1000, 0)x_right.shape: (1000, 1)y_right: (1000, 1)x[:, (self.idx2+1):]: (1000, 1)output_tensor.shape: (1000, 4)self.idx1: 0self.idx2: 1x[:,0:self.idx1]: (1000, 0)x_left: (1000, 1)x[:, self.idx1+1:self.idx2]: (1000, 0)x_right.shape: (1000, 1)y_right: (1000, 1)x[:, (self.idx2+1):]: (1000, 2)output_tensor.shape: (1000, 4)self.idx1: 0self.idx2: 1x[:,0:self.idx1]: (1000, 0)x_left: (1000, 1)x[:, self.idx1+1:self.idx2]: (1000, 0)x_right.shape: (1000, 1)y_right: (1000, 1)x[:, (self.idx2+1):]: (1000, 2)output_tensor.shape: (1000, 4)

我认为你需要在idx1 > idx2的情况下更仔细地考虑如何重新组装拼接的部分。

希望这能让你重新回到正轨!

Related Posts

L1-L2正则化的不同系数

我想对网络的权重同时应用L1和L2正则化。然而,我找不…

使用scikit-learn的无监督方法将列表分类成不同组别,有没有办法?

我有一系列实例,每个实例都有一份列表,代表它所遵循的不…

f1_score metric in lightgbm

我想使用自定义指标f1_score来训练一个lgb模型…

通过相关系数矩阵进行特征选择

我在测试不同的算法时,如逻辑回归、高斯朴素贝叶斯、随机…

可以将机器学习库用于流式输入和输出吗?

已关闭。此问题需要更加聚焦。目前不接受回答。 想要改进…

在TensorFlow中,queue.dequeue_up_to()方法的用途是什么?

我对这个方法感到非常困惑,特别是当我发现这个令人费解的…

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注