非确定性行为和输出行为在Tensorflow中

我在Tensorflow(2.3版本)中编写了一段代码来执行自定义操作,但遇到了问题。虽然代码通常运行正常,但在某些情况下,即使输入相同,也会抛出意外的错误和异常。

我尝试排查问题,几乎可以确定这是评估依赖性问题。我尝试添加了一些依赖性控制,但没有效果。抱歉代码有点长,但我实在无法在更小的示例中重现这个问题。以下是我的代码:

  import tensorflow.compat.v1 as tftf.compat.v1.disable_eager_execution()tf.disable_v2_behavior()myTensor_values = tf.placeholder(dtype=tf.float32)myTensor_l2_splits = tf.placeholder(dtype=tf.int32)myTensor_l1_splits = tf.placeholder(dtype=tf.int32)def innerloop_processing(begin_index , end_index , input1) :     innerloop_counter = begin_index    ta = tf.TensorArray(tf.float32, size=0, dynamic_size=True, clear_after_read=False , infer_shape=False )    def innerloop_body(counter , begin_index , end_index , input1 , ta) :         inner_being_index = input1[1][counter]        inner_end_index = input1[1][counter+1]        row = tf.slice(input1[0] , [inner_being_index] ,  [inner_end_index-inner_being_index])        ta = ta.write(counter-begin_index , row)        counter = counter + 1         return counter , begin_index , end_index , input1 , ta            def innerloop_cond(counter , begin_index , end_index , input1 , ta ) :         return input1[1][counter] < input1[1][end_index] -1  #stop at the next pointer of the l2_splits      results = tf.while_loop(innerloop_cond , innerloop_body , [innerloop_counter , begin_index , end_index , input1 , ta] )    print_resutls = tf.print("this is the component result  :" , results[4].stack())    return results[4].stack()def generateL1Tensor_writeback(start_offest,step,num):    counter=tf.constant(0,tf.int32)    values = tf.TensorArray(tf.int32, size=0, dynamic_size=True, clear_after_read=False , infer_shape=False )    def cond(values , start_offest , num ,counter) :         return counter*step <= num*step    def body(values , start_offest , num ,counter) :         values = values.write(counter,[(counter*step)+start_offest])        counter = counter+1        return  values , start_offest , num ,counter        final_values , _ , _ , _  = tf.while_loop(cond,body,[values , start_offest , num , counter])    final = final_values.concat()    #print_line = tf.print(" xxxxx This is the is the split : " ,  final)    return finaldef multiply2n_ragged(tensor1 , tensor2) :     #this  function multiplies two ragged tesnsors of rank 2 . the most outer ranks of the two tensros must be equal .    #setting variables and constats     outerloop_counter = tf.constant(0 , dtype=tf.int32)    carry_on = tf.constant(0 , dtype=tf.int32)    taValues = tf.TensorArray(tf.float32, size=0, dynamic_size=True, clear_after_read=False , infer_shape=False )    taL2Splits = tf.TensorArray(tf.int32, size=0, dynamic_size=True, clear_after_read=False , infer_shape=False )    taL1Splits = tf.TensorArray(tf.int32, size=0, dynamic_size=True, clear_after_read=False , infer_shape=False )    taL1Splits = taL1Splits.write(0,[0]) ## required intialization for L1 split only    innerloop_processing_graphed = tf.function(innerloop_processing)    generateL1Tensor_writeback_graphed = tf.function(generateL1Tensor_writeback)    def outerloop_cond(counter,input1,input2 ,taValues  ,taL2Splits , taL1Splits , carry_on ) :        value = tf.shape(input1[2])[0]-1        return counter < value ## this is the length of the outermost dimision , stop of this     def outloop_body(counter,input1,input2, taValues  ,taL2Splits , taL1Splits , carry_on) :         l1_comp_begin = input1[2][counter]                  ## this is begin position of the current row in the outer split  ( ie. the ith value in the outer row split tensor )         l1_comp_end = input1[2][counter+1]                  ## this is end position of the current row in the outer split   (ie. the ith + 1 value in the outer row split tensor)        l1_comp2_begin = input2[2][counter]                 ## we do the same for the second components         l1_comp2_end = input2[2][counter+1]                 ## we do the same for the second components        comp  = innerloop_processing_graphed(l1_comp_begin ,l1_comp_end ,input1  ) ## now retrive the data to be procesed for the selected rows from vector1        comp2  =innerloop_processing_graphed(l1_comp2_begin ,l1_comp2_end ,input2  ) ## do the same for vector 2                 comp2 = tf.transpose(comp2) ### desired operation        multiply =tf.matmul(comp , comp2) #### This is the desired operation                  myshape= tf.shape(multiply) ## calculate the shape of the result in order to prepare to write the result in a ragged tensor format.         offset = tf.cond( taValues.size() >0  ,lambda: tf.shape(taValues.concat())[0] , lambda : [0]) ### this is a hack, TensorArray.concat returns an error if the array is empty. Thus we check before calling this.         l2v = generateL1Tensor_writeback_graphed(offset,myshape[1],myshape[0])  # generate the inner row split of the result for the current element        taL2Splits=taL2Splits.write(counter,l2v) # write back the inner rowlplit to a TensorArray         taValues=taValues.write(counter,tf.reshape(multiply , [-1])) # wirte back the actual ragged tensor elemnts in a another TensorArray        carry_on=carry_on+myshape[0] ## required to calculate the outer row splite        taL1Splits=taL1Splits.write(counter+1,[carry_on]) ## This is the outmost row split.         counter = counter+1        return counter , input1,input2, taValues  ,taL2Splits , taL1Splits , carry_on        outerloop_finalcounter , _ , _ , ta1,ta2,ta3,_ = tf.while_loop(outerloop_cond,outloop_body,[outerloop_counter , tensor1 , tensor2 ,taValues  ,taL2Splits , taL1Splits,carry_on])    uinquie_ta2 , _ = tf.unique(ta2.concat())  # this is required since some values might be duplicate in the row split itself     final_values = ta1.concat() , uinquie_ta2   ,ta3.concat()    return final_valuest = myTensor_values , myTensor_l2_splits , myTensor_l1_splitsoo   =multiply2n_ragged(t,t)new_oo = multiply2n_ragged(oo,oo)sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)))sess.run(tf.global_variables_initializer())vals =np.array([1.0, 2.2  , 1.1 , 4.0, 5.0 , 1.1 , 6.0, 7.0 , 1.1 , 8.0, 9.0 , 1.1 ,10.0, 11.0 , 1.1 ])l2_splits = np.array([0,3,6,9,12,15])l1_splits = np.array([0, 2, 5  ]) re       = sess.run([new_oo  ] , feed_dict={myTensor_values:vals ,myTensor_l1_splits:l1_splits ,myTensor_l2_splits:l2_splits  } )print(re)

如我所说,代码在很多情况下运行正常,但有时对于相同的输入会生成以下错误。我得到的一些不同错误的堆栈跟踪如下:

this is the component result  : [[1 2.2 1.1] [4 5 1.1]]this is the component result  : [[1 2.2 1.1] [4 5 1.1]]this is the component result  : [[6 7 1.1] [8 9 1.1] [10 11 1.1]]this is the component result  : [[6 7 1.1] [8 9 1.1] [10 11 1.1]]this is the component result  : [[7.05 16.21] [16.21 42.21]]this is the component result  : [[7.05 16.21] [16.21 42.21]]---------------------------------------------------------------------------InvalidArgumentError                      Traceback (most recent call last)C:\ProgramData\Anaconda3\envs\AutoEncoder\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)   1364     try:-> 1365       return fn(*args)   1366     except errors.OpError as e:C:\ProgramData\Anaconda3\envs\AutoEncoder\lib\site-packages\tensorflow\python\client\session.py in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata)   1349       return self._call_tf_sessionrun(options, feed_dict, fetch_list,-> 1350                                       target_list, run_metadata)   1351 C:\ProgramData\Anaconda3\envs\AutoEncoder\lib\site-packages\tensorflow\python\client\session.py in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata)   1442                                             fetch_list, target_list,-> 1443                                             run_metadata)   1444 InvalidArgumentError: {{function_node __inference_innerloop_processing_13658}} {{function_node __inference_innerloop_processing_13658}} Expected size[0] in [0, 0], but got 3     [[{{node while/body/_1/while/Slice}}]]     [[while_33/StatefulPartitionedCall_1]]During handling of the above exception, another exception occurred:InvalidArgumentError                      Traceback (most recent call last)<ipython-input-18-238a2ce9a03a> in <module>     94 l2_splits = np.array([0,3,6,9,12,15])     95 l1_splits = np.array([0, 2, 5  ])---> 96 re       = sess.run([new_oo  ] , feed_dict={myTensor_values:vals ,myTensor_l1_splits:l1_splits ,myTensor_l2_splits:l2_splits  } )     97 print(re)C:\ProgramData\Anaconda3\envs\AutoEncoder\lib\site-packages\tensorflow\python\client\session.py in run(self, fetches, feed_dict, options, run_metadata)    956     try:    957       result = self._run(None, fetches, feed_dict, options_ptr,--> 958                          run_metadata_ptr)    959       if run_metadata:    960         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)C:\ProgramData\Anaconda3\envs\AutoEncoder\lib\site-packages\tensorflow\python\client\session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)   1179     if final_fetches or final_targets or (handle and feed_dict_tensor):   1180       results = self._do_run(handle, final_targets, final_fetches,-> 1181                              feed_dict_tensor, options, run_metadata)   1182     else:   1183       results = []C:\ProgramData\Anaconda3\envs\AutoEncoder\lib\site-packages\tensorflow\python\client\session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)   1357     if handle is None:   1358       return self._do_call(_run_fn, feeds, fetches, targets, options,-> 1359                            run_metadata)   1360     else:   1361       return self._do_call(_prun_fn, handle, feeds, fetches)C:\ProgramData\Anaconda3\envs\AutoEncoder\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)   1382                     '\nsession_config.graph_options.rewrite_options.'   1383                     'disable_meta_optimizer = True')-> 1384       raise type(e)(node_def, op, message)   1385    1386   def _extend_graph(self):InvalidArgumentError:   Expected size[0] in [0, 0], but got 3     [[{{node while/body/_1/while/Slice}}]]     [[while_33/StatefulPartitionedCall_1]]

以及以下错误:

CancelledError                            Traceback (most recent call last)C:\ProgramData\Anaconda3\envs\AutoEncoder\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)   1364     try:-> 1365       return fn(*args)   1366     except errors.OpError as e:C:\ProgramData\Anaconda3\envs\AutoEncoder\lib\site-packages\tensorflow\python\client\session.py in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata)   1349       return self._call_tf_sessionrun(options, feed_dict, fetch_list,-> 1350                                       target_list, run_metadata)   1351 C:\ProgramData\Anaconda3\envs\AutoEncoder\lib\site-packages\tensorflow\python\client\session.py in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata)   1442                                             fetch_list, target_list,-> 1443                                             run_metadata)   1444 CancelledError: {{function_node __inference_innerloop_processing_11240}} {{function_node __inference_innerloop_processing_11240}} [_Derived_]Loop execution was cancelled.     [[{{node while/LoopCond/_20}}]]     [[while_27/StatefulPartitionedCall_1]]During handling of the above exception, another exception occurred:CancelledError                            Traceback (most recent call last)<ipython-input-15-238a2ce9a03a> in <module>     94 l2_splits = np.array([0,3,6,9,12,15])     95 l1_splits = np.array([0, 2, 5  ])---> 96 re       = sess.run([new_oo  ] , feed_dict={myTensor_values:vals ,myTensor_l1_splits:l1_splits ,myTensor_l2_splits:l2_splits  } )     97 print(re)C:\ProgramData\Anaconda3\envs\AutoEncoder\lib\site-packages\tensorflow\python\client\session.py in run(self, fetches, feed_dict, options, run_metadata)    956     try:    957       result = self._run(None, fetches, feed_dict, options_ptr,--> 958                          run_metadata_ptr)    959       if run_metadata:    960         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)C:\ProgramData\Anaconda3\envs\AutoEncoder\lib\site-packages\tensorflow\python\client\session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)   1179     if final_fetches or final_targets or (handle and feed_dict_tensor):   1180       results = self._do_run(handle, final_targets, final_fetches,-> 1181                              feed_dict_tensor, options, run_metadata)   1182     else:   1183       results = []C:\ProgramData\Anaconda3\envs\AutoEncoder\lib\site-packages\tensorflow\python\client\session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)   1357     if handle is None:   1358       return self._do_call(_run_fn, feeds, fetches, targets, options,-> 1359                            run_metadata)   1360     else:   1361       return self._do_call(_prun_fn, handle, feeds, fetches)C:\ProgramData\Anaconda3\envs\AutoEncoder\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)   1382                     '\nsession_config.graph_options.rewrite_options.'   1383                     'disable_meta_optimizer = True')-> 1384       raise type(e)(node_def, op, message)   1385    1386   def _extend_graph(self):CancelledError:   [_Derived_]Loop execution was cancelled.     [[{{node while/LoopCond/_20}}]]     [[while_27/StatefulPartitionedCall_1]]

我认为所有错误都是在innerloop_processing中抛出的。我还在Tensorflow的GitHub上这里开了一个issue。


回答:

看起来问题出自tf.Cond,幸运的是这个问题在Tensorflow2中已被重新实现。因此,移除以下调用:

tf.disable_v2_behavior()

可以解决这个问题。

Related Posts

L1-L2正则化的不同系数

我想对网络的权重同时应用L1和L2正则化。然而,我找不…

使用scikit-learn的无监督方法将列表分类成不同组别,有没有办法?

我有一系列实例,每个实例都有一份列表,代表它所遵循的不…

f1_score metric in lightgbm

我想使用自定义指标f1_score来训练一个lgb模型…

通过相关系数矩阵进行特征选择

我在测试不同的算法时,如逻辑回归、高斯朴素贝叶斯、随机…

可以将机器学习库用于流式输入和输出吗?

已关闭。此问题需要更加聚焦。目前不接受回答。 想要改进…

在TensorFlow中,queue.dequeue_up_to()方法的用途是什么?

我对这个方法感到非常困惑,特别是当我发现这个令人费解的…

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注