Tensorflow GradientBoostedDecisionTreeClassifier 错误:”Dense float feature must be a matrix”

我在训练时遇到了以下错误:

“tensorflow.python.framework.errors_impl.InvalidArgumentError: Dense float feature must be a matrix.” 这个错误是在使用 tensorflow.contrib.boosted_trees.estimator_batch.estimator.GradientBoostedDecisionTreeClassifier 估计器时出现的。我使用的Tensorflow版本是1.4.0。如果我将估计器改为 tf.contrib.learn.DNNClassifier,同样的代码可以正常工作。在代码中,特征字典在 tf.contrib.learn.Experiment 的 “Train_input_fn” 中传递。

之前有人遇到过类似的错误吗?

#'tensorflow==1.4.0'import tensorflow as tfimport argparseimport sysimport osfrom tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeClassifierfrom tensorflow.contrib.boosted_trees.proto import learner_pb2from tensorflow_transform.tf_metadata import metadata_iofrom tensorflow_transform.saved import input_fn_makerfrom tensorflow.contrib.learn.python.learn import learn_runnerRAW_METADATA_DIR="raw_metadata"CONTRACTED_METADATA_DIR="contracted_metadata"TRANSFORMED_METADATA_DIR="transformed_metadata"TRANSFORMED_TRAIN_DATA_FILE_PREFIX="train"TRANSFORMED_EVAL_DATA_FILE_PREFIX="eval"DATA_FILE_SUFFIX=".tfrecord.gz"TRANSFORM_FN_DIR="transform_fn"TARGET_FEATURE_COLUMN='target_field'FEATURE_NUMERICAL_COLUMN_NAMES = [  'feature1',  'feature2',  'feature3',  'feature4',  'feature5'  ]FEATURE_INTEGER_COLUMN_NAMES = [ # comment out fields that are not features  'feature6',  'feature7',  'feature8',  'feature9',  'feature10']def _parse_arguments(argv):  """Parses command line arguments."""  parser = argparse.ArgumentParser(      description="Runs training on data.")  parser.add_argument(      "--model_dir", required=True, type=str,      help="The directory where model outputs will be written")  parser.add_argument(      "--input_dir", required=True, type=str,      help=("GCS or local directory containing tensorflow-transform outputs."))  parser.add_argument(      "--batch_size", default=30, required=False, type=int,      help=("Batch size to use during training."))  parser.add_argument(      "--num_epochs", default=100, required=False, type=int,      help=("Number of epochs through the training set"))  args, _ = parser.parse_known_args(args=argv[1:])  return argsdef get_eval_metrics():    return {        "accuracy":            tf.contrib.learn.MetricSpec(                metric_fn=tf.contrib.metrics.streaming_accuracy,                prediction_key=tf.contrib.learn.PredictionKey.CLASSES),        "precision":            tf.contrib.learn.MetricSpec(                metric_fn=tf.contrib.metrics.streaming_precision,                prediction_key=tf.contrib.learn.PredictionKey.CLASSES),        "recall":            tf.contrib.learn.MetricSpec(                metric_fn=tf.contrib.metrics.streaming_recall,                prediction_key=tf.contrib.learn.PredictionKey.CLASSES)        }def read_and_decode_single_record(input_dir, num_epochs,        mode=tf.contrib.learn.ModeKeys.TRAIN):  if mode == tf.contrib.learn.ModeKeys.TRAIN:      num_epochs = num_epochs      file_prefix = TRANSFORMED_TRAIN_DATA_FILE_PREFIX  else:      num_epochs = 1      file_prefix = TRANSFORMED_EVAL_DATA_FILE_PREFIX  transformed_metadata = metadata_io.read_metadata(os.path.join(input_dir,      TRANSFORMED_METADATA_DIR))  input_file_names = tf.train.match_filenames_once(os.path.join(input_dir,      '{}*{}'.format(file_prefix, DATA_FILE_SUFFIX)))  filename_queue = tf.train.string_input_producer(input_file_names,          num_epochs=num_epochs, shuffle=True)  reader = tf.TFRecordReader(options=tf.python_io.TFRecordOptions(      tf.python_io.TFRecordCompressionType.GZIP))  _, serialized_example = reader.read(filename_queue)  features = tf.parse_single_example(      serialized = serialized_example,      features=transformed_metadata.schema.as_feature_spec()  )  return featuresdef read_dataset(input_dir, num_epochs, batch_size, mode=tf.contrib.learn.ModeKeys.TRAIN):  def _input_fn():    min_after_dequeue = 10000    features = read_and_decode_single_record(input_dir, num_epochs, mode)    features = tf.train.shuffle_batch(            tensors=features,            batch_size=batch_size,            min_after_dequeue=min_after_dequeue,            capacity=(min_after_dequeue + 3) * batch_size)    target = features.pop(TARGET_FEATURE_COLUMN)    return features, target  return _input_fndef specify_feature_columns():    feature_columns = [        tf.contrib.layers.real_valued_column(column_name = column_name)        for column_name in FEATURE_NUMERICAL_COLUMN_NAMES]    feature_columns.extend([        tf.contrib.layers.real_valued_column(column_name = column_name)        for column_name in FEATURE_INTEGER_COLUMN_NAMES])    return feature_columnsdef build_estimator(model_dir, config, params):    print "Using gradient boosted decision trees estimator \n"    learner_config = learner_pb2.LearnerConfig()    learner_config.learning_rate_tuner.fixed.learning_rate = 0.1    learner_config.regularization.l1 = 0.0    learner_config.regularization.l2 = 4.0 / params.batch_size    learner_config.constraints.max_tree_depth = 4    learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE    return GradientBoostedDecisionTreeClassifier(        learner_config=learner_config,        examples_per_layer=params.batch_size,        num_trees=100,        center_bias=False,        feature_columns=specify_feature_columns()        # feature_engineering_fn=feature_engineering_fn        )def get_experiment_fn(args):    config = tf.contrib.learn.RunConfig(save_checkpoints_steps=1000)    def experiment_fn(output_dir):      return tf.contrib.learn.Experiment(            estimator = build_estimator(model_dir = output_dir,                                              config = config,                                              params = args),            train_input_fn = read_dataset(args.input_dir,                args.num_epochs, args.batch_size,                mode=tf.contrib.learn.ModeKeys.TRAIN),            eval_input_fn = read_dataset(args.input_dir,                args.num_epochs, args.batch_size,                mode=tf.contrib.learn.ModeKeys.EVAL),            eval_metrics = get_eval_metrics())    return experiment_fndef run(args):    learn_runner.run(get_experiment_fn(args), args.model_dir)if __name__ == '__main__':    args = _parse_arguments(sys.argv)    run(args)

完整的错误跟踪:

WARNING:tensorflow:Using temporary folder as model directory: /var/folders/mg/sd4_qlyj4_lbh5ggfn6frvcr00fk8_/T/tmpPFhinsWARNING:tensorflow:From /Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/monitors.py:267: __init__ (from tensorflow.contrib.learn.python.learn.monitors) is deprecated and will be removed after 2016-12-05.Instructions for updating:Monitors are deprecated. Please use tf.train.SessionRunHook.WARNING:tensorflow:Casting <dtype: 'int64'> labels to bool.WARNING:tensorflow:Casting <dtype: 'int64'> labels to bool.WARNING:tensorflow:Error encountered when serializing resources.Type is unsupported, or the types of the items don't match field type in CollectionDef.'_Resource' object has no attribute 'name'2017-11-16 13:38:39.919664: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMAWARNING:tensorflow:Error encountered when serializing resources.Type is unsupported, or the types of the items don't match field type in CollectionDef.'_Resource' object has no attribute 'name'2017-11-16 13:38:48.810825: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Dense float feature must be a matrix.2017-11-16 13:38:48.810825: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Dense float feature must be a matrix.Traceback (most recent call last):  File "./trainer/task.py", line 162, in <module>    run(args)  File "./trainer/task.py", line 157, in run    learn_runner.run(get_experiment_fn(args), args.model_dir)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/learn_runner.py", line 218, in run    return _execute_schedule(experiment, schedule)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/learn_runner.py", line 46, in _execute_schedule    return task()  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/experiment.py", line 625, in train_and_evaluate    self.train(delay_secs=0)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/experiment.py", line 367, in train    hooks=self._train_monitors + extra_hooks)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/experiment.py", line 812, in _call_train    monitors=hooks)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/util/deprecation.py", line 316, in new_func    return func(*args, **kwargs)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 480, in fit    loss = self._train_model(input_fn=input_fn, hooks=hooks)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1040, in _train_model    _, loss = mon_sess.run([model_fn_ops.train_op, model_fn_ops.loss])  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 521, in run    run_metadata=run_metadata)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 892, in run    run_metadata=run_metadata)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 967, in run    raise six.reraise(*original_exc_info)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 952, in run    return self._sess.run(*args, **kwargs)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 1024, in run    run_metadata=run_metadata)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 827, in run    return self._sess.run(*args, **kwargs)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 889, in run    run_metadata_ptr)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1120, in _run    feed_dict_tensor, options, run_metadata)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1317, in _do_run    options, run_metadata)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1336, in _do_call    raise type(e)(node_def, op, message)tensorflow.python.framework.errors_impl.InvalidArgumentError: Dense float feature must be a matrix.     [[Node: gbdt_1/GradientTreesPartitionExamples = GradientTreesPartitionExamples[num_dense_float_features=10, num_sparse_float_features=0, num_sparse_int_features=0, use_locking=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ensemble_model, shuffle_batch:16, shuffle_batch:18, shuffle_batch:20, shuffle_batch:21, shuffle_batch:22, shuffle_batch:23, shuffle_batch:24, shuffle_batch:25, shuffle_batch:26, shuffle_batch:27, ^gbdt_1/TreeEnsembleStats)]]Caused by op u'gbdt_1/GradientTreesPartitionExamples', defined at:  File "./trainer/task.py", line 162, in <module>    run(args)  File "./trainer/task.py", line 157, in run    learn_runner.run(get_experiment_fn(args), args.model_dir)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/learn_runner.py", line 218, in run    return _execute_schedule(experiment, schedule)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/learn_runner.py", line 46, in _execute_schedule    return task()  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/experiment.py", line 625, in train_and_evaluate    self.train(delay_secs=0)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/experiment.py", line 367, in train    hooks=self._train_monitors + extra_hooks)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/experiment.py", line 812, in _call_train    monitors=hooks)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/util/deprecation.py", line 316, in new_func    return func(*args, **kwargs)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 480, in fit    loss = self._train_model(input_fn=input_fn, hooks=hooks)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 986, in _train_model    model_fn_ops = self._get_train_ops(features, labels)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1202, in _get_train_ops    return self._call_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1166, in _call_model_fn    model_fn_results = self._model_fn(features, labels, **kwargs)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/boosted_trees/estimator_batch/model.py", line 98, in model_builder    predictions_dict = gbdt_model.predict(mode)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py", line 463, in predict    ensemble_stamp, mode)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py", line 392, in _predict_and_return_dict    use_locking=True)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/contrib/boosted_trees/python/ops/gen_prediction_ops.py", line 117, in gradient_trees_partition_examples    use_locking=use_locking, name=name)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper    op_def=op_def)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op    op_def=op_def)  File "/Users/amolsharma/anaconda/envs/oldpython/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-accessInvalidArgumentError (see above for traceback): Dense float feature must be a matrix.     [[Node: gbdt_1/GradientTreesPartitionExamples = GradientTreesPartitionExamples[num_dense_float_features=10, num_sparse_float_features=0, num_sparse_int_features=0, use_locking=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ensemble_model, shuffle_batch:16, shuffle_batch:18, shuffle_batch:20, shuffle_batch:21, shuffle_batch:22, shuffle_batch:23, shuffle_batch:24, shuffle_batch:25, shuffle_batch:26, shuffle_batch:27, ^gbdt_1/TreeEnsembleStats)]]

回答:

我猜测由 tf.transform 创建的解析规格与我们通常得到的不同。您能分享 transformed_metadata.schema.as_feature_spec() 的输出吗?

作为一种解决方法,尝试在您的 input_fn 中添加以下这行代码,在 features = tf.train.shuffle_batch(…) 之后:

features = {feature_name: tf.reshape(feature_value, [-1, 1]) for feature_name, feature_value in features.items()}

Related Posts

使用LSTM在Python中预测未来值

这段代码可以预测指定股票的当前日期之前的值,但不能预测…

如何在gensim的word2vec模型中查找双词组的相似性

我有一个word2vec模型,假设我使用的是googl…

dask_xgboost.predict 可以工作但无法显示 – 数据必须是一维的

我试图使用 XGBoost 创建模型。 看起来我成功地…

ML Tuning – Cross Validation in Spark

我在https://spark.apache.org/…

如何在React JS中使用fetch从REST API获取预测

我正在开发一个应用程序,其中Flask REST AP…

如何分析ML.NET中多类分类预测得分数组?

我在ML.NET中创建了一个多类分类项目。该项目可以对…

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注