我正在研究如何在一个静态图像中找出张开的手指数量。首先,我使用了tensorflow
API和egohands数据集来检测手部,并使用output_dict['detection_boxes']
获取[ymin, xmin, ymax, xmax]
值。
然后,我尝试使用opencv
和这些坐标绘制一个矩形。这个方法奏效了,矩形准确地绘制在检测到的区域上。但是,当我尝试将同一区域作为ROI选择时,却没有成功。
我卡在的代码行是:
image = Image.open(image_path) # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it.image_np = load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3]image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection.output_dict = run_inference_for_single_image(image_np, detection_graph) # Visualization of the results of a detection.h = vis_util.visualize_boxes_and_labels_on_image_array( image_np,output_dict['detection_boxes'],output_dict['detection_classes'],output_dict['detection_scores'], category_index,instance_masks=output_dict.get('detection_masks'),use_normalized_coordinates=True,line_thickness=8) #plt.figure(figsize=IMAGE_SIZE)im_width, im_height = image.sizeymin = output_dict['detection_boxes'][0][0]*im_heightxmin = output_dict['detection_boxes'][0][1]*im_widthymax = output_dict['detection_boxes'][0][2]*im_heightxmax = output_dict['detection_boxes'][0][3]*im_widthcv2.rectangle(image_np, (int(xmin),int(ymin)), (int(xmax),int(ymax)), (255,0,0),5)roi = image_np[int(xmin):int(ymin), int(xmax):int(ymax)]cv2.rectangle(image_np, (int(xmin),int(ymin)), (int(xmax),int(ymax)), (0,0,255),-1)#cv2.circle(image_np, (int(xmin),int(xmax)), 55, (0,0,255), -1)####################################
我不知道自己是否走在正确的道路上。下面是我的完整代码。
# coding: utf-8# # 对象检测演示# 欢迎体验对象检测推理演示!本笔记本将逐步引导您使用预训练模型在图像中检测对象的过程。请在开始之前按照[安装说明](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md)进行操作。# # 导入# In[1]:import numpy as npimport osimport six.moves.urllib as urllibimport sysimport tarfileimport tensorflow as tfimport zipfileimport mathfrom collections import defaultdictfrom io import StringIO#from matplotlib import pyplot as pltfrom PIL import Imageimport cv2# 由于笔记本存放在object_detection文件夹中,需要添加此路径。sys.path.append("..")from object_detection.utils import ops as utils_opsif tf.__version__ < '1.4.0': raise ImportError('请升级您的tensorflow安装到v1.4.*或更高版本!')from utils import label_map_utilfrom utils import visualization_utils as vis_util# # 模型准备 # ## 变量# # 任何使用`export_inference_graph.py`工具导出的模型都可以通过更改`PATH_TO_CKPT`指向新的.pb文件来加载这里。# # 默认情况下,我们在这里使用“SSD with Mobilenet”模型。有关可以直接运行的其他模型列表,请参见[检测模型动物园](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md),这些模型具有不同的速度和精度。# In[4]:# 要下载的模型。MODEL_NAME = 'hand_inference_graph'#MODEL_FILE = MODEL_NAME + '.tar.gz'#DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'# 冻结的检测图的路径。这是实际用于对象检测的模型。PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'# 用于为每个框添加正确标签的字符串列表。PATH_TO_LABELS = os.path.join('hand_inference_graph', 'hand_label_map.pbtxt')NUM_CLASSES = 90# ## 下载模型# In[ ]:'''opener = urllib.request.URLopener()opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)tar_file = tarfile.open(MODEL_FILE)for file in tar_file.getmembers(): file_name = os.path.basename(file.name) if 'frozen_inference_graph.pb' in file_name: tar_file.extract(file, os.getcwd()) '''# ## 将冻结的TensorFlow模型加载到内存中。# In[ ]:detection_graph = tf.Graph()with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='')# ## 加载标签映射# 标签映射将索引映射到类别名称,因此当我们的卷积网络预测`5`时,我们知道这对应于`airplane`。这里我们使用内部实用函数,但任何返回将整数映射到适当字符串标签的字典的函数都可以。# In[ ]:label_map = label_map_util.load_labelmap(PATH_TO_LABELS)categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)category_index = label_map_util.create_category_index(categories)# ## 辅助代码# In[ ]:def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8)# # 检测# In[ ]:# 为了简便起见,我们只使用两张图片:# image1.jpg# image2.jpg# 如果您想用自己的图片测试代码,只需将图片路径添加到TEST_IMAGE_PATHS中。PATH_TO_TEST_IMAGES_DIR = 'pics'image_path = 'pics/image1.jpg'# 输出图像的大小,单位为英寸。IMAGE_SIZE = (12, 8)# In[ ]:def run_inference_for_single_image(image, graph): with graph.as_default(): with tf.Session() as sess: # 获取输入和输出张量的句柄 ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) if 'detection_masks' in tensor_dict: # 以下处理仅适用于单张图像 detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # 需要重构以将掩码从框坐标转换为图像坐标并适应图像大小。 real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # 遵循惯例,重新添加批次维度 tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') # 运行推理 output_dict = sess.run(tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)}) # 所有输出都是float32 numpy数组,因此根据需要转换类型 output_dict['num_detections'] = int(output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.uint8) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict['detection_masks'][0] return output_dict# In[ ]:image = Image.open(image_path) # 稍后将使用基于数组的图像表示来准备带有框和标签的结果图像。image_np = load_image_into_numpy_array(image) # 扩展维度,因为模型期望图像具有形状:[1, None, None, 3]image_np_expanded = np.expand_dims(image_np, axis=0) # 实际检测。output_dict = run_inference_for_single_image(image_np, detection_graph) # 检测结果的可视化。h = vis_util.visualize_boxes_and_labels_on_image_array( image_np,output_dict['detection_boxes'],output_dict['detection_classes'],output_dict['detection_scores'], category_index,instance_masks=output_dict.get('detection_masks'),use_normalized_coordinates=True,line_thickness=8) #plt.figure(figsize=IMAGE_SIZE)im_width, im_height = image.sizeymin = output_dict['detection_boxes'][0][0]*im_heightxmin = output_dict['detection_boxes'][0][1]*im_widthymax = output_dict['detection_boxes'][0][2]*im_heightxmax = output_dict['detection_boxes'][0][3]*im_widthcv2.rectangle(image_np, (int(xmin),int(ymin)), (int(xmax),int(ymax)), (255,0,0),5)roi = image_np[int(xmin):int(ymin), int(xmax):int(ymax)]cv2.rectangle(image_np, (int(xmin),int(ymin)), (int(xmax),int(ymax)), (0,0,255),-1)#cv2.circle(image_np, (int(xmin),int(xmax)), 55, (0,0,255), -1)####################################cv2.imshow('original',cv2.resize(image_np, (800,600)))cv2.waitKey(0)cv2.destroyAllWindows()
同时,请检查我选择ROI的方式是否正确。
提前感谢您。
(我在机器学习和OpenCV方面非常非常新手)
回答:
roi = image_np[ymin:ymax,xmin:xmax]
尝试这个。另外,在您的代码中,我看到您将num_classes设置为90,这是为什么呢?当您只有一个类(如果我没记错的话,是手)时。