我正在按照这个GitHub仓库使用TensorFlow学习YOLO v3。代码在单张图像上运行得很顺畅(我从电脑上加载的图像),但我正在尝试将模型连接到我的网络摄像头。
我尝试查看了OpenCV关于从摄像头捕获视频的教程,但我无法弄清楚如何将它与启动TensorFlow会话的语句结合起来运行我的模型:
batch_size = len(img_names)batch = load_images(img_names, model_size=_MODEL_SIZE)class_names = load_class_names('files/coco.names')n_classes = len(class_names)max_output_size = 10iou_threshold = 0.5confidence_threshold = 0.5tf.reset_default_graph()model = Yolo_v3(n_classes=n_classes, model_size=_MODEL_SIZE, max_output_size=max_output_size, iou_threshold=iou_threshold, confidence_threshold=confidence_threshold)inputs = tf.placeholder(tf.float32, [batch_size, 416, 416, 3]) #我认为批量大小将是1,因为我们一次处理一帧detections = model(inputs, training=False)model_vars = tf.global_variables(scope='yolo_v3_model')assign_ops = load_weights(model_vars, 'files/yolov3.weights')with tf.Session() as sess: sess.run(assign_ops) detection_result = sess.run(detections, feed_dict={inputs: batch})draw_boxes(img_names, detection_result, class_names, _MODEL_SIZE)
编辑:
我尝试运行以下代码:
def generator(): cap = cv2.VideoCapture(0) cap.set(3,416) cap.set(4,416) time.sleep(10) while(True): # 逐帧捕获 ret, frame = cap.read() # 显示结果帧 cv2.imshow('frame',frame) if cv2.waitKey(1) & 0xFF == ord('q'): break yield framewith tf.Session() as sess: sess.run(assign_ops) for frame in generator(): detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)}) draw_boxes(img_names, detection_result, class_names, _MODEL_SIZE)
但我遇到了图像未能正确加载(空帧)的错误,或者是这个错误:
ValueError: Cannot feed value of shape (1, 240, 320, 3) for Tensor 'Placeholder:0', which has shape '(1, 416, 416, 3)'
编辑 2
我感觉它几乎可以工作。我运行了
def generator(): cap = cv2.VideoCapture(0) while(True): # 逐帧捕获 ret, frame = cap.read() frame = cv2.resize(frame, (416, 416)) # 显示结果帧 cv2.imshow('frame',frame) if cv2.waitKey(1) & 0xFF == ord('q'): break yield frame # 当一切完成后,释放捕获 cap.release() cv2.destroyAllWindows()with tf.Session() as sess: sess.run(assign_ops) for frame in generator(): detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)}) draw_boxes(frame, detection_result, class_names, _MODEL_SIZE)
但我得到了这个错误:
AttributeError: 'numpy.ndarray' object has no attribute 'read'
我尝试运行没有最后一部分的代码:
draw_boxes(frame, detection_result, class_names, _MODEL_SIZE)
我的摄像头确实打开了,尽管没有进行任何对象检测(YOLO模型)
顺便说一下,这是draw_boxes
函数:
def draw_boxes(img_names, boxes_dicts, class_names, model_size): """绘制检测到的边框。 参数: img_names: 输入图像名称列表。 boxes_dict: 类到边框的字典。 class_names: 类名称列表。 model_size: 模型的输入大小。 返回: 无。 """ for num, img_name, boxes_dict in zip(range(len(img_names)), img_names, boxes_dicts): img = Image.open(img_name) draw = ImageDraw.Draw(img) font = ImageFont.truetype(font='files/futur.ttf', size=(img.size[0] + img.size[1]) // 100) resize_factor = \ (img.size[0] / model_size[0], img.size[1] / model_size[1]) for cls in range(len(class_names)): boxes = boxes_dict[cls] if np.size(boxes) != 0: color = np.random.permutation([np.random.randint(256), 255, 0]) for box in boxes: xy, confidence = box[:4], box[4] #xy – 文本的左上角。 xy = [xy[i] * resize_factor[i % 2] for i in range(4)] x0, y0 = xy[0], xy[1] thickness = (img.size[0] + img.size[1]) // 200 for t in np.linspace(0, 1, thickness): xy[0], xy[1] = xy[0] + t, xy[1] + t xy[2], xy[3] = xy[2] - t, xy[3] - t draw.rectangle(xy, outline=tuple(color)) if class_names[cls] =='car': text = '{} {:.1f}% 大约 {:.1f} 厘米远'.format(class_names[cls], #text – 要绘制的文本。 confidence * 100, Distance_To_Obect(4.3,121,780,xy[3]-xy[1],3.5).distance()) elif class_names[cls] =='person': width, height = img.size print(width, height) text = '{} {:.1f}% 大约 {:.1f} 厘米远'.format(class_names[cls], #text – 要绘制的文本。 confidence * 100, Distance_To_Obect(4.3,170,height,xy[3]-xy[1],3.5).distance()) else: text = '{} {:.1f}%'.format(class_names[cls], #text – 要绘制的文本。 confidence * 100) text_size = draw.textsize(text, font=font) print ('[x0, y0, x1, y1]', xy[0], xy[1] ,xy[2], xy[3]) draw.rectangle( [x0, y0 - text_size[1], x0 + text_size[0], y0], fill=tuple(color)) #fill – 用于文本的颜色 draw.text((x0, y0 - text_size[1]), text, fill='black', font=font) display(img)
我尝试将
img = Image.open(img_name)
替换为
Image.fromarray(img_name)
但我再次运行文件后得到了错误:
TypeError: function takes exactly 1 argument (3 given)
另外,我运行了
print (detection_result)
它确实包含点
编辑 3
我尝试将draw_boxes
方法更改为这个链接中的方法
但我得到了这个错误:
OSError Traceback (most recent call last)<ipython-input-5-fa46870a1059> in <module> 105 detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)}) 106 print(detection_result)--> 107 draw_boxes(frame, detection_result, class_names, _MODEL_SIZE)<ipython-input-5-fa46870a1059> in draw_boxes(image, boxes, box_classes, class_names, scores) 36 font = ImageFont.truetype( 37 font='font/FiraMono-Medium.otf',---> 38 size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) 39 thickness = (image.size[0] + image.size[1]) // 300 40~\AppData\Local\Programs\Python\Python36\lib\site-packages\PIL\ImageFont.py in truetype(font, size, index, encoding, layout_engine) 278 279 try:--> 280 return FreeTypeFont(font, size, index, encoding, layout_engine) 281 except IOError: 282 ttf_filename = os.path.basename(font)~\AppData\Local\Programs\Python\Python36\lib\site-packages\PIL\ImageFont.py in __init__(self, font, size, index, encoding, layout_engine) 143 if isPath(font): 144 self.font = core.getfont(font, size, index, encoding,--> 145 layout_engine=layout_engine) 146 else: 147 self.font_bytes = font.read()OSError: cannot open resource
编辑 4
顺便说一下,这是result_box
:
[{0: array([[131.96371 , 131.70601 , 341.41946 , 358.6781 , 0.68467134]], dtype=float32), 1: array([], shape=(0, 5), dtype=float32), 2: array([], shape=(0, 5), dtype=float32), 3: array([], shape=(0, 5), dtype=float32), 4: array([], shape=(0, 5), dtype=float32), 5: array([], shape=(0, 5), dtype=float32), 6: array([], shape=(0, 5), dtype=float32), 7: array([], shape=(0, 5), dtype=float32), 8: array([], shape=(0, 5), dtype=float32), 9: array([], shape=(0, 5), dtype=float32), 10: array([], shape=(0, 5), dtype=float32), 11: array([], shape=(0, 5), dtype=float32), 12: array([], shape=(0, 5), dtype=float32), 13: array([], shape=(0, 5), dtype=float32), 14: array([], shape=(0, 5), dtype=float32), 15: array([], shape=(0, 5), dtype=float32), 16: array([], shape=(0, 5), dtype=float32), 17: array([], shape=(0, 5), dtype=float32), 18: array([], shape=(0, 5), dtype=float32), 19: array([], shape=(0, 5), dtype=float32), 20: array([], shape=(0, 5), dtype=float32), 21: array([], shape=(0, 5), dtype=float32), 22: array([], shape=(0, 5), dtype=float32), 23: array([], shape=(0, 5), dtype=float32), 24: array([], shape=(0, 5), dtype=float32), 25: array([], shape=(0, 5), dtype=float32), 26: array([], shape=(0, 5), dtype=float32), 27: array([], shape=(0, 5), dtype=float32), 28: array([], shape=(0, 5), dtype=float32), 29: array([], shape=(0, 5), dtype=float32), 30: array([], shape=(0, 5), dtype=float32), 31: array([], shape=(0, 5), dtype=float32), 32: array([], shape=(0, 5), dtype=float32), 33: array([], shape=(0, 5), dtype=float32), 34: array([], shape=(0, 5), dtype=float32), 35: array([], shape=(0, 5), dtype=float32), 36: array([], shape=(0, 5), dtype=float32), 37: array([], shape=(0, 5), dtype=float32), 38: array([], shape=(0, 5), dtype=float32), 39: array([], shape=(0, 5), dtype=float32), 40: array([], shape=(0, 5), dtype=float32), 41: array([], shape=(0, 5), dtype=float32), 42: array([], shape=(0, 5), dtype=float32), 43: array([], shape=(0, 5), dtype=float32), 44: array([], shape=(0, 5), dtype=float32), 45: array([], shape=(0, 5), dtype=float32), 46: array([], shape=(0, 5), dtype=float32), 47: array([], shape=(0, 5), dtype=float32), 48: array([], shape=(0, 5), dtype=float32), 49: array([], shape=(0, 5), dtype=float32), 50: array([], shape=(0, 5), dtype=float32), 51: array([], shape=(0, 5), dtype=float32), 52: array([], shape=(0, 5), dtype=float32), 53: array([], shape=(0, 5), dtype=float32), 54: array([], shape=(0, 5), dtype=float32), 55: array([], shape=(0, 5), dtype=float32), 56: array([], shape=(0, 5), dtype=float32), 57: array([], shape=(0, 5), dtype=float32), 58: array([], shape=(0, 5), dtype=float32), 59: array([], shape=(0, 5), dtype=float32), 60: array([], shape=(0, 5), dtype=float32), 61: array([], shape=(0, 5), dtype=float32), 62: array([], shape=(0, 5), dtype=float32), 63: array([], shape=(0, 5), dtype=float32), 64: array([], shape=(0, 5), dtype=float32), 65: array([], shape=(0, 5), dtype=float32), 66: array([], shape=(0, 5), dtype=float32), 67: array([], shape=(0, 5), dtype=float32), 68: array([], shape=(0, 5), dtype=float32), 69: array([], shape=(0, 5), dtype=float32), 70: array([], shape=(0, 5), dtype=float32), 71: array([], shape=(0, 5), dtype=float32), 72: array([], shape=(0, 5), dtype=float32), 73: array([], shape=(0, 5), dtype=float32), 74: array([], shape=(0, 5), dtype=float32), 75: array([], shape=(0, 5), dtype=float32), 76: array([], shape=(0, 5), dtype=float32), 77: array([], shape=(0, 5), dtype=float32), 78: array([], shape=(0, 5), dtype=float32), 79: array([], shape=(0, 5), dtype=float32)}]
回答:
在你提供的链接中,帧是你需要输入到YOLO的图像。你可以将链接中的整个while循环放入会话中,并通过运行以下代码逐帧处理:
detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)})
或者编写一个提供帧的生成器,这样会更干净一些。
def generator() cap = cv2.VideoCapture(0) while(True): # 逐帧捕获 ret, frame = cap.read() frame = cv2.resize(frame, (416, 416)) yield frame
然后在你的代码中可以这样做:
with tf.Session() as sess: sess.run(assign_ops) for frame in generator(): detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)})
expand_dims是为了创建一个批量大小为1的批次,例如:将399x399x3变成1x399x399x3。
大致如此,希望对你有所帮助
编辑
import numpy as npimport cv2def convert_bbox_to_absolute(bbox_list, w_img, h_img): """ 方法将相对点(< 1)转换为绝对点(< 416) :param bbox_list: (list) 边界框列表 (x, y, w, h, probability) :param w_img: (int) 图像的宽度 :param h_img: (int) 图像的高度 :return: bbox_list: (list) 边界框列表,但这次是放大后的 """ x, y, w, h, c = zip(*bbox_list) y = list(map(lambda y: int(y * w_img), y)) w = list(map(lambda w: int(w * h_img), w)) x = list(map(lambda x: int(x * h_img), x)) h = list(map(lambda h: int(h * w_img), h)) bbox_list = zip(x, y, w, h, c) return list(bbox_list)def draw_boxes(image, bbox_list): """ 将在图像上绘制矩形并在其上放置概率 :param image: (np.ndarray) 一个RGB颜色图像 :param bbox_list: (list) 边界框列表 (x, y, w, h, probability) """ assert type(image) == np.ndarray assert type(bbox_list) == list assert len(image.shape) == 3 assert len(bbox_list[0]) == 5 # 获取图像的宽度和高度 w_img, h_img, _ = image.shape # 将框坐标转换为绝对值 bbox_list = convert_bbox_to_absolute(bbox_list, w_img, h_img) for bbox in bbox_list: x, y, w, h, c = bbox cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 3) cv2.putText(image, str(c), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) cv2.imshow("", image) cv2.waitKey(0)# 创建一个黑色的测试RGB图像test_image = np.zeros((512, 416, 3))# 伪造一些边界框,x, y, w, h, confidenceb_boxes = [[0.5, 0.1, 0.5, 0.9, 0.8], [0.4, 0.1, 0.1, 0.1, 0.4]]draw_boxes(test_image, b_boxes)
这是一个非常简单的绘制边界框的示例,我假设YOLO提供的框是标准化的,因为这个链接。如果你知道如何确定哪个标签属于哪个框,那么我也会将它添加到代码中。
编辑 2:
import numpy as npimport cv2from random import choicesfrom string import ascii_lowercasedef draw_boxes(image, bbox_list, label_list): """ 将在图像上绘制矩形并在其上放置概率 :param image: (np.ndarray) 一个RGB颜色图像 :param bbox_list: (list[dict()]) 边界框的列表字典 label:(x, y, w, h, probability) :param label_list: (list) 与字典键顺序匹配的标签名称列表 """ bbox_dict = bbox_list[0] assert type(image) == np.ndarray assert type(bbox_dict) == dict assert type(label_list) == list assert len(image.shape) == 3 assert len(bbox_dict.keys()) == 80 assert len(label_list) == 80 for label, bbox in bbox_dict.items(): if bbox.size == 0: continue x1, y1, x2, y2, c = np.squeeze(bbox) x1, y1, x2, y2 = list(map(lambda val: int(val), [x1, y1, x2, y2])) label = label_list[label] + ": {}".format(c) cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 3) cv2.putText(image, label, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) cv2.imshow("", image) cv2.waitKey(0)# 创建一个黑色的测试RGB图像test_image = np.zeros((512, 416, 3))# 伪造一些边界框,x, y, w, h, confidence 并自动生成空数组以键匹配你的数据b_boxes = [{ 0: np.array([[131.96371, 131.70601, 341.41946, 358.6781, 0.68467134]]), 1: np.array([])}]for i in range(2, 80): b_boxes[0][i] = np.array([])# 获取80个随机的10个字母字符串以模拟标签labels = ["".join(choices(ascii_lowercase, k=10)) for _ in range(80)]draw_boxes(test_image, b_boxes, labels)
根据你的数据,我做了一些更改,希望这对你有帮助