如何将我的YOLO v3模型连接到我的网络摄像头？

我正在按照这个GitHub仓库使用TensorFlow学习YOLO v3。代码在单张图像上运行得很顺畅（我从电脑上加载的图像），但我正在尝试将模型连接到我的网络摄像头。

我尝试查看了OpenCV关于从摄像头捕获视频的教程，但我无法弄清楚如何将它与启动TensorFlow会话的语句结合起来运行我的模型：

batch_size = len(img_names)batch = load_images(img_names, model_size=_MODEL_SIZE)class_names = load_class_names('files/coco.names')n_classes = len(class_names)max_output_size = 10iou_threshold = 0.5confidence_threshold = 0.5tf.reset_default_graph()model = Yolo_v3(n_classes=n_classes, model_size=_MODEL_SIZE,            max_output_size=max_output_size,            iou_threshold=iou_threshold,            confidence_threshold=confidence_threshold)inputs = tf.placeholder(tf.float32, [batch_size, 416, 416, 3]) #我认为批量大小将是1，因为我们一次处理一帧detections = model(inputs, training=False)model_vars = tf.global_variables(scope='yolo_v3_model')assign_ops = load_weights(model_vars, 'files/yolov3.weights')with tf.Session() as sess:    sess.run(assign_ops)    detection_result = sess.run(detections, feed_dict={inputs: batch})draw_boxes(img_names, detection_result, class_names, _MODEL_SIZE)

编辑：
我尝试运行以下代码：

def generator():    cap = cv2.VideoCapture(0)    cap.set(3,416)    cap.set(4,416)    time.sleep(10)    while(True):        # 逐帧捕获        ret, frame = cap.read()        # 显示结果帧        cv2.imshow('frame',frame)        if cv2.waitKey(1) & 0xFF == ord('q'):            break        yield framewith tf.Session() as sess:    sess.run(assign_ops)    for frame in generator():        detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)})        draw_boxes(img_names, detection_result, class_names, _MODEL_SIZE)

但我遇到了图像未能正确加载（空帧）的错误，或者是这个错误：

ValueError: Cannot feed value of shape (1, 240, 320, 3) for Tensor 'Placeholder:0', which has shape '(1, 416, 416, 3)'

编辑 2
我感觉它几乎可以工作。我运行了

def generator():    cap = cv2.VideoCapture(0)    while(True):        # 逐帧捕获        ret, frame = cap.read()        frame = cv2.resize(frame, (416, 416))        # 显示结果帧        cv2.imshow('frame',frame)        if cv2.waitKey(1) & 0xFF == ord('q'):            break        yield frame    # 当一切完成后，释放捕获    cap.release()    cv2.destroyAllWindows()with tf.Session() as sess:    sess.run(assign_ops)    for frame in generator():        detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)})        draw_boxes(frame, detection_result, class_names, _MODEL_SIZE)

但我得到了这个错误：

AttributeError: 'numpy.ndarray' object has no attribute 'read'

我尝试运行没有最后一部分的代码：

draw_boxes(frame, detection_result, class_names, _MODEL_SIZE)

我的摄像头确实打开了，尽管没有进行任何对象检测（YOLO模型）

顺便说一下，这是draw_boxes函数：

def draw_boxes(img_names, boxes_dicts, class_names, model_size):    """绘制检测到的边框。    参数：        img_names: 输入图像名称列表。        boxes_dict: 类到边框的字典。        class_names: 类名称列表。        model_size: 模型的输入大小。    返回：        无。    """    for num, img_name, boxes_dict in zip(range(len(img_names)), img_names,                                         boxes_dicts):        img = Image.open(img_name)        draw = ImageDraw.Draw(img)        font = ImageFont.truetype(font='files/futur.ttf',                                  size=(img.size[0] + img.size[1]) // 100)        resize_factor = \            (img.size[0] / model_size[0], img.size[1] / model_size[1])        for cls in range(len(class_names)):            boxes = boxes_dict[cls]            if np.size(boxes) != 0:                color = np.random.permutation([np.random.randint(256), 255, 0])                for box in boxes:                    xy, confidence = box[:4], box[4] #xy – 文本的左上角。                    xy = [xy[i] * resize_factor[i % 2] for i in range(4)]                    x0, y0 = xy[0], xy[1]                    thickness = (img.size[0] + img.size[1]) // 200                    for t in np.linspace(0, 1, thickness):                        xy[0], xy[1] = xy[0] + t, xy[1] + t                        xy[2], xy[3] = xy[2] - t, xy[3] - t                        draw.rectangle(xy, outline=tuple(color))                    if class_names[cls] =='car':                        text = '{} {:.1f}% 大约 {:.1f} 厘米远'.format(class_names[cls], #text – 要绘制的文本。                                                   confidence * 100,                                                  Distance_To_Obect(4.3,121,780,xy[3]-xy[1],3.5).distance())                     elif class_names[cls] =='person':                        width, height = img.size                        print(width, height)                        text = '{} {:.1f}% 大约 {:.1f} 厘米远'.format(class_names[cls], #text – 要绘制的文本。                                                   confidence * 100,                                                  Distance_To_Obect(4.3,170,height,xy[3]-xy[1],3.5).distance())                     else:                        text = '{} {:.1f}%'.format(class_names[cls], #text – 要绘制的文本。                                                   confidence * 100)                    text_size = draw.textsize(text, font=font)                    print ('[x0, y0, x1, y1]', xy[0], xy[1] ,xy[2], xy[3])                    draw.rectangle(                        [x0, y0 - text_size[1], x0 + text_size[0], y0],                        fill=tuple(color)) #fill – 用于文本的颜色                    draw.text((x0, y0 - text_size[1]), text, fill='black',                              font=font)        display(img)

我尝试将

img = Image.open(img_name)

替换为

Image.fromarray(img_name)

但我再次运行文件后得到了错误：

TypeError: function takes exactly 1 argument (3 given)

另外，我运行了

print (detection_result)

它确实包含点

编辑 3
我尝试将draw_boxes方法更改为这个链接中的方法
但我得到了这个错误：

OSError                                   Traceback (most recent call last)<ipython-input-5-fa46870a1059> in <module>    105         detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)})    106         print(detection_result)--> 107         draw_boxes(frame, detection_result, class_names, _MODEL_SIZE)<ipython-input-5-fa46870a1059> in draw_boxes(image, boxes, box_classes, class_names, scores)     36     font = ImageFont.truetype(     37         font='font/FiraMono-Medium.otf',---> 38         size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))     39     thickness = (image.size[0] + image.size[1]) // 300     40~\AppData\Local\Programs\Python\Python36\lib\site-packages\PIL\ImageFont.py in truetype(font, size, index, encoding, layout_engine)    278    279     try:--> 280         return FreeTypeFont(font, size, index, encoding, layout_engine)    281     except IOError:    282         ttf_filename = os.path.basename(font)~\AppData\Local\Programs\Python\Python36\lib\site-packages\PIL\ImageFont.py in __init__(self, font, size, index, encoding, layout_engine)    143         if isPath(font):    144             self.font = core.getfont(font, size, index, encoding,--> 145                                      layout_engine=layout_engine)    146         else:    147             self.font_bytes = font.read()OSError: cannot open resource

编辑 4
顺便说一下，这是result_box：

[{0: array([[131.96371   , 131.70601   , 341.41946   , 358.6781    ,          0.68467134]], dtype=float32), 1: array([], shape=(0, 5), dtype=float32), 2: array([], shape=(0, 5), dtype=float32), 3: array([], shape=(0, 5), dtype=float32), 4: array([], shape=(0, 5), dtype=float32), 5: array([], shape=(0, 5), dtype=float32), 6: array([], shape=(0, 5), dtype=float32), 7: array([], shape=(0, 5), dtype=float32), 8: array([], shape=(0, 5), dtype=float32), 9: array([], shape=(0, 5), dtype=float32), 10: array([], shape=(0, 5), dtype=float32), 11: array([], shape=(0, 5), dtype=float32), 12: array([], shape=(0, 5), dtype=float32), 13: array([], shape=(0, 5), dtype=float32), 14: array([], shape=(0, 5), dtype=float32), 15: array([], shape=(0, 5), dtype=float32), 16: array([], shape=(0, 5), dtype=float32), 17: array([], shape=(0, 5), dtype=float32), 18: array([], shape=(0, 5), dtype=float32), 19: array([], shape=(0, 5), dtype=float32), 20: array([], shape=(0, 5), dtype=float32), 21: array([], shape=(0, 5), dtype=float32), 22: array([], shape=(0, 5), dtype=float32), 23: array([], shape=(0, 5), dtype=float32), 24: array([], shape=(0, 5), dtype=float32), 25: array([], shape=(0, 5), dtype=float32), 26: array([], shape=(0, 5), dtype=float32), 27: array([], shape=(0, 5), dtype=float32), 28: array([], shape=(0, 5), dtype=float32), 29: array([], shape=(0, 5), dtype=float32), 30: array([], shape=(0, 5), dtype=float32), 31: array([], shape=(0, 5), dtype=float32), 32: array([], shape=(0, 5), dtype=float32), 33: array([], shape=(0, 5), dtype=float32), 34: array([], shape=(0, 5), dtype=float32), 35: array([], shape=(0, 5), dtype=float32), 36: array([], shape=(0, 5), dtype=float32), 37: array([], shape=(0, 5), dtype=float32), 38: array([], shape=(0, 5), dtype=float32), 39: array([], shape=(0, 5), dtype=float32), 40: array([], shape=(0, 5), dtype=float32), 41: array([], shape=(0, 5), dtype=float32), 42: array([], shape=(0, 5), dtype=float32), 43: array([], shape=(0, 5), dtype=float32), 44: array([], shape=(0, 5), dtype=float32), 45: array([], shape=(0, 5), dtype=float32), 46: array([], shape=(0, 5), dtype=float32), 47: array([], shape=(0, 5), dtype=float32), 48: array([], shape=(0, 5), dtype=float32), 49: array([], shape=(0, 5), dtype=float32), 50: array([], shape=(0, 5), dtype=float32), 51: array([], shape=(0, 5), dtype=float32), 52: array([], shape=(0, 5), dtype=float32), 53: array([], shape=(0, 5), dtype=float32), 54: array([], shape=(0, 5), dtype=float32), 55: array([], shape=(0, 5), dtype=float32), 56: array([], shape=(0, 5), dtype=float32), 57: array([], shape=(0, 5), dtype=float32), 58: array([], shape=(0, 5), dtype=float32), 59: array([], shape=(0, 5), dtype=float32), 60: array([], shape=(0, 5), dtype=float32), 61: array([], shape=(0, 5), dtype=float32), 62: array([], shape=(0, 5), dtype=float32), 63: array([], shape=(0, 5), dtype=float32), 64: array([], shape=(0, 5), dtype=float32), 65: array([], shape=(0, 5), dtype=float32), 66: array([], shape=(0, 5), dtype=float32), 67: array([], shape=(0, 5), dtype=float32), 68: array([], shape=(0, 5), dtype=float32), 69: array([], shape=(0, 5), dtype=float32), 70: array([], shape=(0, 5), dtype=float32), 71: array([], shape=(0, 5), dtype=float32), 72: array([], shape=(0, 5), dtype=float32), 73: array([], shape=(0, 5), dtype=float32), 74: array([], shape=(0, 5), dtype=float32), 75: array([], shape=(0, 5), dtype=float32), 76: array([], shape=(0, 5), dtype=float32), 77: array([], shape=(0, 5), dtype=float32), 78: array([], shape=(0, 5), dtype=float32), 79: array([], shape=(0, 5), dtype=float32)}]

回答：

在你提供的链接中，帧是你需要输入到YOLO的图像。你可以将链接中的整个while循环放入会话中，并通过运行以下代码逐帧处理：

detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)})

或者编写一个提供帧的生成器，这样会更干净一些。

def generator()    cap = cv2.VideoCapture(0)    while(True):        # 逐帧捕获        ret, frame = cap.read()        frame = cv2.resize(frame, (416, 416))        yield frame

然后在你的代码中可以这样做：

with tf.Session() as sess:    sess.run(assign_ops)    for frame in generator():        detection_result = sess.run(detections, feed_dict={inputs: np.expand_dims(frame, 0)})

expand_dims是为了创建一个批量大小为1的批次，例如：将399x399x3变成1x399x399x3。

大致如此，希望对你有所帮助

编辑

import numpy as npimport cv2def convert_bbox_to_absolute(bbox_list, w_img, h_img):    """    方法将相对点（< 1）转换为绝对点（< 416）    :param bbox_list: (list) 边界框列表 (x, y, w, h, probability)    :param w_img: (int) 图像的宽度    :param h_img: (int) 图像的高度    :return: bbox_list: (list) 边界框列表，但这次是放大后的    """    x, y, w, h, c = zip(*bbox_list)    y = list(map(lambda y: int(y * w_img), y))    w = list(map(lambda w: int(w * h_img), w))    x = list(map(lambda x: int(x * h_img), x))    h = list(map(lambda h: int(h * w_img), h))    bbox_list = zip(x, y, w, h, c)    return list(bbox_list)def draw_boxes(image, bbox_list):    """    将在图像上绘制矩形并在其上放置概率    :param image:  (np.ndarray) 一个RGB颜色图像    :param bbox_list: (list) 边界框列表 (x, y, w, h, probability)    """    assert type(image) == np.ndarray    assert type(bbox_list) == list    assert len(image.shape) == 3    assert len(bbox_list[0]) == 5    # 获取图像的宽度和高度    w_img, h_img, _ = image.shape    # 将框坐标转换为绝对值    bbox_list = convert_bbox_to_absolute(bbox_list, w_img, h_img)    for bbox in bbox_list:        x, y, w, h, c = bbox        cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 3)        cv2.putText(image, str(c), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)    cv2.imshow("", image)    cv2.waitKey(0)# 创建一个黑色的测试RGB图像test_image = np.zeros((512, 416, 3))# 伪造一些边界框，x, y, w, h, confidenceb_boxes = [[0.5, 0.1, 0.5, 0.9, 0.8], [0.4, 0.1, 0.1, 0.1, 0.4]]draw_boxes(test_image, b_boxes)

这是一个非常简单的绘制边界框的示例，我假设YOLO提供的框是标准化的，因为这个链接。如果你知道如何确定哪个标签属于哪个框，那么我也会将它添加到代码中。

编辑 2:

import numpy as npimport cv2from random import choicesfrom string import ascii_lowercasedef draw_boxes(image, bbox_list, label_list):    """    将在图像上绘制矩形并在其上放置概率    :param image:  (np.ndarray) 一个RGB颜色图像    :param bbox_list: (list[dict()]) 边界框的列表字典  label:(x, y, w, h, probability)    :param label_list: (list) 与字典键顺序匹配的标签名称列表    """    bbox_dict = bbox_list[0]    assert type(image) == np.ndarray    assert type(bbox_dict) == dict    assert type(label_list) == list    assert len(image.shape) == 3    assert len(bbox_dict.keys()) == 80    assert len(label_list) == 80    for label, bbox in bbox_dict.items():        if bbox.size == 0:            continue        x1, y1, x2, y2, c = np.squeeze(bbox)        x1, y1, x2, y2 = list(map(lambda val: int(val), [x1, y1, x2, y2]))        label = label_list[label] + ": {}".format(c)        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 3)        cv2.putText(image, label, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)    cv2.imshow("", image)    cv2.waitKey(0)# 创建一个黑色的测试RGB图像test_image = np.zeros((512, 416, 3))# 伪造一些边界框，x, y, w, h, confidence 并自动生成空数组以键匹配你的数据b_boxes = [{    0: np.array([[131.96371, 131.70601, 341.41946, 358.6781, 0.68467134]]),    1: np.array([])}]for i in range(2, 80):    b_boxes[0][i] = np.array([])# 获取80个随机的10个字母字符串以模拟标签labels = ["".join(choices(ascii_lowercase, k=10)) for _ in range(80)]draw_boxes(test_image, b_boxes, labels)

根据你的数据，我做了一些更改，希望这对你有帮助

学技术

如何将我的YOLO v3模型连接到我的网络摄像头？

发表回复取消回复

相关文章：

Related Posts

为什么我们在K-means聚类方法中使用kmeans.fit函数？

如何获取Keras中ImageDataGenerator的.flow_from_directory函数扫描的类名？

如何查看每个词的tf-idf得分

如何修复 ‘ValueError: Found input variables with inconsistent numbers of samples: [32979, 21602]’？

如何向神经网络输入两个不同大小的输入？

逻辑回归与机器学习有何关联

发表回复 取消回复

发表回复取消回复