我正在使用pytessearct从图像中提取文本。但是对于倾斜的图像,它无法正常工作。请看下面的图像:
这是提取文本的代码,对于未倾斜的图像运行正常。
img = cv2.imread(<path_to_image>)gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)blur = cv2.GaussianBlur(gray, (5,5),0)ret3, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)def findSignificantContours (img, edgeImg): contours, heirarchy = cv2.findContours(edgeImg, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) # Find level 1 contours level1 = [] for i, tupl in enumerate(heirarchy[0]): # Each array is in format (Next, Prev, First child, Parent) # Filter the ones without parent if tupl[3] == -1: tupl = np.insert(tupl, 0, [i]) level1.append(tupl) significant = [] tooSmall = edgeImg.size * 5 / 100 # If contour isn't covering 5% of total area of image then it probably is too small for tupl in level1: contour = contours[tupl[0]]; area = cv2.contourArea(contour) if area > tooSmall: significant.append([contour, area]) # Draw the contour on the original image cv2.drawContours(img, [contour], 0, (0,255,0),2, cv2.LINE_AA, maxLevel=1) significant.sort(key=lambda x: x[1]) #print ([x[1] for x in significant]); mx = (0,0,0,0) # biggest bounding box so far mx_area = 0 for cont in contours: x,y,w,h = cv2.boundingRect(cont) area = w*h if area > mx_area: mx = x,y,w,h mx_area = area x,y,w,h = mx # Output to files roi = img[y:y+h,x:x+w] gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) blur = cv2.GaussianBlur(gray, (5,5),0) ret3, thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) cv2_imshow(thresh) text = pytesseract.image_to_string(roi); print(text); print("\n"); print(pytesseract.image_to_string(thresh)); print("\n") return [x[0] for x in significant];edgeImg_8u = np.asarray(thresh, np.uint8)# Find contourssignificant = findSignificantContours(img, edgeImg_8u)mask = thresh.copy()mask[mask > 0] = 0cv2.fillPoly(mask, significant, 255)# Invert maskmask = np.logical_not(mask)#Finally remove the backgroundimg[mask] = 0;
Tesseract无法从此图像中提取文本。有没有办法可以旋转它以完美对齐文本,然后将其输入到pytesseract中?请告诉我我的问题是否需要进一步澄清。
回答:
这里有一个简单的方法:
-
查找轮廓并按最大轮廓排序。我们查找轮廓,然后使用
cv2.contourArea()
通过轮廓面积进行过滤,以隔离矩形轮廓。 -
执行透视变换。接下来,我们使用
cv2.contourArea()
进行轮廓近似以获得矩形轮廓。最后,我们利用imutils.perspective.four_point_transform
实际获得图像的鸟瞰图。
二值图像
结果
要实际提取文本,请查看
代码
from imutils.perspective import four_point_transformimport cv2import numpy# Load image, grayscale, Gaussian blur, Otsu's thresholdimage = cv2.imread("1.jpg")gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)blur = cv2.GaussianBlur(gray, (7,7), 0)thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]# Find contours and sort for largest contourcnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)cnts = cnts[0] if len(cnts) == 2 else cnts[1]cnts = sorted(cnts, key=cv2.contourArea, reverse=True)displayCnt = Nonefor c in cnts: # Perform contour approximation peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, 0.02 * peri, True) if len(approx) == 4: displayCnt = approx break# Obtain birds' eye view of imagewarped = four_point_transform(image, displayCnt.reshape(4, 2))cv2.imshow("thresh", thresh)cv2.imshow("warped", warped)cv2.waitKey()