image_preprocess.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. # -*- coding:utf-8 -*-
  2. """
  3. 需要增加,2个上下单词的黑点,靠近哪一边的算法,从而解决上下错位的问题
  4. """
  5. import re
  6. import time
  7. from PIL import Image, ImageFilter
  8. import numpy as np
  9. import cv2
  10. import json
  11. from pathlib import Path
  12. from baidu_ocr import high_ocr_location
  13. def test_log(text: str):
  14. if type(text) == dict:
  15. text = json.dumps(text, ensure_ascii=False)
  16. with open("log.txt", "w", encoding="utf-8") as f:
  17. f.write(str(text))
  18. class PreprocessImage:
  19. def __init__(self, image_path):
  20. self.image_path = image_path
  21. self.template_image_path = "template.jpg"
  22. self.image = cv2.imread(image_path)
  23. self.template_image = cv2.imread(self.template_image_path)
  24. self.temp_h, self.temp_w = self.template_image.shape[:2]
  25. def correct_image(self, point_tuple,image_path='sharpen_image.jpg'):
  26. """图像矫正
  27. point_tuple:传过来的4个点坐标的元组"""
  28. sharpen_image = cv2.imread(image_path)
  29. src_points = np.float32(point_tuple)
  30. dst_points = np.float32([[122, 78], [1070, 78], [122, 2730], [1070, 2730]])
  31. M = cv2.getPerspectiveTransform(src_points, dst_points)
  32. transformed_image = cv2.warpPerspective(sharpen_image, M, (self.temp_w, self.temp_h))
  33. gray = cv2.cvtColor(transformed_image, cv2.COLOR_BGR2GRAY)
  34. blurred = cv2.GaussianBlur(gray, (5, 5), 0)
  35. image_rgb = cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB)
  36. cv2.imwrite('transformed_image.jpg', image_rgb)
  37. def sharpen_image(self):
  38. img = Image.open(self.image_path)
  39. sharpened_img = img.filter(ImageFilter.SHARPEN)
  40. sharpened_img.save('sharpen_image.jpg')
  41. @staticmethod
  42. def parser_ocr(ocr_data):
  43. p1, p2, p3, p4 = None, None, None, None
  44. for word_item in ocr_data['words_result']:
  45. text: str = word_item['words']
  46. if text.startswith("1."):
  47. left_char_location = word_item['chars'][0]['location']
  48. p1 = (left_char_location['left'], left_char_location['top'])
  49. elif text.startswith("51."):
  50. left_char_location = word_item['chars'][0]['location']
  51. p2 = (left_char_location['left'], left_char_location['top'])
  52. elif text.startswith("50."):
  53. left_char_location = word_item['chars'][0]['location']
  54. p3 = (left_char_location['left'], left_char_location['top'])
  55. elif text.startswith("100."):
  56. left_char_location = word_item['chars'][0]['location']
  57. p4 = (left_char_location['left'], left_char_location['top'])
  58. if any([not p1, not p2, not p3, not p4]):
  59. print([p1, p2, p3, p4])
  60. raise Exception("矫正坐标不对")
  61. return [p1, p2, p3, p4]
  62. def run(self):
  63. self.sharpen_image()
  64. ocr_data = high_ocr_location(self.image_path)
  65. point_tuple = self.parser_ocr(ocr_data)
  66. self.correct_image(point_tuple)
  67. class ComparisonAlgorithm:
  68. """比较算法核心"""
  69. def __init__(self, transformed_image, ocr_data):
  70. self.transformed_image = cv2.imread(transformed_image)
  71. self.ocr_data = ocr_data
  72. self.order_ocr_data = {}
  73. self.already_find_index = set()
  74. self.image = Image.open(transformed_image)
  75. @staticmethod
  76. def separate_numbers_and_letters(text):
  77. """正则提取数字和字母"""
  78. numbers = "".join(re.findall(r'\d+', text))
  79. letters = "".join(re.findall(r'[a-zA-Z]+', text))
  80. return numbers, letters
  81. def is_line_word(self, x, y):
  82. """判断点的颜色是否符合标准; cv2取点速度没有pillow快
  83. 指定要查询的点的坐标 (x, y)"""
  84. rgb_color = self.image.getpixel((x, y))
  85. r, g, b = rgb_color
  86. if all([r < 130, g < 130, b < 130]):
  87. return 1
  88. return 0
  89. def __make_order_ocr_data(self):
  90. for word_item in self.ocr_data['words_result']:
  91. word = word_item['words']
  92. if word[0].isdigit() and len(word) >= 2:
  93. word_text = word_item['words']
  94. location = word_item['location']
  95. first_char_location = word_item['chars'][0]['location']
  96. end_char_location = word_item['chars'][-1]['location']
  97. chars_location = word_item['chars']
  98. numbers, letters = self.separate_numbers_and_letters(word_text)
  99. if numbers not in self.order_ocr_data:
  100. self.order_ocr_data[numbers] = {"word": letters, "location": location, "chars_location": chars_location,
  101. "first_char_location": first_char_location, "end_char_location": end_char_location}
  102. def color_algorithm_1(self, int_index, word_location, first_char_location, word):
  103. """
  104. 颜色算法1,正常单词
  105. int_index:整数序号
  106. word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
  107. first_char_location: 第一个字母的位置;对应 self.order_ocr_data[current_index]['first_char_location']
  108. word:具体序号的单词,标识用
  109. """
  110. next_index = str(int_index + 1)
  111. black_count_1 = 0
  112. for x in range(word_location['left'], word_location['left'] + word_location['width']):
  113. b_top, b_height = first_char_location['top'], int(first_char_location['height'])
  114. bottom_location_y = b_top + b_height
  115. if int_index == 50 or int_index == 100:
  116. next_word_top_location = bottom_location_y + b_height * 2
  117. elif next_index in self.order_ocr_data and (
  118. self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
  119. next_word_location = self.order_ocr_data[next_index]['first_char_location']
  120. next_word_top_location = next_word_location['top'] + int(next_word_location['height'] / 8)
  121. else:
  122. next_word_top_location = bottom_location_y + int(b_height * 0.5)
  123. for y in range(bottom_location_y, next_word_top_location):
  124. result = self.is_line_word(x, y)
  125. if result:
  126. black_count_1 += 1
  127. break
  128. black_count_per = black_count_1 / (word_location['width'])
  129. if black_count_per > 0.8:
  130. print(f"{int_index}正常划线{black_count_per:.2f}", word)
  131. self.already_find_index.add(int_index)
  132. return int_index
  133. def color_algorithm_2(self, int_index, word_location, word):
  134. """颜色算法2,单词自身中间的黑点率
  135. int_index:整数序号
  136. word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
  137. word:具体序号的单词,标识用
  138. """
  139. black_count_2 = 0
  140. for x in range(word_location['left'], word_location['left'] + word_location['width']):
  141. mid = word_location['top'] + int(word_location['height'] / 2)
  142. bottom = word_location['top'] + int(word_location['height']) + 5
  143. for y in range(mid, bottom):
  144. result = self.is_line_word(x, y)
  145. if result:
  146. black_count_2 += 1
  147. break
  148. black_count_per = black_count_2 / (word_location['width'])
  149. if black_count_per > 0.92:
  150. print(f"{int_index}中间划线{black_count_per:.2f}", word)
  151. self.already_find_index.add(int_index)
  152. return int_index
  153. def color_algorithm_3(self, int_index, word_location, end_char_location, word):
  154. """
  155. 颜色算法3,正常单词的左右各推20个像素点,判断下黑点率
  156. int_index:整数序号
  157. word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
  158. end_char_location: 最后一个字母的位置;对应 self.order_ocr_data[current_index]['end_char_location']
  159. word:具体序号的单词,标识用
  160. """
  161. next_index = str(int_index + 1)
  162. black_count_1 = 0
  163. moving_distance = 20
  164. """这是在获取所有需要的横向左右x坐标"""
  165. all_x = []
  166. for i in range(word_location['left'] - moving_distance, word_location['left']):
  167. all_x.append(i)
  168. word_right_loca = word_location['left'] + word_location['width'] + 2
  169. for i in range(word_right_loca, word_right_loca + moving_distance):
  170. all_x.append(i)
  171. b_top, b_height = word_location['top'], int(word_location['height'])
  172. bottom_location_y = b_top + b_height
  173. bottom_location_y_half = end_char_location['top'] + int(end_char_location['height'] * 0.8)
  174. for x in all_x:
  175. if int_index == 50 or int_index == 100:
  176. next_word_top_location = bottom_location_y + b_height * 2
  177. elif next_index in self.order_ocr_data and (
  178. self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
  179. next_word_location = self.order_ocr_data[next_index]['first_char_location']
  180. next_word_top_location = next_word_location['top'] + 3
  181. else:
  182. next_word_top_location = bottom_location_y + int(b_height * 0.3)
  183. for y in range(bottom_location_y_half, next_word_top_location):
  184. result = self.is_line_word(x, y)
  185. if result:
  186. black_count_1 += 1
  187. break
  188. black_count_per = black_count_1 / len(all_x)
  189. if black_count_per > 0.4:
  190. print(f"{int_index}前后双边划线{black_count_per:.2f}", word)
  191. self.already_find_index.add(int_index)
  192. return int_index
  193. def color_algorithm_4(self, int_index, word_location, chars_location, word):
  194. """灰度图极差算法"""
  195. for char_index, char_dict in enumerate(chars_location):
  196. if char_dict['char'] == '.' or char_dict['char'] == ',':
  197. point_location, point_char_index = char_dict['location'], char_index
  198. break
  199. else:
  200. char_index = 2
  201. point_location, point_char_index = chars_location[char_index]['location'], char_index
  202. white_block = 0
  203. point_location_half = point_location['top'] + point_location['height']//2
  204. y1, y2 = point_location_half, point_location_half + point_location['height']
  205. for x in range(point_location['left'], point_location['left'] + point_location['width']):
  206. roi_image = self.transformed_image[y1:y2, x:x + 1]
  207. min_val = np.min(roi_image)
  208. max_val = np.max(roi_image)
  209. range_value = max_val - min_val
  210. if min_val>110 or range_value < 90:
  211. white_block +=1
  212. if white_block/point_location['width'] < 0.1:
  213. print(f"{int_index}极差算法 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
  214. self.already_find_index.add(int_index)
  215. return int_index
  216. white_block = 0
  217. end_char_location = chars_location[-2]['location']
  218. bottom = end_char_location['top'] + end_char_location['height']
  219. y1, y2 = bottom+2, bottom + end_char_location['height']-10
  220. for x in range(end_char_location['left'], end_char_location['left'] + point_location['width']):
  221. roi_image = self.transformed_image[y1:y2, x:x + 1]
  222. min_val = np.min(roi_image)
  223. max_val = np.max(roi_image)
  224. range_value = max_val - min_val
  225. if min_val>110 or range_value < 90:
  226. white_block +=1
  227. if white_block/point_location['width'] < 0.1:
  228. print(f"{int_index}极差算法二 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
  229. self.already_find_index.add(int_index)
  230. return int_index
  231. def core_algorithm(self):
  232. self.__make_order_ocr_data()
  233. for int_index in range(1, 101):
  234. current_index = str(int_index)
  235. if current_index not in self.order_ocr_data:
  236. continue
  237. current_dict = self.order_ocr_data[current_index]
  238. word = current_dict['word']
  239. word_location = current_dict['location']
  240. first_char_location = current_dict['first_char_location']
  241. end_char_location = current_dict['end_char_location']
  242. chars_location = current_dict['chars_location']
  243. if self.color_algorithm_1(int_index=int_index, word_location=word_location, first_char_location=first_char_location, word=word):
  244. continue
  245. if self.color_algorithm_2(int_index=int_index, word_location=word_location, word=word):
  246. continue
  247. if self.color_algorithm_3(int_index=int_index, word_location=word_location, end_char_location=end_char_location, word=word):
  248. continue
  249. if self.color_algorithm_4(int_index=int_index, word_location=word_location, chars_location=chars_location, word=word):
  250. continue
  251. if __name__ == '__main__':
  252. image_path = r"C:\Users\86131\Desktop\4.jpg"
  253. script_path = Path(__file__).resolve()
  254. script_directory = script_path.parent
  255. transformed_image_path = str(Path(script_directory, r"transformed_image.jpg"))
  256. pi = PreprocessImage(image_path)
  257. pi.run()
  258. transformed_image_ocr_data = high_ocr_location(transformed_image_path)
  259. test_log(transformed_image_ocr_data)
  260. ca = ComparisonAlgorithm(transformed_image=transformed_image_path, ocr_data=transformed_image_ocr_data)
  261. ca.core_algorithm()