image_preprocess.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. # -*- coding:utf-8 -*-
  2. """
  3. 需要增加,2个上下单词的黑点,靠近哪一边的算法,从而解决上下错位的问题
  4. """
  5. import json
  6. import re
  7. from pathlib import Path
  8. import cv2
  9. import numpy as np
  10. from PIL import Image, ImageFilter
  11. from baidu_ocr import high_ocr_location
  12. def test_log(text: str):
  13. if type(text) == dict:
  14. text = json.dumps(text, ensure_ascii=False)
  15. with open("log.txt", "w", encoding="utf-8") as f:
  16. f.write(str(text))
  17. class PreprocessImage:
  18. def __init__(self, image_path):
  19. self.image_path = image_path
  20. self.template_image_path = "template.jpg"
  21. self.image = cv2.imread(image_path)
  22. self.template_image = cv2.imread(self.template_image_path)
  23. self.temp_h, self.temp_w = self.template_image.shape[:2]
  24. def correct_image(self, point_tuple, image_path='sharpen_image.jpg'):
  25. """图像矫正
  26. point_tuple:传过来的4个点坐标的元组"""
  27. sharpen_image = cv2.imread(image_path)
  28. src_points = np.float32(point_tuple)
  29. dst_points = np.float32([[122, 78], [1070, 78], [122, 2730], [1070, 2730]])
  30. M = cv2.getPerspectiveTransform(src_points, dst_points)
  31. transformed_image = cv2.warpPerspective(sharpen_image, M, (self.temp_w, self.temp_h))
  32. gray = cv2.cvtColor(transformed_image, cv2.COLOR_BGR2GRAY)
  33. blurred = cv2.GaussianBlur(gray, (5, 5), 0)
  34. image_rgb = cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB)
  35. cv2.imwrite('transformed_image.jpg', image_rgb)
  36. def sharpen_image(self):
  37. img = Image.open(self.image_path)
  38. sharpened_img = img.filter(ImageFilter.SHARPEN)
  39. sharpened_img.save('sharpen_image.jpg')
  40. @staticmethod
  41. def parser_ocr(ocr_data):
  42. p1, p2, p3, p4 = None, None, None, None
  43. for word_item in ocr_data['words_result']:
  44. text: str = word_item['words']
  45. if text.startswith("1."):
  46. left_char_location = word_item['chars'][0]['location']
  47. p1 = (left_char_location['left'], left_char_location['top'])
  48. elif text.startswith("51."):
  49. left_char_location = word_item['chars'][0]['location']
  50. p2 = (left_char_location['left'], left_char_location['top'])
  51. elif text.startswith("50."):
  52. left_char_location = word_item['chars'][0]['location']
  53. p3 = (left_char_location['left'], left_char_location['top'])
  54. elif text.startswith("100."):
  55. left_char_location = word_item['chars'][0]['location']
  56. p4 = (left_char_location['left'], left_char_location['top'])
  57. if any([not p1, not p2, not p3, not p4]):
  58. print([p1, p2, p3, p4])
  59. raise Exception("矫正坐标不对")
  60. return [p1, p2, p3, p4]
  61. def run(self):
  62. self.sharpen_image()
  63. ocr_data = high_ocr_location(self.image_path)
  64. point_tuple = self.parser_ocr(ocr_data)
  65. self.correct_image(point_tuple)
  66. class ComparisonAlgorithm:
  67. """比较算法核心"""
  68. def __init__(self, transformed_image, ocr_data):
  69. self.transformed_image = cv2.imread(transformed_image)
  70. self.ocr_data = ocr_data
  71. self.order_ocr_data = {}
  72. self.already_find_index = set()
  73. self.image = Image.open(transformed_image)
  74. @staticmethod
  75. def separate_numbers_and_letters(text):
  76. """正则提取数字和字母"""
  77. numbers = "".join(re.findall(r'\d+', text))
  78. letters = "".join(re.findall(r'[a-zA-Z]+', text))
  79. return numbers, letters
  80. def is_line_word(self, x, y):
  81. """判断点的颜色是否符合标准; cv2取点速度没有pillow快
  82. 指定要查询的点的坐标 (x, y)"""
  83. rgb_color = self.image.getpixel((x, y))
  84. r, g, b = rgb_color
  85. if all([r < 130, g < 130, b < 130]):
  86. return 1
  87. return 0
  88. def __make_order_ocr_data(self):
  89. for word_item in self.ocr_data['words_result']:
  90. word = word_item['words']
  91. if word[0].isdigit() and len(word) >= 2:
  92. word_text = word_item['words']
  93. location = word_item['location']
  94. first_char_location = word_item['chars'][0]['location']
  95. end_char_location = word_item['chars'][-1]['location']
  96. chars_location = word_item['chars']
  97. numbers, letters = self.separate_numbers_and_letters(word_text)
  98. if numbers not in self.order_ocr_data:
  99. self.order_ocr_data[numbers] = {"word": letters, "location": location, "chars_location": chars_location,
  100. "first_char_location": first_char_location, "end_char_location": end_char_location}
  101. def color_algorithm_1(self, int_index, word_location, first_char_location, word):
  102. """
  103. 颜色算法1,正常单词
  104. int_index:整数序号
  105. word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
  106. first_char_location: 第一个字母的位置;对应 self.order_ocr_data[current_index]['first_char_location']
  107. word:具体序号的单词,标识用
  108. """
  109. next_index = str(int_index + 1)
  110. black_count_1 = 0
  111. for x in range(word_location['left'], word_location['left'] + word_location['width']):
  112. b_top, b_height = first_char_location['top'], int(first_char_location['height'])
  113. bottom_location_y = b_top + b_height
  114. if int_index == 50 or int_index == 100:
  115. next_word_top_location = bottom_location_y + b_height * 2
  116. elif next_index in self.order_ocr_data and (
  117. self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
  118. next_word_location = self.order_ocr_data[next_index]['first_char_location']
  119. next_word_top_location = next_word_location['top'] + int(next_word_location['height'] / 8)
  120. else:
  121. next_word_top_location = bottom_location_y + int(b_height * 0.5)
  122. for y in range(bottom_location_y, next_word_top_location):
  123. result = self.is_line_word(x, y)
  124. if result:
  125. black_count_1 += 1
  126. break
  127. black_count_per = black_count_1 / (word_location['width'])
  128. if black_count_per > 0.8:
  129. print(f"{int_index}正常划线{black_count_per:.2f}", word)
  130. self.already_find_index.add(int_index)
  131. return int_index
  132. def color_algorithm_2(self, int_index, word_location, word):
  133. """颜色算法2,单词自身中间的黑点率
  134. int_index:整数序号
  135. word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
  136. word:具体序号的单词,标识用
  137. """
  138. black_count_2 = 0
  139. for x in range(word_location['left'], word_location['left'] + word_location['width']):
  140. mid = word_location['top'] + int(word_location['height'] / 2)
  141. bottom = word_location['top'] + int(word_location['height']) + 5
  142. for y in range(mid, bottom):
  143. result = self.is_line_word(x, y)
  144. if result:
  145. black_count_2 += 1
  146. break
  147. black_count_per = black_count_2 / (word_location['width'])
  148. if black_count_per > 0.92:
  149. print(f"{int_index}中间划线{black_count_per:.2f}", word)
  150. self.already_find_index.add(int_index)
  151. return int_index
  152. def color_algorithm_3(self, int_index, word_location, end_char_location, word):
  153. """
  154. 颜色算法3,正常单词的左右各推20个像素点,判断下黑点率
  155. int_index:整数序号
  156. word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
  157. end_char_location: 最后一个字母的位置;对应 self.order_ocr_data[current_index]['end_char_location']
  158. word:具体序号的单词,标识用
  159. """
  160. next_index = str(int_index + 1)
  161. black_count_1 = 0
  162. moving_distance = 20
  163. """这是在获取所有需要的横向左右x坐标"""
  164. all_x = []
  165. for i in range(word_location['left'] - moving_distance, word_location['left']):
  166. all_x.append(i)
  167. word_right_loca = word_location['left'] + word_location['width'] + 2
  168. for i in range(word_right_loca, word_right_loca + moving_distance):
  169. all_x.append(i)
  170. b_top, b_height = word_location['top'], int(word_location['height'])
  171. bottom_location_y = b_top + b_height
  172. bottom_location_y_half = end_char_location['top'] + int(end_char_location['height'] * 0.8)
  173. for x in all_x:
  174. if int_index == 50 or int_index == 100:
  175. next_word_top_location = bottom_location_y + b_height * 2
  176. elif next_index in self.order_ocr_data and (
  177. self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
  178. next_word_location = self.order_ocr_data[next_index]['first_char_location']
  179. next_word_top_location = next_word_location['top'] + 3
  180. else:
  181. next_word_top_location = bottom_location_y + int(b_height * 0.3)
  182. for y in range(bottom_location_y_half, next_word_top_location):
  183. result = self.is_line_word(x, y)
  184. if result:
  185. black_count_1 += 1
  186. break
  187. black_count_per = black_count_1 / len(all_x)
  188. if black_count_per > 0.4:
  189. print(f"{int_index}前后双边划线{black_count_per:.2f}", word)
  190. self.already_find_index.add(int_index)
  191. return int_index
  192. def color_algorithm_4(self, int_index, word_location, chars_location, word):
  193. """灰度图极差算法"""
  194. for char_index, char_dict in enumerate(chars_location):
  195. if char_dict['char'] == '.' or char_dict['char'] == ',':
  196. point_location, point_char_index = char_dict['location'], char_index
  197. break
  198. else:
  199. char_index = 2
  200. point_location, point_char_index = chars_location[char_index]['location'], char_index
  201. white_block = 0
  202. point_location_half = point_location['top'] + point_location['height'] // 2
  203. y1, y2 = point_location_half, point_location_half + point_location['height']
  204. for x in range(point_location['left'], point_location['left'] + point_location['width']):
  205. roi_image = self.transformed_image[y1:y2, x:x + 1]
  206. min_val = np.min(roi_image)
  207. max_val = np.max(roi_image)
  208. range_value = max_val - min_val
  209. if min_val > 110 or range_value < 90:
  210. white_block += 1
  211. if white_block / point_location['width'] < 0.1:
  212. print(f"{int_index}极差算法 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
  213. self.already_find_index.add(int_index)
  214. return int_index
  215. white_block = 0
  216. end_char_location = chars_location[-2]['location']
  217. bottom = end_char_location['top'] + end_char_location['height']
  218. y1, y2 = bottom + 2, bottom + end_char_location['height'] - 10
  219. for x in range(end_char_location['left'], end_char_location['left'] + point_location['width']):
  220. roi_image = self.transformed_image[y1:y2, x:x + 1]
  221. min_val = np.min(roi_image)
  222. max_val = np.max(roi_image)
  223. range_value = max_val - min_val
  224. if min_val > 110 or range_value < 90:
  225. white_block += 1
  226. if white_block / point_location['width'] < 0.1:
  227. print(f"{int_index}极差算法二 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
  228. self.already_find_index.add(int_index)
  229. return int_index
  230. def core_algorithm(self):
  231. self.__make_order_ocr_data()
  232. for int_index in range(1, 101):
  233. current_index = str(int_index)
  234. if current_index not in self.order_ocr_data:
  235. continue
  236. current_dict = self.order_ocr_data[current_index]
  237. word = current_dict['word']
  238. word_location = current_dict['location']
  239. first_char_location = current_dict['first_char_location']
  240. end_char_location = current_dict['end_char_location']
  241. chars_location = current_dict['chars_location']
  242. if self.color_algorithm_1(int_index=int_index, word_location=word_location, first_char_location=first_char_location, word=word):
  243. continue
  244. if self.color_algorithm_2(int_index=int_index, word_location=word_location, word=word):
  245. continue
  246. if self.color_algorithm_3(int_index=int_index, word_location=word_location, end_char_location=end_char_location, word=word):
  247. continue
  248. if self.color_algorithm_4(int_index=int_index, word_location=word_location, chars_location=chars_location, word=word):
  249. continue
  250. if __name__ == '__main__':
  251. image_path = r"C:\Users\86131\Desktop\4.jpg"
  252. script_path = Path(__file__).resolve()
  253. script_directory = script_path.parent
  254. transformed_image_path = str(Path(script_directory, r"transformed_image.jpg"))
  255. pi = PreprocessImage(image_path)
  256. pi.run()
  257. transformed_image_ocr_data = high_ocr_location(transformed_image_path)
  258. test_log(transformed_image_ocr_data)
  259. ca = ComparisonAlgorithm(transformed_image=transformed_image_path, ocr_data=transformed_image_ocr_data)
  260. ca.core_algorithm()