image_preprocess2.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. # -*- coding:utf-8 -*-
  2. """
  3. 20250114 在单词上划线,分别有斜杠、反斜杠、横着划线三种方式;找到它们的位置
  4. """
  5. import json
  6. import re
  7. from pathlib import Path
  8. import cv2
  9. import numpy as np
  10. from PIL import Image, ImageFilter
  11. from baidu_ocr import high_ocr_location
  12. def test_log(text: str):
  13. if type(text) == dict:
  14. text = json.dumps(text, ensure_ascii=False)
  15. with open("log.txt", "w", encoding="utf-8") as f:
  16. f.write(str(text))
  17. class PreprocessImage:
  18. def __init__(self, image_path):
  19. self.image_path = image_path
  20. self.template_image_path = "template.jpg"
  21. self.image = cv2.imread(image_path)
  22. self.template_image = cv2.imread(self.template_image_path)
  23. self.temp_h, self.temp_w = self.template_image.shape[:2]
  24. def correct_image(self, point_tuple, image_path='sharpen_image.jpg'):
  25. """图像矫正
  26. point_tuple:传过来的4个点坐标的元组"""
  27. sharpen_image = cv2.imread(image_path)
  28. src_points = np.float32(point_tuple)
  29. dst_points = np.float32([[122, 78], [1070, 78], [122, 2730], [1070, 2730]])
  30. M = cv2.getPerspectiveTransform(src_points, dst_points)
  31. transformed_image = cv2.warpPerspective(sharpen_image, M, (self.temp_w, self.temp_h))
  32. gray = cv2.cvtColor(transformed_image, cv2.COLOR_BGR2GRAY)
  33. blurred = cv2.GaussianBlur(gray, (5, 5), 0)
  34. image_rgb = cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB)
  35. cv2.imwrite('transformed_image.jpg', image_rgb)
  36. def sharpen_image(self):
  37. img = Image.open(self.image_path)
  38. sharpened_img = img.filter(ImageFilter.SHARPEN)
  39. sharpened_img.save('sharpen_image.jpg')
  40. @staticmethod
  41. def parser_ocr(ocr_data):
  42. for word_item in ocr_data['words_result']:
  43. for char_item in word_item['chars']:
  44. pass
  45. def run(self):
  46. self.sharpen_image()
  47. ocr_data = high_ocr_location(self.image_path)
  48. point_tuple = self.parser_ocr(ocr_data)
  49. self.correct_image(point_tuple)
  50. class ComparisonAlgorithm:
  51. """比较算法核心"""
  52. def __init__(self, transformed_image, ocr_data):
  53. self.transformed_image = cv2.imread(transformed_image)
  54. self.ocr_data = ocr_data
  55. self.order_ocr_data = {}
  56. self.already_find_index = set()
  57. self.image = Image.open(transformed_image)
  58. @staticmethod
  59. def separate_numbers_and_letters(text):
  60. """正则提取数字和字母"""
  61. numbers = "".join(re.findall(r'\d+', text))
  62. letters = "".join(re.findall(r'[a-zA-Z]+', text))
  63. return numbers, letters
  64. def is_line_word(self, x, y):
  65. """判断点的颜色是否符合标准; cv2取点速度没有pillow快
  66. 指定要查询的点的坐标 (x, y)"""
  67. rgb_color = self.image.getpixel((x, y))
  68. r, g, b = rgb_color
  69. if all([r < 130, g < 130, b < 130]):
  70. return 1
  71. return 0
  72. def __make_order_ocr_data(self):
  73. for word_item in self.ocr_data['words_result']:
  74. word = word_item['words']
  75. if word[0].isdigit() and len(word) >= 2:
  76. word_text = word_item['words']
  77. location = word_item['location']
  78. first_char_location = word_item['chars'][0]['location']
  79. end_char_location = word_item['chars'][-1]['location']
  80. chars_location = word_item['chars']
  81. numbers, letters = self.separate_numbers_and_letters(word_text)
  82. if numbers not in self.order_ocr_data:
  83. self.order_ocr_data[numbers] = {"word": letters, "location": location, "chars_location": chars_location,
  84. "first_char_location": first_char_location, "end_char_location": end_char_location}
  85. def color_algorithm_1(self, int_index, word_location, first_char_location, word):
  86. """
  87. 颜色算法1,正常单词
  88. int_index:整数序号
  89. word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
  90. first_char_location: 第一个字母的位置;对应 self.order_ocr_data[current_index]['first_char_location']
  91. word:具体序号的单词,标识用
  92. """
  93. next_index = str(int_index + 1)
  94. black_count_1 = 0
  95. for x in range(word_location['left'], word_location['left'] + word_location['width']):
  96. b_top, b_height = first_char_location['top'], int(first_char_location['height'])
  97. bottom_location_y = b_top + b_height
  98. if int_index == 50 or int_index == 100:
  99. next_word_top_location = bottom_location_y + b_height * 2
  100. elif next_index in self.order_ocr_data and (
  101. self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
  102. next_word_location = self.order_ocr_data[next_index]['first_char_location']
  103. next_word_top_location = next_word_location['top'] + int(next_word_location['height'] / 8)
  104. else:
  105. next_word_top_location = bottom_location_y + int(b_height * 0.5)
  106. for y in range(bottom_location_y, next_word_top_location):
  107. result = self.is_line_word(x, y)
  108. if result:
  109. black_count_1 += 1
  110. break
  111. black_count_per = black_count_1 / (word_location['width'])
  112. if black_count_per > 0.8:
  113. print(f"{int_index}正常划线{black_count_per:.2f}", word)
  114. self.already_find_index.add(int_index)
  115. return int_index
  116. def color_algorithm_2(self, int_index, word_location, word):
  117. """颜色算法2,单词自身中间的黑点率
  118. int_index:整数序号
  119. word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
  120. word:具体序号的单词,标识用
  121. """
  122. black_count_2 = 0
  123. for x in range(word_location['left'], word_location['left'] + word_location['width']):
  124. mid = word_location['top'] + int(word_location['height'] / 2)
  125. bottom = word_location['top'] + int(word_location['height']) + 5
  126. for y in range(mid, bottom):
  127. result = self.is_line_word(x, y)
  128. if result:
  129. black_count_2 += 1
  130. break
  131. black_count_per = black_count_2 / (word_location['width'])
  132. if black_count_per > 0.92:
  133. print(f"{int_index}中间划线{black_count_per:.2f}", word)
  134. self.already_find_index.add(int_index)
  135. return int_index
  136. def color_algorithm_3(self, int_index, word_location, end_char_location, word):
  137. """
  138. 颜色算法3,正常单词的左右各推20个像素点,判断下黑点率
  139. int_index:整数序号
  140. word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
  141. end_char_location: 最后一个字母的位置;对应 self.order_ocr_data[current_index]['end_char_location']
  142. word:具体序号的单词,标识用
  143. """
  144. next_index = str(int_index + 1)
  145. black_count_1 = 0
  146. moving_distance = 20
  147. """这是在获取所有需要的横向左右x坐标"""
  148. all_x = []
  149. for i in range(word_location['left'] - moving_distance, word_location['left']):
  150. all_x.append(i)
  151. word_right_loca = word_location['left'] + word_location['width'] + 2
  152. for i in range(word_right_loca, word_right_loca + moving_distance):
  153. all_x.append(i)
  154. b_top, b_height = word_location['top'], int(word_location['height'])
  155. bottom_location_y = b_top + b_height
  156. bottom_location_y_half = end_char_location['top'] + int(end_char_location['height'] * 0.8)
  157. for x in all_x:
  158. if int_index == 50 or int_index == 100:
  159. next_word_top_location = bottom_location_y + b_height * 2
  160. elif next_index in self.order_ocr_data and (
  161. self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
  162. next_word_location = self.order_ocr_data[next_index]['first_char_location']
  163. next_word_top_location = next_word_location['top'] + 3
  164. else:
  165. next_word_top_location = bottom_location_y + int(b_height * 0.3)
  166. for y in range(bottom_location_y_half, next_word_top_location):
  167. result = self.is_line_word(x, y)
  168. if result:
  169. black_count_1 += 1
  170. break
  171. black_count_per = black_count_1 / len(all_x)
  172. if black_count_per > 0.4:
  173. print(f"{int_index}前后双边划线{black_count_per:.2f}", word)
  174. self.already_find_index.add(int_index)
  175. return int_index
  176. def color_algorithm_4(self, int_index, word_location, chars_location, word):
  177. """灰度图极差算法"""
  178. for char_index, char_dict in enumerate(chars_location):
  179. if char_dict['char'] == '.' or char_dict['char'] == ',':
  180. point_location, point_char_index = char_dict['location'], char_index
  181. break
  182. else:
  183. char_index = 2
  184. point_location, point_char_index = chars_location[char_index]['location'], char_index
  185. white_block = 0
  186. point_location_half = point_location['top'] + point_location['height'] // 2
  187. y1, y2 = point_location_half, point_location_half + point_location['height']
  188. for x in range(point_location['left'], point_location['left'] + point_location['width']):
  189. roi_image = self.transformed_image[y1:y2, x:x + 1]
  190. min_val = np.min(roi_image)
  191. max_val = np.max(roi_image)
  192. range_value = max_val - min_val
  193. if min_val > 110 or range_value < 90:
  194. white_block += 1
  195. if white_block / point_location['width'] < 0.1:
  196. print(f"{int_index}极差算法 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
  197. self.already_find_index.add(int_index)
  198. return int_index
  199. white_block = 0
  200. end_char_location = chars_location[-2]['location']
  201. bottom = end_char_location['top'] + end_char_location['height']
  202. y1, y2 = bottom + 2, bottom + end_char_location['height'] - 10
  203. for x in range(end_char_location['left'], end_char_location['left'] + point_location['width']):
  204. roi_image = self.transformed_image[y1:y2, x:x + 1]
  205. min_val = np.min(roi_image)
  206. max_val = np.max(roi_image)
  207. range_value = max_val - min_val
  208. if min_val > 110 or range_value < 90:
  209. white_block += 1
  210. if white_block / point_location['width'] < 0.1:
  211. print(f"{int_index}极差算法二 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
  212. self.already_find_index.add(int_index)
  213. return int_index
  214. def core_algorithm(self):
  215. self.__make_order_ocr_data()
  216. for int_index in range(1, 101):
  217. current_index = str(int_index)
  218. if current_index not in self.order_ocr_data:
  219. continue
  220. current_dict = self.order_ocr_data[current_index]
  221. word = current_dict['word']
  222. word_location = current_dict['location']
  223. first_char_location = current_dict['first_char_location']
  224. end_char_location = current_dict['end_char_location']
  225. chars_location = current_dict['chars_location']
  226. if self.color_algorithm_1(int_index=int_index, word_location=word_location, first_char_location=first_char_location, word=word):
  227. continue
  228. if self.color_algorithm_2(int_index=int_index, word_location=word_location, word=word):
  229. continue
  230. if self.color_algorithm_3(int_index=int_index, word_location=word_location, end_char_location=end_char_location, word=word):
  231. continue
  232. if self.color_algorithm_4(int_index=int_index, word_location=word_location, chars_location=chars_location, word=word):
  233. continue
  234. if __name__ == '__main__':
  235. image_path = r"C:\Users\86131\Desktop\4.jpg"
  236. script_path = Path(__file__).resolve()
  237. script_directory = script_path.parent
  238. transformed_image_path = str(Path(script_directory, r"transformed_image.jpg"))
  239. pi = PreprocessImage(image_path)
  240. pi.run()
  241. transformed_image_ocr_data = high_ocr_location(transformed_image_path)
  242. test_log(transformed_image_ocr_data)
  243. ca = ComparisonAlgorithm(transformed_image=transformed_image_path, ocr_data=transformed_image_ocr_data)
  244. ca.core_algorithm()