image_preprocess2.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. # -*- coding:utf-8 -*-
  2. """
  3. 20250114 在单词上划线,分别有斜杠、反斜杠、横着划线三种方式;找到它们的位置
  4. """
  5. import re
  6. import time
  7. from PIL import Image, ImageFilter
  8. import numpy as np
  9. import cv2
  10. import json
  11. from pathlib import Path
  12. from baidu_ocr import high_ocr_location
  13. def test_log(text: str):
  14. if type(text) == dict:
  15. text = json.dumps(text, ensure_ascii=False)
  16. with open("log.txt", "w", encoding="utf-8") as f:
  17. f.write(str(text))
  18. class PreprocessImage:
  19. def __init__(self, image_path):
  20. self.image_path = image_path
  21. self.template_image_path = "template.jpg"
  22. self.image = cv2.imread(image_path)
  23. self.template_image = cv2.imread(self.template_image_path)
  24. self.temp_h, self.temp_w = self.template_image.shape[:2]
  25. def correct_image(self, point_tuple,image_path='sharpen_image.jpg'):
  26. """图像矫正
  27. point_tuple:传过来的4个点坐标的元组"""
  28. sharpen_image = cv2.imread(image_path)
  29. src_points = np.float32(point_tuple)
  30. dst_points = np.float32([[122, 78], [1070, 78], [122, 2730], [1070, 2730]])
  31. M = cv2.getPerspectiveTransform(src_points, dst_points)
  32. transformed_image = cv2.warpPerspective(sharpen_image, M, (self.temp_w, self.temp_h))
  33. gray = cv2.cvtColor(transformed_image, cv2.COLOR_BGR2GRAY)
  34. blurred = cv2.GaussianBlur(gray, (5, 5), 0)
  35. image_rgb = cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB)
  36. cv2.imwrite('transformed_image.jpg', image_rgb)
  37. def sharpen_image(self):
  38. img = Image.open(self.image_path)
  39. sharpened_img = img.filter(ImageFilter.SHARPEN)
  40. sharpened_img.save('sharpen_image.jpg')
  41. @staticmethod
  42. def parser_ocr(ocr_data):
  43. for word_item in ocr_data['words_result']:
  44. for char_item in word_item['chars']:
  45. pass
  46. def run(self):
  47. self.sharpen_image()
  48. ocr_data = high_ocr_location(self.image_path)
  49. point_tuple = self.parser_ocr(ocr_data)
  50. self.correct_image(point_tuple)
  51. class ComparisonAlgorithm:
  52. """比较算法核心"""
  53. def __init__(self, transformed_image, ocr_data):
  54. self.transformed_image = cv2.imread(transformed_image)
  55. self.ocr_data = ocr_data
  56. self.order_ocr_data = {}
  57. self.already_find_index = set()
  58. self.image = Image.open(transformed_image)
  59. @staticmethod
  60. def separate_numbers_and_letters(text):
  61. """正则提取数字和字母"""
  62. numbers = "".join(re.findall(r'\d+', text))
  63. letters = "".join(re.findall(r'[a-zA-Z]+', text))
  64. return numbers, letters
  65. def is_line_word(self, x, y):
  66. """判断点的颜色是否符合标准; cv2取点速度没有pillow快
  67. 指定要查询的点的坐标 (x, y)"""
  68. rgb_color = self.image.getpixel((x, y))
  69. r, g, b = rgb_color
  70. if all([r < 130, g < 130, b < 130]):
  71. return 1
  72. return 0
  73. def __make_order_ocr_data(self):
  74. for word_item in self.ocr_data['words_result']:
  75. word = word_item['words']
  76. if word[0].isdigit() and len(word) >= 2:
  77. word_text = word_item['words']
  78. location = word_item['location']
  79. first_char_location = word_item['chars'][0]['location']
  80. end_char_location = word_item['chars'][-1]['location']
  81. chars_location = word_item['chars']
  82. numbers, letters = self.separate_numbers_and_letters(word_text)
  83. if numbers not in self.order_ocr_data:
  84. self.order_ocr_data[numbers] = {"word": letters, "location": location, "chars_location": chars_location,
  85. "first_char_location": first_char_location, "end_char_location": end_char_location}
  86. def color_algorithm_1(self, int_index, word_location, first_char_location, word):
  87. """
  88. 颜色算法1,正常单词
  89. int_index:整数序号
  90. word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
  91. first_char_location: 第一个字母的位置;对应 self.order_ocr_data[current_index]['first_char_location']
  92. word:具体序号的单词,标识用
  93. """
  94. next_index = str(int_index + 1)
  95. black_count_1 = 0
  96. for x in range(word_location['left'], word_location['left'] + word_location['width']):
  97. b_top, b_height = first_char_location['top'], int(first_char_location['height'])
  98. bottom_location_y = b_top + b_height
  99. if int_index == 50 or int_index == 100:
  100. next_word_top_location = bottom_location_y + b_height * 2
  101. elif next_index in self.order_ocr_data and (
  102. self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
  103. next_word_location = self.order_ocr_data[next_index]['first_char_location']
  104. next_word_top_location = next_word_location['top'] + int(next_word_location['height'] / 8)
  105. else:
  106. next_word_top_location = bottom_location_y + int(b_height * 0.5)
  107. for y in range(bottom_location_y, next_word_top_location):
  108. result = self.is_line_word(x, y)
  109. if result:
  110. black_count_1 += 1
  111. break
  112. black_count_per = black_count_1 / (word_location['width'])
  113. if black_count_per > 0.8:
  114. print(f"{int_index}正常划线{black_count_per:.2f}", word)
  115. self.already_find_index.add(int_index)
  116. return int_index
  117. def color_algorithm_2(self, int_index, word_location, word):
  118. """颜色算法2,单词自身中间的黑点率
  119. int_index:整数序号
  120. word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
  121. word:具体序号的单词,标识用
  122. """
  123. black_count_2 = 0
  124. for x in range(word_location['left'], word_location['left'] + word_location['width']):
  125. mid = word_location['top'] + int(word_location['height'] / 2)
  126. bottom = word_location['top'] + int(word_location['height']) + 5
  127. for y in range(mid, bottom):
  128. result = self.is_line_word(x, y)
  129. if result:
  130. black_count_2 += 1
  131. break
  132. black_count_per = black_count_2 / (word_location['width'])
  133. if black_count_per > 0.92:
  134. print(f"{int_index}中间划线{black_count_per:.2f}", word)
  135. self.already_find_index.add(int_index)
  136. return int_index
  137. def color_algorithm_3(self, int_index, word_location, end_char_location, word):
  138. """
  139. 颜色算法3,正常单词的左右各推20个像素点,判断下黑点率
  140. int_index:整数序号
  141. word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
  142. end_char_location: 最后一个字母的位置;对应 self.order_ocr_data[current_index]['end_char_location']
  143. word:具体序号的单词,标识用
  144. """
  145. next_index = str(int_index + 1)
  146. black_count_1 = 0
  147. moving_distance = 20
  148. """这是在获取所有需要的横向左右x坐标"""
  149. all_x = []
  150. for i in range(word_location['left'] - moving_distance, word_location['left']):
  151. all_x.append(i)
  152. word_right_loca = word_location['left'] + word_location['width'] + 2
  153. for i in range(word_right_loca, word_right_loca + moving_distance):
  154. all_x.append(i)
  155. b_top, b_height = word_location['top'], int(word_location['height'])
  156. bottom_location_y = b_top + b_height
  157. bottom_location_y_half = end_char_location['top'] + int(end_char_location['height'] * 0.8)
  158. for x in all_x:
  159. if int_index == 50 or int_index == 100:
  160. next_word_top_location = bottom_location_y + b_height * 2
  161. elif next_index in self.order_ocr_data and (
  162. self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
  163. next_word_location = self.order_ocr_data[next_index]['first_char_location']
  164. next_word_top_location = next_word_location['top'] + 3
  165. else:
  166. next_word_top_location = bottom_location_y + int(b_height * 0.3)
  167. for y in range(bottom_location_y_half, next_word_top_location):
  168. result = self.is_line_word(x, y)
  169. if result:
  170. black_count_1 += 1
  171. break
  172. black_count_per = black_count_1 / len(all_x)
  173. if black_count_per > 0.4:
  174. print(f"{int_index}前后双边划线{black_count_per:.2f}", word)
  175. self.already_find_index.add(int_index)
  176. return int_index
  177. def color_algorithm_4(self, int_index, word_location, chars_location, word):
  178. """灰度图极差算法"""
  179. for char_index, char_dict in enumerate(chars_location):
  180. if char_dict['char'] == '.' or char_dict['char'] == ',':
  181. point_location, point_char_index = char_dict['location'], char_index
  182. break
  183. else:
  184. char_index = 2
  185. point_location, point_char_index = chars_location[char_index]['location'], char_index
  186. white_block = 0
  187. point_location_half = point_location['top'] + point_location['height']//2
  188. y1, y2 = point_location_half, point_location_half + point_location['height']
  189. for x in range(point_location['left'], point_location['left'] + point_location['width']):
  190. roi_image = self.transformed_image[y1:y2, x:x + 1]
  191. min_val = np.min(roi_image)
  192. max_val = np.max(roi_image)
  193. range_value = max_val - min_val
  194. if min_val>110 or range_value < 90:
  195. white_block +=1
  196. if white_block/point_location['width'] < 0.1:
  197. print(f"{int_index}极差算法 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
  198. self.already_find_index.add(int_index)
  199. return int_index
  200. white_block = 0
  201. end_char_location = chars_location[-2]['location']
  202. bottom = end_char_location['top'] + end_char_location['height']
  203. y1, y2 = bottom+2, bottom + end_char_location['height']-10
  204. for x in range(end_char_location['left'], end_char_location['left'] + point_location['width']):
  205. roi_image = self.transformed_image[y1:y2, x:x + 1]
  206. min_val = np.min(roi_image)
  207. max_val = np.max(roi_image)
  208. range_value = max_val - min_val
  209. if min_val>110 or range_value < 90:
  210. white_block +=1
  211. if white_block/point_location['width'] < 0.1:
  212. print(f"{int_index}极差算法二 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
  213. self.already_find_index.add(int_index)
  214. return int_index
  215. def core_algorithm(self):
  216. self.__make_order_ocr_data()
  217. for int_index in range(1, 101):
  218. current_index = str(int_index)
  219. if current_index not in self.order_ocr_data:
  220. continue
  221. current_dict = self.order_ocr_data[current_index]
  222. word = current_dict['word']
  223. word_location = current_dict['location']
  224. first_char_location = current_dict['first_char_location']
  225. end_char_location = current_dict['end_char_location']
  226. chars_location = current_dict['chars_location']
  227. if self.color_algorithm_1(int_index=int_index, word_location=word_location, first_char_location=first_char_location, word=word):
  228. continue
  229. if self.color_algorithm_2(int_index=int_index, word_location=word_location, word=word):
  230. continue
  231. if self.color_algorithm_3(int_index=int_index, word_location=word_location, end_char_location=end_char_location, word=word):
  232. continue
  233. if self.color_algorithm_4(int_index=int_index, word_location=word_location, chars_location=chars_location, word=word):
  234. continue
  235. if __name__ == '__main__':
  236. image_path = r"C:\Users\86131\Desktop\4.jpg"
  237. script_path = Path(__file__).resolve()
  238. script_directory = script_path.parent
  239. transformed_image_path = str(Path(script_directory, r"transformed_image.jpg"))
  240. pi = PreprocessImage(image_path)
  241. pi.run()
  242. transformed_image_ocr_data = high_ocr_location(transformed_image_path)
  243. test_log(transformed_image_ocr_data)
  244. ca = ComparisonAlgorithm(transformed_image=transformed_image_path, ocr_data=transformed_image_ocr_data)
  245. ca.core_algorithm()