# -*- coding:utf-8 -*- """ 需要增加,2个上下单词的黑点,靠近哪一边的算法,从而解决上下错位的问题 """ import re import time from PIL import Image, ImageFilter import numpy as np import cv2 import json from pathlib import Path from baidu_ocr import high_ocr_location def test_log(text: str): if type(text) == dict: text = json.dumps(text, ensure_ascii=False) with open("log.txt", "w", encoding="utf-8") as f: f.write(str(text)) class PreprocessImage: def __init__(self, image_path): self.image_path = image_path self.template_image_path = "template.jpg" self.image = cv2.imread(image_path) self.template_image = cv2.imread(self.template_image_path) self.temp_h, self.temp_w = self.template_image.shape[:2] def correct_image(self, point_tuple, image_path='sharpen_image.jpg'): """图像矫正 point_tuple:传过来的4个点坐标的元组""" sharpen_image = cv2.imread(image_path) src_points = np.float32(point_tuple) dst_points = np.float32([[122, 78], [1070, 78], [122, 2730], [1070, 2730]]) M = cv2.getPerspectiveTransform(src_points, dst_points) transformed_image = cv2.warpPerspective(sharpen_image, M, (self.temp_w, self.temp_h)) gray = cv2.cvtColor(transformed_image, cv2.COLOR_BGR2GRAY) blurred = cv2.GaussianBlur(gray, (5, 5), 0) image_rgb = cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB) cv2.imwrite('transformed_image.jpg', image_rgb) def sharpen_image(self): img = Image.open(self.image_path) sharpened_img = img.filter(ImageFilter.SHARPEN) sharpened_img.save('sharpen_image.jpg') @staticmethod def parser_ocr(ocr_data): p1, p2, p3, p4 = None, None, None, None for word_item in ocr_data['words_result']: text: str = word_item['words'] if text.startswith("1."): left_char_location = word_item['chars'][0]['location'] p1 = (left_char_location['left'], left_char_location['top']) elif text.startswith("51."): left_char_location = word_item['chars'][0]['location'] p2 = (left_char_location['left'], left_char_location['top']) elif text.startswith("50."): left_char_location = word_item['chars'][0]['location'] p3 = (left_char_location['left'], left_char_location['top']) elif text.startswith("100."): left_char_location = word_item['chars'][0]['location'] p4 = (left_char_location['left'], left_char_location['top']) if any([not p1, not p2, not p3, not p4]): print([p1, p2, p3, p4]) raise Exception("矫正坐标不对") return [p1, p2, p3, p4] def run(self): self.sharpen_image() ocr_data = high_ocr_location(self.image_path) point_tuple = self.parser_ocr(ocr_data) self.correct_image(point_tuple) class ComparisonAlgorithm: """比较算法核心""" def __init__(self, transformed_image, ocr_data): self.transformed_image = cv2.imread(transformed_image) self.ocr_data = ocr_data self.order_ocr_data = {} self.already_find_index = set() self.image = Image.open(transformed_image) @staticmethod def separate_numbers_and_letters(text): """正则提取数字和字母""" numbers = "".join(re.findall(r'\d+', text)) letters = "".join(re.findall(r'[a-zA-Z]+', text)) return numbers, letters def is_line_word(self, x, y): """判断点的颜色是否符合标准; cv2取点速度没有pillow快 指定要查询的点的坐标 (x, y)""" rgb_color = self.image.getpixel((x, y)) r, g, b = rgb_color if all([r < 130, g < 130, b < 130]): return 1 return 0 def __make_order_ocr_data(self): for word_item in self.ocr_data['words_result']: word = word_item['words'] if word[0].isdigit() and len(word) >= 2: word_text = word_item['words'] location = word_item['location'] first_char_location = word_item['chars'][0]['location'] end_char_location = word_item['chars'][-1]['location'] chars_location = word_item['chars'] numbers, letters = self.separate_numbers_and_letters(word_text) if numbers not in self.order_ocr_data: self.order_ocr_data[numbers] = {"word": letters, "location": location, "chars_location": chars_location, "first_char_location": first_char_location, "end_char_location": end_char_location} def color_algorithm_1(self, int_index, word_location, first_char_location, word): """ 颜色算法1,正常单词 int_index:整数序号 word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location'] first_char_location: 第一个字母的位置;对应 self.order_ocr_data[current_index]['first_char_location'] word:具体序号的单词,标识用 """ next_index = str(int_index + 1) black_count_1 = 0 for x in range(word_location['left'], word_location['left'] + word_location['width']): b_top, b_height = first_char_location['top'], int(first_char_location['height']) bottom_location_y = b_top + b_height if int_index == 50 or int_index == 100: next_word_top_location = bottom_location_y + b_height * 2 elif next_index in self.order_ocr_data and ( self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height: next_word_location = self.order_ocr_data[next_index]['first_char_location'] next_word_top_location = next_word_location['top'] + int(next_word_location['height'] / 8) else: next_word_top_location = bottom_location_y + int(b_height * 0.5) for y in range(bottom_location_y, next_word_top_location): result = self.is_line_word(x, y) if result: black_count_1 += 1 break black_count_per = black_count_1 / (word_location['width']) if black_count_per > 0.8: print(f"{int_index}正常划线{black_count_per:.2f}", word) self.already_find_index.add(int_index) return int_index def color_algorithm_2(self, int_index, word_location, word): """颜色算法2,单词自身中间的黑点率 int_index:整数序号 word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location'] word:具体序号的单词,标识用 """ black_count_2 = 0 for x in range(word_location['left'], word_location['left'] + word_location['width']): mid = word_location['top'] + int(word_location['height'] / 2) bottom = word_location['top'] + int(word_location['height']) + 5 for y in range(mid, bottom): result = self.is_line_word(x, y) if result: black_count_2 += 1 break black_count_per = black_count_2 / (word_location['width']) if black_count_per > 0.92: print(f"{int_index}中间划线{black_count_per:.2f}", word) self.already_find_index.add(int_index) return int_index def color_algorithm_3(self, int_index, word_location, end_char_location, word): """ 颜色算法3,正常单词的左右各推20个像素点,判断下黑点率 int_index:整数序号 word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location'] end_char_location: 最后一个字母的位置;对应 self.order_ocr_data[current_index]['end_char_location'] word:具体序号的单词,标识用 """ next_index = str(int_index + 1) black_count_1 = 0 moving_distance = 20 """这是在获取所有需要的横向左右x坐标""" all_x = [] for i in range(word_location['left'] - moving_distance, word_location['left']): all_x.append(i) word_right_loca = word_location['left'] + word_location['width'] + 2 for i in range(word_right_loca, word_right_loca + moving_distance): all_x.append(i) b_top, b_height = word_location['top'], int(word_location['height']) bottom_location_y = b_top + b_height bottom_location_y_half = end_char_location['top'] + int(end_char_location['height'] * 0.8) for x in all_x: if int_index == 50 or int_index == 100: next_word_top_location = bottom_location_y + b_height * 2 elif next_index in self.order_ocr_data and ( self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height: next_word_location = self.order_ocr_data[next_index]['first_char_location'] next_word_top_location = next_word_location['top'] + 3 else: next_word_top_location = bottom_location_y + int(b_height * 0.3) for y in range(bottom_location_y_half, next_word_top_location): result = self.is_line_word(x, y) if result: black_count_1 += 1 break black_count_per = black_count_1 / len(all_x) if black_count_per > 0.4: print(f"{int_index}前后双边划线{black_count_per:.2f}", word) self.already_find_index.add(int_index) return int_index def color_algorithm_4(self, int_index, word_location, chars_location, word): """灰度图极差算法""" for char_index, char_dict in enumerate(chars_location): if char_dict['char'] == '.' or char_dict['char'] == ',': point_location, point_char_index = char_dict['location'], char_index break else: char_index = 2 point_location, point_char_index = chars_location[char_index]['location'], char_index white_block = 0 point_location_half = point_location['top'] + point_location['height'] // 2 y1, y2 = point_location_half, point_location_half + point_location['height'] for x in range(point_location['left'], point_location['left'] + point_location['width']): roi_image = self.transformed_image[y1:y2, x:x + 1] min_val = np.min(roi_image) max_val = np.max(roi_image) range_value = max_val - min_val if min_val > 110 or range_value < 90: white_block += 1 if white_block / point_location['width'] < 0.1: print( f"{int_index}极差算法 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}") self.already_find_index.add(int_index) return int_index white_block = 0 end_char_location = chars_location[-2]['location'] bottom = end_char_location['top'] + end_char_location['height'] y1, y2 = bottom + 2, bottom + end_char_location['height'] - 10 for x in range(end_char_location['left'], end_char_location['left'] + point_location['width']): roi_image = self.transformed_image[y1:y2, x:x + 1] min_val = np.min(roi_image) max_val = np.max(roi_image) range_value = max_val - min_val if min_val > 110 or range_value < 90: white_block += 1 if white_block / point_location['width'] < 0.1: print( f"{int_index}极差算法二 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}") self.already_find_index.add(int_index) return int_index def core_algorithm(self): self.__make_order_ocr_data() for int_index in range(1, 101): current_index = str(int_index) if current_index not in self.order_ocr_data: continue current_dict = self.order_ocr_data[current_index] word = current_dict['word'] word_location = current_dict['location'] first_char_location = current_dict['first_char_location'] end_char_location = current_dict['end_char_location'] chars_location = current_dict['chars_location'] if self.color_algorithm_1(int_index=int_index, word_location=word_location, first_char_location=first_char_location, word=word): continue if self.color_algorithm_2(int_index=int_index, word_location=word_location, word=word): continue if self.color_algorithm_3(int_index=int_index, word_location=word_location, end_char_location=end_char_location, word=word): continue if self.color_algorithm_4(int_index=int_index, word_location=word_location, chars_location=chars_location, word=word): continue if __name__ == '__main__': image_path = r"C:\Users\86131\Desktop\4.jpg" script_path = Path(__file__).resolve() script_directory = script_path.parent transformed_image_path = str(Path(script_directory, r"transformed_image.jpg")) pi = PreprocessImage(image_path) pi.run() transformed_image_ocr_data = high_ocr_location(transformed_image_path) test_log(transformed_image_ocr_data) ca = ComparisonAlgorithm(transformed_image=transformed_image_path, ocr_data=transformed_image_ocr_data) ca.core_algorithm()