123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388 |
- # -*- coding:utf-8 -*-
- """
- 需要增加,2个上下单词的黑点,靠近哪一边的算法,从而解决上下错位的问题
- """
- import re
- import time
- from PIL import Image, ImageFilter
- import numpy as np
- import cv2
- import json
- from pathlib import Path
- from baidu_ocr import high_ocr_location
- def test_log(text: str):
- if type(text) == dict:
- text = json.dumps(text, ensure_ascii=False)
- with open("log.txt", "w", encoding="utf-8") as f:
- f.write(str(text))
- class PreprocessImage:
- def __init__(self, image_path):
- self.image_path = image_path
- self.template_image_path = "template.jpg"
- self.image = cv2.imread(image_path)
- self.template_image = cv2.imread(self.template_image_path)
- self.temp_h, self.temp_w = self.template_image.shape[:2]
- def correct_image(self, point_tuple,image_path='sharpen_image.jpg'):
- """图像矫正
- point_tuple:传过来的4个点坐标的元组"""
- sharpen_image = cv2.imread(image_path)
- src_points = np.float32(point_tuple)
-
-
- dst_points = np.float32([[122, 78], [1070, 78], [122, 2730], [1070, 2730]])
- M = cv2.getPerspectiveTransform(src_points, dst_points)
-
- transformed_image = cv2.warpPerspective(sharpen_image, M, (self.temp_w, self.temp_h))
-
- gray = cv2.cvtColor(transformed_image, cv2.COLOR_BGR2GRAY)
-
- blurred = cv2.GaussianBlur(gray, (5, 5), 0)
-
-
-
-
-
-
-
-
-
-
-
- image_rgb = cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB)
-
- cv2.imwrite('transformed_image.jpg', image_rgb)
- def sharpen_image(self):
-
- img = Image.open(self.image_path)
- sharpened_img = img.filter(ImageFilter.SHARPEN)
- sharpened_img.save('sharpen_image.jpg')
- @staticmethod
- def parser_ocr(ocr_data):
- p1, p2, p3, p4 = None, None, None, None
- for word_item in ocr_data['words_result']:
- text: str = word_item['words']
- if text.startswith("1."):
- left_char_location = word_item['chars'][0]['location']
- p1 = (left_char_location['left'], left_char_location['top'])
- elif text.startswith("51."):
- left_char_location = word_item['chars'][0]['location']
- p2 = (left_char_location['left'], left_char_location['top'])
- elif text.startswith("50."):
- left_char_location = word_item['chars'][0]['location']
- p3 = (left_char_location['left'], left_char_location['top'])
- elif text.startswith("100."):
- left_char_location = word_item['chars'][0]['location']
- p4 = (left_char_location['left'], left_char_location['top'])
-
-
-
-
-
-
-
-
-
-
- if any([not p1, not p2, not p3, not p4]):
- print([p1, p2, p3, p4])
- raise Exception("矫正坐标不对")
- return [p1, p2, p3, p4]
- def run(self):
-
- self.sharpen_image()
- ocr_data = high_ocr_location(self.image_path)
- point_tuple = self.parser_ocr(ocr_data)
- self.correct_image(point_tuple)
- class ComparisonAlgorithm:
- """比较算法核心"""
- def __init__(self, transformed_image, ocr_data):
- self.transformed_image = cv2.imread(transformed_image)
- self.ocr_data = ocr_data
- self.order_ocr_data = {}
- self.already_find_index = set()
- self.image = Image.open(transformed_image)
- @staticmethod
- def separate_numbers_and_letters(text):
- """正则提取数字和字母"""
- numbers = "".join(re.findall(r'\d+', text))
- letters = "".join(re.findall(r'[a-zA-Z]+', text))
- return numbers, letters
- def is_line_word(self, x, y):
- """判断点的颜色是否符合标准; cv2取点速度没有pillow快
- 指定要查询的点的坐标 (x, y)"""
-
-
-
- rgb_color = self.image.getpixel((x, y))
- r, g, b = rgb_color
- if all([r < 130, g < 130, b < 130]):
- return 1
- return 0
- def __make_order_ocr_data(self):
- for word_item in self.ocr_data['words_result']:
- word = word_item['words']
- if word[0].isdigit() and len(word) >= 2:
-
- word_text = word_item['words']
- location = word_item['location']
- first_char_location = word_item['chars'][0]['location']
- end_char_location = word_item['chars'][-1]['location']
- chars_location = word_item['chars']
- numbers, letters = self.separate_numbers_and_letters(word_text)
- if numbers not in self.order_ocr_data:
- self.order_ocr_data[numbers] = {"word": letters, "location": location, "chars_location": chars_location,
- "first_char_location": first_char_location, "end_char_location": end_char_location}
- def color_algorithm_1(self, int_index, word_location, first_char_location, word):
- """
- 颜色算法1,正常单词
- int_index:整数序号
- word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
- first_char_location: 第一个字母的位置;对应 self.order_ocr_data[current_index]['first_char_location']
- word:具体序号的单词,标识用
- """
- next_index = str(int_index + 1)
- black_count_1 = 0
- for x in range(word_location['left'], word_location['left'] + word_location['width']):
-
- b_top, b_height = first_char_location['top'], int(first_char_location['height'])
- bottom_location_y = b_top + b_height
- if int_index == 50 or int_index == 100:
- next_word_top_location = bottom_location_y + b_height * 2
-
- elif next_index in self.order_ocr_data and (
- self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
- next_word_location = self.order_ocr_data[next_index]['first_char_location']
- next_word_top_location = next_word_location['top'] + int(next_word_location['height'] / 8)
- else:
-
-
- next_word_top_location = bottom_location_y + int(b_height * 0.5)
- for y in range(bottom_location_y, next_word_top_location):
- result = self.is_line_word(x, y)
- if result:
- black_count_1 += 1
- break
- black_count_per = black_count_1 / (word_location['width'])
- if black_count_per > 0.8:
- print(f"{int_index}正常划线{black_count_per:.2f}", word)
- self.already_find_index.add(int_index)
- return int_index
-
-
- def color_algorithm_2(self, int_index, word_location, word):
- """颜色算法2,单词自身中间的黑点率
- int_index:整数序号
- word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
- word:具体序号的单词,标识用
- """
- black_count_2 = 0
- for x in range(word_location['left'], word_location['left'] + word_location['width']):
- mid = word_location['top'] + int(word_location['height'] / 2)
- bottom = word_location['top'] + int(word_location['height']) + 5
- for y in range(mid, bottom):
- result = self.is_line_word(x, y)
- if result:
- black_count_2 += 1
- break
- black_count_per = black_count_2 / (word_location['width'])
- if black_count_per > 0.92:
- print(f"{int_index}中间划线{black_count_per:.2f}", word)
- self.already_find_index.add(int_index)
- return int_index
-
-
- def color_algorithm_3(self, int_index, word_location, end_char_location, word):
- """
- 颜色算法3,正常单词的左右各推20个像素点,判断下黑点率
- int_index:整数序号
- word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
- end_char_location: 最后一个字母的位置;对应 self.order_ocr_data[current_index]['end_char_location']
- word:具体序号的单词,标识用
- """
- next_index = str(int_index + 1)
- black_count_1 = 0
- moving_distance = 20
- """这是在获取所有需要的横向左右x坐标"""
- all_x = []
- for i in range(word_location['left'] - moving_distance, word_location['left']):
- all_x.append(i)
- word_right_loca = word_location['left'] + word_location['width'] + 2
- for i in range(word_right_loca, word_right_loca + moving_distance):
- all_x.append(i)
- b_top, b_height = word_location['top'], int(word_location['height'])
- bottom_location_y = b_top + b_height
-
- bottom_location_y_half = end_char_location['top'] + int(end_char_location['height'] * 0.8)
- for x in all_x:
- if int_index == 50 or int_index == 100:
- next_word_top_location = bottom_location_y + b_height * 2
-
- elif next_index in self.order_ocr_data and (
- self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
- next_word_location = self.order_ocr_data[next_index]['first_char_location']
- next_word_top_location = next_word_location['top'] + 3
- else:
-
-
- next_word_top_location = bottom_location_y + int(b_height * 0.3)
- for y in range(bottom_location_y_half, next_word_top_location):
- result = self.is_line_word(x, y)
- if result:
- black_count_1 += 1
- break
- black_count_per = black_count_1 / len(all_x)
- if black_count_per > 0.4:
- print(f"{int_index}前后双边划线{black_count_per:.2f}", word)
- self.already_find_index.add(int_index)
- return int_index
-
-
- def color_algorithm_4(self, int_index, word_location, chars_location, word):
- """灰度图极差算法"""
-
-
- for char_index, char_dict in enumerate(chars_location):
- if char_dict['char'] == '.' or char_dict['char'] == ',':
- point_location, point_char_index = char_dict['location'], char_index
- break
- else:
- char_index = 2
- point_location, point_char_index = chars_location[char_index]['location'], char_index
- white_block = 0
- point_location_half = point_location['top'] + point_location['height']//2
- y1, y2 = point_location_half, point_location_half + point_location['height']
- for x in range(point_location['left'], point_location['left'] + point_location['width']):
- roi_image = self.transformed_image[y1:y2, x:x + 1]
- min_val = np.min(roi_image)
- max_val = np.max(roi_image)
- range_value = max_val - min_val
- if min_val>110 or range_value < 90:
- white_block +=1
- if white_block/point_location['width'] < 0.1:
- print(f"{int_index}极差算法 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
- self.already_find_index.add(int_index)
- return int_index
-
- white_block = 0
- end_char_location = chars_location[-2]['location']
- bottom = end_char_location['top'] + end_char_location['height']
- y1, y2 = bottom+2, bottom + end_char_location['height']-10
- for x in range(end_char_location['left'], end_char_location['left'] + point_location['width']):
- roi_image = self.transformed_image[y1:y2, x:x + 1]
- min_val = np.min(roi_image)
- max_val = np.max(roi_image)
- range_value = max_val - min_val
- if min_val>110 or range_value < 90:
- white_block +=1
- if white_block/point_location['width'] < 0.1:
- print(f"{int_index}极差算法二 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
- self.already_find_index.add(int_index)
- return int_index
-
- def core_algorithm(self):
- self.__make_order_ocr_data()
- for int_index in range(1, 101):
- current_index = str(int_index)
- if current_index not in self.order_ocr_data:
- continue
- current_dict = self.order_ocr_data[current_index]
- word = current_dict['word']
- word_location = current_dict['location']
- first_char_location = current_dict['first_char_location']
- end_char_location = current_dict['end_char_location']
- chars_location = current_dict['chars_location']
- if self.color_algorithm_1(int_index=int_index, word_location=word_location, first_char_location=first_char_location, word=word):
- continue
- if self.color_algorithm_2(int_index=int_index, word_location=word_location, word=word):
- continue
- if self.color_algorithm_3(int_index=int_index, word_location=word_location, end_char_location=end_char_location, word=word):
- continue
- if self.color_algorithm_4(int_index=int_index, word_location=word_location, chars_location=chars_location, word=word):
- continue
- if __name__ == '__main__':
-
- image_path = r"C:\Users\86131\Desktop\4.jpg"
-
- script_path = Path(__file__).resolve()
-
- script_directory = script_path.parent
-
- transformed_image_path = str(Path(script_directory, r"transformed_image.jpg"))
-
- pi = PreprocessImage(image_path)
- pi.run()
- transformed_image_ocr_data = high_ocr_location(transformed_image_path)
- test_log(transformed_image_ocr_data)
- ca = ComparisonAlgorithm(transformed_image=transformed_image_path, ocr_data=transformed_image_ocr_data)
- ca.core_algorithm()
|