wx-ocr 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wx_ocr/__init__.py +139 -0
- wx_ocr/cli.py +296 -0
- wx_ocr/default_callback.py +45 -0
- wx_ocr/mmmojo_dll.py +175 -0
- wx_ocr/ocr_manager.py +160 -0
- wx_ocr/ocr_protobuf_pb2.py +39 -0
- wx_ocr/simple_api.py +303 -0
- wx_ocr/utility_protobuf_pb2.py +39 -0
- wx_ocr/utils.py +161 -0
- wx_ocr/wco_data/Model/FPOCRRecog.xnet +0 -0
- wx_ocr/wco_data/Model/OCRDetFP32.xnet.nas +0 -0
- wx_ocr/wco_data/Model/OCRParaDetV1.1.0.26.xnet +0 -0
- wx_ocr/wco_data/Model/OCRRecogFP32V1.1.0.26.xnet +0 -0
- wx_ocr/wco_data/Model/RecDict +12043 -0
- wx_ocr/wco_data/Model/sohu_simp.txt +5031 -0
- wx_ocr/wco_data/WeChatOCR.exe +0 -0
- wx_ocr/wco_data/mmmojo.dll +0 -0
- wx_ocr/wco_data/mmmojo_64.dll +0 -0
- wx_ocr/wco_data/x64.config +1 -0
- wx_ocr/winapi.py +195 -0
- wx_ocr/xplugin_manager.py +194 -0
- wx_ocr-0.1.3.dist-info/METADATA +84 -0
- wx_ocr-0.1.3.dist-info/RECORD +27 -0
- wx_ocr-0.1.3.dist-info/WHEEL +5 -0
- wx_ocr-0.1.3.dist-info/entry_points.txt +2 -0
- wx_ocr-0.1.3.dist-info/licenses/LICENSE +21 -0
- wx_ocr-0.1.3.dist-info/top_level.txt +1 -0
wx_ocr/ocr_manager.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import time
|
|
4
|
+
import base64
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from typing import Dict, Callable
|
|
7
|
+
from multiprocessing import Queue, Value
|
|
8
|
+
from google.protobuf.json_format import MessageToJson
|
|
9
|
+
|
|
10
|
+
from . import ocr_protobuf_pb2
|
|
11
|
+
from .winapi import *
|
|
12
|
+
from .mmmojo_dll import MMMojoInfoMethod
|
|
13
|
+
from .xplugin_manager import XPluginManager
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
OCR_MAX_TASK_ID = 32
|
|
17
|
+
|
|
18
|
+
class RequestIdOCR(Enum):
|
|
19
|
+
OCRPush = 1
|
|
20
|
+
|
|
21
|
+
def OCRRemoteOnConnect(is_connected:c_bool, user_data:py_object):
|
|
22
|
+
if user_data:
|
|
23
|
+
manager_obj:OcrManager = cast(user_data, py_object).value
|
|
24
|
+
manager_obj.SetConnectState(True)
|
|
25
|
+
|
|
26
|
+
def OCRRemoteOnDisConnect(user_data:py_object):
|
|
27
|
+
if user_data:
|
|
28
|
+
manager_obj:OcrManager = cast(user_data, py_object).value
|
|
29
|
+
manager_obj.SetConnectState(False)
|
|
30
|
+
|
|
31
|
+
def OCRReadOnPush(request_id:c_uint32, request_info:c_void_p, user_data:py_object):
|
|
32
|
+
if user_data:
|
|
33
|
+
manager_obj:OcrManager = cast(user_data, py_object).value
|
|
34
|
+
pb_size = c_uint32()
|
|
35
|
+
pb_data = manager_obj.GetPbSerializedData(request_info, pb_size)
|
|
36
|
+
if pb_size.value > 10:
|
|
37
|
+
manager_obj.CallUsrCallback(request_id, pb_data, pb_size.value)
|
|
38
|
+
manager_obj.RemoveReadInfo(request_info)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class OcrManager(XPluginManager):
|
|
42
|
+
m_task_id = Queue(OCR_MAX_TASK_ID)
|
|
43
|
+
m_id_path:Dict[int, str] = {}
|
|
44
|
+
m_usr_lib_dir: str = None
|
|
45
|
+
m_wechatocr_running: bool = False
|
|
46
|
+
m_connect_state:Value = Value('b', False)
|
|
47
|
+
m_usr_callback: Callable = None
|
|
48
|
+
|
|
49
|
+
def __init__(self, wechat_path) -> None:
|
|
50
|
+
super().__init__(wechat_path)
|
|
51
|
+
for i in range(1, 33):
|
|
52
|
+
self.m_task_id.put(i)
|
|
53
|
+
|
|
54
|
+
def __del__(self):
|
|
55
|
+
if self.m_wechatocr_running:
|
|
56
|
+
self.KillWeChatOCR()
|
|
57
|
+
|
|
58
|
+
def SetUsrLibDir(self, usr_lib_dir:str):
|
|
59
|
+
self.m_usr_lib_dir = usr_lib_dir
|
|
60
|
+
self.AppendSwitchNativeCmdLine("user-lib-dir", usr_lib_dir)
|
|
61
|
+
|
|
62
|
+
def SetOcrResultCallback(self, func:Callable):
|
|
63
|
+
self.m_usr_callback = func
|
|
64
|
+
|
|
65
|
+
def StartWeChatOCR(self):
|
|
66
|
+
self.SetCallbackUsrData(self)
|
|
67
|
+
self.InitMMMojoEnv()
|
|
68
|
+
self.m_wechatocr_running = True
|
|
69
|
+
|
|
70
|
+
def KillWeChatOCR(self):
|
|
71
|
+
self.m_connect_state.value = False
|
|
72
|
+
self.m_wechatocr_running = False
|
|
73
|
+
self.StopMMMojoEnv()
|
|
74
|
+
|
|
75
|
+
def DoOCRTask(self, pic_path:str):
|
|
76
|
+
if not self.m_wechatocr_running:
|
|
77
|
+
raise Exception("请先调用StartWeChatOCR启动")
|
|
78
|
+
if not os.path.exists(pic_path):
|
|
79
|
+
raise Exception(f"给定图片路径pic_path不存在: {pic_path}")
|
|
80
|
+
pic_path = os.path.abspath(pic_path)
|
|
81
|
+
while not self.m_connect_state.value:
|
|
82
|
+
print("等待Ocr服务连接成功!")
|
|
83
|
+
time.sleep(1)
|
|
84
|
+
_id = self.GetIdleTaskId()
|
|
85
|
+
if not _id:
|
|
86
|
+
print("当前队列已满,请等待后重试")
|
|
87
|
+
return
|
|
88
|
+
self.SendOCRTask(_id, pic_path)
|
|
89
|
+
|
|
90
|
+
def SetConnectState(self, connect:bool):
|
|
91
|
+
self.m_connect_state.value = connect
|
|
92
|
+
|
|
93
|
+
def SendOCRTask(self, task_id:int, pic_path:str):
|
|
94
|
+
self.m_id_path[task_id] = pic_path
|
|
95
|
+
ocr_request = ocr_protobuf_pb2.OcrRequest()
|
|
96
|
+
ocr_request.unknow = 0
|
|
97
|
+
ocr_request.task_id = task_id
|
|
98
|
+
|
|
99
|
+
pic_paths = ocr_request.pic_path
|
|
100
|
+
pic_paths.pic_path.extend([pic_path])
|
|
101
|
+
serialized_data = ocr_request.SerializeToString()
|
|
102
|
+
self.SendPbSerializedData(serialized_data, len(serialized_data), MMMojoInfoMethod.kMMPush.value, 0, RequestIdOCR.OCRPush.value)
|
|
103
|
+
|
|
104
|
+
def CallUsrCallback(self, request_id:c_uint32, serialized_data: c_void_p, data_size: int):
|
|
105
|
+
ocr_response_ubyte = (c_ubyte * data_size).from_address(serialized_data)
|
|
106
|
+
ocr_response_array = bytearray(ocr_response_ubyte)
|
|
107
|
+
ocr_response = ocr_protobuf_pb2.OcrResponse()
|
|
108
|
+
ocr_response.ParseFromString(ocr_response_array)
|
|
109
|
+
json_response_str = MessageToJson(ocr_response)
|
|
110
|
+
task_id = ocr_response.task_id
|
|
111
|
+
if not self.m_id_path.get(task_id):
|
|
112
|
+
return
|
|
113
|
+
#print(f"收到识别结果, task_id: {task_id}, result: {json_response}")
|
|
114
|
+
pic_path = self.m_id_path[task_id]
|
|
115
|
+
if self.m_usr_callback:
|
|
116
|
+
self.m_usr_callback(pic_path, self.parse_json_response(json_response_str))
|
|
117
|
+
self.SetTaskIdIdle(task_id)
|
|
118
|
+
|
|
119
|
+
def parse_json_response(self, json_response_str:str):
|
|
120
|
+
json_response = json.loads(json_response_str)
|
|
121
|
+
results = {
|
|
122
|
+
"taskId": json_response["taskId"],
|
|
123
|
+
"ocrResult": []
|
|
124
|
+
}
|
|
125
|
+
singleResult = json_response.get("ocrResult", {}).get("singleResult")
|
|
126
|
+
if not singleResult:
|
|
127
|
+
return results
|
|
128
|
+
|
|
129
|
+
for i in singleResult:
|
|
130
|
+
pos = i.get('singlePos', {}).get('pos')
|
|
131
|
+
if isinstance(pos, list) and len(pos) == 1:
|
|
132
|
+
pos = pos[0]
|
|
133
|
+
text = base64.b64decode(i.get("singleStrUtf8", '')).decode('utf-8')
|
|
134
|
+
r = {
|
|
135
|
+
"text": text,
|
|
136
|
+
"location": {
|
|
137
|
+
"left": i.get('left'),
|
|
138
|
+
"top": i.get("top"),
|
|
139
|
+
"right": i.get('right'),
|
|
140
|
+
"bottom": i.get('bottom')
|
|
141
|
+
},
|
|
142
|
+
"pos": pos
|
|
143
|
+
}
|
|
144
|
+
results["ocrResult"].append(r)
|
|
145
|
+
return results
|
|
146
|
+
|
|
147
|
+
def GetIdleTaskId(self):
|
|
148
|
+
task_id = self.m_task_id.get(timeout=1)
|
|
149
|
+
return task_id
|
|
150
|
+
|
|
151
|
+
def SetTaskIdIdle(self, _id):
|
|
152
|
+
self.m_task_id.put(_id)
|
|
153
|
+
|
|
154
|
+
def SetDefaultCallbaks(self):
|
|
155
|
+
super().SetOneCallback("kMMRemoteConnect", OCRRemoteOnConnect)
|
|
156
|
+
super().SetOneCallback("kMMRemoteDisconnect", OCRRemoteOnDisConnect)
|
|
157
|
+
super().SetOneCallback("kMMReadPush", OCRReadOnPush)
|
|
158
|
+
super().SetDefaultCallbaks()
|
|
159
|
+
|
|
160
|
+
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
+
# source: ocr_protobuf.proto
|
|
4
|
+
"""Generated protocol buffer code."""
|
|
5
|
+
from google.protobuf.internal import builder as _builder
|
|
6
|
+
from google.protobuf import descriptor as _descriptor
|
|
7
|
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
|
8
|
+
from google.protobuf import symbol_database as _symbol_database
|
|
9
|
+
# @@protoc_insertion_point(imports)
|
|
10
|
+
|
|
11
|
+
_sym_db = _symbol_database.Default()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x12ocr_protobuf.proto\x12\x0cocr_protobuf\"\xa8\x06\n\x0bOcrResponse\x12\x0c\n\x04type\x18\x01 \x01(\x05\x12\x0f\n\x07task_id\x18\x02 \x01(\x05\x12\x10\n\x08\x65rr_code\x18\x03 \x01(\x05\x12\x37\n\nocr_result\x18\x04 \x01(\x0b\x32#.ocr_protobuf.OcrResponse.OcrResult\x1a\xae\x05\n\tOcrResult\x12G\n\rsingle_result\x18\x01 \x03(\x0b\x32\x30.ocr_protobuf.OcrResponse.OcrResult.SingleResult\x12\x11\n\tunknown_1\x18\x02 \x01(\x05\x12\x11\n\tunknown_2\x18\x03 \x01(\x05\x1al\n\tResultPos\x12@\n\x03pos\x18\x01 \x03(\x0b\x32\x33.ocr_protobuf.OcrResponse.OcrResult.ResultPos.PosXY\x1a\x1d\n\x05PosXY\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02\x1a\xc3\x03\n\x0cSingleResult\x12\x41\n\nsingle_pos\x18\x01 \x01(\x0b\x32-.ocr_protobuf.OcrResponse.OcrResult.ResultPos\x12\x17\n\x0fsingle_str_utf8\x18\x02 \x01(\x0c\x12\x13\n\x0bsingle_rate\x18\x03 \x01(\x02\x12N\n\none_result\x18\x04 \x03(\x0b\x32:.ocr_protobuf.OcrResponse.OcrResult.SingleResult.OneResult\x12\x0c\n\x04left\x18\x05 \x01(\x02\x12\x0b\n\x03top\x18\x06 \x01(\x02\x12\r\n\x05right\x18\x07 \x01(\x02\x12\x0e\n\x06\x62ottom\x18\x08 \x01(\x02\x12\x11\n\tunknown_0\x18\t \x01(\x05\x12\x42\n\x0bunknown_pos\x18\n \x01(\x0b\x32-.ocr_protobuf.OcrResponse.OcrResult.ResultPos\x1a\x61\n\tOneResult\x12>\n\x07one_pos\x18\x01 \x01(\x0b\x32-.ocr_protobuf.OcrResponse.OcrResult.ResultPos\x12\x14\n\x0cone_str_utf8\x18\x02 \x01(\x0c\"\x80\x01\n\nOcrRequest\x12\x0e\n\x06unknow\x18\x01 \x01(\x05\x12\x0f\n\x07task_id\x18\x02 \x01(\x05\x12\x33\n\x08pic_path\x18\x03 \x01(\x0b\x32!.ocr_protobuf.OcrRequest.PicPaths\x1a\x1c\n\x08PicPaths\x12\x10\n\x08pic_path\x18\x01 \x03(\tb\x06proto3')
|
|
17
|
+
|
|
18
|
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
|
|
19
|
+
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'ocr_protobuf_pb2', globals())
|
|
20
|
+
if _descriptor._USE_C_DESCRIPTORS == False:
|
|
21
|
+
|
|
22
|
+
DESCRIPTOR._options = None
|
|
23
|
+
_OCRRESPONSE._serialized_start=37
|
|
24
|
+
_OCRRESPONSE._serialized_end=845
|
|
25
|
+
_OCRRESPONSE_OCRRESULT._serialized_start=159
|
|
26
|
+
_OCRRESPONSE_OCRRESULT._serialized_end=845
|
|
27
|
+
_OCRRESPONSE_OCRRESULT_RESULTPOS._serialized_start=283
|
|
28
|
+
_OCRRESPONSE_OCRRESULT_RESULTPOS._serialized_end=391
|
|
29
|
+
_OCRRESPONSE_OCRRESULT_RESULTPOS_POSXY._serialized_start=362
|
|
30
|
+
_OCRRESPONSE_OCRRESULT_RESULTPOS_POSXY._serialized_end=391
|
|
31
|
+
_OCRRESPONSE_OCRRESULT_SINGLERESULT._serialized_start=394
|
|
32
|
+
_OCRRESPONSE_OCRRESULT_SINGLERESULT._serialized_end=845
|
|
33
|
+
_OCRRESPONSE_OCRRESULT_SINGLERESULT_ONERESULT._serialized_start=748
|
|
34
|
+
_OCRRESPONSE_OCRRESULT_SINGLERESULT_ONERESULT._serialized_end=845
|
|
35
|
+
_OCRREQUEST._serialized_start=848
|
|
36
|
+
_OCRREQUEST._serialized_end=976
|
|
37
|
+
_OCRREQUEST_PICPATHS._serialized_start=948
|
|
38
|
+
_OCRREQUEST_PICPATHS._serialized_end=976
|
|
39
|
+
# @@protoc_insertion_point(module_scope)
|
wx_ocr/simple_api.py
ADDED
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
"""
|
|
2
|
+
超级简单的 OCR API - 只需要传入图片路径,返回识别结果
|
|
3
|
+
"""
|
|
4
|
+
import os
|
|
5
|
+
import time
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Dict, Union, Optional
|
|
8
|
+
from .ocr_manager import OcrManager, OCR_MAX_TASK_ID
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# 全局 OCR 管理器(单例模式)
|
|
12
|
+
_global_ocr_manager = None
|
|
13
|
+
_global_wco_data_dir = None
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _get_wco_data_dir():
|
|
17
|
+
"""获取 wco_data 目录路径."""
|
|
18
|
+
global _global_wco_data_dir
|
|
19
|
+
|
|
20
|
+
if _global_wco_data_dir is None:
|
|
21
|
+
# 尝试多个可能的位置
|
|
22
|
+
possible_paths = [
|
|
23
|
+
Path(__file__).parent / "wco_data", # 包内部(pip 安装后)
|
|
24
|
+
Path(__file__).parent.parent / "wco_data", # 开发环境:项目根目录
|
|
25
|
+
Path.cwd() / "wco_data", # 当前工作目录
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
# 如果是通过 pip 安装的,检查 site-packages 中的位置
|
|
29
|
+
try:
|
|
30
|
+
import site
|
|
31
|
+
for site_dir in site.getsitepackages():
|
|
32
|
+
site_wco_data = Path(site_dir) / "wechat_ocr" / "wco_data"
|
|
33
|
+
if site_wco_data.exists():
|
|
34
|
+
possible_paths.insert(0, site_wco_data)
|
|
35
|
+
except:
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
for path in possible_paths:
|
|
39
|
+
if path.exists() and (path / "WeChatOCR.exe").exists():
|
|
40
|
+
_global_wco_data_dir = str(path)
|
|
41
|
+
break
|
|
42
|
+
|
|
43
|
+
if _global_wco_data_dir is None:
|
|
44
|
+
raise FileNotFoundError(
|
|
45
|
+
"找不到 wco_data 目录!\n"
|
|
46
|
+
"请确保 wco_data 目录存在,并包含 WeChatOCR.exe 文件。\n"
|
|
47
|
+
"可能的位置:\n"
|
|
48
|
+
"1. wechat_ocr/wco_data (包内部)\n"
|
|
49
|
+
"2. 项目根目录/wco_data\n"
|
|
50
|
+
"3. 当前工作目录/wco_data"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
return _global_wco_data_dir
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _get_ocr_manager():
|
|
57
|
+
"""获取全局 OCR 管理器(单例)."""
|
|
58
|
+
global _global_ocr_manager
|
|
59
|
+
|
|
60
|
+
if _global_ocr_manager is None:
|
|
61
|
+
wco_data_dir = _get_wco_data_dir()
|
|
62
|
+
wechat_ocr_exe = os.path.join(wco_data_dir, "WeChatOCR.exe")
|
|
63
|
+
|
|
64
|
+
# 创建管理器
|
|
65
|
+
_global_ocr_manager = OcrManager(wco_data_dir)
|
|
66
|
+
_global_ocr_manager.SetExePath(wechat_ocr_exe)
|
|
67
|
+
_global_ocr_manager.SetUsrLibDir(wco_data_dir)
|
|
68
|
+
|
|
69
|
+
# 启动服务
|
|
70
|
+
_global_ocr_manager.StartWeChatOCR()
|
|
71
|
+
|
|
72
|
+
# 等待连接
|
|
73
|
+
max_wait = 10
|
|
74
|
+
waited = 0
|
|
75
|
+
while not _global_ocr_manager.m_connect_state.value and waited < max_wait:
|
|
76
|
+
time.sleep(0.5)
|
|
77
|
+
waited += 0.5
|
|
78
|
+
|
|
79
|
+
if not _global_ocr_manager.m_connect_state.value:
|
|
80
|
+
_global_ocr_manager = None
|
|
81
|
+
raise RuntimeError(
|
|
82
|
+
"OCR 服务连接失败!\n"
|
|
83
|
+
"可能的原因:\n"
|
|
84
|
+
"1. WeChatOCR.exe 无法启动\n"
|
|
85
|
+
"2. mmmojo.dll 版本不兼容\n"
|
|
86
|
+
"3. 缺少依赖文件"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
return _global_ocr_manager
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _format_text_with_lines(ocr_result: List[Dict], line_threshold: int = 10) -> str:
|
|
93
|
+
"""
|
|
94
|
+
将 OCR 结果格式化为带换行的文本
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
ocr_result: OCR 识别结果列表
|
|
98
|
+
line_threshold: 判断是否为新行的垂直距离阈值(像素)
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
格式化后的文本,包含换行
|
|
102
|
+
"""
|
|
103
|
+
if not ocr_result:
|
|
104
|
+
return ""
|
|
105
|
+
|
|
106
|
+
# 按垂直位置(top)排序
|
|
107
|
+
sorted_items = sorted(ocr_result, key=lambda x: (x['location']['top'], x['location']['left']))
|
|
108
|
+
|
|
109
|
+
lines = []
|
|
110
|
+
current_line = []
|
|
111
|
+
last_bottom = 0
|
|
112
|
+
|
|
113
|
+
for item in sorted_items:
|
|
114
|
+
top = item['location']['top']
|
|
115
|
+
|
|
116
|
+
# 如果 top 位置差距较大,说明是新的一行
|
|
117
|
+
if current_line and abs(top - last_bottom) > line_threshold:
|
|
118
|
+
lines.append(' '.join([i['text'] for i in current_line]))
|
|
119
|
+
current_line = []
|
|
120
|
+
|
|
121
|
+
current_line.append(item)
|
|
122
|
+
last_bottom = item['location']['bottom']
|
|
123
|
+
|
|
124
|
+
# 添加最后一行
|
|
125
|
+
if current_line:
|
|
126
|
+
lines.append(' '.join([i['text'] for i in current_line]))
|
|
127
|
+
|
|
128
|
+
return '\n'.join(lines)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def ocr(image_path: str, return_text_only: bool = False, format_text: bool = False) -> Union[List[str], str, Dict]:
|
|
132
|
+
"""
|
|
133
|
+
识别单张图片(最简单的接口)
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
image_path: 图片路径
|
|
137
|
+
return_text_only: 是否只返回文字列表(默认 False,返回完整结果)
|
|
138
|
+
format_text: 是否格式化为带换行的文本(仅当 return_text_only=True 时有效)
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
如果 return_text_only=False: 返回完整结果字典
|
|
142
|
+
如果 return_text_only=True 且 format_text=False: 返回文字列表 ["文字1", "文字2", ...]
|
|
143
|
+
如果 return_text_only=True 且 format_text=True: 返回带换行的文本字符串
|
|
144
|
+
|
|
145
|
+
Examples:
|
|
146
|
+
>>> # 只获取文字列表
|
|
147
|
+
>>> texts = ocr("image.png", return_text_only=True)
|
|
148
|
+
>>> print(texts)
|
|
149
|
+
['文字1', '文字2', '文字3']
|
|
150
|
+
|
|
151
|
+
>>> # 获取格式化的文本(带换行)
|
|
152
|
+
>>> text = ocr("image.png", return_text_only=True, format_text=True)
|
|
153
|
+
>>> print(text)
|
|
154
|
+
文字1 文字2
|
|
155
|
+
文字3 文字4
|
|
156
|
+
|
|
157
|
+
>>> # 获取完整结果(包含位置信息)
|
|
158
|
+
>>> result = ocr("image.png")
|
|
159
|
+
>>> for item in result['ocrResult']:
|
|
160
|
+
... print(item['text'], item['location'])
|
|
161
|
+
"""
|
|
162
|
+
# 检查图片是否存在
|
|
163
|
+
if not os.path.exists(image_path):
|
|
164
|
+
raise FileNotFoundError(f"图片不存在: {image_path}")
|
|
165
|
+
|
|
166
|
+
# 获取 OCR 管理器
|
|
167
|
+
manager = _get_ocr_manager()
|
|
168
|
+
|
|
169
|
+
# 存储结果
|
|
170
|
+
result_container = []
|
|
171
|
+
|
|
172
|
+
def callback(img_path, result):
|
|
173
|
+
result_container.append(result)
|
|
174
|
+
|
|
175
|
+
# 设置回调
|
|
176
|
+
manager.SetOcrResultCallback(callback)
|
|
177
|
+
|
|
178
|
+
# 执行 OCR
|
|
179
|
+
manager.DoOCRTask(os.path.abspath(image_path))
|
|
180
|
+
|
|
181
|
+
# 等待结果(最多 10 秒)
|
|
182
|
+
max_wait = 10
|
|
183
|
+
waited = 0
|
|
184
|
+
while len(result_container) == 0 and waited < max_wait:
|
|
185
|
+
time.sleep(0.1)
|
|
186
|
+
waited += 0.1
|
|
187
|
+
|
|
188
|
+
if len(result_container) == 0:
|
|
189
|
+
raise TimeoutError(f"识别超时: {image_path}")
|
|
190
|
+
|
|
191
|
+
result = result_container[0]
|
|
192
|
+
|
|
193
|
+
# 根据参数返回不同格式
|
|
194
|
+
if return_text_only:
|
|
195
|
+
ocr_result = result.get('ocrResult', [])
|
|
196
|
+
if format_text:
|
|
197
|
+
return _format_text_with_lines(ocr_result)
|
|
198
|
+
else:
|
|
199
|
+
return [item['text'] for item in ocr_result]
|
|
200
|
+
else:
|
|
201
|
+
return result
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def ocr_batch(image_paths: List[str], return_text_only: bool = False, format_text: bool = False) -> List[Union[List[str], str, Dict]]:
|
|
205
|
+
"""
|
|
206
|
+
批量识别多张图片
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
image_paths: 图片路径列表
|
|
210
|
+
return_text_only: 是否只返回文字列表
|
|
211
|
+
format_text: 是否格式化为带换行的文本(仅当 return_text_only=True 时有效)
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
结果列表,每个元素对应一张图片的识别结果
|
|
215
|
+
|
|
216
|
+
Examples:
|
|
217
|
+
>>> # 批量识别,只获取文字
|
|
218
|
+
>>> results = ocr_batch(["1.png", "2.png", "3.png"], return_text_only=True)
|
|
219
|
+
>>> for texts in results:
|
|
220
|
+
... print(texts)
|
|
221
|
+
|
|
222
|
+
>>> # 批量识别,获取格式化的文本(带换行)
|
|
223
|
+
>>> results = ocr_batch(["1.png", "2.png"], return_text_only=True, format_text=True)
|
|
224
|
+
>>> for text in results:
|
|
225
|
+
... print(text)
|
|
226
|
+
... print("---")
|
|
227
|
+
|
|
228
|
+
>>> # 批量识别,获取完整结果
|
|
229
|
+
>>> results = ocr_batch(["1.png", "2.png", "3.png"])
|
|
230
|
+
>>> for result in results:
|
|
231
|
+
... for item in result['ocrResult']:
|
|
232
|
+
... print(item['text'])
|
|
233
|
+
"""
|
|
234
|
+
# 检查所有图片是否存在
|
|
235
|
+
for img_path in image_paths:
|
|
236
|
+
if not os.path.exists(img_path):
|
|
237
|
+
raise FileNotFoundError(f"图片不存在: {img_path}")
|
|
238
|
+
|
|
239
|
+
# 获取 OCR 管理器
|
|
240
|
+
manager = _get_ocr_manager()
|
|
241
|
+
|
|
242
|
+
# 存储结果
|
|
243
|
+
results = {}
|
|
244
|
+
|
|
245
|
+
def callback(img_path, result):
|
|
246
|
+
results[img_path] = result
|
|
247
|
+
|
|
248
|
+
# 设置回调
|
|
249
|
+
manager.SetOcrResultCallback(callback)
|
|
250
|
+
|
|
251
|
+
# 提交所有任务
|
|
252
|
+
for img_path in image_paths:
|
|
253
|
+
manager.DoOCRTask(os.path.abspath(img_path))
|
|
254
|
+
|
|
255
|
+
# 等待所有结果(最多 30 秒)
|
|
256
|
+
max_wait = 30
|
|
257
|
+
waited = 0
|
|
258
|
+
while len(results) < len(image_paths) and waited < max_wait:
|
|
259
|
+
time.sleep(0.1)
|
|
260
|
+
waited += 0.1
|
|
261
|
+
|
|
262
|
+
if len(results) < len(image_paths):
|
|
263
|
+
raise TimeoutError(f"部分图片识别超时,完成 {len(results)}/{len(image_paths)}")
|
|
264
|
+
|
|
265
|
+
# 按照输入顺序返回结果
|
|
266
|
+
ordered_results = []
|
|
267
|
+
for img_path in image_paths:
|
|
268
|
+
abs_path = os.path.abspath(img_path)
|
|
269
|
+
result = results.get(abs_path)
|
|
270
|
+
|
|
271
|
+
if result:
|
|
272
|
+
if return_text_only:
|
|
273
|
+
ocr_result = result.get('ocrResult', [])
|
|
274
|
+
if format_text:
|
|
275
|
+
ordered_results.append(_format_text_with_lines(ocr_result))
|
|
276
|
+
else:
|
|
277
|
+
ordered_results.append([item['text'] for item in ocr_result])
|
|
278
|
+
else:
|
|
279
|
+
ordered_results.append(result)
|
|
280
|
+
|
|
281
|
+
return ordered_results
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def cleanup():
|
|
285
|
+
"""
|
|
286
|
+
清理 OCR 服务(可选)
|
|
287
|
+
|
|
288
|
+
通常不需要手动调用,程序退出时会自动清理。
|
|
289
|
+
如果需要释放资源,可以手动调用此函数。
|
|
290
|
+
"""
|
|
291
|
+
global _global_ocr_manager
|
|
292
|
+
|
|
293
|
+
if _global_ocr_manager is not None:
|
|
294
|
+
try:
|
|
295
|
+
_global_ocr_manager.KillWeChatOCR()
|
|
296
|
+
except:
|
|
297
|
+
pass
|
|
298
|
+
_global_ocr_manager = None
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
# 程序退出时自动清理
|
|
302
|
+
import atexit
|
|
303
|
+
atexit.register(cleanup)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
+
# source: utility_protobuf.proto
|
|
4
|
+
"""Generated protocol buffer code."""
|
|
5
|
+
from google.protobuf.internal import builder as _builder
|
|
6
|
+
from google.protobuf import descriptor as _descriptor
|
|
7
|
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
|
8
|
+
from google.protobuf import symbol_database as _symbol_database
|
|
9
|
+
# @@protoc_insertion_point(imports)
|
|
10
|
+
|
|
11
|
+
_sym_db = _symbol_database.Default()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x16utility_protobuf.proto\x12\x10utility_protobuf\"8\n\x0eInitReqMessage\x12\x18\n\x10\x65xpire_timestamp\x18\x01 \x01(\x05\x12\x0c\n\x04type\x18\x02 \x01(\x05\"!\n\x0fInitRespMessage\x12\x0e\n\x06status\x18\x01 \x01(\x05\"x\n\x17ResampleImageReqMessage\x12\x12\n\ndecode_pic\x18\x01 \x01(\t\x12\x12\n\nencode_pic\x18\x02 \x01(\t\x12\x10\n\x08re_pic_x\x18\x03 \x01(\x05\x12\x10\n\x08re_pic_y\x18\x04 \x01(\x05\x12\x11\n\tunknown_0\x18\x05 \x01(\x05\",\n\x18ResampleImageRespMessage\x12\x10\n\x08\x65rr_code\x18\x01 \x01(\x05\"m\n\x0fTextScanMessage\x12\x14\n\x0ctext_scan_id\x18\x01 \x01(\x05\x12\x10\n\x08pic_path\x18\x02 \x01(\t\x12\x11\n\thave_text\x18\x03 \x01(\x05\x12\x11\n\tunknown_0\x18\x04 \x01(\x05\x12\x0c\n\x04rate\x18\x05 \x01(\x02\"\xce\x01\n\x10QRScanReqMessage\x12\x12\n\norigin_pic\x18\x01 \x01(\t\x12\x12\n\ndecode_pic\x18\x02 \x01(\t\x12\x11\n\tunknown_0\x18\x03 \x01(\x05\x12\x17\n\x0f\x65ncode_pic_data\x18\x04 \x01(\x0c\x12\x14\n\x0c\x65ncode_pic_x\x18\x05 \x01(\x05\x12\x14\n\x0c\x65ncode_pic_y\x18\x06 \x01(\x05\x12\x11\n\tunknown_1\x18\x07 \x01(\x05\x12\x14\n\x0ctext_scan_id\x18\x08 \x01(\x05\x12\x11\n\tunknown_3\x18\t \x01(\x05\"\xd6\x01\n\x11QRScanRespMessage\x12\x43\n\tqr_result\x18\x01 \x03(\x0b\x32\x30.utility_protobuf.QRScanRespMessage.QRScanResult\x12\x11\n\tunknown_0\x18\x02 \x01(\x05\x1ai\n\x0cQRScanResult\x12\x0e\n\x06result\x18\x01 \x01(\x0c\x12\x10\n\x08unknow_0\x18\x02 \x01(\x05\x12\x11\n\tunknown_1\x18\x03 \x01(\x01\x12\x11\n\tunknown_2\x18\x04 \x01(\x01\x12\x11\n\tunknown_3\x18\x05 \x01(\x05\x62\x06proto3')
|
|
17
|
+
|
|
18
|
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
|
|
19
|
+
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'utility_protobuf_pb2', globals())
|
|
20
|
+
if _descriptor._USE_C_DESCRIPTORS == False:
|
|
21
|
+
|
|
22
|
+
DESCRIPTOR._options = None
|
|
23
|
+
_INITREQMESSAGE._serialized_start=44
|
|
24
|
+
_INITREQMESSAGE._serialized_end=100
|
|
25
|
+
_INITRESPMESSAGE._serialized_start=102
|
|
26
|
+
_INITRESPMESSAGE._serialized_end=135
|
|
27
|
+
_RESAMPLEIMAGEREQMESSAGE._serialized_start=137
|
|
28
|
+
_RESAMPLEIMAGEREQMESSAGE._serialized_end=257
|
|
29
|
+
_RESAMPLEIMAGERESPMESSAGE._serialized_start=259
|
|
30
|
+
_RESAMPLEIMAGERESPMESSAGE._serialized_end=303
|
|
31
|
+
_TEXTSCANMESSAGE._serialized_start=305
|
|
32
|
+
_TEXTSCANMESSAGE._serialized_end=414
|
|
33
|
+
_QRSCANREQMESSAGE._serialized_start=417
|
|
34
|
+
_QRSCANREQMESSAGE._serialized_end=623
|
|
35
|
+
_QRSCANRESPMESSAGE._serialized_start=626
|
|
36
|
+
_QRSCANRESPMESSAGE._serialized_end=840
|
|
37
|
+
_QRSCANRESPMESSAGE_QRSCANRESULT._serialized_start=735
|
|
38
|
+
_QRSCANRESPMESSAGE_QRSCANRESULT._serialized_end=840
|
|
39
|
+
# @@protoc_insertion_point(module_scope)
|