webull-openapi-python-sdk 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. samples/__init__.py +1 -0
  2. samples/data/__init__.py +1 -0
  3. samples/data/data_client.py +57 -0
  4. samples/data/data_streaming_client.py +86 -0
  5. samples/data/data_streaming_client_async.py +101 -0
  6. samples/trade/__init__.py +0 -0
  7. samples/trade/trade_client.py +163 -0
  8. samples/trade/trade_client_v2.py +181 -0
  9. samples/trade/trade_event_client.py +47 -0
  10. webull/__init__.py +1 -0
  11. webull/core/__init__.py +12 -0
  12. webull/core/auth/__init__.py +0 -0
  13. webull/core/auth/algorithm/__init__.py +0 -0
  14. webull/core/auth/algorithm/sha_hmac1.py +65 -0
  15. webull/core/auth/algorithm/sha_hmac256.py +75 -0
  16. webull/core/auth/composer/__init__.py +0 -0
  17. webull/core/auth/composer/default_signature_composer.py +125 -0
  18. webull/core/auth/credentials.py +46 -0
  19. webull/core/auth/signers/__init__.py +0 -0
  20. webull/core/auth/signers/app_key_signer.py +72 -0
  21. webull/core/auth/signers/signer.py +48 -0
  22. webull/core/auth/signers/signer_factory.py +58 -0
  23. webull/core/cache/__init__.py +225 -0
  24. webull/core/client.py +410 -0
  25. webull/core/common/__init__.py +0 -0
  26. webull/core/common/api_type.py +19 -0
  27. webull/core/common/easy_enum.py +35 -0
  28. webull/core/common/region.py +7 -0
  29. webull/core/compat.py +85 -0
  30. webull/core/context/__init__.py +0 -0
  31. webull/core/context/request_context_holder.py +33 -0
  32. webull/core/data/endpoints.json +22 -0
  33. webull/core/data/retry_config.json +15 -0
  34. webull/core/endpoint/__init__.py +8 -0
  35. webull/core/endpoint/chained_endpoint_resolver.py +57 -0
  36. webull/core/endpoint/default_endpoint_resolver.py +60 -0
  37. webull/core/endpoint/local_config_regional_endpoint_resolver.py +77 -0
  38. webull/core/endpoint/resolver_endpoint_request.py +46 -0
  39. webull/core/endpoint/user_customized_endpoint_resolver.py +55 -0
  40. webull/core/exception/__init__.py +0 -0
  41. webull/core/exception/error_code.py +23 -0
  42. webull/core/exception/error_msg.py +21 -0
  43. webull/core/exception/exceptions.py +53 -0
  44. webull/core/headers.py +57 -0
  45. webull/core/http/__init__.py +0 -0
  46. webull/core/http/initializer/__init__.py +0 -0
  47. webull/core/http/initializer/client_initializer.py +79 -0
  48. webull/core/http/initializer/token/__init__.py +0 -0
  49. webull/core/http/initializer/token/bean/__init__.py +0 -0
  50. webull/core/http/initializer/token/bean/access_token.py +40 -0
  51. webull/core/http/initializer/token/bean/check_token_request.py +44 -0
  52. webull/core/http/initializer/token/bean/create_token_request.py +45 -0
  53. webull/core/http/initializer/token/bean/refresh_token_request.py +44 -0
  54. webull/core/http/initializer/token/token_manager.py +208 -0
  55. webull/core/http/initializer/token/token_operation.py +72 -0
  56. webull/core/http/method_type.py +43 -0
  57. webull/core/http/protocol_type.py +43 -0
  58. webull/core/http/request.py +121 -0
  59. webull/core/http/response.py +166 -0
  60. webull/core/request.py +278 -0
  61. webull/core/retry/__init__.py +0 -0
  62. webull/core/retry/backoff_strategy.py +102 -0
  63. webull/core/retry/retry_condition.py +214 -0
  64. webull/core/retry/retry_policy.py +63 -0
  65. webull/core/retry/retry_policy_context.py +51 -0
  66. webull/core/utils/__init__.py +0 -0
  67. webull/core/utils/common.py +62 -0
  68. webull/core/utils/data.py +25 -0
  69. webull/core/utils/desensitize.py +33 -0
  70. webull/core/utils/validation.py +49 -0
  71. webull/core/vendored/__init__.py +0 -0
  72. webull/core/vendored/requests/__init__.py +94 -0
  73. webull/core/vendored/requests/__version__.py +28 -0
  74. webull/core/vendored/requests/_internal_utils.py +56 -0
  75. webull/core/vendored/requests/adapters.py +539 -0
  76. webull/core/vendored/requests/api.py +166 -0
  77. webull/core/vendored/requests/auth.py +307 -0
  78. webull/core/vendored/requests/certs.py +34 -0
  79. webull/core/vendored/requests/compat.py +85 -0
  80. webull/core/vendored/requests/cookies.py +555 -0
  81. webull/core/vendored/requests/exceptions.py +136 -0
  82. webull/core/vendored/requests/help.py +134 -0
  83. webull/core/vendored/requests/hooks.py +48 -0
  84. webull/core/vendored/requests/models.py +960 -0
  85. webull/core/vendored/requests/packages/__init__.py +17 -0
  86. webull/core/vendored/requests/packages/certifi/__init__.py +17 -0
  87. webull/core/vendored/requests/packages/certifi/__main__.py +16 -0
  88. webull/core/vendored/requests/packages/certifi/cacert.pem +4433 -0
  89. webull/core/vendored/requests/packages/certifi/core.py +51 -0
  90. webull/core/vendored/requests/packages/chardet/__init__.py +53 -0
  91. webull/core/vendored/requests/packages/chardet/big5freq.py +400 -0
  92. webull/core/vendored/requests/packages/chardet/big5prober.py +61 -0
  93. webull/core/vendored/requests/packages/chardet/chardistribution.py +247 -0
  94. webull/core/vendored/requests/packages/chardet/charsetgroupprober.py +120 -0
  95. webull/core/vendored/requests/packages/chardet/charsetprober.py +159 -0
  96. webull/core/vendored/requests/packages/chardet/cli/__init__.py +1 -0
  97. webull/core/vendored/requests/packages/chardet/cli/chardetect.py +99 -0
  98. webull/core/vendored/requests/packages/chardet/codingstatemachine.py +102 -0
  99. webull/core/vendored/requests/packages/chardet/compat.py +48 -0
  100. webull/core/vendored/requests/packages/chardet/cp949prober.py +63 -0
  101. webull/core/vendored/requests/packages/chardet/enums.py +90 -0
  102. webull/core/vendored/requests/packages/chardet/escprober.py +115 -0
  103. webull/core/vendored/requests/packages/chardet/escsm.py +260 -0
  104. webull/core/vendored/requests/packages/chardet/eucjpprober.py +106 -0
  105. webull/core/vendored/requests/packages/chardet/euckrfreq.py +209 -0
  106. webull/core/vendored/requests/packages/chardet/euckrprober.py +61 -0
  107. webull/core/vendored/requests/packages/chardet/euctwfreq.py +401 -0
  108. webull/core/vendored/requests/packages/chardet/euctwprober.py +60 -0
  109. webull/core/vendored/requests/packages/chardet/gb2312freq.py +297 -0
  110. webull/core/vendored/requests/packages/chardet/gb2312prober.py +60 -0
  111. webull/core/vendored/requests/packages/chardet/hebrewprober.py +306 -0
  112. webull/core/vendored/requests/packages/chardet/jisfreq.py +339 -0
  113. webull/core/vendored/requests/packages/chardet/jpcntx.py +247 -0
  114. webull/core/vendored/requests/packages/chardet/langbulgarianmodel.py +242 -0
  115. webull/core/vendored/requests/packages/chardet/langcyrillicmodel.py +347 -0
  116. webull/core/vendored/requests/packages/chardet/langgreekmodel.py +239 -0
  117. webull/core/vendored/requests/packages/chardet/langhebrewmodel.py +214 -0
  118. webull/core/vendored/requests/packages/chardet/langhungarianmodel.py +239 -0
  119. webull/core/vendored/requests/packages/chardet/langthaimodel.py +213 -0
  120. webull/core/vendored/requests/packages/chardet/langturkishmodel.py +207 -0
  121. webull/core/vendored/requests/packages/chardet/latin1prober.py +159 -0
  122. webull/core/vendored/requests/packages/chardet/mbcharsetprober.py +105 -0
  123. webull/core/vendored/requests/packages/chardet/mbcsgroupprober.py +68 -0
  124. webull/core/vendored/requests/packages/chardet/mbcssm.py +586 -0
  125. webull/core/vendored/requests/packages/chardet/sbcharsetprober.py +146 -0
  126. webull/core/vendored/requests/packages/chardet/sbcsgroupprober.py +87 -0
  127. webull/core/vendored/requests/packages/chardet/sjisprober.py +106 -0
  128. webull/core/vendored/requests/packages/chardet/universaldetector.py +300 -0
  129. webull/core/vendored/requests/packages/chardet/utf8prober.py +96 -0
  130. webull/core/vendored/requests/packages/chardet/version.py +23 -0
  131. webull/core/vendored/requests/packages/urllib3/__init__.py +114 -0
  132. webull/core/vendored/requests/packages/urllib3/_collections.py +346 -0
  133. webull/core/vendored/requests/packages/urllib3/connection.py +405 -0
  134. webull/core/vendored/requests/packages/urllib3/connectionpool.py +910 -0
  135. webull/core/vendored/requests/packages/urllib3/contrib/__init__.py +0 -0
  136. webull/core/vendored/requests/packages/urllib3/contrib/_appengine_environ.py +44 -0
  137. webull/core/vendored/requests/packages/urllib3/contrib/_securetransport/__init__.py +0 -0
  138. webull/core/vendored/requests/packages/urllib3/contrib/_securetransport/bindings.py +607 -0
  139. webull/core/vendored/requests/packages/urllib3/contrib/_securetransport/low_level.py +360 -0
  140. webull/core/vendored/requests/packages/urllib3/contrib/appengine.py +303 -0
  141. webull/core/vendored/requests/packages/urllib3/contrib/ntlmpool.py +125 -0
  142. webull/core/vendored/requests/packages/urllib3/contrib/pyopenssl.py +484 -0
  143. webull/core/vendored/requests/packages/urllib3/contrib/securetransport.py +818 -0
  144. webull/core/vendored/requests/packages/urllib3/contrib/socks.py +206 -0
  145. webull/core/vendored/requests/packages/urllib3/exceptions.py +260 -0
  146. webull/core/vendored/requests/packages/urllib3/fields.py +192 -0
  147. webull/core/vendored/requests/packages/urllib3/filepost.py +112 -0
  148. webull/core/vendored/requests/packages/urllib3/packages/__init__.py +19 -0
  149. webull/core/vendored/requests/packages/urllib3/packages/backports/__init__.py +0 -0
  150. webull/core/vendored/requests/packages/urllib3/packages/backports/makefile.py +67 -0
  151. webull/core/vendored/requests/packages/urllib3/packages/ordered_dict.py +273 -0
  152. webull/core/vendored/requests/packages/urllib3/packages/six.py +882 -0
  153. webull/core/vendored/requests/packages/urllib3/packages/socks.py +887 -0
  154. webull/core/vendored/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py +19 -0
  155. webull/core/vendored/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py +170 -0
  156. webull/core/vendored/requests/packages/urllib3/poolmanager.py +467 -0
  157. webull/core/vendored/requests/packages/urllib3/request.py +164 -0
  158. webull/core/vendored/requests/packages/urllib3/response.py +721 -0
  159. webull/core/vendored/requests/packages/urllib3/util/__init__.py +68 -0
  160. webull/core/vendored/requests/packages/urllib3/util/connection.py +148 -0
  161. webull/core/vendored/requests/packages/urllib3/util/queue.py +35 -0
  162. webull/core/vendored/requests/packages/urllib3/util/request.py +132 -0
  163. webull/core/vendored/requests/packages/urllib3/util/response.py +101 -0
  164. webull/core/vendored/requests/packages/urllib3/util/retry.py +426 -0
  165. webull/core/vendored/requests/packages/urllib3/util/selectors.py +601 -0
  166. webull/core/vendored/requests/packages/urllib3/util/ssl_.py +396 -0
  167. webull/core/vendored/requests/packages/urllib3/util/timeout.py +256 -0
  168. webull/core/vendored/requests/packages/urllib3/util/url.py +252 -0
  169. webull/core/vendored/requests/packages/urllib3/util/wait.py +164 -0
  170. webull/core/vendored/requests/packages.py +28 -0
  171. webull/core/vendored/requests/sessions.py +750 -0
  172. webull/core/vendored/requests/status_codes.py +105 -0
  173. webull/core/vendored/requests/structures.py +119 -0
  174. webull/core/vendored/requests/utils.py +916 -0
  175. webull/core/vendored/six.py +905 -0
  176. webull/data/__init__.py +3 -0
  177. webull/data/common/__init__.py +0 -0
  178. webull/data/common/category.py +26 -0
  179. webull/data/common/connect_ack.py +29 -0
  180. webull/data/common/direction.py +25 -0
  181. webull/data/common/exchange_code.py +33 -0
  182. webull/data/common/exercise_style.py +22 -0
  183. webull/data/common/expiration_cycle.py +26 -0
  184. webull/data/common/instrument_status.py +23 -0
  185. webull/data/common/option_type.py +20 -0
  186. webull/data/common/subscribe_type.py +22 -0
  187. webull/data/common/timespan.py +29 -0
  188. webull/data/data_client.py +35 -0
  189. webull/data/data_streaming_client.py +89 -0
  190. webull/data/internal/__init__.py +0 -0
  191. webull/data/internal/default_retry_policy.py +84 -0
  192. webull/data/internal/exceptions.py +60 -0
  193. webull/data/internal/quotes_client.py +314 -0
  194. webull/data/internal/quotes_decoder.py +40 -0
  195. webull/data/internal/quotes_payload_decoder.py +35 -0
  196. webull/data/internal/quotes_topic.py +36 -0
  197. webull/data/quotes/__init__.py +0 -0
  198. webull/data/quotes/instrument.py +33 -0
  199. webull/data/quotes/market_data.py +187 -0
  200. webull/data/quotes/market_streaming_data.py +66 -0
  201. webull/data/quotes/subscribe/__init__.py +0 -0
  202. webull/data/quotes/subscribe/ask_bid_result.py +49 -0
  203. webull/data/quotes/subscribe/basic_result.py +45 -0
  204. webull/data/quotes/subscribe/broker_result.py +33 -0
  205. webull/data/quotes/subscribe/message_pb2.py +37 -0
  206. webull/data/quotes/subscribe/order_result.py +30 -0
  207. webull/data/quotes/subscribe/payload_type.py +19 -0
  208. webull/data/quotes/subscribe/quote_decoder.py +28 -0
  209. webull/data/quotes/subscribe/quote_result.py +47 -0
  210. webull/data/quotes/subscribe/snapshot_decoder.py +30 -0
  211. webull/data/quotes/subscribe/snapshot_result.py +69 -0
  212. webull/data/quotes/subscribe/tick_decoder.py +29 -0
  213. webull/data/quotes/subscribe/tick_result.py +47 -0
  214. webull/data/request/__init__.py +0 -0
  215. webull/data/request/get_batch_historical_bars_request.py +43 -0
  216. webull/data/request/get_corp_action_request.py +47 -0
  217. webull/data/request/get_eod_bars_request.py +32 -0
  218. webull/data/request/get_historical_bars_request.py +43 -0
  219. webull/data/request/get_instruments_request.py +30 -0
  220. webull/data/request/get_quotes_request.py +35 -0
  221. webull/data/request/get_snapshot_request.py +38 -0
  222. webull/data/request/get_tick_request.py +37 -0
  223. webull/data/request/subscribe_request.py +43 -0
  224. webull/data/request/unsubscribe_request.py +42 -0
  225. webull/trade/__init__.py +2 -0
  226. webull/trade/common/__init__.py +0 -0
  227. webull/trade/common/account_type.py +22 -0
  228. webull/trade/common/category.py +29 -0
  229. webull/trade/common/combo_ticker_type.py +23 -0
  230. webull/trade/common/combo_type.py +31 -0
  231. webull/trade/common/currency.py +24 -0
  232. webull/trade/common/forbid_reason.py +27 -0
  233. webull/trade/common/instrument_type.py +27 -0
  234. webull/trade/common/markets.py +27 -0
  235. webull/trade/common/order_entrust_type.py +21 -0
  236. webull/trade/common/order_side.py +23 -0
  237. webull/trade/common/order_status.py +25 -0
  238. webull/trade/common/order_tif.py +24 -0
  239. webull/trade/common/order_type.py +30 -0
  240. webull/trade/common/trade_policy.py +22 -0
  241. webull/trade/common/trading_date_type.py +24 -0
  242. webull/trade/common/trailing_type.py +23 -0
  243. webull/trade/events/__init__.py +0 -0
  244. webull/trade/events/default_retry_policy.py +64 -0
  245. webull/trade/events/events_pb2.py +43 -0
  246. webull/trade/events/events_pb2_grpc.py +66 -0
  247. webull/trade/events/signature_composer.py +61 -0
  248. webull/trade/events/types.py +21 -0
  249. webull/trade/request/__init__.py +0 -0
  250. webull/trade/request/cancel_order_request.py +28 -0
  251. webull/trade/request/get_account_balance_request.py +28 -0
  252. webull/trade/request/get_account_positions_request.py +30 -0
  253. webull/trade/request/get_account_profile_request.py +26 -0
  254. webull/trade/request/get_app_subscriptions.py +28 -0
  255. webull/trade/request/get_open_orders_request.py +30 -0
  256. webull/trade/request/get_order_detail_request.py +27 -0
  257. webull/trade/request/get_today_orders_request.py +31 -0
  258. webull/trade/request/get_trade_calendar_request.py +30 -0
  259. webull/trade/request/get_trade_instrument_detail_request.py +24 -0
  260. webull/trade/request/get_trade_security_detail_request.py +42 -0
  261. webull/trade/request/get_tradeable_instruments_request.py +27 -0
  262. webull/trade/request/palce_order_request.py +91 -0
  263. webull/trade/request/place_order_request_v2.py +58 -0
  264. webull/trade/request/replace_order_request.py +73 -0
  265. webull/trade/request/replace_order_request_v2.py +38 -0
  266. webull/trade/request/v2/__init__.py +0 -0
  267. webull/trade/request/v2/cancel_option_request.py +28 -0
  268. webull/trade/request/v2/cancel_order_request.py +28 -0
  269. webull/trade/request/v2/get_account_balance_request.py +28 -0
  270. webull/trade/request/v2/get_account_list.py +23 -0
  271. webull/trade/request/v2/get_account_positions_request.py +24 -0
  272. webull/trade/request/v2/get_order_detail_request.py +26 -0
  273. webull/trade/request/v2/get_order_history_request.py +35 -0
  274. webull/trade/request/v2/palce_order_request.py +87 -0
  275. webull/trade/request/v2/place_option_request.py +64 -0
  276. webull/trade/request/v2/preview_option_request.py +28 -0
  277. webull/trade/request/v2/preview_order_request.py +59 -0
  278. webull/trade/request/v2/replace_option_request.py +28 -0
  279. webull/trade/request/v2/replace_order_request.py +57 -0
  280. webull/trade/trade/__init__.py +0 -0
  281. webull/trade/trade/account_info.py +83 -0
  282. webull/trade/trade/order_operation.py +246 -0
  283. webull/trade/trade/trade_calendar.py +37 -0
  284. webull/trade/trade/trade_instrument.py +72 -0
  285. webull/trade/trade/v2/__init__.py +0 -0
  286. webull/trade/trade/v2/account_info_v2.py +55 -0
  287. webull/trade/trade/v2/order_operation_v2.py +206 -0
  288. webull/trade/trade_client.py +43 -0
  289. webull/trade/trade_events_client.py +233 -0
  290. webull_openapi_python_sdk-1.0.0.dist-info/METADATA +28 -0
  291. webull_openapi_python_sdk-1.0.0.dist-info/RECORD +295 -0
  292. webull_openapi_python_sdk-1.0.0.dist-info/WHEEL +5 -0
  293. webull_openapi_python_sdk-1.0.0.dist-info/licenses/LICENSE +202 -0
  294. webull_openapi_python_sdk-1.0.0.dist-info/licenses/NOTICE +56 -0
  295. webull_openapi_python_sdk-1.0.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,146 @@
1
+ # Copyright 2022 Webull
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ ######################## BEGIN LICENSE BLOCK ########################
16
+ # The Original Code is Mozilla Universal charset detector code.
17
+ #
18
+ # The Initial Developer of the Original Code is
19
+ # Netscape Communications Corporation.
20
+ # Portions created by the Initial Developer are Copyright (C) 2001
21
+ # the Initial Developer. All Rights Reserved.
22
+ #
23
+ # Contributor(s):
24
+ # Mark Pilgrim - port to Python
25
+ # Shy Shalom - original C code
26
+ #
27
+ # This library is free software; you can redistribute it and/or
28
+ # modify it under the terms of the GNU Lesser General Public
29
+ # License as published by the Free Software Foundation; either
30
+ # version 2.1 of the License, or (at your option) any later version.
31
+ #
32
+ # This library is distributed in the hope that it will be useful,
33
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
34
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35
+ # Lesser General Public License for more details.
36
+ #
37
+ # You should have received a copy of the GNU Lesser General Public
38
+ # License along with this library; if not, write to the Free Software
39
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
40
+ # 02110-1301 USA
41
+ ######################### END LICENSE BLOCK #########################
42
+
43
+ from .charsetprober import CharSetProber
44
+ from .enums import CharacterCategory, ProbingState, SequenceLikelihood
45
+
46
+
47
+ class SingleByteCharSetProber(CharSetProber):
48
+ SAMPLE_SIZE = 64
49
+ SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2
50
+ POSITIVE_SHORTCUT_THRESHOLD = 0.95
51
+ NEGATIVE_SHORTCUT_THRESHOLD = 0.05
52
+
53
+ def __init__(self, model, reversed=False, name_prober=None):
54
+ super(SingleByteCharSetProber, self).__init__()
55
+ self._model = model
56
+ # TRUE if we need to reverse every pair in the model lookup
57
+ self._reversed = reversed
58
+ # Optional auxiliary prober for name decision
59
+ self._name_prober = name_prober
60
+ self._last_order = None
61
+ self._seq_counters = None
62
+ self._total_seqs = None
63
+ self._total_char = None
64
+ self._freq_char = None
65
+ self.reset()
66
+
67
+ def reset(self):
68
+ super(SingleByteCharSetProber, self).reset()
69
+ # char order of last character
70
+ self._last_order = 255
71
+ self._seq_counters = [0] * SequenceLikelihood.get_num_categories()
72
+ self._total_seqs = 0
73
+ self._total_char = 0
74
+ # characters that fall in our sampling range
75
+ self._freq_char = 0
76
+
77
+ @property
78
+ def charset_name(self):
79
+ if self._name_prober:
80
+ return self._name_prober.charset_name
81
+ else:
82
+ return self._model['charset_name']
83
+
84
+ @property
85
+ def language(self):
86
+ if self._name_prober:
87
+ return self._name_prober.language
88
+ else:
89
+ return self._model.get('language')
90
+
91
+ def feed(self, byte_str):
92
+ if not self._model['keep_english_letter']:
93
+ byte_str = self.filter_international_words(byte_str)
94
+ if not byte_str:
95
+ return self.state
96
+ char_to_order_map = self._model['char_to_order_map']
97
+ for i, c in enumerate(byte_str):
98
+ # XXX: Order is in range 1-64, so one would think we want 0-63 here,
99
+ # but that leads to 27 more test failures than before.
100
+ order = char_to_order_map[c]
101
+ # XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but
102
+ # CharacterCategory.SYMBOL is actually 253, so we use CONTROL
103
+ # to make it closer to the original intent. The only difference
104
+ # is whether or not we count digits and control characters for
105
+ # _total_char purposes.
106
+ if order < CharacterCategory.CONTROL:
107
+ self._total_char += 1
108
+ if order < self.SAMPLE_SIZE:
109
+ self._freq_char += 1
110
+ if self._last_order < self.SAMPLE_SIZE:
111
+ self._total_seqs += 1
112
+ if not self._reversed:
113
+ i = (self._last_order * self.SAMPLE_SIZE) + order
114
+ model = self._model['precedence_matrix'][i]
115
+ else: # reverse the order of the letters in the lookup
116
+ i = (order * self.SAMPLE_SIZE) + self._last_order
117
+ model = self._model['precedence_matrix'][i]
118
+ self._seq_counters[model] += 1
119
+ self._last_order = order
120
+
121
+ charset_name = self._model['charset_name']
122
+ if self.state == ProbingState.DETECTING:
123
+ if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD:
124
+ confidence = self.get_confidence()
125
+ if confidence > self.POSITIVE_SHORTCUT_THRESHOLD:
126
+ self.logger.debug('%s confidence = %s, we have a winner',
127
+ charset_name, confidence)
128
+ self._state = ProbingState.FOUND_IT
129
+ elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD:
130
+ self.logger.debug('%s confidence = %s, below negative '
131
+ 'shortcut threshhold %s', charset_name,
132
+ confidence,
133
+ self.NEGATIVE_SHORTCUT_THRESHOLD)
134
+ self._state = ProbingState.NOT_ME
135
+
136
+ return self.state
137
+
138
+ def get_confidence(self):
139
+ r = 0.01
140
+ if self._total_seqs > 0:
141
+ r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) /
142
+ self._total_seqs / self._model['typical_positive_ratio'])
143
+ r = r * self._freq_char / self._total_char
144
+ if r >= 1.0:
145
+ r = 0.99
146
+ return r
@@ -0,0 +1,87 @@
1
+ # Copyright 2022 Webull
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ ######################## BEGIN LICENSE BLOCK ########################
16
+ # The Original Code is Mozilla Universal charset detector code.
17
+ #
18
+ # The Initial Developer of the Original Code is
19
+ # Netscape Communications Corporation.
20
+ # Portions created by the Initial Developer are Copyright (C) 2001
21
+ # the Initial Developer. All Rights Reserved.
22
+ #
23
+ # Contributor(s):
24
+ # Mark Pilgrim - port to Python
25
+ # Shy Shalom - original C code
26
+ #
27
+ # This library is free software; you can redistribute it and/or
28
+ # modify it under the terms of the GNU Lesser General Public
29
+ # License as published by the Free Software Foundation; either
30
+ # version 2.1 of the License, or (at your option) any later version.
31
+ #
32
+ # This library is distributed in the hope that it will be useful,
33
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
34
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35
+ # Lesser General Public License for more details.
36
+ #
37
+ # You should have received a copy of the GNU Lesser General Public
38
+ # License along with this library; if not, write to the Free Software
39
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
40
+ # 02110-1301 USA
41
+ ######################### END LICENSE BLOCK #########################
42
+
43
+ from .charsetgroupprober import CharSetGroupProber
44
+ from .sbcharsetprober import SingleByteCharSetProber
45
+ from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
46
+ Latin5CyrillicModel, MacCyrillicModel,
47
+ Ibm866Model, Ibm855Model)
48
+ from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
49
+ from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
50
+ # from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
51
+ from .langthaimodel import TIS620ThaiModel
52
+ from .langhebrewmodel import Win1255HebrewModel
53
+ from .hebrewprober import HebrewProber
54
+ from .langturkishmodel import Latin5TurkishModel
55
+
56
+
57
+ class SBCSGroupProber(CharSetGroupProber):
58
+ def __init__(self):
59
+ super(SBCSGroupProber, self).__init__()
60
+ self.probers = [
61
+ SingleByteCharSetProber(Win1251CyrillicModel),
62
+ SingleByteCharSetProber(Koi8rModel),
63
+ SingleByteCharSetProber(Latin5CyrillicModel),
64
+ SingleByteCharSetProber(MacCyrillicModel),
65
+ SingleByteCharSetProber(Ibm866Model),
66
+ SingleByteCharSetProber(Ibm855Model),
67
+ SingleByteCharSetProber(Latin7GreekModel),
68
+ SingleByteCharSetProber(Win1253GreekModel),
69
+ SingleByteCharSetProber(Latin5BulgarianModel),
70
+ SingleByteCharSetProber(Win1251BulgarianModel),
71
+ # TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250)
72
+ # after we retrain model.
73
+ # SingleByteCharSetProber(Latin2HungarianModel),
74
+ # SingleByteCharSetProber(Win1250HungarianModel),
75
+ SingleByteCharSetProber(TIS620ThaiModel),
76
+ SingleByteCharSetProber(Latin5TurkishModel),
77
+ ]
78
+ hebrew_prober = HebrewProber()
79
+ logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel,
80
+ False, hebrew_prober)
81
+ visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True,
82
+ hebrew_prober)
83
+ hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober)
84
+ self.probers.extend([hebrew_prober, logical_hebrew_prober,
85
+ visual_hebrew_prober])
86
+
87
+ self.reset()
@@ -0,0 +1,106 @@
1
+ # Copyright 2022 Webull
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ ######################## BEGIN LICENSE BLOCK ########################
16
+ # The Original Code is mozilla.org code.
17
+ #
18
+ # The Initial Developer of the Original Code is
19
+ # Netscape Communications Corporation.
20
+ # Portions created by the Initial Developer are Copyright (C) 1998
21
+ # the Initial Developer. All Rights Reserved.
22
+ #
23
+ # Contributor(s):
24
+ # Mark Pilgrim - port to Python
25
+ #
26
+ # This library is free software; you can redistribute it and/or
27
+ # modify it under the terms of the GNU Lesser General Public
28
+ # License as published by the Free Software Foundation; either
29
+ # version 2.1 of the License, or (at your option) any later version.
30
+ #
31
+ # This library is distributed in the hope that it will be useful,
32
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
33
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
34
+ # Lesser General Public License for more details.
35
+ #
36
+ # You should have received a copy of the GNU Lesser General Public
37
+ # License along with this library; if not, write to the Free Software
38
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
39
+ # 02110-1301 USA
40
+ ######################### END LICENSE BLOCK #########################
41
+
42
+ from .mbcharsetprober import MultiByteCharSetProber
43
+ from .codingstatemachine import CodingStateMachine
44
+ from .chardistribution import SJISDistributionAnalysis
45
+ from .jpcntx import SJISContextAnalysis
46
+ from .mbcssm import SJIS_SM_MODEL
47
+ from .enums import ProbingState, MachineState
48
+
49
+
50
+ class SJISProber(MultiByteCharSetProber):
51
+ def __init__(self):
52
+ super(SJISProber, self).__init__()
53
+ self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
54
+ self.distribution_analyzer = SJISDistributionAnalysis()
55
+ self.context_analyzer = SJISContextAnalysis()
56
+ self.reset()
57
+
58
+ def reset(self):
59
+ super(SJISProber, self).reset()
60
+ self.context_analyzer.reset()
61
+
62
+ @property
63
+ def charset_name(self):
64
+ return self.context_analyzer.charset_name
65
+
66
+ @property
67
+ def language(self):
68
+ return "Japanese"
69
+
70
+ def feed(self, byte_str):
71
+ for i in range(len(byte_str)):
72
+ coding_state = self.coding_sm.next_state(byte_str[i])
73
+ if coding_state == MachineState.ERROR:
74
+ self.logger.debug('%s %s prober hit error at byte %s',
75
+ self.charset_name, self.language, i)
76
+ self._state = ProbingState.NOT_ME
77
+ break
78
+ elif coding_state == MachineState.ITS_ME:
79
+ self._state = ProbingState.FOUND_IT
80
+ break
81
+ elif coding_state == MachineState.START:
82
+ char_len = self.coding_sm.get_current_charlen()
83
+ if i == 0:
84
+ self._last_char[1] = byte_str[0]
85
+ self.context_analyzer.feed(self._last_char[2 - char_len:],
86
+ char_len)
87
+ self.distribution_analyzer.feed(self._last_char, char_len)
88
+ else:
89
+ self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3
90
+ - char_len], char_len)
91
+ self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
92
+ char_len)
93
+
94
+ self._last_char[0] = byte_str[-1]
95
+
96
+ if self.state == ProbingState.DETECTING:
97
+ if (self.context_analyzer.got_enough_data() and
98
+ (self.get_confidence() > self.SHORTCUT_THRESHOLD)):
99
+ self._state = ProbingState.FOUND_IT
100
+
101
+ return self.state
102
+
103
+ def get_confidence(self):
104
+ context_conf = self.context_analyzer.get_confidence()
105
+ distrib_conf = self.distribution_analyzer.get_confidence()
106
+ return max(context_conf, distrib_conf)
@@ -0,0 +1,300 @@
1
+ # Copyright 2022 Webull
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ ######################## BEGIN LICENSE BLOCK ########################
16
+ # The Original Code is Mozilla Universal charset detector code.
17
+ #
18
+ # The Initial Developer of the Original Code is
19
+ # Netscape Communications Corporation.
20
+ # Portions created by the Initial Developer are Copyright (C) 2001
21
+ # the Initial Developer. All Rights Reserved.
22
+ #
23
+ # Contributor(s):
24
+ # Mark Pilgrim - port to Python
25
+ # Shy Shalom - original C code
26
+ #
27
+ # This library is free software; you can redistribute it and/or
28
+ # modify it under the terms of the GNU Lesser General Public
29
+ # License as published by the Free Software Foundation; either
30
+ # version 2.1 of the License, or (at your option) any later version.
31
+ #
32
+ # This library is distributed in the hope that it will be useful,
33
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
34
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35
+ # Lesser General Public License for more details.
36
+ #
37
+ # You should have received a copy of the GNU Lesser General Public
38
+ # License along with this library; if not, write to the Free Software
39
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
40
+ # 02110-1301 USA
41
+ ######################### END LICENSE BLOCK #########################
42
+ """
43
+ Module containing the UniversalDetector detector class, which is the primary
44
+ class a user of ``chardet`` should use.
45
+
46
+ :author: Mark Pilgrim (initial port to Python)
47
+ :author: Shy Shalom (original C code)
48
+ :author: Dan Blanchard (major refactoring for 3.0)
49
+ :author: Ian Cordasco
50
+ """
51
+
52
+
53
+ import codecs
54
+ import logging
55
+ import re
56
+
57
+ from .charsetgroupprober import CharSetGroupProber
58
+ from .enums import InputState, LanguageFilter, ProbingState
59
+ from .escprober import EscCharSetProber
60
+ from .latin1prober import Latin1Prober
61
+ from .mbcsgroupprober import MBCSGroupProber
62
+ from .sbcsgroupprober import SBCSGroupProber
63
+
64
+
65
+ class UniversalDetector(object):
66
+ """
67
+ The ``UniversalDetector`` class underlies the ``chardet.detect`` function
68
+ and coordinates all of the different charset probers.
69
+
70
+ To get a ``dict`` containing an encoding and its confidence, you can simply
71
+ run:
72
+
73
+ .. code::
74
+
75
+ u = UniversalDetector()
76
+ u.feed(some_bytes)
77
+ u.close()
78
+ detected = u.result
79
+
80
+ """
81
+
82
+ MINIMUM_THRESHOLD = 0.20
83
+ HIGH_BYTE_DETECTOR = re.compile(b'[\x80-\xFF]')
84
+ ESC_DETECTOR = re.compile(b'(\033|~{)')
85
+ WIN_BYTE_DETECTOR = re.compile(b'[\x80-\x9F]')
86
+ ISO_WIN_MAP = {'iso-8859-1': 'Windows-1252',
87
+ 'iso-8859-2': 'Windows-1250',
88
+ 'iso-8859-5': 'Windows-1251',
89
+ 'iso-8859-6': 'Windows-1256',
90
+ 'iso-8859-7': 'Windows-1253',
91
+ 'iso-8859-8': 'Windows-1255',
92
+ 'iso-8859-9': 'Windows-1254',
93
+ 'iso-8859-13': 'Windows-1257'}
94
+
95
+ def __init__(self, lang_filter=LanguageFilter.ALL):
96
+ self._esc_charset_prober = None
97
+ self._charset_probers = []
98
+ self.result = None
99
+ self.done = None
100
+ self._got_data = None
101
+ self._input_state = None
102
+ self._last_char = None
103
+ self.lang_filter = lang_filter
104
+ self.logger = logging.getLogger(__name__)
105
+ self._has_win_bytes = None
106
+ self.reset()
107
+
108
+ def reset(self):
109
+ """
110
+ Reset the UniversalDetector and all of its probers back to their
111
+ initial states. This is called by ``__init__``, so you only need to
112
+ call this directly in between analyses of different documents.
113
+ """
114
+ self.result = {'encoding': None, 'confidence': 0.0, 'language': None}
115
+ self.done = False
116
+ self._got_data = False
117
+ self._has_win_bytes = False
118
+ self._input_state = InputState.PURE_ASCII
119
+ self._last_char = b''
120
+ if self._esc_charset_prober:
121
+ self._esc_charset_prober.reset()
122
+ for prober in self._charset_probers:
123
+ prober.reset()
124
+
125
+ def feed(self, byte_str):
126
+ """
127
+ Takes a chunk of a document and feeds it through all of the relevant
128
+ charset probers.
129
+
130
+ After calling ``feed``, you can check the value of the ``done``
131
+ attribute to see if you need to continue feeding the
132
+ ``UniversalDetector`` more data, or if it has made a prediction
133
+ (in the ``result`` attribute).
134
+
135
+ .. note::
136
+ You should always call ``close`` when you're done feeding in your
137
+ document if ``done`` is not already ``True``.
138
+ """
139
+ if self.done:
140
+ return
141
+
142
+ if not len(byte_str):
143
+ return
144
+
145
+ if not isinstance(byte_str, bytearray):
146
+ byte_str = bytearray(byte_str)
147
+
148
+ # First check for known BOMs, since these are guaranteed to be correct
149
+ if not self._got_data:
150
+ # If the data starts with BOM, we know it is UTF
151
+ if byte_str.startswith(codecs.BOM_UTF8):
152
+ # EF BB BF UTF-8 with BOM
153
+ self.result = {'encoding': "UTF-8-SIG",
154
+ 'confidence': 1.0,
155
+ 'language': ''}
156
+ elif byte_str.startswith((codecs.BOM_UTF32_LE,
157
+ codecs.BOM_UTF32_BE)):
158
+ # FF FE 00 00 UTF-32, little-endian BOM
159
+ # 00 00 FE FF UTF-32, big-endian BOM
160
+ self.result = {'encoding': "UTF-32",
161
+ 'confidence': 1.0,
162
+ 'language': ''}
163
+ elif byte_str.startswith(b'\xFE\xFF\x00\x00'):
164
+ # FE FF 00 00 UCS-4, unusual octet order BOM (3412)
165
+ self.result = {'encoding': "X-ISO-10646-UCS-4-3412",
166
+ 'confidence': 1.0,
167
+ 'language': ''}
168
+ elif byte_str.startswith(b'\x00\x00\xFF\xFE'):
169
+ # 00 00 FF FE UCS-4, unusual octet order BOM (2143)
170
+ self.result = {'encoding': "X-ISO-10646-UCS-4-2143",
171
+ 'confidence': 1.0,
172
+ 'language': ''}
173
+ elif byte_str.startswith((codecs.BOM_LE, codecs.BOM_BE)):
174
+ # FF FE UTF-16, little endian BOM
175
+ # FE FF UTF-16, big endian BOM
176
+ self.result = {'encoding': "UTF-16",
177
+ 'confidence': 1.0,
178
+ 'language': ''}
179
+
180
+ self._got_data = True
181
+ if self.result['encoding'] is not None:
182
+ self.done = True
183
+ return
184
+
185
+ # If none of those matched and we've only see ASCII so far, check
186
+ # for high bytes and escape sequences
187
+ if self._input_state == InputState.PURE_ASCII:
188
+ if self.HIGH_BYTE_DETECTOR.search(byte_str):
189
+ self._input_state = InputState.HIGH_BYTE
190
+ elif self._input_state == InputState.PURE_ASCII and \
191
+ self.ESC_DETECTOR.search(self._last_char + byte_str):
192
+ self._input_state = InputState.ESC_ASCII
193
+
194
+ self._last_char = byte_str[-1:]
195
+
196
+ # If we've seen escape sequences, use the EscCharSetProber, which
197
+ # uses a simple state machine to check for known escape sequences in
198
+ # HZ and ISO-2022 encodings, since those are the only encodings that
199
+ # use such sequences.
200
+ if self._input_state == InputState.ESC_ASCII:
201
+ if not self._esc_charset_prober:
202
+ self._esc_charset_prober = EscCharSetProber(self.lang_filter)
203
+ if self._esc_charset_prober.feed(byte_str) == ProbingState.FOUND_IT:
204
+ self.result = {'encoding':
205
+ self._esc_charset_prober.charset_name,
206
+ 'confidence':
207
+ self._esc_charset_prober.get_confidence(),
208
+ 'language':
209
+ self._esc_charset_prober.language}
210
+ self.done = True
211
+ # If we've seen high bytes (i.e., those with values greater than 127),
212
+ # we need to do more complicated checks using all our multi-byte and
213
+ # single-byte probers that are left. The single-byte probers
214
+ # use character bigram distributions to determine the encoding, whereas
215
+ # the multi-byte probers use a combination of character unigram and
216
+ # bigram distributions.
217
+ elif self._input_state == InputState.HIGH_BYTE:
218
+ if not self._charset_probers:
219
+ self._charset_probers = [MBCSGroupProber(self.lang_filter)]
220
+ # If we're checking non-CJK encodings, use single-byte prober
221
+ if self.lang_filter & LanguageFilter.NON_CJK:
222
+ self._charset_probers.append(SBCSGroupProber())
223
+ self._charset_probers.append(Latin1Prober())
224
+ for prober in self._charset_probers:
225
+ if prober.feed(byte_str) == ProbingState.FOUND_IT:
226
+ self.result = {'encoding': prober.charset_name,
227
+ 'confidence': prober.get_confidence(),
228
+ 'language': prober.language}
229
+ self.done = True
230
+ break
231
+ if self.WIN_BYTE_DETECTOR.search(byte_str):
232
+ self._has_win_bytes = True
233
+
234
+ def close(self):
235
+ """
236
+ Stop analyzing the current document and come up with a final
237
+ prediction.
238
+
239
+ :returns: The ``result`` attribute, a ``dict`` with the keys
240
+ `encoding`, `confidence`, and `language`.
241
+ """
242
+ # Don't bother with checks if we're already done
243
+ if self.done:
244
+ return self.result
245
+ self.done = True
246
+
247
+ if not self._got_data:
248
+ self.logger.debug('no data received!')
249
+
250
+ # Default to ASCII if it is all we've seen so far
251
+ elif self._input_state == InputState.PURE_ASCII:
252
+ self.result = {'encoding': 'ascii',
253
+ 'confidence': 1.0,
254
+ 'language': ''}
255
+
256
+ # If we have seen non-ASCII, return the best that met MINIMUM_THRESHOLD
257
+ elif self._input_state == InputState.HIGH_BYTE:
258
+ prober_confidence = None
259
+ max_prober_confidence = 0.0
260
+ max_prober = None
261
+ for prober in self._charset_probers:
262
+ if not prober:
263
+ continue
264
+ prober_confidence = prober.get_confidence()
265
+ if prober_confidence > max_prober_confidence:
266
+ max_prober_confidence = prober_confidence
267
+ max_prober = prober
268
+ if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD):
269
+ charset_name = max_prober.charset_name
270
+ lower_charset_name = max_prober.charset_name.lower()
271
+ confidence = max_prober.get_confidence()
272
+ # Use Windows encoding name instead of ISO-8859 if we saw any
273
+ # extra Windows-specific bytes
274
+ if lower_charset_name.startswith('iso-8859'):
275
+ if self._has_win_bytes:
276
+ charset_name = self.ISO_WIN_MAP.get(lower_charset_name,
277
+ charset_name)
278
+ self.result = {'encoding': charset_name,
279
+ 'confidence': confidence,
280
+ 'language': max_prober.language}
281
+
282
+ # Log all prober confidences if none met MINIMUM_THRESHOLD
283
+ if self.logger.getEffectiveLevel() == logging.DEBUG:
284
+ if self.result['encoding'] is None:
285
+ self.logger.debug('no probers hit minimum threshold')
286
+ for group_prober in self._charset_probers:
287
+ if not group_prober:
288
+ continue
289
+ if isinstance(group_prober, CharSetGroupProber):
290
+ for prober in group_prober.probers:
291
+ self.logger.debug('%s %s confidence = %s',
292
+ prober.charset_name,
293
+ prober.language,
294
+ prober.get_confidence())
295
+ else:
296
+ self.logger.debug('%s %s confidence = %s',
297
+ prober.charset_name,
298
+ prober.language,
299
+ prober.get_confidence())
300
+ return self.result