webull-openapi-python-sdk 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. samples/__init__.py +1 -0
  2. samples/data/__init__.py +1 -0
  3. samples/data/data_client.py +57 -0
  4. samples/data/data_streaming_client.py +86 -0
  5. samples/data/data_streaming_client_async.py +101 -0
  6. samples/trade/__init__.py +0 -0
  7. samples/trade/trade_client.py +163 -0
  8. samples/trade/trade_client_v2.py +181 -0
  9. samples/trade/trade_event_client.py +47 -0
  10. webull/__init__.py +1 -0
  11. webull/core/__init__.py +12 -0
  12. webull/core/auth/__init__.py +0 -0
  13. webull/core/auth/algorithm/__init__.py +0 -0
  14. webull/core/auth/algorithm/sha_hmac1.py +65 -0
  15. webull/core/auth/algorithm/sha_hmac256.py +75 -0
  16. webull/core/auth/composer/__init__.py +0 -0
  17. webull/core/auth/composer/default_signature_composer.py +125 -0
  18. webull/core/auth/credentials.py +46 -0
  19. webull/core/auth/signers/__init__.py +0 -0
  20. webull/core/auth/signers/app_key_signer.py +72 -0
  21. webull/core/auth/signers/signer.py +48 -0
  22. webull/core/auth/signers/signer_factory.py +58 -0
  23. webull/core/cache/__init__.py +225 -0
  24. webull/core/client.py +410 -0
  25. webull/core/common/__init__.py +0 -0
  26. webull/core/common/api_type.py +19 -0
  27. webull/core/common/easy_enum.py +35 -0
  28. webull/core/common/region.py +7 -0
  29. webull/core/compat.py +85 -0
  30. webull/core/context/__init__.py +0 -0
  31. webull/core/context/request_context_holder.py +33 -0
  32. webull/core/data/endpoints.json +22 -0
  33. webull/core/data/retry_config.json +15 -0
  34. webull/core/endpoint/__init__.py +8 -0
  35. webull/core/endpoint/chained_endpoint_resolver.py +57 -0
  36. webull/core/endpoint/default_endpoint_resolver.py +60 -0
  37. webull/core/endpoint/local_config_regional_endpoint_resolver.py +77 -0
  38. webull/core/endpoint/resolver_endpoint_request.py +46 -0
  39. webull/core/endpoint/user_customized_endpoint_resolver.py +55 -0
  40. webull/core/exception/__init__.py +0 -0
  41. webull/core/exception/error_code.py +23 -0
  42. webull/core/exception/error_msg.py +21 -0
  43. webull/core/exception/exceptions.py +53 -0
  44. webull/core/headers.py +57 -0
  45. webull/core/http/__init__.py +0 -0
  46. webull/core/http/initializer/__init__.py +0 -0
  47. webull/core/http/initializer/client_initializer.py +79 -0
  48. webull/core/http/initializer/token/__init__.py +0 -0
  49. webull/core/http/initializer/token/bean/__init__.py +0 -0
  50. webull/core/http/initializer/token/bean/access_token.py +40 -0
  51. webull/core/http/initializer/token/bean/check_token_request.py +44 -0
  52. webull/core/http/initializer/token/bean/create_token_request.py +45 -0
  53. webull/core/http/initializer/token/bean/refresh_token_request.py +44 -0
  54. webull/core/http/initializer/token/token_manager.py +208 -0
  55. webull/core/http/initializer/token/token_operation.py +72 -0
  56. webull/core/http/method_type.py +43 -0
  57. webull/core/http/protocol_type.py +43 -0
  58. webull/core/http/request.py +121 -0
  59. webull/core/http/response.py +166 -0
  60. webull/core/request.py +278 -0
  61. webull/core/retry/__init__.py +0 -0
  62. webull/core/retry/backoff_strategy.py +102 -0
  63. webull/core/retry/retry_condition.py +214 -0
  64. webull/core/retry/retry_policy.py +63 -0
  65. webull/core/retry/retry_policy_context.py +51 -0
  66. webull/core/utils/__init__.py +0 -0
  67. webull/core/utils/common.py +62 -0
  68. webull/core/utils/data.py +25 -0
  69. webull/core/utils/desensitize.py +33 -0
  70. webull/core/utils/validation.py +49 -0
  71. webull/core/vendored/__init__.py +0 -0
  72. webull/core/vendored/requests/__init__.py +94 -0
  73. webull/core/vendored/requests/__version__.py +28 -0
  74. webull/core/vendored/requests/_internal_utils.py +56 -0
  75. webull/core/vendored/requests/adapters.py +539 -0
  76. webull/core/vendored/requests/api.py +166 -0
  77. webull/core/vendored/requests/auth.py +307 -0
  78. webull/core/vendored/requests/certs.py +34 -0
  79. webull/core/vendored/requests/compat.py +85 -0
  80. webull/core/vendored/requests/cookies.py +555 -0
  81. webull/core/vendored/requests/exceptions.py +136 -0
  82. webull/core/vendored/requests/help.py +134 -0
  83. webull/core/vendored/requests/hooks.py +48 -0
  84. webull/core/vendored/requests/models.py +960 -0
  85. webull/core/vendored/requests/packages/__init__.py +17 -0
  86. webull/core/vendored/requests/packages/certifi/__init__.py +17 -0
  87. webull/core/vendored/requests/packages/certifi/__main__.py +16 -0
  88. webull/core/vendored/requests/packages/certifi/cacert.pem +4433 -0
  89. webull/core/vendored/requests/packages/certifi/core.py +51 -0
  90. webull/core/vendored/requests/packages/chardet/__init__.py +53 -0
  91. webull/core/vendored/requests/packages/chardet/big5freq.py +400 -0
  92. webull/core/vendored/requests/packages/chardet/big5prober.py +61 -0
  93. webull/core/vendored/requests/packages/chardet/chardistribution.py +247 -0
  94. webull/core/vendored/requests/packages/chardet/charsetgroupprober.py +120 -0
  95. webull/core/vendored/requests/packages/chardet/charsetprober.py +159 -0
  96. webull/core/vendored/requests/packages/chardet/cli/__init__.py +1 -0
  97. webull/core/vendored/requests/packages/chardet/cli/chardetect.py +99 -0
  98. webull/core/vendored/requests/packages/chardet/codingstatemachine.py +102 -0
  99. webull/core/vendored/requests/packages/chardet/compat.py +48 -0
  100. webull/core/vendored/requests/packages/chardet/cp949prober.py +63 -0
  101. webull/core/vendored/requests/packages/chardet/enums.py +90 -0
  102. webull/core/vendored/requests/packages/chardet/escprober.py +115 -0
  103. webull/core/vendored/requests/packages/chardet/escsm.py +260 -0
  104. webull/core/vendored/requests/packages/chardet/eucjpprober.py +106 -0
  105. webull/core/vendored/requests/packages/chardet/euckrfreq.py +209 -0
  106. webull/core/vendored/requests/packages/chardet/euckrprober.py +61 -0
  107. webull/core/vendored/requests/packages/chardet/euctwfreq.py +401 -0
  108. webull/core/vendored/requests/packages/chardet/euctwprober.py +60 -0
  109. webull/core/vendored/requests/packages/chardet/gb2312freq.py +297 -0
  110. webull/core/vendored/requests/packages/chardet/gb2312prober.py +60 -0
  111. webull/core/vendored/requests/packages/chardet/hebrewprober.py +306 -0
  112. webull/core/vendored/requests/packages/chardet/jisfreq.py +339 -0
  113. webull/core/vendored/requests/packages/chardet/jpcntx.py +247 -0
  114. webull/core/vendored/requests/packages/chardet/langbulgarianmodel.py +242 -0
  115. webull/core/vendored/requests/packages/chardet/langcyrillicmodel.py +347 -0
  116. webull/core/vendored/requests/packages/chardet/langgreekmodel.py +239 -0
  117. webull/core/vendored/requests/packages/chardet/langhebrewmodel.py +214 -0
  118. webull/core/vendored/requests/packages/chardet/langhungarianmodel.py +239 -0
  119. webull/core/vendored/requests/packages/chardet/langthaimodel.py +213 -0
  120. webull/core/vendored/requests/packages/chardet/langturkishmodel.py +207 -0
  121. webull/core/vendored/requests/packages/chardet/latin1prober.py +159 -0
  122. webull/core/vendored/requests/packages/chardet/mbcharsetprober.py +105 -0
  123. webull/core/vendored/requests/packages/chardet/mbcsgroupprober.py +68 -0
  124. webull/core/vendored/requests/packages/chardet/mbcssm.py +586 -0
  125. webull/core/vendored/requests/packages/chardet/sbcharsetprober.py +146 -0
  126. webull/core/vendored/requests/packages/chardet/sbcsgroupprober.py +87 -0
  127. webull/core/vendored/requests/packages/chardet/sjisprober.py +106 -0
  128. webull/core/vendored/requests/packages/chardet/universaldetector.py +300 -0
  129. webull/core/vendored/requests/packages/chardet/utf8prober.py +96 -0
  130. webull/core/vendored/requests/packages/chardet/version.py +23 -0
  131. webull/core/vendored/requests/packages/urllib3/__init__.py +114 -0
  132. webull/core/vendored/requests/packages/urllib3/_collections.py +346 -0
  133. webull/core/vendored/requests/packages/urllib3/connection.py +405 -0
  134. webull/core/vendored/requests/packages/urllib3/connectionpool.py +910 -0
  135. webull/core/vendored/requests/packages/urllib3/contrib/__init__.py +0 -0
  136. webull/core/vendored/requests/packages/urllib3/contrib/_appengine_environ.py +44 -0
  137. webull/core/vendored/requests/packages/urllib3/contrib/_securetransport/__init__.py +0 -0
  138. webull/core/vendored/requests/packages/urllib3/contrib/_securetransport/bindings.py +607 -0
  139. webull/core/vendored/requests/packages/urllib3/contrib/_securetransport/low_level.py +360 -0
  140. webull/core/vendored/requests/packages/urllib3/contrib/appengine.py +303 -0
  141. webull/core/vendored/requests/packages/urllib3/contrib/ntlmpool.py +125 -0
  142. webull/core/vendored/requests/packages/urllib3/contrib/pyopenssl.py +484 -0
  143. webull/core/vendored/requests/packages/urllib3/contrib/securetransport.py +818 -0
  144. webull/core/vendored/requests/packages/urllib3/contrib/socks.py +206 -0
  145. webull/core/vendored/requests/packages/urllib3/exceptions.py +260 -0
  146. webull/core/vendored/requests/packages/urllib3/fields.py +192 -0
  147. webull/core/vendored/requests/packages/urllib3/filepost.py +112 -0
  148. webull/core/vendored/requests/packages/urllib3/packages/__init__.py +19 -0
  149. webull/core/vendored/requests/packages/urllib3/packages/backports/__init__.py +0 -0
  150. webull/core/vendored/requests/packages/urllib3/packages/backports/makefile.py +67 -0
  151. webull/core/vendored/requests/packages/urllib3/packages/ordered_dict.py +273 -0
  152. webull/core/vendored/requests/packages/urllib3/packages/six.py +882 -0
  153. webull/core/vendored/requests/packages/urllib3/packages/socks.py +887 -0
  154. webull/core/vendored/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py +19 -0
  155. webull/core/vendored/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py +170 -0
  156. webull/core/vendored/requests/packages/urllib3/poolmanager.py +467 -0
  157. webull/core/vendored/requests/packages/urllib3/request.py +164 -0
  158. webull/core/vendored/requests/packages/urllib3/response.py +721 -0
  159. webull/core/vendored/requests/packages/urllib3/util/__init__.py +68 -0
  160. webull/core/vendored/requests/packages/urllib3/util/connection.py +148 -0
  161. webull/core/vendored/requests/packages/urllib3/util/queue.py +35 -0
  162. webull/core/vendored/requests/packages/urllib3/util/request.py +132 -0
  163. webull/core/vendored/requests/packages/urllib3/util/response.py +101 -0
  164. webull/core/vendored/requests/packages/urllib3/util/retry.py +426 -0
  165. webull/core/vendored/requests/packages/urllib3/util/selectors.py +601 -0
  166. webull/core/vendored/requests/packages/urllib3/util/ssl_.py +396 -0
  167. webull/core/vendored/requests/packages/urllib3/util/timeout.py +256 -0
  168. webull/core/vendored/requests/packages/urllib3/util/url.py +252 -0
  169. webull/core/vendored/requests/packages/urllib3/util/wait.py +164 -0
  170. webull/core/vendored/requests/packages.py +28 -0
  171. webull/core/vendored/requests/sessions.py +750 -0
  172. webull/core/vendored/requests/status_codes.py +105 -0
  173. webull/core/vendored/requests/structures.py +119 -0
  174. webull/core/vendored/requests/utils.py +916 -0
  175. webull/core/vendored/six.py +905 -0
  176. webull/data/__init__.py +3 -0
  177. webull/data/common/__init__.py +0 -0
  178. webull/data/common/category.py +26 -0
  179. webull/data/common/connect_ack.py +29 -0
  180. webull/data/common/direction.py +25 -0
  181. webull/data/common/exchange_code.py +33 -0
  182. webull/data/common/exercise_style.py +22 -0
  183. webull/data/common/expiration_cycle.py +26 -0
  184. webull/data/common/instrument_status.py +23 -0
  185. webull/data/common/option_type.py +20 -0
  186. webull/data/common/subscribe_type.py +22 -0
  187. webull/data/common/timespan.py +29 -0
  188. webull/data/data_client.py +35 -0
  189. webull/data/data_streaming_client.py +89 -0
  190. webull/data/internal/__init__.py +0 -0
  191. webull/data/internal/default_retry_policy.py +84 -0
  192. webull/data/internal/exceptions.py +60 -0
  193. webull/data/internal/quotes_client.py +314 -0
  194. webull/data/internal/quotes_decoder.py +40 -0
  195. webull/data/internal/quotes_payload_decoder.py +35 -0
  196. webull/data/internal/quotes_topic.py +36 -0
  197. webull/data/quotes/__init__.py +0 -0
  198. webull/data/quotes/instrument.py +33 -0
  199. webull/data/quotes/market_data.py +187 -0
  200. webull/data/quotes/market_streaming_data.py +66 -0
  201. webull/data/quotes/subscribe/__init__.py +0 -0
  202. webull/data/quotes/subscribe/ask_bid_result.py +49 -0
  203. webull/data/quotes/subscribe/basic_result.py +45 -0
  204. webull/data/quotes/subscribe/broker_result.py +33 -0
  205. webull/data/quotes/subscribe/message_pb2.py +37 -0
  206. webull/data/quotes/subscribe/order_result.py +30 -0
  207. webull/data/quotes/subscribe/payload_type.py +19 -0
  208. webull/data/quotes/subscribe/quote_decoder.py +28 -0
  209. webull/data/quotes/subscribe/quote_result.py +47 -0
  210. webull/data/quotes/subscribe/snapshot_decoder.py +30 -0
  211. webull/data/quotes/subscribe/snapshot_result.py +69 -0
  212. webull/data/quotes/subscribe/tick_decoder.py +29 -0
  213. webull/data/quotes/subscribe/tick_result.py +47 -0
  214. webull/data/request/__init__.py +0 -0
  215. webull/data/request/get_batch_historical_bars_request.py +43 -0
  216. webull/data/request/get_corp_action_request.py +47 -0
  217. webull/data/request/get_eod_bars_request.py +32 -0
  218. webull/data/request/get_historical_bars_request.py +43 -0
  219. webull/data/request/get_instruments_request.py +30 -0
  220. webull/data/request/get_quotes_request.py +35 -0
  221. webull/data/request/get_snapshot_request.py +38 -0
  222. webull/data/request/get_tick_request.py +37 -0
  223. webull/data/request/subscribe_request.py +43 -0
  224. webull/data/request/unsubscribe_request.py +42 -0
  225. webull/trade/__init__.py +2 -0
  226. webull/trade/common/__init__.py +0 -0
  227. webull/trade/common/account_type.py +22 -0
  228. webull/trade/common/category.py +29 -0
  229. webull/trade/common/combo_ticker_type.py +23 -0
  230. webull/trade/common/combo_type.py +31 -0
  231. webull/trade/common/currency.py +24 -0
  232. webull/trade/common/forbid_reason.py +27 -0
  233. webull/trade/common/instrument_type.py +27 -0
  234. webull/trade/common/markets.py +27 -0
  235. webull/trade/common/order_entrust_type.py +21 -0
  236. webull/trade/common/order_side.py +23 -0
  237. webull/trade/common/order_status.py +25 -0
  238. webull/trade/common/order_tif.py +24 -0
  239. webull/trade/common/order_type.py +30 -0
  240. webull/trade/common/trade_policy.py +22 -0
  241. webull/trade/common/trading_date_type.py +24 -0
  242. webull/trade/common/trailing_type.py +23 -0
  243. webull/trade/events/__init__.py +0 -0
  244. webull/trade/events/default_retry_policy.py +64 -0
  245. webull/trade/events/events_pb2.py +43 -0
  246. webull/trade/events/events_pb2_grpc.py +66 -0
  247. webull/trade/events/signature_composer.py +61 -0
  248. webull/trade/events/types.py +21 -0
  249. webull/trade/request/__init__.py +0 -0
  250. webull/trade/request/cancel_order_request.py +28 -0
  251. webull/trade/request/get_account_balance_request.py +28 -0
  252. webull/trade/request/get_account_positions_request.py +30 -0
  253. webull/trade/request/get_account_profile_request.py +26 -0
  254. webull/trade/request/get_app_subscriptions.py +28 -0
  255. webull/trade/request/get_open_orders_request.py +30 -0
  256. webull/trade/request/get_order_detail_request.py +27 -0
  257. webull/trade/request/get_today_orders_request.py +31 -0
  258. webull/trade/request/get_trade_calendar_request.py +30 -0
  259. webull/trade/request/get_trade_instrument_detail_request.py +24 -0
  260. webull/trade/request/get_trade_security_detail_request.py +42 -0
  261. webull/trade/request/get_tradeable_instruments_request.py +27 -0
  262. webull/trade/request/palce_order_request.py +91 -0
  263. webull/trade/request/place_order_request_v2.py +58 -0
  264. webull/trade/request/replace_order_request.py +73 -0
  265. webull/trade/request/replace_order_request_v2.py +38 -0
  266. webull/trade/request/v2/__init__.py +0 -0
  267. webull/trade/request/v2/cancel_option_request.py +28 -0
  268. webull/trade/request/v2/cancel_order_request.py +28 -0
  269. webull/trade/request/v2/get_account_balance_request.py +28 -0
  270. webull/trade/request/v2/get_account_list.py +23 -0
  271. webull/trade/request/v2/get_account_positions_request.py +24 -0
  272. webull/trade/request/v2/get_order_detail_request.py +26 -0
  273. webull/trade/request/v2/get_order_history_request.py +35 -0
  274. webull/trade/request/v2/palce_order_request.py +87 -0
  275. webull/trade/request/v2/place_option_request.py +64 -0
  276. webull/trade/request/v2/preview_option_request.py +28 -0
  277. webull/trade/request/v2/preview_order_request.py +59 -0
  278. webull/trade/request/v2/replace_option_request.py +28 -0
  279. webull/trade/request/v2/replace_order_request.py +57 -0
  280. webull/trade/trade/__init__.py +0 -0
  281. webull/trade/trade/account_info.py +83 -0
  282. webull/trade/trade/order_operation.py +246 -0
  283. webull/trade/trade/trade_calendar.py +37 -0
  284. webull/trade/trade/trade_instrument.py +72 -0
  285. webull/trade/trade/v2/__init__.py +0 -0
  286. webull/trade/trade/v2/account_info_v2.py +55 -0
  287. webull/trade/trade/v2/order_operation_v2.py +206 -0
  288. webull/trade/trade_client.py +43 -0
  289. webull/trade/trade_events_client.py +233 -0
  290. webull_openapi_python_sdk-1.0.0.dist-info/METADATA +28 -0
  291. webull_openapi_python_sdk-1.0.0.dist-info/RECORD +295 -0
  292. webull_openapi_python_sdk-1.0.0.dist-info/WHEEL +5 -0
  293. webull_openapi_python_sdk-1.0.0.dist-info/licenses/LICENSE +202 -0
  294. webull_openapi_python_sdk-1.0.0.dist-info/licenses/NOTICE +56 -0
  295. webull_openapi_python_sdk-1.0.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,247 @@
1
+ # Copyright 2022 Webull
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ ######################## BEGIN LICENSE BLOCK ########################
16
+ # The Original Code is Mozilla Communicator client code.
17
+ #
18
+ # The Initial Developer of the Original Code is
19
+ # Netscape Communications Corporation.
20
+ # Portions created by the Initial Developer are Copyright (C) 1998
21
+ # the Initial Developer. All Rights Reserved.
22
+ #
23
+ # Contributor(s):
24
+ # Mark Pilgrim - port to Python
25
+ #
26
+ # This library is free software; you can redistribute it and/or
27
+ # modify it under the terms of the GNU Lesser General Public
28
+ # License as published by the Free Software Foundation; either
29
+ # version 2.1 of the License, or (at your option) any later version.
30
+ #
31
+ # This library is distributed in the hope that it will be useful,
32
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
33
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
34
+ # Lesser General Public License for more details.
35
+ #
36
+ # You should have received a copy of the GNU Lesser General Public
37
+ # License along with this library; if not, write to the Free Software
38
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
39
+ # 02110-1301 USA
40
+ ######################### END LICENSE BLOCK #########################
41
+
42
+ from .euctwfreq import (EUCTW_CHAR_TO_FREQ_ORDER, EUCTW_TABLE_SIZE,
43
+ EUCTW_TYPICAL_DISTRIBUTION_RATIO)
44
+ from .euckrfreq import (EUCKR_CHAR_TO_FREQ_ORDER, EUCKR_TABLE_SIZE,
45
+ EUCKR_TYPICAL_DISTRIBUTION_RATIO)
46
+ from .gb2312freq import (GB2312_CHAR_TO_FREQ_ORDER, GB2312_TABLE_SIZE,
47
+ GB2312_TYPICAL_DISTRIBUTION_RATIO)
48
+ from .big5freq import (BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE,
49
+ BIG5_TYPICAL_DISTRIBUTION_RATIO)
50
+ from .jisfreq import (JIS_CHAR_TO_FREQ_ORDER, JIS_TABLE_SIZE,
51
+ JIS_TYPICAL_DISTRIBUTION_RATIO)
52
+
53
+
54
+ class CharDistributionAnalysis(object):
55
+ ENOUGH_DATA_THRESHOLD = 1024
56
+ SURE_YES = 0.99
57
+ SURE_NO = 0.01
58
+ MINIMUM_DATA_THRESHOLD = 3
59
+
60
+ def __init__(self):
61
+ # Mapping table to get frequency order from char order (get from
62
+ # GetOrder())
63
+ self._char_to_freq_order = None
64
+ self._table_size = None # Size of above table
65
+ # This is a constant value which varies from language to language,
66
+ # used in calculating confidence. See
67
+ # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
68
+ # for further detail.
69
+ self.typical_distribution_ratio = None
70
+ self._done = None
71
+ self._total_chars = None
72
+ self._freq_chars = None
73
+ self.reset()
74
+
75
+ def reset(self):
76
+ """reset analyser, clear any state"""
77
+ # If this flag is set to True, detection is done and conclusion has
78
+ # been made
79
+ self._done = False
80
+ self._total_chars = 0 # Total characters encountered
81
+ # The number of characters whose frequency order is less than 512
82
+ self._freq_chars = 0
83
+
84
+ def feed(self, char, char_len):
85
+ """feed a character with known length"""
86
+ if char_len == 2:
87
+ # we only care about 2-bytes character in our distribution analysis
88
+ order = self.get_order(char)
89
+ else:
90
+ order = -1
91
+ if order >= 0:
92
+ self._total_chars += 1
93
+ # order is valid
94
+ if order < self._table_size:
95
+ if 512 > self._char_to_freq_order[order]:
96
+ self._freq_chars += 1
97
+
98
+ def get_confidence(self):
99
+ """return confidence based on existing data"""
100
+ # if we didn't receive any character in our consideration range,
101
+ # return negative answer
102
+ if self._total_chars <= 0 or self._freq_chars <= self.MINIMUM_DATA_THRESHOLD:
103
+ return self.SURE_NO
104
+
105
+ if self._total_chars != self._freq_chars:
106
+ r = (self._freq_chars / ((self._total_chars - self._freq_chars)
107
+ * self.typical_distribution_ratio))
108
+ if r < self.SURE_YES:
109
+ return r
110
+
111
+ # normalize confidence (we don't want to be 100% sure)
112
+ return self.SURE_YES
113
+
114
+ def got_enough_data(self):
115
+ # It is not necessary to receive all data to draw conclusion.
116
+ # For charset detection, certain amount of data is enough
117
+ return self._total_chars > self.ENOUGH_DATA_THRESHOLD
118
+
119
+ def get_order(self, byte_str):
120
+ # We do not handle characters based on the original encoding string,
121
+ # but convert this encoding string to a number, here called order.
122
+ # This allows multiple encodings of a language to share one frequency
123
+ # table.
124
+ return -1
125
+
126
+
127
+ class EUCTWDistributionAnalysis(CharDistributionAnalysis):
128
+ def __init__(self):
129
+ super(EUCTWDistributionAnalysis, self).__init__()
130
+ self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER
131
+ self._table_size = EUCTW_TABLE_SIZE
132
+ self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
133
+
134
+ def get_order(self, byte_str):
135
+ # for euc-TW encoding, we are interested
136
+ # first byte range: 0xc4 -- 0xfe
137
+ # second byte range: 0xa1 -- 0xfe
138
+ # no validation needed here. State machine has done that
139
+ first_char = byte_str[0]
140
+ if first_char >= 0xC4:
141
+ return 94 * (first_char - 0xC4) + byte_str[1] - 0xA1
142
+ else:
143
+ return -1
144
+
145
+
146
+ class EUCKRDistributionAnalysis(CharDistributionAnalysis):
147
+ def __init__(self):
148
+ super(EUCKRDistributionAnalysis, self).__init__()
149
+ self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
150
+ self._table_size = EUCKR_TABLE_SIZE
151
+ self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
152
+
153
+ def get_order(self, byte_str):
154
+ # for euc-KR encoding, we are interested
155
+ # first byte range: 0xb0 -- 0xfe
156
+ # second byte range: 0xa1 -- 0xfe
157
+ # no validation needed here. State machine has done that
158
+ first_char = byte_str[0]
159
+ if first_char >= 0xB0:
160
+ return 94 * (first_char - 0xB0) + byte_str[1] - 0xA1
161
+ else:
162
+ return -1
163
+
164
+
165
+ class GB2312DistributionAnalysis(CharDistributionAnalysis):
166
+ def __init__(self):
167
+ super(GB2312DistributionAnalysis, self).__init__()
168
+ self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER
169
+ self._table_size = GB2312_TABLE_SIZE
170
+ self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO
171
+
172
+ def get_order(self, byte_str):
173
+ # for GB2312 encoding, we are interested
174
+ # first byte range: 0xb0 -- 0xfe
175
+ # second byte range: 0xa1 -- 0xfe
176
+ # no validation needed here. State machine has done that
177
+ first_char, second_char = byte_str[0], byte_str[1]
178
+ if (first_char >= 0xB0) and (second_char >= 0xA1):
179
+ return 94 * (first_char - 0xB0) + second_char - 0xA1
180
+ else:
181
+ return -1
182
+
183
+
184
+ class Big5DistributionAnalysis(CharDistributionAnalysis):
185
+ def __init__(self):
186
+ super(Big5DistributionAnalysis, self).__init__()
187
+ self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER
188
+ self._table_size = BIG5_TABLE_SIZE
189
+ self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO
190
+
191
+ def get_order(self, byte_str):
192
+ # for big5 encoding, we are interested
193
+ # first byte range: 0xa4 -- 0xfe
194
+ # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
195
+ # no validation needed here. State machine has done that
196
+ first_char, second_char = byte_str[0], byte_str[1]
197
+ if first_char >= 0xA4:
198
+ if second_char >= 0xA1:
199
+ return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
200
+ else:
201
+ return 157 * (first_char - 0xA4) + second_char - 0x40
202
+ else:
203
+ return -1
204
+
205
+
206
+ class SJISDistributionAnalysis(CharDistributionAnalysis):
207
+ def __init__(self):
208
+ super(SJISDistributionAnalysis, self).__init__()
209
+ self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
210
+ self._table_size = JIS_TABLE_SIZE
211
+ self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
212
+
213
+ def get_order(self, byte_str):
214
+ # for sjis encoding, we are interested
215
+ # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
216
+ # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
217
+ # no validation needed here. State machine has done that
218
+ first_char, second_char = byte_str[0], byte_str[1]
219
+ if (first_char >= 0x81) and (first_char <= 0x9F):
220
+ order = 188 * (first_char - 0x81)
221
+ elif (first_char >= 0xE0) and (first_char <= 0xEF):
222
+ order = 188 * (first_char - 0xE0 + 31)
223
+ else:
224
+ return -1
225
+ order = order + second_char - 0x40
226
+ if second_char > 0x7F:
227
+ order = -1
228
+ return order
229
+
230
+
231
+ class EUCJPDistributionAnalysis(CharDistributionAnalysis):
232
+ def __init__(self):
233
+ super(EUCJPDistributionAnalysis, self).__init__()
234
+ self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
235
+ self._table_size = JIS_TABLE_SIZE
236
+ self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
237
+
238
+ def get_order(self, byte_str):
239
+ # for euc-JP encoding, we are interested
240
+ # first byte range: 0xa0 -- 0xfe
241
+ # second byte range: 0xa1 -- 0xfe
242
+ # no validation needed here. State machine has done that
243
+ char = byte_str[0]
244
+ if char >= 0xA0:
245
+ return 94 * (char - 0xA1) + byte_str[1] - 0xa1
246
+ else:
247
+ return -1
@@ -0,0 +1,120 @@
1
+ # Copyright 2022 Webull
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ ######################## BEGIN LICENSE BLOCK ########################
16
+ # The Original Code is Mozilla Communicator client code.
17
+ #
18
+ # The Initial Developer of the Original Code is
19
+ # Netscape Communications Corporation.
20
+ # Portions created by the Initial Developer are Copyright (C) 1998
21
+ # the Initial Developer. All Rights Reserved.
22
+ #
23
+ # Contributor(s):
24
+ # Mark Pilgrim - port to Python
25
+ #
26
+ # This library is free software; you can redistribute it and/or
27
+ # modify it under the terms of the GNU Lesser General Public
28
+ # License as published by the Free Software Foundation; either
29
+ # version 2.1 of the License, or (at your option) any later version.
30
+ #
31
+ # This library is distributed in the hope that it will be useful,
32
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
33
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
34
+ # Lesser General Public License for more details.
35
+ #
36
+ # You should have received a copy of the GNU Lesser General Public
37
+ # License along with this library; if not, write to the Free Software
38
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
39
+ # 02110-1301 USA
40
+ ######################### END LICENSE BLOCK #########################
41
+
42
+ from .enums import ProbingState
43
+ from .charsetprober import CharSetProber
44
+
45
+
46
+ class CharSetGroupProber(CharSetProber):
47
+ def __init__(self, lang_filter=None):
48
+ super(CharSetGroupProber, self).__init__(lang_filter=lang_filter)
49
+ self._active_num = 0
50
+ self.probers = []
51
+ self._best_guess_prober = None
52
+
53
+ def reset(self):
54
+ super(CharSetGroupProber, self).reset()
55
+ self._active_num = 0
56
+ for prober in self.probers:
57
+ if prober:
58
+ prober.reset()
59
+ prober.active = True
60
+ self._active_num += 1
61
+ self._best_guess_prober = None
62
+
63
+ @property
64
+ def charset_name(self):
65
+ if not self._best_guess_prober:
66
+ self.get_confidence()
67
+ if not self._best_guess_prober:
68
+ return None
69
+ return self._best_guess_prober.charset_name
70
+
71
+ @property
72
+ def language(self):
73
+ if not self._best_guess_prober:
74
+ self.get_confidence()
75
+ if not self._best_guess_prober:
76
+ return None
77
+ return self._best_guess_prober.language
78
+
79
+ def feed(self, byte_str):
80
+ for prober in self.probers:
81
+ if not prober:
82
+ continue
83
+ if not prober.active:
84
+ continue
85
+ state = prober.feed(byte_str)
86
+ if not state:
87
+ continue
88
+ if state == ProbingState.FOUND_IT:
89
+ self._best_guess_prober = prober
90
+ return self.state
91
+ elif state == ProbingState.NOT_ME:
92
+ prober.active = False
93
+ self._active_num -= 1
94
+ if self._active_num <= 0:
95
+ self._state = ProbingState.NOT_ME
96
+ return self.state
97
+ return self.state
98
+
99
+ def get_confidence(self):
100
+ state = self.state
101
+ if state == ProbingState.FOUND_IT:
102
+ return 0.99
103
+ elif state == ProbingState.NOT_ME:
104
+ return 0.01
105
+ best_conf = 0.0
106
+ self._best_guess_prober = None
107
+ for prober in self.probers:
108
+ if not prober:
109
+ continue
110
+ if not prober.active:
111
+ self.logger.debug('%s not active', prober.charset_name)
112
+ continue
113
+ conf = prober.get_confidence()
114
+ self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf)
115
+ if best_conf < conf:
116
+ best_conf = conf
117
+ self._best_guess_prober = prober
118
+ if not self._best_guess_prober:
119
+ return 0.0
120
+ return best_conf
@@ -0,0 +1,159 @@
1
+ # Copyright 2022 Webull
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ ######################## BEGIN LICENSE BLOCK ########################
16
+ # The Original Code is Mozilla Universal charset detector code.
17
+ #
18
+ # The Initial Developer of the Original Code is
19
+ # Netscape Communications Corporation.
20
+ # Portions created by the Initial Developer are Copyright (C) 2001
21
+ # the Initial Developer. All Rights Reserved.
22
+ #
23
+ # Contributor(s):
24
+ # Mark Pilgrim - port to Python
25
+ # Shy Shalom - original C code
26
+ #
27
+ # This library is free software; you can redistribute it and/or
28
+ # modify it under the terms of the GNU Lesser General Public
29
+ # License as published by the Free Software Foundation; either
30
+ # version 2.1 of the License, or (at your option) any later version.
31
+ #
32
+ # This library is distributed in the hope that it will be useful,
33
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
34
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35
+ # Lesser General Public License for more details.
36
+ #
37
+ # You should have received a copy of the GNU Lesser General Public
38
+ # License along with this library; if not, write to the Free Software
39
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
40
+ # 02110-1301 USA
41
+ ######################### END LICENSE BLOCK #########################
42
+
43
+ import logging
44
+ import re
45
+
46
+ from .enums import ProbingState
47
+
48
+
49
+ class CharSetProber(object):
50
+
51
+ SHORTCUT_THRESHOLD = 0.95
52
+
53
+ def __init__(self, lang_filter=None):
54
+ self._state = None
55
+ self.lang_filter = lang_filter
56
+ self.logger = logging.getLogger(__name__)
57
+
58
+ def reset(self):
59
+ self._state = ProbingState.DETECTING
60
+
61
+ @property
62
+ def charset_name(self):
63
+ return None
64
+
65
+ def feed(self, buf):
66
+ pass
67
+
68
+ @property
69
+ def state(self):
70
+ return self._state
71
+
72
+ def get_confidence(self):
73
+ return 0.0
74
+
75
+ @staticmethod
76
+ def filter_high_byte_only(buf):
77
+ buf = re.sub(b'([\x00-\x7F])+', b' ', buf)
78
+ return buf
79
+
80
+ @staticmethod
81
+ def filter_international_words(buf):
82
+ """
83
+ We define three types of bytes:
84
+ alphabet: english alphabets [a-zA-Z]
85
+ international: international characters [\x80-\xFF]
86
+ marker: everything else [^a-zA-Z\x80-\xFF]
87
+
88
+ The input buffer can be thought to contain a series of words delimited
89
+ by markers. This function works to filter all words that contain at
90
+ least one international character. All contiguous sequences of markers
91
+ are replaced by a single space ascii character.
92
+
93
+ This filter applies to all scripts which do not use English characters.
94
+ """
95
+ filtered = bytearray()
96
+
97
+ # This regex expression filters out only words that have at-least one
98
+ # international character. The word may include one marker character at
99
+ # the end.
100
+ words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?',
101
+ buf)
102
+
103
+ for word in words:
104
+ filtered.extend(word[:-1])
105
+
106
+ # If the last character in the word is a marker, replace it with a
107
+ # space as markers shouldn't affect our analysis (they are used
108
+ # similarly across all languages and may thus have similar
109
+ # frequencies).
110
+ last_char = word[-1:]
111
+ if not last_char.isalpha() and last_char < b'\x80':
112
+ last_char = b' '
113
+ filtered.extend(last_char)
114
+
115
+ return filtered
116
+
117
+ @staticmethod
118
+ def filter_with_english_letters(buf):
119
+ """
120
+ Returns a copy of ``buf`` that retains only the sequences of English
121
+ alphabet and high byte characters that are not between <> characters.
122
+ Also retains English alphabet and high byte characters immediately
123
+ before occurrences of >.
124
+
125
+ This filter can be applied to all scripts which contain both English
126
+ characters and extended ASCII characters, but is currently only used by
127
+ ``Latin1Prober``.
128
+ """
129
+ filtered = bytearray()
130
+ in_tag = False
131
+ prev = 0
132
+
133
+ for curr in range(len(buf)):
134
+ # Slice here to get bytes instead of an int with Python 3
135
+ buf_char = buf[curr:curr + 1]
136
+ # Check if we're coming out of or entering an HTML tag
137
+ if buf_char == b'>':
138
+ in_tag = False
139
+ elif buf_char == b'<':
140
+ in_tag = True
141
+
142
+ # If current character is not extended-ASCII and not alphabetic...
143
+ if buf_char < b'\x80' and not buf_char.isalpha():
144
+ # ...and we're not in a tag
145
+ if curr > prev and not in_tag:
146
+ # Keep everything after last non-extended-ASCII,
147
+ # non-alphabetic character
148
+ filtered.extend(buf[prev:curr])
149
+ # Output a space to delimit stretch we kept
150
+ filtered.extend(b' ')
151
+ prev = curr + 1
152
+
153
+ # If we're not in a tag...
154
+ if not in_tag:
155
+ # Keep everything after last non-extended-ASCII, non-alphabetic
156
+ # character
157
+ filtered.extend(buf[prev:])
158
+
159
+ return filtered
@@ -0,0 +1,99 @@
1
+ # Copyright 2022 Webull
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ #!/usr/bin/env python
16
+ """
17
+ Script which takes one or more file paths and reports on their detected
18
+ encodings
19
+
20
+ Example::
21
+
22
+ % chardetect somefile someotherfile
23
+ somefile: windows-1252 with confidence 0.5
24
+ someotherfile: ascii with confidence 1.0
25
+
26
+ If no paths are provided, it takes its input from stdin.
27
+
28
+ """
29
+
30
+ from __future__ import absolute_import, print_function, unicode_literals
31
+
32
+ import argparse
33
+ import sys
34
+
35
+ from chardet import __version__
36
+ from chardet.compat import PY2
37
+ from chardet.universaldetector import UniversalDetector
38
+
39
+
40
+ def description_of(lines, name='stdin'):
41
+ """
42
+ Return a string describing the probable encoding of a file or
43
+ list of strings.
44
+
45
+ :param lines: The lines to get the encoding of.
46
+ :type lines: Iterable of bytes
47
+ :param name: Name of file or collection of lines
48
+ :type name: str
49
+ """
50
+ u = UniversalDetector()
51
+ for line in lines:
52
+ line = bytearray(line)
53
+ u.feed(line)
54
+ # shortcut out of the loop to save reading further - particularly useful if we read a BOM.
55
+ if u.done:
56
+ break
57
+ u.close()
58
+ result = u.result
59
+ if PY2:
60
+ name = name.decode(sys.getfilesystemencoding(), 'ignore')
61
+ if result['encoding']:
62
+ return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
63
+ result['confidence'])
64
+ else:
65
+ return '{0}: no result'.format(name)
66
+
67
+
68
+ def main(argv=None):
69
+ """
70
+ Handles command line arguments and gets things started.
71
+
72
+ :param argv: List of arguments, as if specified on the command-line.
73
+ If None, ``sys.argv[1:]`` is used instead.
74
+ :type argv: list of str
75
+ """
76
+ # Get command line arguments
77
+ parser = argparse.ArgumentParser(
78
+ description="Takes one or more file paths and reports their detected \
79
+ encodings")
80
+ parser.add_argument('input',
81
+ help='File whose encoding we would like to determine. \
82
+ (default: stdin)',
83
+ type=argparse.FileType('rb'), nargs='*',
84
+ default=[sys.stdin if PY2 else sys.stdin.buffer])
85
+ parser.add_argument('--version', action='version',
86
+ version='%(prog)s {0}'.format(__version__))
87
+ args = parser.parse_args(argv)
88
+
89
+ for f in args.input:
90
+ if f.isatty():
91
+ print("You are running chardetect interactively. Press " +
92
+ "CTRL-D twice at the start of a blank line to signal the " +
93
+ "end of your input. If you want help, run chardetect " +
94
+ "--help\n", file=sys.stderr)
95
+ print(description_of(f, f.name))
96
+
97
+
98
+ if __name__ == '__main__':
99
+ main()