ustrade 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ustrade/__init__.py CHANGED
@@ -5,6 +5,7 @@ from .codes import HSCode
5
5
  from .errors import *
6
6
 
7
7
  from importlib import metadata
8
+ from typing import Literal
8
9
 
9
10
  try:
10
11
  __version__ = metadata.version("ustrade")
@@ -158,6 +159,31 @@ def get_product(hs: str) -> HSCode:
158
159
  """
159
160
  return _get_default_client().get_product(hs)
160
161
 
162
+ def search_for_code(keyword : str | list[str],
163
+ mode : Literal["OR", "AND"] = "OR",
164
+ in_codes : str = None) -> pd.DataFrame:
165
+ """
166
+ Research keywords in the HS Code description base.
167
+
168
+ Args:
169
+ keyword (str | list[str]):
170
+ A single keyword or a list of keywords.
171
+ mode (Literal["OR", "AND"]):
172
+ Exclusive or inclusive search if `keyword` is a list. Default uses "OR".
173
+ "OR" mode will return every code associated with at least one keyword.
174
+ "AND" mode will return only the codes associated with all the keywords.
175
+ in_codes (str):
176
+ The code chapter or heading to look in. Default None will search across all chapters.
177
+
178
+ Returns:
179
+ pd.DataFrame:
180
+ A dataframe containing the list of associated codes.
181
+
182
+ Examples:
183
+ >>> ut.search_for_code(keyword="oil", in_codes="27")
184
+ """
185
+ return _get_default_client().search_for_code(keyword, mode, in_codes)
186
+
161
187
 
162
188
  __all__ = [
163
189
  "CensusClient",
@@ -171,5 +197,6 @@ __all__ = [
171
197
  "get_country_by_iso2",
172
198
  "get_desc_from_code",
173
199
  "get_children_codes",
174
- "get_product"
175
- ]
200
+ "get_product",
201
+ "search_for_code",
202
+ ]
ustrade/client.py CHANGED
@@ -2,7 +2,11 @@ import requests
2
2
  import socket
3
3
  from datetime import datetime
4
4
  import pandas as pd
5
+ import re
5
6
  from urllib.parse import urlencode
7
+ from typing import Literal
8
+ import unicodedata
9
+
6
10
  from . import countries
7
11
  from .countries import Country
8
12
  from . import codes
@@ -23,7 +27,7 @@ class CensusClient:
23
27
  self.BASE_URL = "api.census.gov"
24
28
  self.BASE_PORT = 443
25
29
 
26
- self._hs_codes, self._codes_by_hs_codes = codes._load_codes()
30
+ self._hs_codes, self._codes_by_hs_codes, self._desc_by_hs_codes = codes._load_codes()
27
31
  self._code_tree = codes.build_tree_from_codes(self._hs_codes)
28
32
 
29
33
  self.col_mapping = {
@@ -397,8 +401,9 @@ class CensusClient:
397
401
  """
398
402
  Returns the description of the specified HS code
399
403
 
400
- ## Args:
401
- hs (str): the HS code (ex: '1806')
404
+ Args:
405
+ hs (str):
406
+ the HS code (ex: '1806')
402
407
  """
403
408
  if isinstance(hs, str):
404
409
  if hs in self._codes_by_hs_codes:
@@ -477,6 +482,81 @@ class CensusClient:
477
482
  raise InvalidCodeError(
478
483
  f"Code must be a str or a HSCode instance - received a {type(code).__name__!r}"
479
484
  )
485
+
486
+
487
+ def _normalize_kw(self, s: str) -> str:
488
+ s = s.lower()
489
+ s = unicodedata.normalize("NFKD", s)
490
+ s = "".join(c for c in s if not unicodedata.combining(c))
491
+ s = re.sub(r"[^a-z0-9\s]+", " ", s)
492
+ return re.sub(r"\s+", " ", s).strip()
493
+
494
+ def _tokenize(self, s: str) -> list[str]:
495
+ return self._normalize_kw(s).split()
496
+
497
+
498
+ def search_for_code(self, keyword : str | list[str],
499
+ mode : Literal["OR", "AND"] = "OR",
500
+ in_codes : str = None) -> pd.DataFrame:
501
+ """
502
+ Research keywords in the HS Code description base.
503
+
504
+ Args:
505
+ keyword (str | list[str]):
506
+ a single keyword or a list of keywords.
507
+ mode (Literal["OR", "AND"]):
508
+ exclusive or inclusive search if keyword is a list. Default uses "OR".
509
+ "OR" mode will return every code associated with at least one word of the list.
510
+ "AND" mode will return only the codes associated with all the words of the list.
511
+ in_codes (str):
512
+ the code chapter or heading to look in. Default None will search across all chapters.
513
+
514
+ Returns:
515
+ pd.Dataframe:
516
+ A dataframe containing the list of associated codes.
517
+
518
+ Examples:
519
+ >>> ut.search_for_code(keyword = "oil", in_codes = "27")
520
+
521
+ """
522
+ if in_codes is not None:
523
+ if in_codes not in self._codes_by_hs_codes:
524
+ raise CodeNotFoundError(f"Error : {in_codes} was not found as a valid code.")
525
+
526
+ keywords = self._tokenize(keyword) if isinstance(keyword, str) else list(keyword)
527
+ keywords = [self._normalize_kw(k) for k in keywords]
528
+
529
+ results_code = []
530
+ results_desc = []
531
+
532
+ for desc, code in self._desc_by_hs_codes.items():
533
+ if in_codes is not None and not code.hscode.startswith(in_codes):
534
+ continue
535
+
536
+ tokens = self._tokenize(desc)
537
+
538
+ def keyword_matches(k: str) -> bool:
539
+ return any(tok.startswith(k) for tok in tokens)
540
+
541
+ if mode == "OR":
542
+ ok = any(keyword_matches(k) for k in keywords)
543
+ else:
544
+ ok = all(keyword_matches(k) for k in keywords)
545
+
546
+ if ok:
547
+ results_code.append(code.hscode)
548
+ results_desc.append(code.description)
549
+
550
+ return pd.DataFrame({
551
+ "Description": results_desc,
552
+ "Code": results_code
553
+ })
554
+
555
+
556
+
557
+
558
+
559
+
480
560
 
481
561
 
482
562
 
ustrade/codes.py CHANGED
@@ -19,7 +19,7 @@ class HSCode:
19
19
  return self.parent
20
20
 
21
21
 
22
- def _load_codes() -> tuple[list[HSCode], dict[str, HSCode]]:
22
+ def _load_codes() -> tuple[list[HSCode], dict[str, HSCode], dict[str, HSCode]]:
23
23
  csv_path = files(__package__) / "data" / "harmonized-system.csv"
24
24
  codes: list[HSCode] = []
25
25
  with csv_path.open(encoding="utf-8") as f:
@@ -37,7 +37,7 @@ def _load_codes() -> tuple[list[HSCode], dict[str, HSCode]]:
37
37
  )
38
38
  )
39
39
 
40
- return codes, {c.hscode: c for c in codes}
40
+ return codes, {c.hscode: c for c in codes}, {c.description: c for c in codes}
41
41
 
42
42
 
43
43
  def _get_parent(code: str) -> str | None:
@@ -70,4 +70,3 @@ def build_tree_from_codes(codes: list[HSCode]) -> dict[str, HSCode]:
70
70
  parent_node.children.append(node.hscode)
71
71
 
72
72
  return code_dict
73
-
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ustrade
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Python client for the U.S. Census Bureau International Trade API
5
5
  Author: Fantin Sibony
6
6
  License-Expression: MIT
@@ -10,6 +10,8 @@ Description-Content-Type: text/markdown
10
10
  License-File: LICENSE.txt
11
11
  Requires-Dist: requests
12
12
  Requires-Dist: pandas
13
+ Provides-Extra: dev
14
+ Requires-Dist: pytest; extra == "dev"
13
15
  Dynamic: license-file
14
16
 
15
17
  # ustrade
@@ -20,7 +22,9 @@ Dynamic: license-file
20
22
  <p align="left">
21
23
  <img src="https://img.shields.io/badge/python-3.10%2B-blue" />
22
24
  <img src="https://img.shields.io/badge/status-active-success" />
25
+ <img src= "https://img.shields.io/pypi/v/ustrade" />
23
26
  <img src="https://img.shields.io/badge/license-MIT-green" />
27
+ <img src="https://static.pepy.tech/personalized-badge/ustrade?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads" />
24
28
  </p>
25
29
 
26
30
  ---
@@ -34,8 +38,10 @@ Dynamic: license-file
34
38
  - `"4279"` (Census code)
35
39
  - HS codes lookup + product descriptions
36
40
  - Standardized DataFrame output with clean column names
37
- - Uses a cached internal client for efficiency
38
- - Zero configuration required
41
+
42
+ For interactive exploration and non-Python usage, a Streamlit dashboard is available for this library at [https://ustrade.streamlit.app](https://ustrade.streamlit.app).
43
+
44
+ Source code : [https://github.com/fantinsib/ustrade_dashboard](https://github.com/fantinsib/ustrade_dashboard)
39
45
 
40
46
  ---
41
47
 
@@ -0,0 +1,12 @@
1
+ ustrade/__init__.py,sha256=NBsxO-lvNGvQ-iK3Qfmvg-6YsuGrdqq1QuhGoSYdHEQ,7016
2
+ ustrade/client.py,sha256=k_NyEN_IrZcIG7SfbfEqbAA6pKz3Er8ufjhFEVCm48k,19263
3
+ ustrade/codes.py,sha256=OA8thItlx0vczrzwmdPO3Skf3COwfGYE4K4hAW69oB8,1953
4
+ ustrade/countries.py,sha256=8CagTTccgs_Obr3dSpTffPKcikVe7eUdObcer-azIbU,783
5
+ ustrade/errors.py,sha256=AIKPwwb1fUmtvZkwqNpVAXevwD5jSz2NN8_iYzpM1Dk,802
6
+ ustrade/data/country_codes.csv,sha256=xwpr9MmBsIB78lIiKGDCaWDbGaO1kyDz0Vxwcxu1TTU,5794
7
+ ustrade/data/harmonized-system.csv,sha256=537p4c_RWkLR-t6ywGGNEPa57ZrsDl1mgm3H750xJqU,850576
8
+ ustrade-0.5.0.dist-info/licenses/LICENSE.txt,sha256=dwk5PMNyALOsBcKsVyrMjtoD0LvXr-Lu61cMjFJJn7I,1066
9
+ ustrade-0.5.0.dist-info/METADATA,sha256=HBXw6S5RRworsbL-8VeluS2q9ZIMw7d2mwCpY4bU5co,5084
10
+ ustrade-0.5.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
11
+ ustrade-0.5.0.dist-info/top_level.txt,sha256=sGFKm5WBJQE8rpKswtEFbRvBNzrON1Dr5UCdQptW2GE,8
12
+ ustrade-0.5.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,12 +0,0 @@
1
- ustrade/__init__.py,sha256=uOIWAm1QLJKPm0ajouB4t_NNh822k3g3xc8M6AITma0,5968
2
- ustrade/client.py,sha256=txGvXKFSC88T74HmBd5Q9b9L-7Bvh_eu1sEX3SWB16k,16611
3
- ustrade/codes.py,sha256=-4f54TUF9Dq93PYuzJli6d5HyGlWWmYlHqngb1GJe40,1900
4
- ustrade/countries.py,sha256=8CagTTccgs_Obr3dSpTffPKcikVe7eUdObcer-azIbU,783
5
- ustrade/errors.py,sha256=AIKPwwb1fUmtvZkwqNpVAXevwD5jSz2NN8_iYzpM1Dk,802
6
- ustrade/data/country_codes.csv,sha256=xwpr9MmBsIB78lIiKGDCaWDbGaO1kyDz0Vxwcxu1TTU,5794
7
- ustrade/data/harmonized-system.csv,sha256=537p4c_RWkLR-t6ywGGNEPa57ZrsDl1mgm3H750xJqU,850576
8
- ustrade-0.4.0.dist-info/licenses/LICENSE.txt,sha256=dwk5PMNyALOsBcKsVyrMjtoD0LvXr-Lu61cMjFJJn7I,1066
9
- ustrade-0.4.0.dist-info/METADATA,sha256=IhiWRMtf6vFlsmD88cITFl1F0f5HE_OmlZ4vGalWbsA,4602
10
- ustrade-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
- ustrade-0.4.0.dist-info/top_level.txt,sha256=sGFKm5WBJQE8rpKswtEFbRvBNzrON1Dr5UCdQptW2GE,8
12
- ustrade-0.4.0.dist-info/RECORD,,