ustrade 0.4.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ustrade-0.4.0 → ustrade-0.5.0}/PKG-INFO +9 -3
- ustrade-0.4.0/ustrade.egg-info/PKG-INFO → ustrade-0.5.0/README.md +6 -16
- {ustrade-0.4.0 → ustrade-0.5.0}/pyproject.toml +6 -1
- ustrade-0.5.0/tests/test_api.py +148 -0
- ustrade-0.5.0/tests/test_client.py +54 -0
- {ustrade-0.4.0 → ustrade-0.5.0}/tests/test_hscode.py +34 -0
- {ustrade-0.4.0 → ustrade-0.5.0}/ustrade/__init__.py +29 -2
- {ustrade-0.4.0 → ustrade-0.5.0}/ustrade/client.py +83 -3
- {ustrade-0.4.0 → ustrade-0.5.0}/ustrade/codes.py +2 -3
- ustrade-0.4.0/README.md → ustrade-0.5.0/ustrade.egg-info/PKG-INFO +23 -3
- {ustrade-0.4.0 → ustrade-0.5.0}/ustrade.egg-info/requires.txt +3 -0
- ustrade-0.4.0/tests/test_api.py +0 -28
- ustrade-0.4.0/tests/test_client.py +0 -40
- {ustrade-0.4.0 → ustrade-0.5.0}/LICENSE.txt +0 -0
- {ustrade-0.4.0 → ustrade-0.5.0}/setup.cfg +0 -0
- {ustrade-0.4.0 → ustrade-0.5.0}/ustrade/countries.py +0 -0
- {ustrade-0.4.0 → ustrade-0.5.0}/ustrade/data/country_codes.csv +0 -0
- {ustrade-0.4.0 → ustrade-0.5.0}/ustrade/data/harmonized-system.csv +0 -0
- {ustrade-0.4.0 → ustrade-0.5.0}/ustrade/errors.py +0 -0
- {ustrade-0.4.0 → ustrade-0.5.0}/ustrade.egg-info/SOURCES.txt +0 -0
- {ustrade-0.4.0 → ustrade-0.5.0}/ustrade.egg-info/dependency_links.txt +0 -0
- {ustrade-0.4.0 → ustrade-0.5.0}/ustrade.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ustrade
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Python client for the U.S. Census Bureau International Trade API
|
|
5
5
|
Author: Fantin Sibony
|
|
6
6
|
License-Expression: MIT
|
|
@@ -10,6 +10,8 @@ Description-Content-Type: text/markdown
|
|
|
10
10
|
License-File: LICENSE.txt
|
|
11
11
|
Requires-Dist: requests
|
|
12
12
|
Requires-Dist: pandas
|
|
13
|
+
Provides-Extra: dev
|
|
14
|
+
Requires-Dist: pytest; extra == "dev"
|
|
13
15
|
Dynamic: license-file
|
|
14
16
|
|
|
15
17
|
# ustrade
|
|
@@ -20,7 +22,9 @@ Dynamic: license-file
|
|
|
20
22
|
<p align="left">
|
|
21
23
|
<img src="https://img.shields.io/badge/python-3.10%2B-blue" />
|
|
22
24
|
<img src="https://img.shields.io/badge/status-active-success" />
|
|
25
|
+
<img src= "https://img.shields.io/pypi/v/ustrade" />
|
|
23
26
|
<img src="https://img.shields.io/badge/license-MIT-green" />
|
|
27
|
+
<img src="https://static.pepy.tech/personalized-badge/ustrade?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads" />
|
|
24
28
|
</p>
|
|
25
29
|
|
|
26
30
|
---
|
|
@@ -34,8 +38,10 @@ Dynamic: license-file
|
|
|
34
38
|
- `"4279"` (Census code)
|
|
35
39
|
- HS codes lookup + product descriptions
|
|
36
40
|
- Standardized DataFrame output with clean column names
|
|
37
|
-
|
|
38
|
-
-
|
|
41
|
+
|
|
42
|
+
For interactive exploration and non-Python usage, a Streamlit dashboard is available for this library at [https://ustrade.streamlit.app](https://ustrade.streamlit.app).
|
|
43
|
+
|
|
44
|
+
Source code : [https://github.com/fantinsib/ustrade_dashboard](https://github.com/fantinsib/ustrade_dashboard)
|
|
39
45
|
|
|
40
46
|
---
|
|
41
47
|
|
|
@@ -1,17 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: ustrade
|
|
3
|
-
Version: 0.4.0
|
|
4
|
-
Summary: Python client for the U.S. Census Bureau International Trade API
|
|
5
|
-
Author: Fantin Sibony
|
|
6
|
-
License-Expression: MIT
|
|
7
|
-
Project-URL: Homepage, https://github.com/fantinsib/ustrade
|
|
8
|
-
Requires-Python: >=3.10
|
|
9
|
-
Description-Content-Type: text/markdown
|
|
10
|
-
License-File: LICENSE.txt
|
|
11
|
-
Requires-Dist: requests
|
|
12
|
-
Requires-Dist: pandas
|
|
13
|
-
Dynamic: license-file
|
|
14
|
-
|
|
15
1
|
# ustrade
|
|
16
2
|
|
|
17
3
|
> A lightweight and intuitive Python client for the **U.S. Census Bureau International Trade API**.
|
|
@@ -20,7 +6,9 @@ Dynamic: license-file
|
|
|
20
6
|
<p align="left">
|
|
21
7
|
<img src="https://img.shields.io/badge/python-3.10%2B-blue" />
|
|
22
8
|
<img src="https://img.shields.io/badge/status-active-success" />
|
|
9
|
+
<img src= "https://img.shields.io/pypi/v/ustrade" />
|
|
23
10
|
<img src="https://img.shields.io/badge/license-MIT-green" />
|
|
11
|
+
<img src="https://static.pepy.tech/personalized-badge/ustrade?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads" />
|
|
24
12
|
</p>
|
|
25
13
|
|
|
26
14
|
---
|
|
@@ -34,8 +22,10 @@ Dynamic: license-file
|
|
|
34
22
|
- `"4279"` (Census code)
|
|
35
23
|
- HS codes lookup + product descriptions
|
|
36
24
|
- Standardized DataFrame output with clean column names
|
|
37
|
-
|
|
38
|
-
-
|
|
25
|
+
|
|
26
|
+
For interactive exploration and non-Python usage, a Streamlit dashboard is available for this library at [https://ustrade.streamlit.app](https://ustrade.streamlit.app).
|
|
27
|
+
|
|
28
|
+
Source code : [https://github.com/fantinsib/ustrade_dashboard](https://github.com/fantinsib/ustrade_dashboard)
|
|
39
29
|
|
|
40
30
|
---
|
|
41
31
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ustrade"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.5.0"
|
|
8
8
|
description = "Python client for the U.S. Census Bureau International Trade API"
|
|
9
9
|
authors = [
|
|
10
10
|
{ name = "Fantin Sibony" }
|
|
@@ -28,3 +28,8 @@ include-package-data = true
|
|
|
28
28
|
|
|
29
29
|
[tool.setuptools.package-data]
|
|
30
30
|
"ustrade" = ["data/*.csv"]
|
|
31
|
+
|
|
32
|
+
[project.optional-dependencies]
|
|
33
|
+
dev = [
|
|
34
|
+
"pytest"
|
|
35
|
+
]
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import datetime as dt
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import pytest
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
import ustrade as ut
|
|
8
|
+
from ustrade.client import CensusClient
|
|
9
|
+
from ustrade.errors import EmptyResult
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class FakeResponse:
|
|
13
|
+
def __init__(self, url: str, payload):
|
|
14
|
+
self.url = url
|
|
15
|
+
self._payload = payload
|
|
16
|
+
|
|
17
|
+
def raise_for_status(self):
|
|
18
|
+
return None
|
|
19
|
+
|
|
20
|
+
def json(self):
|
|
21
|
+
if isinstance(self._payload, Exception):
|
|
22
|
+
raise self._payload
|
|
23
|
+
return self._payload
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@pytest.fixture(autouse=True)
|
|
27
|
+
def _reset_default_client():
|
|
28
|
+
ut._default_client = None
|
|
29
|
+
yield
|
|
30
|
+
ut._default_client = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_build_client():
|
|
34
|
+
CensusClient()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_get_exports_mocks_api_call(monkeypatch):
|
|
38
|
+
called = {}
|
|
39
|
+
|
|
40
|
+
def fake_get(url, timeout):
|
|
41
|
+
called["url"] = url
|
|
42
|
+
payload = [
|
|
43
|
+
[
|
|
44
|
+
"CTY_CODE",
|
|
45
|
+
"CTY_NAME",
|
|
46
|
+
"E_COMMODITY",
|
|
47
|
+
"E_COMMODITY_SDESC",
|
|
48
|
+
"ALL_VAL_MO",
|
|
49
|
+
"YEAR",
|
|
50
|
+
"MONTH",
|
|
51
|
+
],
|
|
52
|
+
[
|
|
53
|
+
"2010",
|
|
54
|
+
"MEXICO",
|
|
55
|
+
"27",
|
|
56
|
+
"Mineral fuels, oils, distillation products, etc.",
|
|
57
|
+
"773377170",
|
|
58
|
+
"2010",
|
|
59
|
+
"01",
|
|
60
|
+
],
|
|
61
|
+
]
|
|
62
|
+
return FakeResponse(url, payload)
|
|
63
|
+
|
|
64
|
+
monkeypatch.setattr(requests, "get", fake_get)
|
|
65
|
+
|
|
66
|
+
df = ut.get_exports("Mexico", "27", "2010-01")
|
|
67
|
+
|
|
68
|
+
assert isinstance(df, pd.DataFrame)
|
|
69
|
+
assert len(df) == 1
|
|
70
|
+
assert df.loc[0, "country_name"] == "MEXICO"
|
|
71
|
+
assert df.loc[0, "product_code"] == "27"
|
|
72
|
+
assert df.loc[0, "export_value"] == 773377170.0
|
|
73
|
+
assert df.loc[0, "date"].to_pydatetime() == dt.datetime(2010, 1, 1)
|
|
74
|
+
|
|
75
|
+
url = called["url"]
|
|
76
|
+
assert url.startswith("https://api.census.gov/data/timeseries/intltrade/exports/hs?")
|
|
77
|
+
assert "CTY_CODE=2010" in url
|
|
78
|
+
assert "E_COMMODITY=27" in url
|
|
79
|
+
assert "YEAR=2010" in url and "MONTH=01" in url
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def test_get_imports_mocks_api_call(monkeypatch):
|
|
83
|
+
def fake_get(url, timeout):
|
|
84
|
+
payload = [
|
|
85
|
+
[
|
|
86
|
+
"CTY_CODE",
|
|
87
|
+
"CTY_NAME",
|
|
88
|
+
"I_COMMODITY",
|
|
89
|
+
"I_COMMODITY_SDESC",
|
|
90
|
+
"GEN_VAL_MO",
|
|
91
|
+
"CON_VAL_MO",
|
|
92
|
+
"YEAR",
|
|
93
|
+
"MONTH",
|
|
94
|
+
],
|
|
95
|
+
[
|
|
96
|
+
"1220",
|
|
97
|
+
"FRANCE",
|
|
98
|
+
"08",
|
|
99
|
+
"Edible fruit and nuts; peel of citrus fruit or melons",
|
|
100
|
+
"123.45",
|
|
101
|
+
"100.0",
|
|
102
|
+
"2018",
|
|
103
|
+
"03",
|
|
104
|
+
],
|
|
105
|
+
]
|
|
106
|
+
return FakeResponse(url, payload)
|
|
107
|
+
|
|
108
|
+
monkeypatch.setattr(requests, "get", fake_get)
|
|
109
|
+
|
|
110
|
+
df = ut.get_imports("France", "08", "2018-03")
|
|
111
|
+
assert len(df) == 1
|
|
112
|
+
assert df.loc[0, "country_name"] == "FRANCE"
|
|
113
|
+
assert df.loc[0, "product_code"] == "08"
|
|
114
|
+
assert df.loc[0, "import_value"] == 123.45
|
|
115
|
+
assert df.loc[0, "consumption_import_value"] == 100.0
|
|
116
|
+
assert df.loc[0, "date"].to_pydatetime() == dt.datetime(2018, 3, 1)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def test_get_exports_returns_empty_df_on_json_decode_error(monkeypatch):
|
|
120
|
+
def fake_get(url, timeout):
|
|
121
|
+
err = requests.exceptions.JSONDecodeError("boom", "", 0)
|
|
122
|
+
return FakeResponse(url, err)
|
|
123
|
+
|
|
124
|
+
monkeypatch.setattr(requests, "get", fake_get)
|
|
125
|
+
|
|
126
|
+
c = CensusClient()
|
|
127
|
+
df = c.get_exports("Mexico", "27", "2010-01")
|
|
128
|
+
assert df.empty
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def test_get_exports_on_period_raises_on_json_decode_error(monkeypatch):
|
|
132
|
+
def fake_get(url, timeout):
|
|
133
|
+
err = requests.exceptions.JSONDecodeError("boom", "", 0)
|
|
134
|
+
return FakeResponse(url, err)
|
|
135
|
+
|
|
136
|
+
monkeypatch.setattr(requests, "get", fake_get)
|
|
137
|
+
|
|
138
|
+
c = CensusClient()
|
|
139
|
+
with pytest.raises(EmptyResult):
|
|
140
|
+
c.get_exports_on_period("Mexico", "27", "2010-01", "2010-03")
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def test_get_children_codes():
|
|
144
|
+
children = ut.get_children_codes("1001")
|
|
145
|
+
|
|
146
|
+
expected_keys = {"100111", "100119", "100191", "100199"}
|
|
147
|
+
assert set(children.keys()) == expected_keys
|
|
148
|
+
assert "durum wheat" in children["100111"].lower()
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from urllib.parse import parse_qs, urlparse
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from ustrade.client import CensusClient
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _parse(url: str):
|
|
9
|
+
parsed = urlparse(url)
|
|
10
|
+
return parsed, parse_qs(parsed.query)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_build_params_imports_date_month_year():
|
|
14
|
+
c = CensusClient()
|
|
15
|
+
url = c._build_params(["Mexico", "Canada"], ["08", "09"], "imports", date="2013-01")
|
|
16
|
+
|
|
17
|
+
parsed, qs = _parse(url)
|
|
18
|
+
assert parsed.scheme == "https"
|
|
19
|
+
assert parsed.netloc == "api.census.gov"
|
|
20
|
+
assert parsed.path.endswith("/data/timeseries/intltrade/imports/hs")
|
|
21
|
+
|
|
22
|
+
assert "get" in qs
|
|
23
|
+
assert "CTY_CODE" in qs and len(qs["CTY_CODE"]) == 2
|
|
24
|
+
assert "I_COMMODITY" in qs and len(qs["I_COMMODITY"]) == 2
|
|
25
|
+
assert qs["YEAR"] == ["2013"]
|
|
26
|
+
assert qs["MONTH"] == ["01"]
|
|
27
|
+
assert "time" not in qs
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_build_params_imports_date_range_uses_time():
|
|
31
|
+
c = CensusClient()
|
|
32
|
+
url = c._build_params(["Mexico", "Canada"], ["08", "09"], "imports", start="2013-01", end="2014-01")
|
|
33
|
+
_, qs = _parse(url)
|
|
34
|
+
|
|
35
|
+
assert "time" in qs
|
|
36
|
+
assert qs["time"] == ["from 2013-01 to 2014-01"]
|
|
37
|
+
assert "YEAR" not in qs and "MONTH" not in qs
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_normalize_country_accepts_name_iso_code_and_object():
|
|
41
|
+
c = CensusClient()
|
|
42
|
+
mexico = c.get_country_by_name("Mexico")
|
|
43
|
+
|
|
44
|
+
assert c._normalize_country("Mexico") == mexico.code
|
|
45
|
+
assert c._normalize_country("mx") == mexico.code
|
|
46
|
+
assert c._normalize_country(mexico.code) == mexico.code
|
|
47
|
+
assert c._normalize_country(mexico, output="iso2") == "MX"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_normalize_country_rejects_unknown_country():
|
|
51
|
+
c = CensusClient()
|
|
52
|
+
with pytest.raises(ValueError):
|
|
53
|
+
c._normalize_country("Neverland")
|
|
54
|
+
|
|
@@ -2,7 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
import ustrade as ut
|
|
4
4
|
from ustrade.codes import HSCode, build_tree_from_codes
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
5
7
|
from ustrade import CensusClient
|
|
8
|
+
from ustrade.errors import CodeNotFoundError
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
def test_build_tree_simple():
|
|
@@ -31,3 +34,34 @@ def test_build_tree_roots():
|
|
|
31
34
|
assert "10" in roots
|
|
32
35
|
|
|
33
36
|
|
|
37
|
+
def test_get_desc_from_code_and_product_roundtrip():
|
|
38
|
+
c = CensusClient()
|
|
39
|
+
desc = c.get_desc_from_code("1001")
|
|
40
|
+
assert isinstance(desc, str)
|
|
41
|
+
assert "wheat" in desc.lower()
|
|
42
|
+
|
|
43
|
+
product = c.get_product("1001")
|
|
44
|
+
assert product.hscode == "1001"
|
|
45
|
+
assert product.description == desc
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_get_desc_from_code_suggests_leading_zero_for_one_digit_code():
|
|
49
|
+
c = CensusClient()
|
|
50
|
+
with pytest.raises(CodeNotFoundError, match="Did you mean '09'\\?"):
|
|
51
|
+
c.get_desc_from_code("9")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def test_get_children_codes_as_list_and_as_object():
|
|
55
|
+
c = CensusClient()
|
|
56
|
+
children_list = c.get_children_codes("1001", return_names=False)
|
|
57
|
+
assert set(children_list) == {"100111", "100119", "100191", "100199"}
|
|
58
|
+
|
|
59
|
+
product = c.get_product("1001")
|
|
60
|
+
assert set(c.get_children_codes(product, return_names=False)) == set(children_list)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_search_for_code_scoped_and_and_mode():
|
|
64
|
+
c = CensusClient()
|
|
65
|
+
res = c.search_for_code(["durum", "wheat"], mode="AND", in_codes="1001")
|
|
66
|
+
assert "100111" in set(res["Code"])
|
|
67
|
+
|
|
@@ -5,6 +5,7 @@ from .codes import HSCode
|
|
|
5
5
|
from .errors import *
|
|
6
6
|
|
|
7
7
|
from importlib import metadata
|
|
8
|
+
from typing import Literal
|
|
8
9
|
|
|
9
10
|
try:
|
|
10
11
|
__version__ = metadata.version("ustrade")
|
|
@@ -158,6 +159,31 @@ def get_product(hs: str) -> HSCode:
|
|
|
158
159
|
"""
|
|
159
160
|
return _get_default_client().get_product(hs)
|
|
160
161
|
|
|
162
|
+
def search_for_code(keyword : str | list[str],
|
|
163
|
+
mode : Literal["OR", "AND"] = "OR",
|
|
164
|
+
in_codes : str = None) -> pd.DataFrame:
|
|
165
|
+
"""
|
|
166
|
+
Research keywords in the HS Code description base.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
keyword (str | list[str]):
|
|
170
|
+
A single keyword or a list of keywords.
|
|
171
|
+
mode (Literal["OR", "AND"]):
|
|
172
|
+
Exclusive or inclusive search if `keyword` is a list. Default uses "OR".
|
|
173
|
+
"OR" mode will return every code associated with at least one keyword.
|
|
174
|
+
"AND" mode will return only the codes associated with all the keywords.
|
|
175
|
+
in_codes (str):
|
|
176
|
+
The code chapter or heading to look in. Default None will search across all chapters.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
pd.DataFrame:
|
|
180
|
+
A dataframe containing the list of associated codes.
|
|
181
|
+
|
|
182
|
+
Examples:
|
|
183
|
+
>>> ut.search_for_code(keyword="oil", in_codes="27")
|
|
184
|
+
"""
|
|
185
|
+
return _get_default_client().search_for_code(keyword, mode, in_codes)
|
|
186
|
+
|
|
161
187
|
|
|
162
188
|
__all__ = [
|
|
163
189
|
"CensusClient",
|
|
@@ -171,5 +197,6 @@ __all__ = [
|
|
|
171
197
|
"get_country_by_iso2",
|
|
172
198
|
"get_desc_from_code",
|
|
173
199
|
"get_children_codes",
|
|
174
|
-
"get_product"
|
|
175
|
-
|
|
200
|
+
"get_product",
|
|
201
|
+
"search_for_code",
|
|
202
|
+
]
|
|
@@ -2,7 +2,11 @@ import requests
|
|
|
2
2
|
import socket
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
import pandas as pd
|
|
5
|
+
import re
|
|
5
6
|
from urllib.parse import urlencode
|
|
7
|
+
from typing import Literal
|
|
8
|
+
import unicodedata
|
|
9
|
+
|
|
6
10
|
from . import countries
|
|
7
11
|
from .countries import Country
|
|
8
12
|
from . import codes
|
|
@@ -23,7 +27,7 @@ class CensusClient:
|
|
|
23
27
|
self.BASE_URL = "api.census.gov"
|
|
24
28
|
self.BASE_PORT = 443
|
|
25
29
|
|
|
26
|
-
self._hs_codes, self._codes_by_hs_codes = codes._load_codes()
|
|
30
|
+
self._hs_codes, self._codes_by_hs_codes, self._desc_by_hs_codes = codes._load_codes()
|
|
27
31
|
self._code_tree = codes.build_tree_from_codes(self._hs_codes)
|
|
28
32
|
|
|
29
33
|
self.col_mapping = {
|
|
@@ -397,8 +401,9 @@ class CensusClient:
|
|
|
397
401
|
"""
|
|
398
402
|
Returns the description of the specified HS code
|
|
399
403
|
|
|
400
|
-
|
|
401
|
-
hs (str):
|
|
404
|
+
Args:
|
|
405
|
+
hs (str):
|
|
406
|
+
the HS code (ex: '1806')
|
|
402
407
|
"""
|
|
403
408
|
if isinstance(hs, str):
|
|
404
409
|
if hs in self._codes_by_hs_codes:
|
|
@@ -477,6 +482,81 @@ class CensusClient:
|
|
|
477
482
|
raise InvalidCodeError(
|
|
478
483
|
f"Code must be a str or a HSCode instance - received a {type(code).__name__!r}"
|
|
479
484
|
)
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def _normalize_kw(self, s: str) -> str:
|
|
488
|
+
s = s.lower()
|
|
489
|
+
s = unicodedata.normalize("NFKD", s)
|
|
490
|
+
s = "".join(c for c in s if not unicodedata.combining(c))
|
|
491
|
+
s = re.sub(r"[^a-z0-9\s]+", " ", s)
|
|
492
|
+
return re.sub(r"\s+", " ", s).strip()
|
|
493
|
+
|
|
494
|
+
def _tokenize(self, s: str) -> list[str]:
|
|
495
|
+
return self._normalize_kw(s).split()
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def search_for_code(self, keyword : str | list[str],
|
|
499
|
+
mode : Literal["OR", "AND"] = "OR",
|
|
500
|
+
in_codes : str = None) -> pd.DataFrame:
|
|
501
|
+
"""
|
|
502
|
+
Research keywords in the HS Code description base.
|
|
503
|
+
|
|
504
|
+
Args:
|
|
505
|
+
keyword (str | list[str]):
|
|
506
|
+
a single keyword or a list of keywords.
|
|
507
|
+
mode (Literal["OR", "AND"]):
|
|
508
|
+
exclusive or inclusive search if keyword is a list. Default uses "OR".
|
|
509
|
+
"OR" mode will return every code associated with at least one word of the list.
|
|
510
|
+
"AND" mode will return only the codes associated with all the words of the list.
|
|
511
|
+
in_codes (str):
|
|
512
|
+
the code chapter or heading to look in. Default None will search across all chapters.
|
|
513
|
+
|
|
514
|
+
Returns:
|
|
515
|
+
pd.Dataframe:
|
|
516
|
+
A dataframe containing the list of associated codes.
|
|
517
|
+
|
|
518
|
+
Examples:
|
|
519
|
+
>>> ut.search_for_code(keyword = "oil", in_codes = "27")
|
|
520
|
+
|
|
521
|
+
"""
|
|
522
|
+
if in_codes is not None:
|
|
523
|
+
if in_codes not in self._codes_by_hs_codes:
|
|
524
|
+
raise CodeNotFoundError(f"Error : {in_codes} was not found as a valid code.")
|
|
525
|
+
|
|
526
|
+
keywords = self._tokenize(keyword) if isinstance(keyword, str) else list(keyword)
|
|
527
|
+
keywords = [self._normalize_kw(k) for k in keywords]
|
|
528
|
+
|
|
529
|
+
results_code = []
|
|
530
|
+
results_desc = []
|
|
531
|
+
|
|
532
|
+
for desc, code in self._desc_by_hs_codes.items():
|
|
533
|
+
if in_codes is not None and not code.hscode.startswith(in_codes):
|
|
534
|
+
continue
|
|
535
|
+
|
|
536
|
+
tokens = self._tokenize(desc)
|
|
537
|
+
|
|
538
|
+
def keyword_matches(k: str) -> bool:
|
|
539
|
+
return any(tok.startswith(k) for tok in tokens)
|
|
540
|
+
|
|
541
|
+
if mode == "OR":
|
|
542
|
+
ok = any(keyword_matches(k) for k in keywords)
|
|
543
|
+
else:
|
|
544
|
+
ok = all(keyword_matches(k) for k in keywords)
|
|
545
|
+
|
|
546
|
+
if ok:
|
|
547
|
+
results_code.append(code.hscode)
|
|
548
|
+
results_desc.append(code.description)
|
|
549
|
+
|
|
550
|
+
return pd.DataFrame({
|
|
551
|
+
"Description": results_desc,
|
|
552
|
+
"Code": results_code
|
|
553
|
+
})
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
|
|
480
560
|
|
|
481
561
|
|
|
482
562
|
|
|
@@ -19,7 +19,7 @@ class HSCode:
|
|
|
19
19
|
return self.parent
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
def _load_codes() -> tuple[list[HSCode], dict[str, HSCode]]:
|
|
22
|
+
def _load_codes() -> tuple[list[HSCode], dict[str, HSCode], dict[str, HSCode]]:
|
|
23
23
|
csv_path = files(__package__) / "data" / "harmonized-system.csv"
|
|
24
24
|
codes: list[HSCode] = []
|
|
25
25
|
with csv_path.open(encoding="utf-8") as f:
|
|
@@ -37,7 +37,7 @@ def _load_codes() -> tuple[list[HSCode], dict[str, HSCode]]:
|
|
|
37
37
|
)
|
|
38
38
|
)
|
|
39
39
|
|
|
40
|
-
return codes, {c.hscode: c for c in codes}
|
|
40
|
+
return codes, {c.hscode: c for c in codes}, {c.description: c for c in codes}
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
def _get_parent(code: str) -> str | None:
|
|
@@ -70,4 +70,3 @@ def build_tree_from_codes(codes: list[HSCode]) -> dict[str, HSCode]:
|
|
|
70
70
|
parent_node.children.append(node.hscode)
|
|
71
71
|
|
|
72
72
|
return code_dict
|
|
73
|
-
|
|
@@ -1,3 +1,19 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ustrade
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Summary: Python client for the U.S. Census Bureau International Trade API
|
|
5
|
+
Author: Fantin Sibony
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/fantinsib/ustrade
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE.txt
|
|
11
|
+
Requires-Dist: requests
|
|
12
|
+
Requires-Dist: pandas
|
|
13
|
+
Provides-Extra: dev
|
|
14
|
+
Requires-Dist: pytest; extra == "dev"
|
|
15
|
+
Dynamic: license-file
|
|
16
|
+
|
|
1
17
|
# ustrade
|
|
2
18
|
|
|
3
19
|
> A lightweight and intuitive Python client for the **U.S. Census Bureau International Trade API**.
|
|
@@ -6,7 +22,9 @@
|
|
|
6
22
|
<p align="left">
|
|
7
23
|
<img src="https://img.shields.io/badge/python-3.10%2B-blue" />
|
|
8
24
|
<img src="https://img.shields.io/badge/status-active-success" />
|
|
25
|
+
<img src= "https://img.shields.io/pypi/v/ustrade" />
|
|
9
26
|
<img src="https://img.shields.io/badge/license-MIT-green" />
|
|
27
|
+
<img src="https://static.pepy.tech/personalized-badge/ustrade?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads" />
|
|
10
28
|
</p>
|
|
11
29
|
|
|
12
30
|
---
|
|
@@ -20,8 +38,10 @@
|
|
|
20
38
|
- `"4279"` (Census code)
|
|
21
39
|
- HS codes lookup + product descriptions
|
|
22
40
|
- Standardized DataFrame output with clean column names
|
|
23
|
-
|
|
24
|
-
-
|
|
41
|
+
|
|
42
|
+
For interactive exploration and non-Python usage, a Streamlit dashboard is available for this library at [https://ustrade.streamlit.app](https://ustrade.streamlit.app).
|
|
43
|
+
|
|
44
|
+
Source code : [https://github.com/fantinsib/ustrade_dashboard](https://github.com/fantinsib/ustrade_dashboard)
|
|
25
45
|
|
|
26
46
|
---
|
|
27
47
|
|
|
@@ -172,4 +192,4 @@ ust.get_country_by_iso2("FR")
|
|
|
172
192
|
|
|
173
193
|
- All data retrieval functions return a **pandas DataFrame** unless otherwise noted.
|
|
174
194
|
- Column names are automatically standardized (see schema section).
|
|
175
|
-
- This library is still in <1.0.0 version and can change. Contributions are always welcome !
|
|
195
|
+
- This library is still in <1.0.0 version and can change. Contributions are always welcome !
|
ustrade-0.4.0/tests/test_api.py
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
####### Test for API calls ######
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
import ustrade as ut
|
|
5
|
-
from ustrade.client import CensusClient
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def test_build_client():
|
|
11
|
-
c= CensusClient()
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def test_basic_request():
|
|
15
|
-
df = ut.get_exports("Mexico", "27", "2010-01")
|
|
16
|
-
|
|
17
|
-
assert len(df) == 1
|
|
18
|
-
assert df.loc[0, "country_name"] == "MEXICO"
|
|
19
|
-
assert df.loc[0, "product_code"] == "27"
|
|
20
|
-
assert df.loc[0, "export_value"] == 773377170.0
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def test_get_children_codes():
|
|
24
|
-
children = ut.get_children_codes("1001")
|
|
25
|
-
|
|
26
|
-
expected_keys = {"100111", "100119", "100191", "100199"}
|
|
27
|
-
assert set(children.keys()) == expected_keys
|
|
28
|
-
assert "durum wheat" in children["100111"]
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
######## Tests for clients core methods ########
|
|
2
|
-
|
|
3
|
-
from urllib.parse import urlparse, parse_qs
|
|
4
|
-
from ustrade import CensusClient
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
import inspect
|
|
8
|
-
from ustrade.client import CensusClient
|
|
9
|
-
|
|
10
|
-
def test_debug():
|
|
11
|
-
print("module:", CensusClient.__module__)
|
|
12
|
-
print("file:", inspect.getfile(CensusClient))
|
|
13
|
-
print("_build_params in dir?", "_build_params" in dir(CensusClient))
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def test_build_param():
|
|
18
|
-
|
|
19
|
-
c = CensusClient()
|
|
20
|
-
print(type(c))
|
|
21
|
-
print(hasattr(c, "_build_params"))
|
|
22
|
-
print(c.__class__)
|
|
23
|
-
|
|
24
|
-
url = c._build_params(["Mexico", "Canada"], ["08", "09"], "imports", start="2013-01", end= "2014-01")
|
|
25
|
-
|
|
26
|
-
parsed = urlparse(url)
|
|
27
|
-
qs = parse_qs(parsed.query)
|
|
28
|
-
|
|
29
|
-
# URL de base
|
|
30
|
-
assert parsed.scheme == "https"
|
|
31
|
-
assert parsed.netloc == "api.census.gov"
|
|
32
|
-
assert "/intltrade/imports/hs" in parsed.path
|
|
33
|
-
|
|
34
|
-
# Paramètres critiques
|
|
35
|
-
assert "get" in qs
|
|
36
|
-
assert "CTY_CODE" in qs
|
|
37
|
-
assert "I_COMMODITY" in qs
|
|
38
|
-
assert "time" in qs
|
|
39
|
-
|
|
40
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|