wbgapi360 0.2.8__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wbgapi360/__init__.py +66 -61
- wbgapi360/_version.py +34 -0
- wbgapi360/ai/agent.py +114 -114
- wbgapi360/api.py +290 -290
- wbgapi360/cli.py +83 -83
- wbgapi360/config.py +41 -41
- wbgapi360/core/auditor.py +107 -107
- wbgapi360/core/client.py +163 -163
- wbgapi360/core/models.py +24 -24
- wbgapi360/core/transformers.py +70 -70
- wbgapi360/core/utils.py +38 -38
- wbgapi360/data/builder.py +192 -192
- wbgapi360/mcp/server.py +682 -610
- wbgapi360/metadata/builder.py +87 -87
- wbgapi360/metadata/iso_mapping.py +228 -228
- wbgapi360/metadata/resolver.py +136 -136
- wbgapi360/search/engine.py +143 -143
- wbgapi360/visual/__init__.py +1 -1
- wbgapi360/visual/charts.py +1158 -1093
- {wbgapi360-0.2.8.dist-info → wbgapi360-0.3.1.dist-info}/METADATA +238 -241
- wbgapi360-0.3.1.dist-info/RECORD +25 -0
- {wbgapi360-0.2.8.dist-info → wbgapi360-0.3.1.dist-info}/licenses/LICENSE +21 -21
- wbgapi360-0.2.8.dist-info/RECORD +0 -24
- {wbgapi360-0.2.8.dist-info → wbgapi360-0.3.1.dist-info}/WHEEL +0 -0
- {wbgapi360-0.2.8.dist-info → wbgapi360-0.3.1.dist-info}/entry_points.txt +0 -0
- {wbgapi360-0.2.8.dist-info → wbgapi360-0.3.1.dist-info}/top_level.txt +0 -0
wbgapi360/__init__.py
CHANGED
|
@@ -1,61 +1,66 @@
|
|
|
1
|
-
from .core.client import Data360Client
|
|
2
|
-
from .search.engine import SearchEngine
|
|
3
|
-
from .data.builder import DataBuilder
|
|
4
|
-
from .ai.agent import DataAgent
|
|
5
|
-
from .metadata.builder import MetadataBuilder
|
|
6
|
-
from .metadata.builder import MetadataBuilder
|
|
7
|
-
# from .visual import viz # LAZY LOADED
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class API:
|
|
11
|
-
def __init__(self):
|
|
12
|
-
self._client = None
|
|
13
|
-
|
|
14
|
-
@property
|
|
15
|
-
def client(self):
|
|
16
|
-
if not self._client:
|
|
17
|
-
self._client = Data360Client()
|
|
18
|
-
return self._client
|
|
19
|
-
|
|
20
|
-
@property
|
|
21
|
-
def search(self):
|
|
22
|
-
return SearchEngine(self.client)
|
|
23
|
-
|
|
24
|
-
@property
|
|
25
|
-
def data(self):
|
|
26
|
-
return DataBuilder(self.client)
|
|
27
|
-
|
|
28
|
-
@property
|
|
29
|
-
def metadata(self):
|
|
30
|
-
return MetadataBuilder(self.client)
|
|
31
|
-
|
|
32
|
-
@property
|
|
33
|
-
def ai(self):
|
|
34
|
-
return DataAgent(self.client)
|
|
35
|
-
|
|
36
|
-
@property
|
|
37
|
-
def visual(self):
|
|
38
|
-
try:
|
|
39
|
-
from .visual import viz
|
|
40
|
-
return viz
|
|
41
|
-
except ImportError as e:
|
|
42
|
-
if "seaborn" in str(e) or "matplotlib" in str(e):
|
|
43
|
-
raise ImportError(
|
|
44
|
-
"Optional dependency 'seaborn' not found. "
|
|
45
|
-
"Install with: pip install wbgapi360[visual]"
|
|
46
|
-
) from e
|
|
47
|
-
raise e
|
|
48
|
-
|
|
49
|
-
async def close(self):
|
|
50
|
-
if self._client:
|
|
51
|
-
await self._client.close()
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
1
|
+
from .core.client import Data360Client
|
|
2
|
+
from .search.engine import SearchEngine
|
|
3
|
+
from .data.builder import DataBuilder
|
|
4
|
+
from .ai.agent import DataAgent
|
|
5
|
+
from .metadata.builder import MetadataBuilder
|
|
6
|
+
from .metadata.builder import MetadataBuilder
|
|
7
|
+
# from .visual import viz # LAZY LOADED
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class API:
|
|
11
|
+
def __init__(self):
|
|
12
|
+
self._client = None
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
def client(self):
|
|
16
|
+
if not self._client:
|
|
17
|
+
self._client = Data360Client()
|
|
18
|
+
return self._client
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def search(self):
|
|
22
|
+
return SearchEngine(self.client)
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def data(self):
|
|
26
|
+
return DataBuilder(self.client)
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def metadata(self):
|
|
30
|
+
return MetadataBuilder(self.client)
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def ai(self):
|
|
34
|
+
return DataAgent(self.client)
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def visual(self):
|
|
38
|
+
try:
|
|
39
|
+
from .visual import viz
|
|
40
|
+
return viz
|
|
41
|
+
except ImportError as e:
|
|
42
|
+
if "seaborn" in str(e) or "matplotlib" in str(e):
|
|
43
|
+
raise ImportError(
|
|
44
|
+
"Optional dependency 'seaborn' not found. "
|
|
45
|
+
"Install with: pip install wbgapi360[visual]"
|
|
46
|
+
) from e
|
|
47
|
+
raise e
|
|
48
|
+
|
|
49
|
+
async def close(self):
|
|
50
|
+
if self._client:
|
|
51
|
+
await self._client.close()
|
|
52
|
+
|
|
53
|
+
# Dynamic version from setuptools-scm
|
|
54
|
+
try:
|
|
55
|
+
from ._version import version as __version__
|
|
56
|
+
except ImportError:
|
|
57
|
+
__version__ = "0.0.0.dev0" # Fallback for editable installs
|
|
58
|
+
|
|
59
|
+
__author__ = "Maykol Medrano"
|
|
60
|
+
__email__ = "mmedrano2@uc.cl"
|
|
61
|
+
__credits__ = ["Applied Economist Policy Data Scientist"]
|
|
62
|
+
|
|
63
|
+
# Expose the human-friendly API at top level
|
|
64
|
+
from wbgapi360.api import search, get_data, plot
|
|
65
|
+
|
|
66
|
+
__all__ = ["search", "get_data", "plot"]
|
wbgapi360/_version.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '0.3.1'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 3, 1)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
wbgapi360/ai/agent.py
CHANGED
|
@@ -1,114 +1,114 @@
|
|
|
1
|
-
from ..core.client import Data360Client
|
|
2
|
-
from ..search.engine import SearchEngine
|
|
3
|
-
from ..data.builder import DataBuilder
|
|
4
|
-
from typing import Dict, Any, List
|
|
5
|
-
import logging
|
|
6
|
-
|
|
7
|
-
logger = logging.getLogger("wbgapi360")
|
|
8
|
-
|
|
9
|
-
class DataAgent:
|
|
10
|
-
"""
|
|
11
|
-
The 'Smart' interface. Relies on the API's vector search
|
|
12
|
-
to resolve natural language to IDs.
|
|
13
|
-
"""
|
|
14
|
-
def __init__(self, client: Data360Client):
|
|
15
|
-
self.client = client
|
|
16
|
-
self.search = SearchEngine(client)
|
|
17
|
-
|
|
18
|
-
async def get_context(self, natural_query: str) -> Dict[str, Any]:
|
|
19
|
-
"""
|
|
20
|
-
Understands the query using vector search and returns a DataContext.
|
|
21
|
-
"""
|
|
22
|
-
# 1. Search for the indicator using semantic search
|
|
23
|
-
# 1. Search for the indicator using semantic search, preferring WDI
|
|
24
|
-
logger.info(f"[AI] Thinking about '{natural_query}'...")
|
|
25
|
-
results = await self.search.semantic_explore(natural_query, database_id="WB_WDI")
|
|
26
|
-
|
|
27
|
-
if not results:
|
|
28
|
-
logger.info(f"[AI] No results found for '{natural_query}'.")
|
|
29
|
-
return {"error": f"I couldn't find any relevant data for '{natural_query}' in the World Bank 360 API."}
|
|
30
|
-
|
|
31
|
-
# 2. Pick the top result, but verify it has minimal checks
|
|
32
|
-
# In a real agent, we might present the top 3 to the user if confidence is low.
|
|
33
|
-
best_match = results[0]
|
|
34
|
-
series_desc = best_match.get('series_description', {})
|
|
35
|
-
indicator_id = series_desc.get('idno')
|
|
36
|
-
name = series_desc.get('name')
|
|
37
|
-
database_id = series_desc.get('database_id')
|
|
38
|
-
|
|
39
|
-
if not indicator_id:
|
|
40
|
-
return {"error": "Found a match but it lacked a valid Indicator ID."}
|
|
41
|
-
|
|
42
|
-
logger.info(f"[AI] I found: {name} (ID: {indicator_id}, DB: {database_id})")
|
|
43
|
-
|
|
44
|
-
return {
|
|
45
|
-
"indicator": indicator_id,
|
|
46
|
-
"database_id": database_id or "WB_WDI",
|
|
47
|
-
"name": name,
|
|
48
|
-
"raw_match": best_match
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
async def get_available_dimensions(self, indicator_id: str) -> Dict[str, List[str]]:
|
|
52
|
-
"""
|
|
53
|
-
Queries /disaggregation to see what dims are valid.
|
|
54
|
-
Returns a dict of dim_name -> list of valid codes.
|
|
55
|
-
"""
|
|
56
|
-
try:
|
|
57
|
-
# The disaggregation endpoint returns metadata about valid filters
|
|
58
|
-
# We use the generic 'get_data' since disaggregation is a GET endpoint
|
|
59
|
-
response = await self.client.get_data("/disaggregation", params={"indicatorId": indicator_id})
|
|
60
|
-
|
|
61
|
-
# Response handling logic (simplified for prototype)
|
|
62
|
-
# Assuming response structure is list of objects with dimension info
|
|
63
|
-
dims = {}
|
|
64
|
-
if isinstance(response, dict) and "value" in response:
|
|
65
|
-
vals = response["value"]
|
|
66
|
-
# Heuristic parsing of dimension metadata
|
|
67
|
-
# Assuming structure might be list of dicts with 'id', 'name', or 'code'
|
|
68
|
-
if isinstance(vals, list):
|
|
69
|
-
for v in vals:
|
|
70
|
-
# Try to find the dimension name and its valid codes
|
|
71
|
-
# This is speculative without the specific API contract for /disaggregation
|
|
72
|
-
# But we look for common keys.
|
|
73
|
-
dim_id = v.get('id') or v.get('code')
|
|
74
|
-
if dim_id:
|
|
75
|
-
# If the API returns valid values for this dimension, store them
|
|
76
|
-
# For now, we just map the dimension ID to a placeholder or count
|
|
77
|
-
dims[dim_id] = []
|
|
78
|
-
# If there's a nested 'values' list, capture it
|
|
79
|
-
if 'values' in v and isinstance(v['values'], list):
|
|
80
|
-
dims[dim_id] = [sub.get('id') for sub in v['values'] if 'id' in sub]
|
|
81
|
-
|
|
82
|
-
return dims
|
|
83
|
-
except Exception as e:
|
|
84
|
-
logger.warning(f"[AI] Warning: Could not introspect dimensions: {e}")
|
|
85
|
-
return {}
|
|
86
|
-
|
|
87
|
-
async def ask(self, natural_query: str, economy: str = "WLD", years: int = 5):
|
|
88
|
-
"""
|
|
89
|
-
End-to-end flow: Question -> Data.
|
|
90
|
-
"""
|
|
91
|
-
ctx = await self.get_context(natural_query)
|
|
92
|
-
if "error" in ctx:
|
|
93
|
-
return ctx
|
|
94
|
-
|
|
95
|
-
indicator_id = ctx["indicator"]
|
|
96
|
-
database_id = ctx.get("database_id", "WB_WDI")
|
|
97
|
-
|
|
98
|
-
# 3. Introspect (Smart Step)
|
|
99
|
-
logger.info(f"[AI] Inspecting dimensions for {indicator_id}...")
|
|
100
|
-
# For this prototype we just log that we are doing it.
|
|
101
|
-
# In a full version, we would check if 'economy' or 'years' is valid,
|
|
102
|
-
# or if we need to add specific filters based on the query text (e.g. 'rural').
|
|
103
|
-
|
|
104
|
-
# 4. Fetch data via Builder
|
|
105
|
-
logger.info(f"[AI] Fetching data for {economy} from {database_id}...")
|
|
106
|
-
builder = DataBuilder(self.client, dataset_id=database_id)
|
|
107
|
-
data = await builder.indicator(indicator_id).economy(economy).limit(years).get()
|
|
108
|
-
|
|
109
|
-
return {
|
|
110
|
-
"answer": f"Here is the data for '{ctx['name']}'",
|
|
111
|
-
"data": data,
|
|
112
|
-
"source_indicator": indicator_id,
|
|
113
|
-
"name": ctx['name']
|
|
114
|
-
}
|
|
1
|
+
from ..core.client import Data360Client
|
|
2
|
+
from ..search.engine import SearchEngine
|
|
3
|
+
from ..data.builder import DataBuilder
|
|
4
|
+
from typing import Dict, Any, List
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger("wbgapi360")
|
|
8
|
+
|
|
9
|
+
class DataAgent:
|
|
10
|
+
"""
|
|
11
|
+
The 'Smart' interface. Relies on the API's vector search
|
|
12
|
+
to resolve natural language to IDs.
|
|
13
|
+
"""
|
|
14
|
+
def __init__(self, client: Data360Client):
|
|
15
|
+
self.client = client
|
|
16
|
+
self.search = SearchEngine(client)
|
|
17
|
+
|
|
18
|
+
async def get_context(self, natural_query: str) -> Dict[str, Any]:
|
|
19
|
+
"""
|
|
20
|
+
Understands the query using vector search and returns a DataContext.
|
|
21
|
+
"""
|
|
22
|
+
# 1. Search for the indicator using semantic search
|
|
23
|
+
# 1. Search for the indicator using semantic search, preferring WDI
|
|
24
|
+
logger.info(f"[AI] Thinking about '{natural_query}'...")
|
|
25
|
+
results = await self.search.semantic_explore(natural_query, database_id="WB_WDI")
|
|
26
|
+
|
|
27
|
+
if not results:
|
|
28
|
+
logger.info(f"[AI] No results found for '{natural_query}'.")
|
|
29
|
+
return {"error": f"I couldn't find any relevant data for '{natural_query}' in the World Bank 360 API."}
|
|
30
|
+
|
|
31
|
+
# 2. Pick the top result, but verify it has minimal checks
|
|
32
|
+
# In a real agent, we might present the top 3 to the user if confidence is low.
|
|
33
|
+
best_match = results[0]
|
|
34
|
+
series_desc = best_match.get('series_description', {})
|
|
35
|
+
indicator_id = series_desc.get('idno')
|
|
36
|
+
name = series_desc.get('name')
|
|
37
|
+
database_id = series_desc.get('database_id')
|
|
38
|
+
|
|
39
|
+
if not indicator_id:
|
|
40
|
+
return {"error": "Found a match but it lacked a valid Indicator ID."}
|
|
41
|
+
|
|
42
|
+
logger.info(f"[AI] I found: {name} (ID: {indicator_id}, DB: {database_id})")
|
|
43
|
+
|
|
44
|
+
return {
|
|
45
|
+
"indicator": indicator_id,
|
|
46
|
+
"database_id": database_id or "WB_WDI",
|
|
47
|
+
"name": name,
|
|
48
|
+
"raw_match": best_match
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
async def get_available_dimensions(self, indicator_id: str) -> Dict[str, List[str]]:
|
|
52
|
+
"""
|
|
53
|
+
Queries /disaggregation to see what dims are valid.
|
|
54
|
+
Returns a dict of dim_name -> list of valid codes.
|
|
55
|
+
"""
|
|
56
|
+
try:
|
|
57
|
+
# The disaggregation endpoint returns metadata about valid filters
|
|
58
|
+
# We use the generic 'get_data' since disaggregation is a GET endpoint
|
|
59
|
+
response = await self.client.get_data("/disaggregation", params={"indicatorId": indicator_id})
|
|
60
|
+
|
|
61
|
+
# Response handling logic (simplified for prototype)
|
|
62
|
+
# Assuming response structure is list of objects with dimension info
|
|
63
|
+
dims = {}
|
|
64
|
+
if isinstance(response, dict) and "value" in response:
|
|
65
|
+
vals = response["value"]
|
|
66
|
+
# Heuristic parsing of dimension metadata
|
|
67
|
+
# Assuming structure might be list of dicts with 'id', 'name', or 'code'
|
|
68
|
+
if isinstance(vals, list):
|
|
69
|
+
for v in vals:
|
|
70
|
+
# Try to find the dimension name and its valid codes
|
|
71
|
+
# This is speculative without the specific API contract for /disaggregation
|
|
72
|
+
# But we look for common keys.
|
|
73
|
+
dim_id = v.get('id') or v.get('code')
|
|
74
|
+
if dim_id:
|
|
75
|
+
# If the API returns valid values for this dimension, store them
|
|
76
|
+
# For now, we just map the dimension ID to a placeholder or count
|
|
77
|
+
dims[dim_id] = []
|
|
78
|
+
# If there's a nested 'values' list, capture it
|
|
79
|
+
if 'values' in v and isinstance(v['values'], list):
|
|
80
|
+
dims[dim_id] = [sub.get('id') for sub in v['values'] if 'id' in sub]
|
|
81
|
+
|
|
82
|
+
return dims
|
|
83
|
+
except Exception as e:
|
|
84
|
+
logger.warning(f"[AI] Warning: Could not introspect dimensions: {e}")
|
|
85
|
+
return {}
|
|
86
|
+
|
|
87
|
+
async def ask(self, natural_query: str, economy: str = "WLD", years: int = 5):
|
|
88
|
+
"""
|
|
89
|
+
End-to-end flow: Question -> Data.
|
|
90
|
+
"""
|
|
91
|
+
ctx = await self.get_context(natural_query)
|
|
92
|
+
if "error" in ctx:
|
|
93
|
+
return ctx
|
|
94
|
+
|
|
95
|
+
indicator_id = ctx["indicator"]
|
|
96
|
+
database_id = ctx.get("database_id", "WB_WDI")
|
|
97
|
+
|
|
98
|
+
# 3. Introspect (Smart Step)
|
|
99
|
+
logger.info(f"[AI] Inspecting dimensions for {indicator_id}...")
|
|
100
|
+
# For this prototype we just log that we are doing it.
|
|
101
|
+
# In a full version, we would check if 'economy' or 'years' is valid,
|
|
102
|
+
# or if we need to add specific filters based on the query text (e.g. 'rural').
|
|
103
|
+
|
|
104
|
+
# 4. Fetch data via Builder
|
|
105
|
+
logger.info(f"[AI] Fetching data for {economy} from {database_id}...")
|
|
106
|
+
builder = DataBuilder(self.client, dataset_id=database_id)
|
|
107
|
+
data = await builder.indicator(indicator_id).economy(economy).limit(years).get()
|
|
108
|
+
|
|
109
|
+
return {
|
|
110
|
+
"answer": f"Here is the data for '{ctx['name']}'",
|
|
111
|
+
"data": data,
|
|
112
|
+
"source_indicator": indicator_id,
|
|
113
|
+
"name": ctx['name']
|
|
114
|
+
}
|