wbgapi360 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wbgapi360/__init__.py ADDED
@@ -0,0 +1,51 @@
1
+ from .core.client import Data360Client
2
+ from .search.engine import SearchEngine
3
+ from .data.builder import DataBuilder
4
+ from .ai.agent import DataAgent
5
+ from .metadata.builder import MetadataBuilder
6
+ from .visual import viz
7
+
8
+
9
+ class API:
10
+ def __init__(self):
11
+ self._client = None
12
+
13
+ @property
14
+ def client(self):
15
+ if not self._client:
16
+ self._client = Data360Client()
17
+ return self._client
18
+
19
+ @property
20
+ def search(self):
21
+ return SearchEngine(self.client)
22
+
23
+ @property
24
+ def data(self):
25
+ return DataBuilder(self.client)
26
+
27
+ @property
28
+ def metadata(self):
29
+ return MetadataBuilder(self.client)
30
+
31
+ @property
32
+ def ai(self):
33
+ return DataAgent(self.client)
34
+
35
+ @property
36
+ def visual(self):
37
+ return viz
38
+
39
+ async def close(self):
40
+ if self._client:
41
+ await self._client.close()
42
+
43
+ __version__ = "0.2.1"
44
+ __author__ = "Maykol Medrano"
45
+ __email__ = "mmedrano2@uc.cl"
46
+ __credits__ = ["Applied Economist Policy Data Scientist"]
47
+
48
+ # Expose the human-friendly API at top level
49
+ from wbgapi360.api import search, get_data, plot
50
+
51
+ __all__ = ["search", "get_data", "plot"]
wbgapi360/ai/agent.py ADDED
@@ -0,0 +1,114 @@
1
+ from ..core.client import Data360Client
2
+ from ..search.engine import SearchEngine
3
+ from ..data.builder import DataBuilder
4
+ from typing import Dict, Any, List
5
+ import logging
6
+
7
+ logger = logging.getLogger("wbgapi360")
8
+
9
+ class DataAgent:
10
+ """
11
+ The 'Smart' interface. Relies on the API's vector search
12
+ to resolve natural language to IDs.
13
+ """
14
+ def __init__(self, client: Data360Client):
15
+ self.client = client
16
+ self.search = SearchEngine(client)
17
+
18
+ async def get_context(self, natural_query: str) -> Dict[str, Any]:
19
+ """
20
+ Understands the query using vector search and returns a DataContext.
21
+ """
22
+ # 1. Search for the indicator using semantic search
23
+ # 1. Search for the indicator using semantic search, preferring WDI
24
+ logger.info(f"[AI] Thinking about '{natural_query}'...")
25
+ results = await self.search.semantic_explore(natural_query, database_id="WB_WDI")
26
+
27
+ if not results:
28
+ logger.info(f"[AI] No results found for '{natural_query}'.")
29
+ return {"error": f"I couldn't find any relevant data for '{natural_query}' in the World Bank 360 API."}
30
+
31
+ # 2. Pick the top result, but verify it has minimal checks
32
+ # In a real agent, we might present the top 3 to the user if confidence is low.
33
+ best_match = results[0]
34
+ series_desc = best_match.get('series_description', {})
35
+ indicator_id = series_desc.get('idno')
36
+ name = series_desc.get('name')
37
+ database_id = series_desc.get('database_id')
38
+
39
+ if not indicator_id:
40
+ return {"error": "Found a match but it lacked a valid Indicator ID."}
41
+
42
+ logger.info(f"[AI] I found: {name} (ID: {indicator_id}, DB: {database_id})")
43
+
44
+ return {
45
+ "indicator": indicator_id,
46
+ "database_id": database_id or "WB_WDI",
47
+ "name": name,
48
+ "raw_match": best_match
49
+ }
50
+
51
+ async def get_available_dimensions(self, indicator_id: str) -> Dict[str, List[str]]:
52
+ """
53
+ Queries /disaggregation to see what dims are valid.
54
+ Returns a dict of dim_name -> list of valid codes.
55
+ """
56
+ try:
57
+ # The disaggregation endpoint returns metadata about valid filters
58
+ # We use the generic 'get_data' since disaggregation is a GET endpoint
59
+ response = await self.client.get_data("/disaggregation", params={"indicatorId": indicator_id})
60
+
61
+ # Response handling logic (simplified for prototype)
62
+ # Assuming response structure is list of objects with dimension info
63
+ dims = {}
64
+ if isinstance(response, dict) and "value" in response:
65
+ vals = response["value"]
66
+ # Heuristic parsing of dimension metadata
67
+ # Assuming structure might be list of dicts with 'id', 'name', or 'code'
68
+ if isinstance(vals, list):
69
+ for v in vals:
70
+ # Try to find the dimension name and its valid codes
71
+ # This is speculative without the specific API contract for /disaggregation
72
+ # But we look for common keys.
73
+ dim_id = v.get('id') or v.get('code')
74
+ if dim_id:
75
+ # If the API returns valid values for this dimension, store them
76
+ # For now, we just map the dimension ID to a placeholder or count
77
+ dims[dim_id] = []
78
+ # If there's a nested 'values' list, capture it
79
+ if 'values' in v and isinstance(v['values'], list):
80
+ dims[dim_id] = [sub.get('id') for sub in v['values'] if 'id' in sub]
81
+
82
+ return dims
83
+ except Exception as e:
84
+ logger.warning(f"[AI] Warning: Could not introspect dimensions: {e}")
85
+ return {}
86
+
87
+ async def ask(self, natural_query: str, economy: str = "WLD", years: int = 5):
88
+ """
89
+ End-to-end flow: Question -> Data.
90
+ """
91
+ ctx = await self.get_context(natural_query)
92
+ if "error" in ctx:
93
+ return ctx
94
+
95
+ indicator_id = ctx["indicator"]
96
+ database_id = ctx.get("database_id", "WB_WDI")
97
+
98
+ # 3. Introspect (Smart Step)
99
+ logger.info(f"[AI] Inspecting dimensions for {indicator_id}...")
100
+ # For this prototype we just log that we are doing it.
101
+ # In a full version, we would check if 'economy' or 'years' is valid,
102
+ # or if we need to add specific filters based on the query text (e.g. 'rural').
103
+
104
+ # 4. Fetch data via Builder
105
+ logger.info(f"[AI] Fetching data for {economy} from {database_id}...")
106
+ builder = DataBuilder(self.client, dataset_id=database_id)
107
+ data = await builder.indicator(indicator_id).economy(economy).limit(years).get()
108
+
109
+ return {
110
+ "answer": f"Here is the data for '{ctx['name']}'",
111
+ "data": data,
112
+ "source_indicator": indicator_id,
113
+ "name": ctx['name']
114
+ }
wbgapi360/api.py ADDED
@@ -0,0 +1,257 @@
1
+ import logging
2
+ import asyncio
3
+ import nest_asyncio
4
+ import pandas as pd
5
+ import datetime
6
+ import os
7
+ from typing import List, Dict, Any, Union, Optional
8
+
9
+ # --- CORE IMPORTS (Decoupled Architectue) ---
10
+ from wbgapi360.core.client import Data360Client
11
+ from wbgapi360.search.engine import SearchEngine
12
+ from wbgapi360.data.builder import DataBuilder
13
+ from wbgapi360.visual.charts import Visualizer
14
+ from wbgapi360.core.utils import normalize_codes, resolve_economies
15
+ from wbgapi360.core.transformers import DataStandardizer
16
+ from wbgapi360.core.auditor import DataAuditor
17
+
18
+ # --- SETUP ---
19
+ logger = logging.getLogger("wbgapi360")
20
+ logger.addHandler(logging.NullHandler())
21
+
22
+ # Singletons for Sync API
23
+ # _client = Data360Client() -> PREVIOUSLY GLOBAL, NOW INSTANTIATED PER CALL TO FIX ASYNCIO LIFECYCLE
24
+ _viz = Visualizer()
25
+
26
+ def _run_sync(coro):
27
+ """
28
+ Helper privado: Ejecuta una corutina de forma síncrona.
29
+ Aplica nest_asyncio solo si detecta un loop en ejecución (Jupyter).
30
+ """
31
+ try:
32
+ loop = asyncio.get_event_loop()
33
+ except RuntimeError:
34
+ loop = asyncio.new_event_loop()
35
+ asyncio.set_event_loop(loop)
36
+
37
+ if loop.is_running():
38
+ # Scoped Side-Effect: Correcto, solo bajo demanda.
39
+ nest_asyncio.apply()
40
+ return loop.run_until_complete(coro)
41
+ else:
42
+ return asyncio.run(coro)
43
+
44
+ # --- INTERNAL ASYNC LOGIC (Replicated from server to decouple) ---
45
+
46
+ async def _async_get_data(
47
+ indicator: Union[str, List[str]],
48
+ economies: Union[str, List[str]],
49
+ years: int = 5,
50
+ database_id: str = "WB_WDI",
51
+ labels: bool = False, # Convert codes to human names
52
+ as_frame: bool = True # Default to True for Python API
53
+ ) -> Union[pd.DataFrame, str]:
54
+
55
+ async with Data360Client() as client:
56
+ # 1. Normalize Inputs
57
+ codes = normalize_codes(indicator, database_id)
58
+ eco_list = resolve_economies(economies)
59
+
60
+ # 2. Builder
61
+ builder = DataBuilder(client, dataset_id=database_id)
62
+ builder.indicator(codes).economy(eco_list)
63
+
64
+ # 3. Time Logic (MRV vs Trend)
65
+ current_year = datetime.datetime.now().year
66
+
67
+ if years == 1:
68
+ # MRV Logic (Simplified)
69
+ start_year = current_year - 5
70
+ builder.time(f"{start_year}:{current_year}")
71
+ # Note: True MRV filter logic is complex, for v0.3 we take simple range
72
+ # and let the DataFrame user filter the last column if needed,
73
+ # or we could implement the full logic here. For now, strict range.
74
+ mrv_mode = True
75
+ else:
76
+ mrv_mode = False
77
+ start_year = current_year - years
78
+ builder.time(f"{start_year}:{current_year}")
79
+
80
+ # 4. Fetch
81
+ try:
82
+ # Pivot=True is standard for human analysis
83
+ # labels=True converts codes to names (preserves codes for maps)
84
+ df = await builder.to_dataframe(pivot=True, labels=labels)
85
+
86
+ if df.empty:
87
+ return pd.DataFrame() if as_frame else "No data found."
88
+
89
+ # MRV Post-processing (Keep only valid latest value)
90
+ if mrv_mode and not df.empty:
91
+ def _is_year(c):
92
+ s = str(c)
93
+ if s.isdigit(): return True
94
+ try:
95
+ f = float(s)
96
+ return f.is_integer() and 1900 < f < 2100
97
+ except ValueError:
98
+ return False
99
+
100
+ year_cols = [c for c in df.columns if _is_year(c)]
101
+ if year_cols:
102
+ year_cols.sort()
103
+ # ffill to get last valid observation
104
+ last_vals = df[year_cols].ffill(axis=1).iloc[:, -1]
105
+ # Clean up structure
106
+ df = df.drop(columns=year_cols)
107
+ df[f'mrv_{current_year}'] = pd.to_numeric(last_vals, errors='coerce')
108
+ df = df.dropna()
109
+
110
+ if as_frame:
111
+ return df
112
+
113
+ return df.reset_index().to_json(orient='records')
114
+
115
+ except Exception as e:
116
+ logger.error(f"Data retrieval failed: {e}")
117
+ return pd.DataFrame() if as_frame else f"Error: {e}"
118
+
119
+ # --- PUBLIC API (USER FACING) ---
120
+
121
+ def search(query: str, limit: int = 10) -> List[Dict[str, Any]]:
122
+ """
123
+ Search for World Bank indicators using natural language and Smart Ranking.
124
+
125
+ Args:
126
+ query: Search term (e.g., "Inflation", "education spending")
127
+ limit: Maximum number of results.
128
+ """
129
+ logger.info(f"Searching for: '{query}'")
130
+
131
+ async def _do_search():
132
+ async with Data360Client() as client:
133
+ search_engine = SearchEngine(client)
134
+ raw = await search_engine.semantic_explore(query, database_id="WB_WDI")
135
+ # Format for humans
136
+ return [
137
+ {
138
+ "code": item.get('series_description', {}).get('idno'),
139
+ "name": item.get('series_description', {}).get('name'),
140
+ "source": item.get('series_description', {}).get('database_id')
141
+ }
142
+ for item in raw[:limit]
143
+ ]
144
+
145
+ return _run_sync(_do_search())
146
+
147
+ def get_data(
148
+ indicator: Union[str, List[str]],
149
+ economies: Union[str, List[str]],
150
+ years: int = 5,
151
+ labels: bool = False, # Convert codes to names
152
+ as_json: bool = False
153
+ ) -> Union[pd.DataFrame, str]:
154
+ """
155
+ Descarga datos corregidos y listos para usar (DataFrame por defecto).
156
+
157
+ Args:
158
+ indicator: Código(s) del indicador (ej: "NY.GDP.MKTP.KD" o "GDP")
159
+ economies: Código(s) de país (ej: ["CHL", "PER"] o "USA")
160
+ years: Número de años hacia atrás (default: 5). Si es 1, busca el valor más reciente (MRV).
161
+ labels: Si es True, convierte códigos ISO (USA) a nombres legibles (United States).
162
+ Preserva códigos originales en columna REF_AREA_CODE para compatibilidad con mapas.
163
+ as_json: Si es True, Returns un string JSON en vez de DataFrame.
164
+
165
+ Returns:
166
+ pd.DataFrame (index=[REF_AREA, INDICATOR], columns=[Years...])
167
+ o String JSON si as_json=True
168
+ """
169
+ logger.info(f"Fetching data via Senior Analyst Engine for {economies}...")
170
+
171
+ # 1. Fetch Raw Data using internal async logic
172
+ # Force as_frame=True so we can standardize. If user wants JSON, we convert at end.
173
+ df = _run_sync(_async_get_data(indicator, economies, years, labels=labels, as_frame=True))
174
+
175
+ if isinstance(df, str): # Error message
176
+ return df
177
+
178
+ # --- SENIOR ANALYST LAYER ---
179
+ try:
180
+ # 2. Standardize
181
+ df = DataStandardizer.ensure_tidy(df)
182
+
183
+ # 3. Audit
184
+ report = DataAuditor.audit(df)
185
+ DataAuditor.print_report(report)
186
+ except Exception as e:
187
+ # Fail safe: if analyst crashes, return raw data but log warning
188
+ logger.warning(f"Senior Analyst Layer error: {e}")
189
+
190
+ if as_json:
191
+ return df.to_json(orient='records')
192
+
193
+ return df
194
+
195
+ def plot(chart_type: str, data: Union[str, pd.DataFrame], title: str = "", subtitle: str = "", **kwargs) -> str:
196
+ """
197
+ Generate Financial Times-style chart with editorial aesthetics.
198
+
199
+ Args:
200
+ chart_type: Chart type (trend, bar, scatter, map, map_bubble, map_diverging, map_categorical, etc.)
201
+ data: DataFrame or JSON string with data
202
+ title: Chart title
203
+ subtitle: Chart subtitle
204
+ **kwargs: Additional chart-specific arguments
205
+ (e.g., bins, labels for map_categorical)
206
+
207
+ Returns:
208
+ Absolute path to generated image.
209
+ """
210
+ logger.info(f"Plotting chart type: {chart_type}")
211
+
212
+ # Logic adapted for local dispatch
213
+ dispatch_table = {
214
+ 'trend': _viz.plot_trend,
215
+ 'line': _viz.plot_trend,
216
+ 'bar': _viz.plot_bar,
217
+ 'column': _viz.plot_column,
218
+ 'scatter': _viz.plot_scatter,
219
+ 'map': _viz.plot_map,
220
+ 'map_bubble': _viz.plot_map_bubble,
221
+ 'map_diverging': _viz.plot_map_diverging,
222
+ 'map_categorical': _viz.plot_map_categorical,
223
+ 'dumbbell': _viz.plot_dumbbell,
224
+ 'stacked': _viz.plot_stacked_bar,
225
+ 'stacked_bar': _viz.plot_stacked_bar,
226
+ 'area': _viz.plot_area,
227
+ 'heatmap': _viz.plot_heatmap,
228
+ 'bump': _viz.plot_bump,
229
+ 'treemap': _viz.plot_treemap,
230
+ 'donut': _viz.plot_donut,
231
+ 'pie': _viz.plot_donut
232
+ }
233
+
234
+ func = dispatch_table.get(chart_type)
235
+ if not func:
236
+ raise ValueError(f"Unknown chart type: {chart_type}")
237
+
238
+ # Handle JSON input
239
+ if isinstance(data, str):
240
+ try:
241
+ import pandas as pd
242
+ import json
243
+ data = pd.DataFrame(json.loads(data))
244
+ except Exception as e:
245
+ raise ValueError(f"Invalid JSON data: {e}")
246
+
247
+ # Generatetete unique path
248
+ import tempfile, uuid
249
+ chart_id = str(uuid.uuid4())[:8]
250
+ path = os.path.join(tempfile.gettempdir(), f"wbg_plot_{chart_id}_{chart_type}.png")
251
+
252
+ # Execute the plot function with all kwargs
253
+ func(data, title=title, subtitle=subtitle, save_path=path, **kwargs)
254
+
255
+ return path
256
+
257
+
wbgapi360/cli.py ADDED
@@ -0,0 +1,83 @@
1
+ import argparse
2
+ import asyncio
3
+ import sys
4
+ import json
5
+ from . import api
6
+ from .config import settings
7
+
8
+ async def run_search(args):
9
+ """Execute search command."""
10
+ print(f"Searching for: {args.query}...")
11
+ try:
12
+ results = await api.search.semantic_explore(args.query)
13
+ if args.json:
14
+ print(json.dumps(results, indent=2))
15
+ else:
16
+ print(f"Found {len(results)} results:")
17
+ for r in results[:10]:
18
+ desc = r.get('series_description', {})
19
+ print(f"[{desc.get('idno')}] {desc.get('name')}")
20
+ except Exception as e:
21
+ print(f"Error: {e}")
22
+
23
+ async def run_data(args):
24
+ """Execute data fetch command."""
25
+ print(f"Fetching data: {args.indicator} for {args.economy}...")
26
+ try:
27
+ data = await api.data.indicator(args.indicator).economy(args.economy).limit(args.limit).get()
28
+ if args.json:
29
+ print(json.dumps(data, indent=2))
30
+ else:
31
+ print(f"Returned {len(data)} rows.")
32
+ # Simple table print for CLI
33
+ print(f"{'Economy':<10} {'Year':<10} {'Value':<15}")
34
+ print("-" * 35)
35
+ for row in data:
36
+ # Based on OAS, fields might be uppercase like OBS_VALUE
37
+ eco = row.get('REF_AREA') or row.get('economy') or 'N/A'
38
+ time = row.get('TIME_PERIOD') or row.get('time') or 'N/A'
39
+ val = row.get('OBS_VALUE') or row.get('value') or 'N/A'
40
+ print(f"{eco:<10} {time:<10} {val:<15}")
41
+
42
+ except Exception as e:
43
+ print(f"Error: {e}")
44
+
45
+ async def async_main():
46
+ parser = argparse.ArgumentParser(description="wbgapi360 Enterprise CLI")
47
+ subparsers = parser.add_subparsers(dest="command", required=True)
48
+
49
+ # Search Command
50
+ search_parser = subparsers.add_parser("search", help="Semantic search for indicators")
51
+ search_parser.add_argument("query", help="Search term or natural language query")
52
+ search_parser.add_argument("--json", action="store_true", help="Output raw JSON")
53
+
54
+ # Data Command
55
+ data_parser = subparsers.add_parser("data", help="Fetch data")
56
+ data_parser.add_argument("--indicator", required=True, help="Indicator ID")
57
+ data_parser.add_argument("--economy", default="WLD", help="Economy code (default: WLD)")
58
+ data_parser.add_argument("--limit", type=int, default=5, help="Limit results")
59
+ data_parser.add_argument("--json", action="store_true", help="Output raw JSON")
60
+
61
+ # Config Command
62
+ config_parser = subparsers.add_parser("config", help="Show current configuration")
63
+
64
+ args = parser.parse_args()
65
+
66
+ if args.command == "search":
67
+ await run_search(args)
68
+ elif args.command == "data":
69
+ await run_data(args)
70
+ elif args.command == "config":
71
+ print("Current Configuration:")
72
+ print(settings.model_dump_json(indent=2))
73
+
74
+ await api.close()
75
+
76
+ def main():
77
+ try:
78
+ asyncio.run(async_main())
79
+ except KeyboardInterrupt:
80
+ print("\nAborted.")
81
+
82
+ if __name__ == "__main__":
83
+ main()
wbgapi360/config.py ADDED
@@ -0,0 +1,41 @@
1
+ import os
2
+ from pydantic import BaseModel, Field
3
+
4
+ class Settings(BaseModel):
5
+ """
6
+ Enterprise Configuration Management.
7
+ Loads from Environment Variables tailored for Docker/K8s.
8
+ """
9
+ API_URL: str = Field(default="https://data360api.worldbank.org/data360", alias="WBG360_API_URL")
10
+ TIMEOUT: int = Field(default=30, alias="WBG360_TIMEOUT")
11
+ MAX_RETRIES: int = Field(default=3, alias="WBG360_MAX_RETRIES")
12
+ ENABLE_CACHE: bool = Field(default=True, alias="WBG360_ENABLE_CACHE")
13
+ CACHE_TTL: int = Field(default=300, alias="WBG360_CACHE_TTL") # 5 minutes default
14
+ CACHE_DIR: str = Field(default=os.path.join(os.path.expanduser("~"), ".wbgapi360"), alias="WBG360_CACHE_DIR")
15
+
16
+ # Logging
17
+ LOG_LEVEL: str = Field(default="INFO", alias="WBG360_LOG_LEVEL")
18
+
19
+ @classmethod
20
+ def load(cls):
21
+ # Initial load from os.environ
22
+ # Pydantic v2 usually uses pydantic-settings, but to avoid extra deps for this prototype step,
23
+ # we manually populate from os.environ referencing the aliases if present.
24
+ data = {}
25
+ defaults = cls().model_dump()
26
+
27
+ for name, field in cls.model_fields.items():
28
+ alias = field.alias or name
29
+ if alias in os.environ:
30
+ val = os.environ[alias]
31
+ # Simple boolean conversion
32
+ if field.annotation is bool:
33
+ val = val.lower() in ('true', '1', 'yes')
34
+ # Simple int conversion
35
+ elif field.annotation is int:
36
+ val = int(val)
37
+ data[name] = val
38
+
39
+ return cls(**data)
40
+
41
+ settings = Settings.load()
@@ -0,0 +1,107 @@
1
+ import pandas as pd
2
+ import logging
3
+
4
+ logger = logging.getLogger(__name__)
5
+
6
+ class DataAuditor:
7
+ """
8
+ The 'Eyes' of the Senior Analyst.
9
+ Checks data before it reaches the user.
10
+ """
11
+
12
+ @staticmethod
13
+ def audit(df: pd.DataFrame) -> dict:
14
+ """
15
+ Performs a full audit on the dataframe.
16
+ Returns a report dictionary.
17
+ """
18
+ if df.empty:
19
+ return {"status": "CRITICAL", "message": "No data returned."}
20
+
21
+ report = {"status": "OK", "warnings": []}
22
+
23
+ # 1. Gaps Audit (The "Venezuela Check")
24
+ # Ensure we have column 'OBS_VALUE'
25
+ val_col = 'OBS_VALUE'
26
+ if val_col not in df.columns:
27
+ # Try to find numeric column
28
+ nums = df.select_dtypes(include=['number'])
29
+ if not nums.empty:
30
+ val_col = nums.columns[0]
31
+
32
+ if val_col in df.columns:
33
+ missing = df[val_col].isna().sum()
34
+ total = len(df)
35
+ completeness = 1.0 - (missing / total) if total > 0 else 0
36
+
37
+ if completeness < 0.8:
38
+ report['warnings'].append(f"High data gap detected. Completeness: {completeness:.1%}. Check for missing countries/years.")
39
+ report['status'] = "WARNING"
40
+
41
+ if completeness == 0:
42
+ report['status'] = "CRITICAL"
43
+ report['message'] = "Data is completely empty (Black Hole)."
44
+
45
+ # 2. Scale Audit (The "Japan vs Zimbabwe Check")
46
+ # Check variance / magnitude
47
+ if val_col in df.columns and pd.api.types.is_numeric_dtype(df[val_col]):
48
+ vmin = df[val_col].min()
49
+ vmax = df[val_col].max()
50
+
51
+ if vmax > 0 and vmin > 0:
52
+ ratio = vmax / vmin
53
+ if ratio > 10_000: # 4 orders of magnitude
54
+ report['warnings'].append(f"Extreme Scale Differences detected (Max/Min ratio: {ratio:,.0f}). Consider Log Scale.")
55
+
56
+ return report
57
+
58
+ @staticmethod
59
+ def detect_scale_conflict(df: pd.DataFrame) -> dict:
60
+ """
61
+ Analyzes if the dataframe contains mixed series with incompatible scales
62
+ (e.g., Billions vs Percentages).
63
+ Returns a dict with 'has_conflict', 'micro_cols', 'macro_cols'.
64
+ """
65
+ conflict_info = {'has_conflict': False, 'micro_cols': [], 'macro_cols': []}
66
+
67
+ # Identify numeric columns (excluding Year if it's a column)
68
+ numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
69
+ numeric_cols = [c for c in numeric_cols if c.lower() not in ['year', 'time_period']]
70
+
71
+ if len(numeric_cols) < 2:
72
+ return conflict_info
73
+
74
+ micro = []
75
+ macro = []
76
+
77
+ for col in numeric_cols:
78
+ # Check magnitude (using median to be robust against outliers)
79
+ median_val = df[col].abs().median()
80
+
81
+ # Logic: Micro < 500 (covers %, indices 0-100, etc)
82
+ # Macro > 1,000,000 (Millions, Billions)
83
+ if median_val < 500:
84
+ micro.append(col)
85
+ elif median_val > 1_000_000:
86
+ macro.append(col)
87
+
88
+ # Conflict exists if we have BOTH types
89
+ if micro and macro:
90
+ conflict_info['has_conflict'] = True
91
+ conflict_info['micro_cols'] = micro
92
+ conflict_info['macro_cols'] = macro
93
+
94
+ return conflict_info
95
+
96
+ @staticmethod
97
+ def print_report(report):
98
+ """Prints a user-friendly audit report to console"""
99
+ if report['status'] == 'OK' and not report['warnings']:
100
+ # Silent if perfect? Or subtle nod?
101
+ return
102
+
103
+ print(f"\n[SENIOR ANALYST AUDIT] Status: {report['status']}")
104
+ for w in report.get('warnings', []):
105
+ print(f" ⚠ {w}")
106
+ if 'message' in report:
107
+ print(f" ! {report['message']}")