xelytics-core 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. xelytics_core-0.1.0/PKG-INFO +104 -0
  2. xelytics_core-0.1.0/README.md +70 -0
  3. xelytics_core-0.1.0/pyproject.toml +64 -0
  4. xelytics_core-0.1.0/setup.cfg +4 -0
  5. xelytics_core-0.1.0/tests/test_core.py +167 -0
  6. xelytics_core-0.1.0/tests/test_golden.py +178 -0
  7. xelytics_core-0.1.0/tests/test_llm.py +151 -0
  8. xelytics_core-0.1.0/tests/test_stats.py +225 -0
  9. xelytics_core-0.1.0/tests/test_viz_insights.py +181 -0
  10. xelytics_core-0.1.0/xelytics/__init__.py +16 -0
  11. xelytics_core-0.1.0/xelytics/__version__.py +3 -0
  12. xelytics_core-0.1.0/xelytics/cli/__init__.py +5 -0
  13. xelytics_core-0.1.0/xelytics/cli/main.py +149 -0
  14. xelytics_core-0.1.0/xelytics/core/__init__.py +14 -0
  15. xelytics_core-0.1.0/xelytics/core/features.py +359 -0
  16. xelytics_core-0.1.0/xelytics/core/ingestion.py +135 -0
  17. xelytics_core-0.1.0/xelytics/core/profiler.py +174 -0
  18. xelytics_core-0.1.0/xelytics/engine.py +139 -0
  19. xelytics_core-0.1.0/xelytics/exceptions.py +78 -0
  20. xelytics_core-0.1.0/xelytics/insights/__init__.py +10 -0
  21. xelytics_core-0.1.0/xelytics/insights/rules.py +203 -0
  22. xelytics_core-0.1.0/xelytics/llm/__init__.py +14 -0
  23. xelytics_core-0.1.0/xelytics/llm/narrator.py +135 -0
  24. xelytics_core-0.1.0/xelytics/llm/provider.py +125 -0
  25. xelytics_core-0.1.0/xelytics/llm/providers/__init__.py +8 -0
  26. xelytics_core-0.1.0/xelytics/llm/providers/openai.py +160 -0
  27. xelytics_core-0.1.0/xelytics/schemas/__init__.py +26 -0
  28. xelytics_core-0.1.0/xelytics/schemas/config.py +51 -0
  29. xelytics_core-0.1.0/xelytics/schemas/inputs.py +47 -0
  30. xelytics_core-0.1.0/xelytics/schemas/metadata.py +121 -0
  31. xelytics_core-0.1.0/xelytics/schemas/outputs.py +368 -0
  32. xelytics_core-0.1.0/xelytics/stats/__init__.py +24 -0
  33. xelytics_core-0.1.0/xelytics/stats/engine.py +213 -0
  34. xelytics_core-0.1.0/xelytics/stats/planner.py +309 -0
  35. xelytics_core-0.1.0/xelytics/stats/tests.py +378 -0
  36. xelytics_core-0.1.0/xelytics/viz/__init__.py +12 -0
  37. xelytics_core-0.1.0/xelytics/viz/generator.py +249 -0
  38. xelytics_core-0.1.0/xelytics/viz/selector.py +147 -0
  39. xelytics_core-0.1.0/xelytics_core.egg-info/PKG-INFO +104 -0
  40. xelytics_core-0.1.0/xelytics_core.egg-info/SOURCES.txt +42 -0
  41. xelytics_core-0.1.0/xelytics_core.egg-info/dependency_links.txt +1 -0
  42. xelytics_core-0.1.0/xelytics_core.egg-info/entry_points.txt +2 -0
  43. xelytics_core-0.1.0/xelytics_core.egg-info/requires.txt +19 -0
  44. xelytics_core-0.1.0/xelytics_core.egg-info/top_level.txt +1 -0
@@ -0,0 +1,104 @@
1
+ Metadata-Version: 2.4
2
+ Name: xelytics-core
3
+ Version: 0.1.0
4
+ Summary: Pure analytics engine for statistical analysis and insight generation
5
+ Author: Xelytics Team
6
+ License: MIT
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Requires-Python: >=3.9
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: pandas>=2.1.0
18
+ Requires-Dist: numpy>=1.24.0
19
+ Requires-Dist: scipy>=1.11.0
20
+ Requires-Dist: scikit-learn>=1.3.0
21
+ Requires-Dist: statsmodels>=0.14.0
22
+ Requires-Dist: pingouin>=0.5.3
23
+ Requires-Dist: plotly>=5.17.0
24
+ Provides-Extra: llm
25
+ Requires-Dist: openai>=1.6.0; extra == "llm"
26
+ Requires-Dist: groq>=0.4.0; extra == "llm"
27
+ Requires-Dist: httpx>=0.25.0; extra == "llm"
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest>=7.4.0; extra == "dev"
30
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
31
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
32
+ Requires-Dist: black>=23.11.0; extra == "dev"
33
+ Requires-Dist: mypy>=1.7.0; extra == "dev"
34
+
35
+ # Xelytics-Core
36
+
37
+ **Pure analytics engine for statistical analysis and insight generation.**
38
+
39
+ ## Installation
40
+
41
+ ```bash
42
+ pip install -e .
43
+ ```
44
+
45
+ ## Quick Start
46
+
47
+ ```python
48
+ from xelytics import analyze, AnalysisConfig
49
+ import pandas as pd
50
+
51
+ # Load your data
52
+ df = pd.read_csv("data.csv")
53
+
54
+ # Run automated analysis
55
+ result = analyze(df, mode="automated")
56
+
57
+ # Access results
58
+ print(f"Analyzed {result.metadata.row_count} rows")
59
+ print(f"Found {len(result.statistics)} statistical tests")
60
+ print(f"Generated {len(result.visualizations)} visualizations")
61
+ print(f"Produced {len(result.insights)} insights")
62
+
63
+ # Export to JSON
64
+ json_output = result.to_json()
65
+ ```
66
+
67
+ ## API Contract
68
+
69
+ ```python
70
+ from xelytics import analyze, AnalysisConfig, AnalysisResult
71
+
72
+ result = analyze(
73
+ data=df,
74
+ mode="automated", # or "semi-automated"
75
+ config=AnalysisConfig(
76
+ significance_level=0.05,
77
+ enable_llm_insights=True,
78
+ max_visualizations=10,
79
+ )
80
+ )
81
+ ```
82
+
83
+ ## Output Schema
84
+
85
+ ```python
86
+ AnalysisResult(
87
+ summary=DatasetSummary(...),
88
+ statistics=[StatisticalTestResult(...), ...],
89
+ visualizations=[VisualizationSpec(...), ...],
90
+ insights=[Insight(...), ...],
91
+ metadata=RunMetadata(...),
92
+ )
93
+ ```
94
+
95
+ ## Design Principles
96
+
97
+ 1. **Pure analytics engine** - No HTTP, no database, no auth
98
+ 2. **Deterministic** - Same input = same output
99
+ 3. **LLM is optional** - Rule-based insights work without LLM
100
+ 4. **Type-safe** - All inputs/outputs are typed dataclasses
101
+
102
+ ## License
103
+
104
+ MIT
@@ -0,0 +1,70 @@
1
+ # Xelytics-Core
2
+
3
+ **Pure analytics engine for statistical analysis and insight generation.**
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install -e .
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```python
14
+ from xelytics import analyze, AnalysisConfig
15
+ import pandas as pd
16
+
17
+ # Load your data
18
+ df = pd.read_csv("data.csv")
19
+
20
+ # Run automated analysis
21
+ result = analyze(df, mode="automated")
22
+
23
+ # Access results
24
+ print(f"Analyzed {result.metadata.row_count} rows")
25
+ print(f"Found {len(result.statistics)} statistical tests")
26
+ print(f"Generated {len(result.visualizations)} visualizations")
27
+ print(f"Produced {len(result.insights)} insights")
28
+
29
+ # Export to JSON
30
+ json_output = result.to_json()
31
+ ```
32
+
33
+ ## API Contract
34
+
35
+ ```python
36
+ from xelytics import analyze, AnalysisConfig, AnalysisResult
37
+
38
+ result = analyze(
39
+ data=df,
40
+ mode="automated", # or "semi-automated"
41
+ config=AnalysisConfig(
42
+ significance_level=0.05,
43
+ enable_llm_insights=True,
44
+ max_visualizations=10,
45
+ )
46
+ )
47
+ ```
48
+
49
+ ## Output Schema
50
+
51
+ ```python
52
+ AnalysisResult(
53
+ summary=DatasetSummary(...),
54
+ statistics=[StatisticalTestResult(...), ...],
55
+ visualizations=[VisualizationSpec(...), ...],
56
+ insights=[Insight(...), ...],
57
+ metadata=RunMetadata(...),
58
+ )
59
+ ```
60
+
61
+ ## Design Principles
62
+
63
+ 1. **Pure analytics engine** - No HTTP, no database, no auth
64
+ 2. **Deterministic** - Same input = same output
65
+ 3. **LLM is optional** - Rule-based insights work without LLM
66
+ 4. **Type-safe** - All inputs/outputs are typed dataclasses
67
+
68
+ ## License
69
+
70
+ MIT
@@ -0,0 +1,64 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "xelytics-core"
7
+ version = "0.1.0"
8
+ description = "Pure analytics engine for statistical analysis and insight generation"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.9"
12
+ authors = [
13
+ {name = "Xelytics Team"}
14
+ ]
15
+ classifiers = [
16
+ "Development Status :: 3 - Alpha",
17
+ "Intended Audience :: Developers",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.9",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Programming Language :: Python :: 3.12",
24
+ ]
25
+
26
+ dependencies = [
27
+ "pandas>=2.1.0",
28
+ "numpy>=1.24.0",
29
+ "scipy>=1.11.0",
30
+ "scikit-learn>=1.3.0",
31
+ "statsmodels>=0.14.0",
32
+ "pingouin>=0.5.3",
33
+ "plotly>=5.17.0",
34
+ ]
35
+
36
+ [project.optional-dependencies]
37
+ llm = [
38
+ "openai>=1.6.0",
39
+ "groq>=0.4.0",
40
+ "httpx>=0.25.0",
41
+ ]
42
+ dev = [
43
+ "pytest>=7.4.0",
44
+ "pytest-asyncio>=0.21.0",
45
+ "pytest-cov>=4.1.0",
46
+ "black>=23.11.0",
47
+ "mypy>=1.7.0",
48
+ ]
49
+
50
+ [project.scripts]
51
+ xelytics = "xelytics.cli.main:main"
52
+
53
+ [tool.setuptools.packages.find]
54
+ where = ["."]
55
+ include = ["xelytics*"]
56
+
57
+ [tool.black]
58
+ line-length = 100
59
+ target-version = ['py39', 'py310', 'py311', 'py312']
60
+
61
+ [tool.mypy]
62
+ python_version = "3.9"
63
+ warn_return_any = true
64
+ warn_unused_configs = true
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,167 @@
1
+ """Unit tests for core modules.
2
+
3
+ Tests ingestion, profiling, and feature detection.
4
+ """
5
+
6
+ import pytest
7
+ import pandas as pd
8
+ import numpy as np
9
+
10
+ from xelytics.core.ingestion import DataIngestion, IngestionResult
11
+ from xelytics.core.profiler import DataProfiler, ProfileResult
12
+ from xelytics.core.features import FeatureDetector, FeatureDetectionResult
13
+
14
+
15
+ class TestDataIngestion:
16
+ """Tests for DataIngestion module."""
17
+
18
+ def test_ingest_valid_dataframe(self, sample_mixed_df):
19
+ """Test ingestion of valid DataFrame."""
20
+ ingestion = DataIngestion()
21
+ result = ingestion.ingest(sample_mixed_df)
22
+
23
+ assert isinstance(result, IngestionResult)
24
+ assert result.row_count == 100
25
+ assert result.column_count == 5
26
+ assert len(result.column_dtypes) == 5
27
+
28
+ def test_ingest_empty_dataframe_raises(self):
29
+ """Test that empty DataFrame raises ValueError."""
30
+ ingestion = DataIngestion()
31
+ with pytest.raises(ValueError, match="cannot be empty"):
32
+ ingestion.ingest(pd.DataFrame())
33
+
34
+ def test_ingest_invalid_type_raises(self):
35
+ """Test that non-DataFrame raises error."""
36
+ ingestion = DataIngestion()
37
+ with pytest.raises(ValueError, match="must be a pandas DataFrame"):
38
+ ingestion.ingest([1, 2, 3]) # type: ignore
39
+
40
+ def test_type_normalization(self):
41
+ """Test automatic type normalization."""
42
+ ingestion = DataIngestion()
43
+ df = pd.DataFrame({
44
+ 'numeric_str': ['1', '2', '3', '4', '5'],
45
+ 'date_str': ['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04', '2024-01-05'],
46
+ })
47
+ result = ingestion.ingest(df)
48
+
49
+ # Should normalize numeric strings to proper types
50
+ assert result.row_count == 5
51
+
52
+
53
+ class TestDataProfiler:
54
+ """Tests for DataProfiler module."""
55
+
56
+ def test_profile_numeric_columns(self, sample_numeric_df):
57
+ """Test profiling numeric columns."""
58
+ profiler = DataProfiler()
59
+ result = profiler.profile(sample_numeric_df)
60
+
61
+ assert isinstance(result, ProfileResult)
62
+ assert len(result.column_profiles) == 4
63
+
64
+ # Check numeric statistics present
65
+ sales_profile = next(p for p in result.column_profiles if p.column_name == 'sales')
66
+ assert sales_profile.mean is not None
67
+ assert sales_profile.std is not None
68
+ assert sales_profile.min is not None
69
+ assert sales_profile.max is not None
70
+
71
+ def test_profile_categorical_columns(self, sample_categorical_df):
72
+ """Test profiling categorical columns."""
73
+ profiler = DataProfiler()
74
+ result = profiler.profile(sample_categorical_df)
75
+
76
+ assert len(result.column_profiles) == 4
77
+
78
+ # Check categorical statistics present
79
+ region_profile = next(p for p in result.column_profiles if p.column_name == 'region')
80
+ assert region_profile.unique_count == 4
81
+ assert region_profile.data_type == 'categorical'
82
+
83
+ def test_profile_missing_values(self, sample_with_missing_df):
84
+ """Test profiling DataFrame with missing values."""
85
+ profiler = DataProfiler()
86
+ result = profiler.profile(sample_with_missing_df)
87
+
88
+ assert result.total_missing_cells > 0
89
+
90
+ # Check missing value detection
91
+ value1_profile = next(p for p in result.column_profiles if p.column_name == 'value1')
92
+ assert value1_profile.missing_count == 10
93
+
94
+
95
+ class TestFeatureDetector:
96
+ """Tests for FeatureDetector module."""
97
+
98
+ def test_detect_numeric_columns(self, sample_numeric_df):
99
+ """Test detection of numeric columns."""
100
+ detector = FeatureDetector()
101
+ result = detector.detect(sample_numeric_df)
102
+
103
+ assert isinstance(result, FeatureDetectionResult)
104
+ assert len(result.numeric_columns) == 4
105
+ assert 'sales' in result.numeric_columns
106
+
107
+ def test_detect_categorical_columns(self, sample_categorical_df):
108
+ """Test detection of categorical columns."""
109
+ detector = FeatureDetector()
110
+ result = detector.detect(sample_categorical_df)
111
+
112
+ assert len(result.categorical_columns) == 4
113
+ assert 'region' in result.categorical_columns
114
+
115
+ def test_detect_datetime_columns(self, sample_mixed_df):
116
+ """Test detection of datetime columns."""
117
+ detector = FeatureDetector()
118
+ result = detector.detect(sample_mixed_df)
119
+
120
+ assert 'date' in result.datetime_columns
121
+
122
+ def test_detect_groupable_columns(self, sample_mixed_df):
123
+ """Test detection of groupable columns."""
124
+ detector = FeatureDetector()
125
+ result = detector.detect(sample_mixed_df)
126
+
127
+ # Should detect region and category as groupable
128
+ assert len(result.groupable_columns) >= 2
129
+
130
+ def test_no_name_heuristics(self):
131
+ """Test that feature detection uses data only, not column names.
132
+
133
+ Per plan constraint: Feature detection must rely only on data.
134
+ """
135
+ detector = FeatureDetector()
136
+
137
+ # Create DataFrame with misleading column names
138
+ df = pd.DataFrame({
139
+ 'id_column': ['A', 'B', 'C'] * 10, # Named like ID but low cardinality
140
+ 'date_field': [1.5, 2.5, 3.5] * 10, # Named like date but numeric
141
+ 'target_var': np.random.choice(['X', 'Y'], 30), # Named like target but categorical
142
+ })
143
+
144
+ result = detector.detect(df)
145
+
146
+ # 'id_column' should NOT be classified as identifier (low cardinality)
147
+ assert 'id_column' not in result.identifier_columns
148
+ assert 'id_column' in result.categorical_columns
149
+
150
+ # 'date_field' should NOT be classified as datetime (it's numeric)
151
+ assert 'date_field' not in result.datetime_columns
152
+ assert 'date_field' in result.numeric_columns
153
+
154
+ def test_deterministic_detection(self, sample_mixed_df, assert_deterministic):
155
+ """Test that feature detection is deterministic."""
156
+ detector = FeatureDetector()
157
+
158
+ def detect_wrapper():
159
+ return detector.detect(sample_mixed_df)
160
+
161
+ # Run 5 times and assert same output
162
+ results = []
163
+ for _ in range(5):
164
+ result = detector.detect(sample_mixed_df)
165
+ results.append(tuple(sorted(result.column_roles.items())))
166
+
167
+ assert len(set(results)) == 1, "Feature detection is not deterministic"
@@ -0,0 +1,178 @@
1
+ """Golden output and determinism tests.
2
+
3
+ Tests to ensure:
4
+ 1. Same input → same output (determinism)
5
+ 2. Output matches expected golden values (regression)
6
+ """
7
+
8
+ import pytest
9
+ import pandas as pd
10
+ import numpy as np
11
+ import json
12
+ import hashlib
13
+
14
+ from xelytics import analyze, AnalysisConfig
15
+
16
+
17
+ class TestDeterminism:
18
+ """Tests that same input produces same output."""
19
+
20
+ def test_analyze_deterministic(self, golden_dataset):
21
+ """Test that analyze() produces identical output across runs."""
22
+ config = AnalysisConfig(
23
+ mode="automated",
24
+ enable_llm_insights=False, # Disable LLM for determinism
25
+ )
26
+
27
+ hashes = []
28
+ for _ in range(3):
29
+ result = analyze(golden_dataset, mode="automated", config=config)
30
+ # Hash the core output
31
+ output = {
32
+ "row_count": result.summary.row_count,
33
+ "column_count": result.summary.column_count,
34
+ "tests_executed": result.metadata.tests_executed,
35
+ "numeric_columns": sorted(result.summary.numeric_columns),
36
+ "categorical_columns": sorted(result.summary.categorical_columns),
37
+ }
38
+ hash_val = hashlib.md5(json.dumps(output, sort_keys=True).encode()).hexdigest()
39
+ hashes.append(hash_val)
40
+
41
+ assert len(set(hashes)) == 1, f"Non-deterministic output: {hashes}"
42
+
43
+ def test_statistical_results_deterministic(self, golden_dataset):
44
+ """Test that statistical test results are deterministic."""
45
+ config = AnalysisConfig(mode="automated", enable_llm_insights=False)
46
+
47
+ p_values_runs = []
48
+ for _ in range(3):
49
+ result = analyze(golden_dataset, mode="automated", config=config)
50
+ p_values = [r.p_value for r in result.statistics]
51
+ p_values_runs.append(tuple(p_values))
52
+
53
+ assert len(set(p_values_runs)) == 1, "P-values differ across runs"
54
+
55
+ def test_insight_generation_deterministic(self, golden_dataset):
56
+ """Test that insight generation is deterministic."""
57
+ config = AnalysisConfig(mode="automated", enable_llm_insights=False)
58
+
59
+ insight_titles_runs = []
60
+ for _ in range(3):
61
+ result = analyze(golden_dataset, mode="automated", config=config)
62
+ titles = sorted([i.title for i in result.insights])
63
+ insight_titles_runs.append(tuple(titles))
64
+
65
+ assert len(set(insight_titles_runs)) == 1, "Insights differ across runs"
66
+
67
+
68
+ class TestGoldenOutput:
69
+ """Golden output regression tests.
70
+
71
+ These tests verify that key outputs match expected values.
72
+ If the expected values change, investigate why before updating.
73
+ """
74
+
75
+ def test_golden_dataset_row_count(self, golden_dataset):
76
+ """Test expected row count."""
77
+ result = analyze(golden_dataset, mode="automated")
78
+ assert result.summary.row_count == 10
79
+
80
+ def test_golden_dataset_column_count(self, golden_dataset):
81
+ """Test expected column count."""
82
+ result = analyze(golden_dataset, mode="automated")
83
+ assert result.summary.column_count == 4
84
+
85
+ def test_golden_dataset_numeric_columns(self, golden_dataset):
86
+ """Test expected numeric column detection."""
87
+ result = analyze(golden_dataset, mode="automated")
88
+
89
+ expected_numeric = ['age', 'income']
90
+ assert sorted(result.summary.numeric_columns) == sorted(expected_numeric)
91
+
92
+ def test_golden_dataset_categorical_columns(self, golden_dataset):
93
+ """Test expected categorical column detection."""
94
+ result = analyze(golden_dataset, mode="automated")
95
+
96
+ expected_categorical = ['education', 'region']
97
+ assert sorted(result.summary.categorical_columns) == sorted(expected_categorical)
98
+
99
+ def test_golden_dataset_no_missing(self, golden_dataset):
100
+ """Test that golden dataset has no missing values."""
101
+ result = analyze(golden_dataset, mode="automated")
102
+ assert result.summary.total_missing_cells == 0
103
+
104
+ def test_json_serialization_roundtrip(self, golden_dataset):
105
+ """Test that JSON serialization is lossless."""
106
+ from xelytics.schemas.outputs import AnalysisResult
107
+
108
+ result = analyze(golden_dataset, mode="automated")
109
+
110
+ # Serialize to JSON
111
+ json_str = result.to_json()
112
+
113
+ # Deserialize
114
+ restored = AnalysisResult.from_json(json_str)
115
+
116
+ # Compare key fields
117
+ assert restored.summary.row_count == result.summary.row_count
118
+ assert restored.summary.column_count == result.summary.column_count
119
+ assert len(restored.statistics) == len(result.statistics)
120
+ assert len(restored.insights) == len(result.insights)
121
+
122
+
123
+ class TestBackwardCompatibility:
124
+ """Backward compatibility tests.
125
+
126
+ Ensures schema changes don't break existing integrations.
127
+ """
128
+
129
+ def test_analysis_result_has_required_fields(self, sample_mixed_df):
130
+ """Test that AnalysisResult has all required fields."""
131
+ result = analyze(sample_mixed_df, mode="automated")
132
+
133
+ # Required top-level fields (per API_CONTRACT.md)
134
+ assert hasattr(result, 'summary')
135
+ assert hasattr(result, 'statistics')
136
+ assert hasattr(result, 'visualizations')
137
+ assert hasattr(result, 'insights')
138
+ assert hasattr(result, 'metadata')
139
+
140
+ def test_dataset_summary_fields(self, sample_mixed_df):
141
+ """Test that DatasetSummary has expected fields."""
142
+ result = analyze(sample_mixed_df, mode="automated")
143
+ summary = result.summary
144
+
145
+ assert hasattr(summary, 'row_count')
146
+ assert hasattr(summary, 'column_count')
147
+ assert hasattr(summary, 'numeric_columns')
148
+ assert hasattr(summary, 'categorical_columns')
149
+ assert hasattr(summary, 'column_profiles')
150
+
151
+ def test_statistical_result_fields(self, sample_mixed_df):
152
+ """Test that StatisticalTestResult has expected fields."""
153
+ result = analyze(sample_mixed_df, mode="automated")
154
+
155
+ if result.statistics:
156
+ stat = result.statistics[0]
157
+ assert hasattr(stat, 'test_name')
158
+ assert hasattr(stat, 'test_type')
159
+ assert hasattr(stat, 'statistic')
160
+ assert hasattr(stat, 'p_value')
161
+ assert hasattr(stat, 'significant')
162
+ assert hasattr(stat, 'interpretation')
163
+
164
+ def test_json_output_structure(self, sample_mixed_df):
165
+ """Test that JSON output has expected structure."""
166
+ result = analyze(sample_mixed_df, mode="automated")
167
+ output = result.to_dict()
168
+
169
+ # Top-level keys
170
+ assert 'summary' in output
171
+ assert 'statistics' in output
172
+ assert 'visualizations' in output
173
+ assert 'insights' in output
174
+ assert 'metadata' in output
175
+
176
+ # Summary keys
177
+ assert 'row_count' in output['summary']
178
+ assert 'column_count' in output['summary']