PyPI - viewx - Versions diffs - 0.2.3__tar.gz → 0.2.4__tar.gz - Mend

viewx 0.2.3tar.gz → 0.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

{viewx-0.2.3 → viewx-0.2.4}/PKG-INFO +2 -3
{viewx-0.2.3 → viewx-0.2.4}/README.md +1 -1
{viewx-0.2.3 → viewx-0.2.4}/setup.py +2 -3
{viewx-0.2.3 → viewx-0.2.4}/tests/test1.py +14 -16
viewx-0.2.4/tests/test8_slides_auto.py +15 -0
viewx-0.2.4/tests/test9_report_auto.py +22 -0
viewx-0.2.4/viewx/DataMatrix/__init__.py +30 -0
viewx-0.2.4/viewx/DataMatrix/analyzers.py +325 -0
viewx-0.2.4/viewx/DataMatrix/bibliometrics.py +143 -0
viewx-0.2.4/viewx/DataMatrix/datamatrix_engine.py +1348 -0
viewx-0.2.4/viewx/DataMatrix/explorer.py +87 -0
viewx-0.2.4/viewx/DataMatrix/visualizer.py +511 -0
{viewx-0.2.3 → viewx-0.2.4}/viewx/HTML/html_engine.py +408 -167
viewx-0.2.4/viewx/Report/auto_builder.py +187 -0
{viewx-0.2.3 → viewx-0.2.4}/viewx/Report/report_engine.py +30 -1
{viewx-0.2.3 → viewx-0.2.4}/viewx/Slides/__init__.py +1 -0
viewx-0.2.4/viewx/Slides/auto_builder.py +171 -0
{viewx-0.2.3 → viewx-0.2.4}/viewx/Slides/slides_engine.py +125 -49
{viewx-0.2.3 → viewx-0.2.4}/viewx/__init__.py +4 -5
{viewx-0.2.3 → viewx-0.2.4}/viewx/datasets/__init__.py +20 -14
viewx-0.2.4/viewx/shared/__init__.py +20 -0
viewx-0.2.4/viewx/shared/a11y.py +24 -0
viewx-0.2.4/viewx/shared/explorer_runtime.py +523 -0
viewx-0.2.4/viewx/shared/insights.py +121 -0
viewx-0.2.4/viewx/shared/plotly_bundle.py +28 -0
viewx-0.2.4/viewx/shared/runtime.py +237 -0
{viewx-0.2.3 → viewx-0.2.4}/viewx.egg-info/PKG-INFO +2 -3
{viewx-0.2.3 → viewx-0.2.4}/viewx.egg-info/SOURCES.txt +13 -2
{viewx-0.2.3 → viewx-0.2.4}/viewx.egg-info/requires.txt +0 -1
viewx-0.2.3/tests/test2.py +0 -39
viewx-0.2.3/viewx/DataMatrix/__init__.py +0 -3
viewx-0.2.3/viewx/DataMatrix/bibliometrics.py +0 -52
viewx-0.2.3/viewx/DataMatrix/datamatrix_engine.py +0 -184
viewx-0.2.3/viewx/DataMatrix/visualizer.py +0 -75
{viewx-0.2.3 → viewx-0.2.4}/setup.cfg +0 -0
{viewx-0.2.3 → viewx-0.2.4}/tests/test3.py +0 -0
{viewx-0.2.3 → viewx-0.2.4}/tests/test4.py +0 -0
{viewx-0.2.3 → viewx-0.2.4}/tests/test5.py +0 -0
{viewx-0.2.3 → viewx-0.2.4}/tests/test6.py +0 -0
{viewx-0.2.3 → viewx-0.2.4}/tests/test7.py +0 -0
{viewx-0.2.3 → viewx-0.2.4}/viewx/HTML/__init__.py +0 -0
{viewx-0.2.3 → viewx-0.2.4}/viewx/Report/__init__.py +0 -0
{viewx-0.2.3 → viewx-0.2.4}/viewx/Slides/charts.py +0 -0
{viewx-0.2.3 → viewx-0.2.4}/viewx/Slides/components.py +0 -0
{viewx-0.2.3 → viewx-0.2.4}/viewx/datasets/course_completion.csv +0 -0
{viewx-0.2.3 → viewx-0.2.4}/viewx/datasets/iris.csv +0 -0
{viewx-0.2.3 → viewx-0.2.4}/viewx/datasets/penguins.csv +0 -0
{viewx-0.2.3 → viewx-0.2.4}/viewx/datasets/sp500_companies.csv +0 -0
{viewx-0.2.3 → viewx-0.2.4}/viewx/datasets/titanic.csv +0 -0
{viewx-0.2.3 → viewx-0.2.4}/viewx.egg-info/dependency_links.txt +0 -0
{viewx-0.2.3 → viewx-0.2.4}/viewx.egg-info/top_level.txt +0 -0

{viewx-0.2.3 → viewx-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: viewx
-Version: 0.2.3
+Version: 0.2.4
 Summary: Librería de visualización adaptable para HTML, Dashboards y PDFs en Python
 Home-page: https://github.com/GhostAnalyst30/ViewX
 Author: Emmanuel Ascendra Perez
@@ -25,7 +25,6 @@ Requires-Dist: matplotlib>=3.8.0
 Requires-Dist: pylatex>=1.4.2
 Requires-Dist: seaborn>=0.12.2
 Requires-Dist: plotly>=6.0.0
-Requires-Dist: streamlit>=1.32.0
 Provides-Extra: streamlit
 Requires-Dist: streamlit>=1.32.0; extra == "streamlit"
 Provides-Extra: dash
@@ -52,7 +51,7 @@ Dynamic: requires-dist
 Dynamic: requires-python
 Dynamic: summary
-# ViewX — v2.3
+# ViewX — v2.4
 **ViewX** es un paquete moderno de Python diseñado para generar **páginas HTML interactivas**, **dashboards dinámicos** y **visualizaciones inteligentes** que se adaptan automáticamente a los objetos agregados por el usuario.

{viewx-0.2.3 → viewx-0.2.4}/README.md RENAMED Viewed

@@ -1,4 +1,4 @@
-# ViewX — v2.3
+# ViewX — v2.4
 **ViewX** es un paquete moderno de Python diseñado para generar **páginas HTML interactivas**, **dashboards dinámicos** y **visualizaciones inteligentes** que se adaptan automáticamente a los objetos agregados por el usuario.

{viewx-0.2.3 → viewx-0.2.4}/setup.py RENAMED Viewed

@@ -9,7 +9,7 @@ except FileNotFoundError:
 setup(
     name="viewx",
-    version="0.2.3",
+    version="0.2.4",
     author="Emmanuel Ascendra Perez",
     author_email="ascendraemmanuel@gmail.com",
     description="Librería de visualización adaptable para HTML, Dashboards y PDFs en Python",
@@ -47,8 +47,7 @@ setup(
         "matplotlib>=3.8.0",
         "pylatex>=1.4.2",  # Para PDFs
         "seaborn>=0.12.2",
-        "plotly>=6.0.0",
-        "streamlit>=1.32.0"
+        "plotly>=6.0.0"
     ],
     # Dependencias opcionales

{viewx-0.2.3 → viewx-0.2.4}/tests/test1.py RENAMED Viewed

@@ -35,6 +35,7 @@ HTML.auto_generate(
     df,
     title    = "Demo 1 · Auto Layout",
     filename = "demo1_auto.html",
+    show     = False,
 )
 # ════════════════════════════════════════════════════════════════════════════
@@ -49,7 +50,8 @@ HTML.auto_generate(
     authors  = [
         {"name": "Ana García",  "email": "ana@empresa.com"},
         {"name": "Luis Torres", "email": "luis@empresa.com"},
-    ]
+    ],
+    show     = False,
 )
 # ════════════════════════════════════════════════════════════════════════════
@@ -63,6 +65,7 @@ HTML.auto_generate(
     filename = "demo3_kpi_focus.html",
     layout   = "kpi_focus",
     authors  = "Carlos Méndez",
+    show     = False,
 )
 # ════════════════════════════════════════════════════════════════════════════
@@ -75,6 +78,7 @@ HTML.auto_generate(
     title    = "Demo 4 · Chart Focus",
     filename = "demo4_chart_focus.html",
     layout   = "chart_focus",
+    show     = False,
 )
 # ════════════════════════════════════════════════════════════════════════════
@@ -87,14 +91,11 @@ HTML.auto_generate(
     title    = "Demo 5 · Table First",
     filename = "demo5_table_first.html",
     layout   = "table_first",
+    show     = False,
 )
 # ════════════════════════════════════════════════════════════════════════════
 # DEMO 6 — Layout 100% personalizado
-#   Diseño:
-#   [KPI ventas] [KPI utilidad] [KPI unidades] | [Chart barras región]
-#   [Chart línea temporal (ventas)            ] | [Chart scatter        ]
-#   [Tabla completa                                                      ]
 # ════════════════════════════════════════════════════════════════════════════
 HTML.auto_generate(
     df,
@@ -104,19 +105,15 @@ HTML.auto_generate(
     filename = "demo6_custom.html",
     authors  = [{"name": "Equipo BI", "email": "bi@empresa.com"}],
     layout   = [
-        # Fila 1: 3 KPIs a la izquierda + 1 chart a la derecha
         {"type": "kpi",   "index": 0, "row": 1, "col": 1,  "height": 2, "width": 3},
         {"type": "kpi",   "index": 1, "row": 1, "col": 4,  "height": 2, "width": 3},
         {"type": "kpi",   "index": 2, "row": 1, "col": 7,  "height": 2, "width": 3},
-        {"type": "chart", "index": 1, "row": 1, "col": 10, "height": 7, "width": 3},  # barras región
-        # Fila 2: línea temporal grande + scatter
-        {"type": "chart", "index": 0, "row": 3, "col": 1,  "height": 5, "width": 6},  # línea tiempo
-        {"type": "chart", "index": 2, "row": 3, "col": 7,  "height": 5, "width": 3},  # scatter
-        # Fila 3: tabla completa
+        {"type": "chart", "index": 1, "row": 1, "col": 10, "height": 7, "width": 3},
+        {"type": "chart", "index": 0, "row": 3, "col": 1,  "height": 5, "width": 6},
+        {"type": "chart", "index": 2, "row": 3, "col": 7,  "height": 5, "width": 3},
         {"type": "table",             "row": 8, "col": 1,  "height": 4, "width": 12},
-    ]
+    ],
+    show     = False,
 )
 # ════════════════════════════════════════════════════════════════════════════
@@ -135,9 +132,10 @@ HTML.auto_generate(
     template = "corporate_blue",
     title    = "Demo 7 · Parseo Automático de Strings",
     filename = "demo7_parseo.html",
+    show     = False,
 )
-print("\n✅ Todos los dashboards generados:")
+print("\nTodos los dashboards generados:")
 for i, name in enumerate([
     "demo1_auto.html",
     "demo2_cols.html",
@@ -147,4 +145,4 @@ for i, name in enumerate([
     "demo6_custom.html",
     "demo7_parseo.html",
 ], 1):
-    print(f"   {i}. {name}")
+    print(f"   {i}. {name}")

viewx-0.2.4/tests/test8_slides_auto.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""Auto-generated slides from a DataFrame."""
+from viewx.datasets import load_iris
+from viewx.Slides import Presentation
+df = load_iris()
+path = Presentation.auto_generate(
+    df,
+    title="Iris Dataset Overview",
+    theme="ocean",
+    filename="output/test8_auto_slides.html",
+    show=False,
+)
+print(f"Presentation.auto_generate -> {path}")

viewx-0.2.4/tests/test9_report_auto.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""Auto-generated PDF quality report from a DataFrame."""
+import shutil
+from viewx.datasets import load_iris
+from viewx import Report
+if not shutil.which("pdflatex"):
+    print("SKIP: pdflatex not found — install a LaTeX distribution to run this test.")
+    raise SystemExit(0)
+df = load_iris()
+path = Report.auto_generate(
+    df,
+    title="Iris Dataset Quality Report",
+    author="ViewX Test",
+    filename="test9_auto_report",
+    outdir="output",
+    include_plots=True,
+)
+print(f"Report.auto_generate -> {path}")

viewx-0.2.4/viewx/DataMatrix/__init__.py ADDED Viewed

@@ -0,0 +1,30 @@
+from .analyzers import (
+    AnalyzerEngine,
+    BooleanStrategy,
+    CategoricalStrategy,
+    ColumnProfile,
+    ColumnTypeStrategy,
+    DatasetReport,
+    DateTimeStrategy,
+    NumericStrategy,
+)
+from .bibliometrics import BibliometricsAnalyzer
+from .datamatrix_engine import DataMatrix, ReportTheme
+from .explorer import build_explorer_payload
+from .visualizer import Visualizer
+__all__ = [
+    "DataMatrix",
+    "ReportTheme",
+    "AnalyzerEngine",
+    "Visualizer",
+    "BibliometricsAnalyzer",
+    "DatasetReport",
+    "ColumnProfile",
+    "ColumnTypeStrategy",
+    "NumericStrategy",
+    "CategoricalStrategy",
+    "DateTimeStrategy",
+    "BooleanStrategy",
+    "build_explorer_payload",
+]

viewx-0.2.4/viewx/DataMatrix/analyzers.py ADDED Viewed

@@ -0,0 +1,325 @@
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Tuple
+import numpy as np
+import pandas as pd
+@dataclass
+class ColumnProfile:
+    name: str
+    dtype: str
+    inferred_type: str
+    n_unique: int
+    n_missing: int
+    p_missing: float
+    is_constant: bool
+    cardinality_ratio: float
+    skewness: Optional[float] = None
+    kurtosis: Optional[float] = None
+    mean: Optional[float] = None
+    std: Optional[float] = None
+    min: Optional[float] = None
+    max: Optional[float] = None
+    median: Optional[float] = None
+    q1: Optional[float] = None
+    q3: Optional[float] = None
+    iqr: Optional[float] = None
+    top_values: Dict = field(default_factory=dict)
+    outliers: int = 0
+    alerts: List[str] = field(default_factory=list)
+@dataclass
+class DatasetReport:
+    n_rows: int
+    n_cols: int
+    n_duplicates: int
+    n_missing_total: int
+    p_missing_total: float
+    memory_usage: str
+    estimated_rows: str
+    column_profiles: Dict[str, ColumnProfile]
+    correlation_pairs: List[Tuple[str, str, float]]
+    alerts: List[str]
+    categorical_columns: List[str]
+    numeric_columns: List[str]
+    datetime_columns: List[str]
+    boolean_columns: List[str]
+class ColumnTypeStrategy(ABC):
+    @abstractmethod
+    def infer(self, series: pd.Series) -> str:
+        ...
+    @abstractmethod
+    def analyze(self, series: pd.Series, col: str, n_total: int) -> ColumnProfile:
+        ...
+class NumericStrategy(ColumnTypeStrategy):
+    def infer(self, series: pd.Series) -> str:
+        return "numeric"
+    def analyze(self, series: pd.Series, col: str, n_total: int) -> ColumnProfile:
+        s = series.dropna()
+        n_missing = series.isna().sum()
+        p_missing = (n_missing / n_total) * 100
+        n_unique = series.nunique()
+        profile = ColumnProfile(
+            name=col,
+            dtype=str(series.dtype),
+            inferred_type="numeric",
+            n_unique=n_unique,
+            n_missing=n_missing,
+            p_missing=p_missing,
+            is_constant=n_unique == 1,
+            cardinality_ratio=n_unique / n_total if n_total > 0 else 0,
+        )
+        if len(s) > 0:
+            profile.mean = float(s.mean())
+            profile.std = float(s.std())
+            profile.min = float(s.min())
+            profile.max = float(s.max())
+            profile.median = float(s.median())
+            profile.q1 = float(s.quantile(0.25))
+            profile.q3 = float(s.quantile(0.75))
+            profile.iqr = profile.q3 - profile.q1
+            profile.skewness = float(s.skew()) if len(s) > 2 else 0.0
+            profile.kurtosis = float(s.kurtosis()) if len(s) > 2 else 0.0
+            q1, q3 = profile.q1, profile.q3
+            iqr = profile.iqr
+            if iqr and iqr > 0:
+                lower = q1 - 1.5 * iqr
+                upper = q3 + 1.5 * iqr
+                profile.outliers = int(((s < lower) | (s > upper)).sum())
+        if p_missing > 50:
+            profile.alerts.append(f"Column '{col}': {p_missing:.1f}% missing values")
+        if profile.is_constant:
+            profile.alerts.append(f"Column '{col}': constant value ({s.iloc[0] if len(s) > 0 else 'N/A'})")
+        if profile.skewness is not None and abs(profile.skewness) > 2:
+            profile.alerts.append(f"Column '{col}': high skewness ({profile.skewness:.2f})")
+        if profile.outliers > 0:
+            profile.alerts.append(f"Column '{col}': {profile.outliers} outliers detected")
+        return profile
+class CategoricalStrategy(ColumnTypeStrategy):
+    def infer(self, series: pd.Series) -> str:
+        return "categorical"
+    def analyze(self, series: pd.Series, col: str, n_total: int) -> ColumnProfile:
+        s = series.dropna()
+        n_missing = series.isna().sum()
+        p_missing = (n_missing / n_total) * 100
+        n_unique = series.nunique()
+        top_values = {}
+        if len(s) > 0:
+            top_n = s.value_counts().head(5)
+            top_values = {str(k): int(v) for k, v in top_n.items()}
+        profile = ColumnProfile(
+            name=col,
+            dtype=str(series.dtype),
+            inferred_type="categorical",
+            n_unique=n_unique,
+            n_missing=n_missing,
+            p_missing=p_missing,
+            is_constant=n_unique <= 1,
+            cardinality_ratio=n_unique / n_total if n_total > 0 else 0,
+            top_values=top_values,
+        )
+        if p_missing > 50:
+            profile.alerts.append(f"Column '{col}': {p_missing:.1f}% missing values")
+        if profile.is_constant:
+            profile.alerts.append(f"Column '{col}': constant value")
+        if n_unique == n_total:
+            profile.alerts.append(f"Column '{col}': all values unique (possible ID)")
+        return profile
+class DateTimeStrategy(ColumnTypeStrategy):
+    def infer(self, series: pd.Series) -> str:
+        return "datetime"
+    def analyze(self, series: pd.Series, col: str, n_total: int) -> ColumnProfile:
+        s = series.dropna()
+        n_missing = series.isna().sum()
+        p_missing = (n_missing / n_total) * 100
+        n_unique = series.nunique()
+        profile = ColumnProfile(
+            name=col,
+            dtype=str(series.dtype),
+            inferred_type="datetime",
+            n_unique=n_unique,
+            n_missing=n_missing,
+            p_missing=p_missing,
+            is_constant=n_unique <= 1,
+            cardinality_ratio=n_unique / n_total if n_total > 0 else 0,
+        )
+        if len(s) > 0:
+            try:
+                years = s.dt.year
+                profile.min = float(years.min())
+                profile.max = float(years.max())
+                profile.mean = float(years.mean())
+            except Exception:
+                pass
+        if p_missing > 50:
+            profile.alerts.append(f"Column '{col}': {p_missing:.1f}% missing values")
+        return profile
+class BooleanStrategy(ColumnTypeStrategy):
+    def infer(self, series: pd.Series) -> str:
+        return "boolean"
+    def analyze(self, series: pd.Series, col: str, n_total: int) -> ColumnProfile:
+        s = series.dropna()
+        n_missing = series.isna().sum()
+        p_missing = (n_missing / n_total) * 100
+        n_unique = series.nunique()
+        true_count = int(s.astype(bool).sum()) if len(s) > 0 else 0
+        profile = ColumnProfile(
+            name=col,
+            dtype=str(series.dtype),
+            inferred_type="boolean",
+            n_unique=n_unique,
+            n_missing=n_missing,
+            p_missing=p_missing,
+            is_constant=n_unique <= 1,
+            cardinality_ratio=n_unique / n_total if n_total > 0 else 0,
+            mean=float(true_count / len(s)) if len(s) > 0 else 0,
+            top_values={"True": true_count, "False": len(s) - true_count} if len(s) > 0 else {},
+        )
+        if p_missing > 50:
+            profile.alerts.append(f"Column '{col}': {p_missing:.1f}% missing values")
+        return profile
+class AnalyzerEngine:
+    def __init__(self):
+        self.strategies: Dict[str, ColumnTypeStrategy] = {
+            "numeric": NumericStrategy(),
+            "categorical": CategoricalStrategy(),
+            "datetime": DateTimeStrategy(),
+            "boolean": BooleanStrategy(),
+        }
+    def infer_column_type(self, series: pd.Series) -> str:
+        if pd.api.types.is_datetime64_any_dtype(series):
+            return "datetime"
+        if pd.api.types.is_bool_dtype(series):
+            return "boolean"
+        if pd.api.types.is_numeric_dtype(series):
+            return "numeric"
+        return "categorical"
+    def analyze_column(self, series: pd.Series, col: str, n_total: int) -> ColumnProfile:
+        ctype = self.infer_column_type(series)
+        strategy = self.strategies[ctype]
+        return strategy.analyze(series, col, n_total)
+    def analyze_dataset(self, df: pd.DataFrame) -> DatasetReport:
+        profiles: Dict[str, ColumnProfile] = {}
+        all_alerts: List[str] = []
+        categorical_columns: List[str] = []
+        numeric_columns: List[str] = []
+        datetime_columns: List[str] = []
+        boolean_columns: List[str] = []
+        for col in df.columns:
+            profile = self.analyze_column(df[col], col, len(df))
+            profiles[col] = profile
+            all_alerts.extend(profile.alerts)
+            if profile.inferred_type == "numeric":
+                numeric_columns.append(col)
+            elif profile.inferred_type == "categorical":
+                categorical_columns.append(col)
+            elif profile.inferred_type == "datetime":
+                datetime_columns.append(col)
+            elif profile.inferred_type == "boolean":
+                boolean_columns.append(col)
+        n_duplicates = int(df.duplicated().sum())
+        if n_duplicates > 0:
+            all_alerts.append(f"Found {n_duplicates} duplicate rows")
+        correlation_pairs = self._find_correlations(df, numeric_columns)
+        mem_bytes = df.memory_usage(deep=True).sum()
+        if mem_bytes > 1e9:
+            memory_usage = f"{mem_bytes / 1e9:.2f} GB"
+        elif mem_bytes > 1e6:
+            memory_usage = f"{mem_bytes / 1e6:.2f} MB"
+        else:
+            memory_usage = f"{mem_bytes / 1e3:.1f} KB"
+        n_rows = len(df)
+        if n_rows > 1_000_000:
+            estimated_rows = f"{n_rows / 1_000_000:.1f}M"
+        elif n_rows > 1_000:
+            estimated_rows = f"{n_rows / 1_000:.1f}K"
+        else:
+            estimated_rows = str(n_rows)
+        n_missing_total = sum(p.n_missing for p in profiles.values())
+        total_cells = n_rows * len(df.columns)
+        p_missing_total = (n_missing_total / total_cells) * 100 if total_cells > 0 else 0
+        return DatasetReport(
+            n_rows=n_rows,
+            n_cols=len(df.columns),
+            n_duplicates=n_duplicates,
+            n_missing_total=n_missing_total,
+            p_missing_total=p_missing_total,
+            memory_usage=memory_usage,
+            estimated_rows=estimated_rows,
+            column_profiles=profiles,
+            correlation_pairs=correlation_pairs,
+            alerts=all_alerts,
+            categorical_columns=categorical_columns,
+            numeric_columns=numeric_columns,
+            datetime_columns=datetime_columns,
+            boolean_columns=boolean_columns,
+        )
+    def _find_correlations(
+        self, df: pd.DataFrame, numeric_cols: List[str], threshold: float = 0.3
+    ) -> List[Tuple[str, str, float]]:
+        if len(numeric_cols) < 2:
+            return []
+        corr_matrix = df[numeric_cols].corr().abs()
+        pairs = []
+        for i in range(len(numeric_cols)):
+            for j in range(i + 1, len(numeric_cols)):
+                r = corr_matrix.iloc[i, j]
+                if r >= threshold:
+                    col_a = numeric_cols[i]
+                    col_b = numeric_cols[j]
+                    actual_r = df[[col_a, col_b]].corr().iloc[0, 1]
+                    pairs.append((col_a, col_b, float(actual_r)))
+        pairs.sort(key=lambda x: abs(x[2]), reverse=True)
+        return pairs[:10]

viewx-0.2.4/viewx/DataMatrix/bibliometrics.py ADDED Viewed

@@ -0,0 +1,143 @@
+from __future__ import annotations
+from collections import Counter
+from typing import Dict, List, Optional, Tuple
+import pandas as pd
+class BibliometricsAnalyzer:
+    def __init__(self):
+        self.column_map = {
+            "AU": ["Authors", "AU", "Author", "Autores", "AUTHOR", "authors"],
+            "PY": ["Year", "PY", "Publication Year", "Año", "YEAR", "year"],
+            "SO": [
+                "Source title", "SO", "Journal", "Source", "Revista",
+                "SOURCE", "source", "Publication Name",
+            ],
+            "DE": [
+                "Author Keywords", "DE", "Keywords", "Palabras Clave",
+                "KEYWORDS", "keywords", "Index Keywords",
+            ],
+            "TC": [
+                "Cited by", "TC", "Times Cited", "Citas",
+                "CITING", "citations", "Citations",
+            ],
+            "TI": ["Title", "TI", "TITLE", "Article Title", "title"],
+            "DI": ["DOI", "DI", "DOI Number", "doi"],
+            "AF": [
+                "Affiliation", "AF", "AFFILIATION", "affiliation",
+                "Author Affiliation", "Author Affiliations",
+            ],
+            "AB": ["Abstract", "AB", "ABSTRACT", "abstract"],
+            "DT": [
+                "Document Type", "DT", "Document Type", "document_type",
+            ],
+        }
+    def _find_column(self, df: pd.DataFrame, key: str) -> Optional[str]:
+        candidates = self.column_map[key]
+        for col in df.columns:
+            col_upper = col.strip().upper()
+            for c in candidates:
+                if col_upper == c.upper() or col.strip() == c:
+                    return col
+                if col_upper.replace(" ", "_") == c.upper().replace(" ", "_"):
+                    return col
+        return None
+    def _split_multi(self, raw: str, separators: str = ";,", strip_parens: bool = True) -> List[str]:
+        if not isinstance(raw, str) or not raw.strip():
+            return []
+        result = []
+        for part in raw.split():
+            if not part.strip():
+                continue
+            items = [item.strip() for item in part.replace(";", ",").split(",")]
+            for item in items:
+                item = item.strip().strip(".").strip()
+                if strip_parens:
+                    item = item.split("(")[0].strip()
+                if item and len(item) > 1:
+                    result.append(item)
+        return result
+    def _split_list_field(self, raw: str) -> List[str]:
+        if not isinstance(raw, str) or not raw.strip():
+            return []
+        items = [x.strip() for x in raw.replace(";", ",").split(",")]
+        return [x for x in items if x]
+    def analyze(self, df: pd.DataFrame) -> Optional[dict]:
+        results: Dict[str, pd.DataFrame] = {}
+        py_col = self._find_column(df, "PY")
+        if py_col:
+            year_series = df[py_col].dropna()
+            year_numeric = pd.to_numeric(year_series, errors="coerce").dropna()
+            if len(year_numeric) > 0:
+                prod = year_numeric.value_counts().sort_index().reset_index()
+                prod.columns = ["Year", "Count"]
+                prod["Year"] = prod["Year"].astype(int)
+                results["annual_production"] = prod
+        au_col = self._find_column(df, "AU")
+        if au_col:
+            all_authors: List[str] = []
+            for entry in df[au_col].dropna():
+                all_authors.extend(self._split_list_field(str(entry)))
+            if all_authors:
+                au_counts = Counter(all_authors)
+                au_df = (
+                    pd.DataFrame(au_counts.most_common(20), columns=["Author", "Count"])
+                    .sort_values("Count", ascending=False)
+                    .reset_index(drop=True)
+                )
+                results["top_authors"] = au_df
+                n_unique = len(au_counts)
+                total = sum(au_counts.values())
+                results["author_summary"] = {
+                    "total_authors": total,
+                    "unique_authors": n_unique,
+                    "avg_per_publication": round(total / max(len(df), 1), 2),
+                }
+        so_col = self._find_column(df, "SO")
+        if so_col:
+            so_counts = df[so_col].dropna().value_counts().head(15)
+            if len(so_counts) > 0:
+                so_df = so_counts.reset_index()
+                so_df.columns = ["Source", "Count"]
+                results["top_sources"] = so_df
+        de_col = self._find_column(df, "DE")
+        if de_col:
+            all_keywords: List[str] = []
+            for entry in df[de_col].dropna():
+                all_keywords.extend(self._split_list_field(str(entry)))
+            if all_keywords:
+                kw_counts = Counter(all_keywords)
+                kw_df = (
+                    pd.DataFrame(kw_counts.most_common(20), columns=["Keyword", "Count"])
+                    .sort_values("Count", ascending=False)
+                    .reset_index(drop=True)
+                )
+                results["top_keywords"] = kw_df
+        tc_col = self._find_column(df, "TC")
+        if tc_col:
+            tc_series = pd.to_numeric(df[tc_col], errors="coerce").dropna()
+            if len(tc_series) > 0:
+                results["citation_summary"] = {
+                    "total_citations": int(tc_series.sum()),
+                    "mean_citations": round(tc_series.mean(), 2),
+                    "median_citations": round(tc_series.median(), 2),
+                    "max_citations": int(tc_series.max()),
+                    "min_citations": int(tc_series.min()),
+                }
+        return results if results else None

viewx 0.2.3__tar.gz → 0.2.4__tar.gz

viewx 0.2.3tar.gz → 0.2.4tar.gz