streamlit-launcher 2.2.5__py3-none-any.whl → 2.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- streamlit_launcher/dashboard.py +3017 -704
- {streamlit_launcher-2.2.5.dist-info → streamlit_launcher-2.3.8.dist-info}/METADATA +2 -1
- streamlit_launcher-2.3.8.dist-info/RECORD +11 -0
- streamlit_launcher-2.3.8.dist-info/licenses/license.txt +18 -0
- streamlit_launcher-2.2.5.dist-info/RECORD +0 -10
- {streamlit_launcher-2.2.5.dist-info → streamlit_launcher-2.3.8.dist-info}/WHEEL +0 -0
- {streamlit_launcher-2.2.5.dist-info → streamlit_launcher-2.3.8.dist-info}/entry_points.txt +0 -0
- {streamlit_launcher-2.2.5.dist-info → streamlit_launcher-2.3.8.dist-info}/licenses/LICENSE +0 -0
- {streamlit_launcher-2.2.5.dist-info → streamlit_launcher-2.3.8.dist-info}/top_level.txt +0 -0
streamlit_launcher/dashboard.py
CHANGED
@@ -34,6 +34,39 @@ from sklearn.preprocessing import StandardScaler, LabelEncoder
|
|
34
34
|
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report, confusion_matrix
|
35
35
|
from sklearn.inspection import permutation_importance
|
36
36
|
from scipy.stats import gaussian_kde
|
37
|
+
import streamlit.components.v1 as components
|
38
|
+
import tensorflow as tf
|
39
|
+
import numpy as np
|
40
|
+
import pandas as pd
|
41
|
+
from sklearn.model_selection import train_test_split
|
42
|
+
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
43
|
+
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
44
|
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
45
|
+
from sklearn.metrics import confusion_matrix, classification_report
|
46
|
+
import plotly.express as px
|
47
|
+
import plotly.graph_objects as go
|
48
|
+
from plotly.subplots import make_subplots
|
49
|
+
import xgboost as xgb
|
50
|
+
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
|
51
|
+
from sklearn.feature_selection import mutual_info_regression, mutual_info_classif
|
52
|
+
import plotly.graph_objects as go
|
53
|
+
from plotly.subplots import make_subplots
|
54
|
+
import time
|
55
|
+
import warnings
|
56
|
+
warnings.filterwarnings('ignore')
|
57
|
+
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
|
58
|
+
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
|
59
|
+
from xgboost import XGBRegressor, XGBClassifier
|
60
|
+
from sklearn.ensemble import VotingRegressor, VotingClassifier
|
61
|
+
from sklearn.ensemble import StackingRegressor, StackingClassifier
|
62
|
+
from sklearn.model_selection import cross_validate, GridSearchCV
|
63
|
+
from sklearn.metrics import get_scorer
|
64
|
+
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
|
65
|
+
from sklearn.compose import ColumnTransformer
|
66
|
+
from sklearn.impute import SimpleImputer
|
67
|
+
from sklearn.pipeline import Pipeline
|
68
|
+
import keras
|
69
|
+
|
37
70
|
|
38
71
|
# Konfigurasi untuk performa
|
39
72
|
plt.style.use('default')
|
@@ -6434,8 +6467,13 @@ def create_ml_dl_analysis_dashboard(df, numeric_cols, non_numeric_cols):
|
|
6434
6467
|
"""
|
6435
6468
|
Dashboard komprehensif untuk analisis Machine Learning dan Deep Learning
|
6436
6469
|
"""
|
6437
|
-
|
6438
|
-
|
6470
|
+
st.markdown("""
|
6471
|
+
<div style='text-align: center; padding: 10px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
6472
|
+
border-radius: 10px; margin: 10px 0;'>
|
6473
|
+
<h3 style='color: white; margin: 0;'>🧠 dwibaktindev AI</h3>
|
6474
|
+
<p style='color: white; margin: 0;'>Sasha • Alisa • dwibaktindev Models</p>
|
6475
|
+
</div>
|
6476
|
+
""", unsafe_allow_html=True)
|
6439
6477
|
|
6440
6478
|
# Deteksi tipe data
|
6441
6479
|
data_size = len(df)
|
@@ -6873,10 +6911,25 @@ def create_outlier_analysis(df, numeric_cols):
|
|
6873
6911
|
st.plotly_chart(fig, use_container_width=True)
|
6874
6912
|
|
6875
6913
|
def machine_learning_analysis(df, numeric_cols, non_numeric_cols):
|
6876
|
-
"""Analisis Machine Learning"""
|
6914
|
+
"""Analisis Machine Learning dengan Optimasi untuk Dataset Besar"""
|
6877
6915
|
|
6878
6916
|
st.header("🤖 Machine Learning Analysis")
|
6879
6917
|
|
6918
|
+
# Informasi dataset
|
6919
|
+
st.subheader("📊 Dataset Info")
|
6920
|
+
col1, col2, col3 = st.columns(3)
|
6921
|
+
with col1:
|
6922
|
+
st.metric("Total Rows", f"{len(df):,}")
|
6923
|
+
with col2:
|
6924
|
+
st.metric("Total Columns", f"{len(df.columns):,}")
|
6925
|
+
with col3:
|
6926
|
+
st.metric("Memory Usage", f"{df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
|
6927
|
+
|
6928
|
+
# Optimasi memory usage
|
6929
|
+
if st.checkbox("Optimize Memory Usage", value=True):
|
6930
|
+
df = optimize_memory_usage(df)
|
6931
|
+
st.success("Memory usage optimized!")
|
6932
|
+
|
6880
6933
|
# Preprocessing
|
6881
6934
|
st.subheader("🔧 Data Preprocessing")
|
6882
6935
|
|
@@ -6892,162 +6945,407 @@ def machine_learning_analysis(df, numeric_cols, non_numeric_cols):
|
|
6892
6945
|
|
6893
6946
|
problem_type = st.selectbox(
|
6894
6947
|
"Jenis Problem",
|
6895
|
-
["Regression", "Classification"],
|
6948
|
+
["Regression", "Classification", "Auto Detect"],
|
6896
6949
|
key="ml_problem_type"
|
6897
6950
|
)
|
6951
|
+
|
6952
|
+
# Auto detect problem type
|
6953
|
+
if problem_type == "Auto Detect":
|
6954
|
+
if target_variable in numeric_cols:
|
6955
|
+
problem_type = "Regression"
|
6956
|
+
else:
|
6957
|
+
problem_type = "Classification"
|
6958
|
+
st.info(f"Auto-detected: {problem_type}")
|
6898
6959
|
|
6899
6960
|
with col2:
|
6900
6961
|
test_size = st.slider("Test Size Ratio", 0.1, 0.5, 0.2, 0.05, key="ml_test_size")
|
6901
6962
|
random_state = st.number_input("Random State", value=42, key="ml_random_state")
|
6963
|
+
|
6964
|
+
# Sampling untuk dataset besar
|
6965
|
+
sample_size = st.slider("Sample Size (untuk dataset besar)",
|
6966
|
+
min_value=1000,
|
6967
|
+
max_value=min(50000, len(df)),
|
6968
|
+
value=min(10000, len(df)),
|
6969
|
+
step=1000,
|
6970
|
+
key="ml_sample_size")
|
6902
6971
|
|
6903
|
-
# Feature selection
|
6972
|
+
# Feature selection dengan advanced options
|
6904
6973
|
st.subheader("🎯 Feature Selection")
|
6974
|
+
|
6905
6975
|
available_features = [f for f in numeric_cols + non_numeric_cols if f != target_variable]
|
6906
|
-
selected_features = st.multiselect(
|
6907
|
-
"Pilih Features untuk Model",
|
6908
|
-
available_features,
|
6909
|
-
default=available_features[:min(10, len(available_features))],
|
6910
|
-
key="ml_features_select"
|
6911
|
-
)
|
6912
6976
|
|
6913
|
-
|
6914
|
-
|
6915
|
-
|
6916
|
-
|
6917
|
-
|
6918
|
-
|
6919
|
-
|
6920
|
-
|
6921
|
-
|
6922
|
-
|
6923
|
-
|
6924
|
-
|
6925
|
-
|
6926
|
-
|
6927
|
-
|
6928
|
-
if problem_type == "Classification" and y.dtype == 'object':
|
6929
|
-
le_target = LabelEncoder()
|
6930
|
-
y = le_target.fit_transform(y.astype(str))
|
6931
|
-
|
6932
|
-
# Handle missing values
|
6933
|
-
X = X.fillna(X.mean(numeric_only=True))
|
6934
|
-
|
6935
|
-
# Split data
|
6936
|
-
X_train, X_test, y_train, y_test = train_test_split(
|
6937
|
-
X, y, test_size=test_size, random_state=random_state, stratify=y if problem_type == "Classification" else None
|
6977
|
+
col1, col2 = st.columns([2, 1])
|
6978
|
+
|
6979
|
+
with col1:
|
6980
|
+
feature_selection_method = st.radio(
|
6981
|
+
"Feature Selection Method",
|
6982
|
+
["Manual Selection", "Auto Select Top Features"],
|
6983
|
+
key="feature_selection_method"
|
6984
|
+
)
|
6985
|
+
|
6986
|
+
if feature_selection_method == "Manual Selection":
|
6987
|
+
selected_features = st.multiselect(
|
6988
|
+
"Pilih Features untuk Model",
|
6989
|
+
available_features,
|
6990
|
+
default=available_features[:min(10, len(available_features))],
|
6991
|
+
key="ml_features_select"
|
6938
6992
|
)
|
6993
|
+
else:
|
6994
|
+
top_k = st.slider("Number of Top Features", 5, 50, 15, key="top_k_features")
|
6995
|
+
selected_features = available_features[:top_k]
|
6996
|
+
st.info(f"Auto-selected top {top_k} features")
|
6997
|
+
|
6998
|
+
with col2:
|
6999
|
+
# Advanced options
|
7000
|
+
st.write("**Advanced Options:**")
|
7001
|
+
use_feature_engineering = st.checkbox("Feature Engineering", value=False)
|
7002
|
+
remove_high_correlation = st.checkbox("Remove High Correlation", value=True)
|
7003
|
+
correlation_threshold = st.slider("Correlation Threshold", 0.7, 0.99, 0.9, 0.01)
|
7004
|
+
|
7005
|
+
if not target_variable or not selected_features:
|
7006
|
+
st.warning("Pilih target variable dan features terlebih dahulu")
|
7007
|
+
return
|
7008
|
+
|
7009
|
+
try:
|
7010
|
+
# Sampling untuk dataset besar
|
7011
|
+
if len(df) > sample_size:
|
7012
|
+
st.info(f"Using sample of {sample_size} records for faster processing")
|
7013
|
+
df_sampled = df.sample(n=sample_size, random_state=random_state)
|
7014
|
+
else:
|
7015
|
+
df_sampled = df
|
7016
|
+
|
7017
|
+
# Progress tracking
|
7018
|
+
progress_bar = st.progress(0)
|
7019
|
+
status_text = st.empty()
|
7020
|
+
|
7021
|
+
# Prepare data
|
7022
|
+
status_text.text("Preparing data...")
|
7023
|
+
X = df_sampled[selected_features].copy()
|
7024
|
+
y = df_sampled[target_variable]
|
7025
|
+
progress_bar.progress(20)
|
7026
|
+
|
7027
|
+
# Handle large dataset - incremental processing
|
7028
|
+
chunk_size = min(1000, len(X))
|
7029
|
+
|
7030
|
+
# Encode categorical features
|
7031
|
+
status_text.text("Encoding categorical features...")
|
7032
|
+
le_dict = {}
|
7033
|
+
categorical_columns = [col for col in selected_features if col in non_numeric_cols]
|
7034
|
+
|
7035
|
+
for col in categorical_columns:
|
7036
|
+
# Untuk dataset besar, gunakan categorical encoding yang lebih efisien
|
7037
|
+
if X[col].nunique() > 100: # Jika terlalu banyak kategori, gunakan frequency encoding
|
7038
|
+
freq_encoding = X[col].value_counts().to_dict()
|
7039
|
+
X[col] = X[col].map(freq_encoding)
|
7040
|
+
X[col].fillna(0, inplace=True)
|
7041
|
+
else:
|
7042
|
+
le = LabelEncoder()
|
7043
|
+
X[col] = le.fit_transform(X[col].astype(str))
|
7044
|
+
le_dict[col] = le
|
7045
|
+
progress_bar.progress(40)
|
7046
|
+
|
7047
|
+
# Encode target variable
|
7048
|
+
status_text.text("Encoding target variable...")
|
7049
|
+
le_target = None
|
7050
|
+
if problem_type == "Classification" and y.dtype == 'object':
|
7051
|
+
le_target = LabelEncoder()
|
7052
|
+
y = le_target.fit_transform(y.astype(str))
|
7053
|
+
|
7054
|
+
# Remove high correlation features
|
7055
|
+
if remove_high_correlation and len(selected_features) > 1:
|
7056
|
+
status_text.text("Removing highly correlated features...")
|
7057
|
+
X = remove_correlated_features(X, correlation_threshold)
|
7058
|
+
|
7059
|
+
progress_bar.progress(60)
|
7060
|
+
|
7061
|
+
# Handle missing values dengan metode yang lebih robust
|
7062
|
+
status_text.text("Handling missing values...")
|
7063
|
+
for col in X.columns:
|
7064
|
+
if X[col].isnull().sum() > 0:
|
7065
|
+
if X[col].dtype in ['int64', 'float64']:
|
7066
|
+
X[col].fillna(X[col].median(), inplace=True)
|
7067
|
+
else:
|
7068
|
+
X[col].fillna(X[col].mode()[0] if len(X[col].mode()) > 0 else 0, inplace=True)
|
7069
|
+
|
7070
|
+
progress_bar.progress(80)
|
7071
|
+
|
7072
|
+
# Split data
|
7073
|
+
status_text.text("Splitting data...")
|
7074
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
7075
|
+
X, y,
|
7076
|
+
test_size=test_size,
|
7077
|
+
random_state=random_state,
|
7078
|
+
stratify=y if problem_type == "Classification" else None
|
7079
|
+
)
|
7080
|
+
|
7081
|
+
# Scale features - gunakan StandardScaler yang lebih efisien
|
7082
|
+
scaler = StandardScaler()
|
7083
|
+
X_train_scaled = scaler.fit_transform(X_train)
|
7084
|
+
X_test_scaled = scaler.transform(X_test)
|
7085
|
+
progress_bar.progress(100)
|
7086
|
+
|
7087
|
+
# Model selection dengan progress tracking
|
7088
|
+
st.subheader("🚀 Model Training & Evaluation")
|
7089
|
+
|
7090
|
+
# Pilihan model berdasarkan problem type dan dataset size
|
7091
|
+
if problem_type == "Regression":
|
7092
|
+
models = {
|
7093
|
+
"Linear Regression": LinearRegression(),
|
7094
|
+
"Ridge Regression": Ridge(random_state=random_state),
|
7095
|
+
"Random Forest": RandomForestRegressor(
|
7096
|
+
n_estimators=50, # Kurangi untuk dataset besar
|
7097
|
+
random_state=random_state,
|
7098
|
+
n_jobs=-1 # Gunakan semua core CPU
|
7099
|
+
),
|
7100
|
+
"Gradient Boosting": GradientBoostingRegressor(
|
7101
|
+
n_estimators=50,
|
7102
|
+
random_state=random_state
|
7103
|
+
)
|
7104
|
+
}
|
7105
|
+
elif problem_type == "Classification":
|
7106
|
+
models = {
|
7107
|
+
"Logistic Regression": LogisticRegression(
|
7108
|
+
random_state=random_state,
|
7109
|
+
n_jobs=-1,
|
7110
|
+
max_iter=1000
|
7111
|
+
),
|
7112
|
+
"Random Forest": RandomForestClassifier(
|
7113
|
+
n_estimators=50,
|
7114
|
+
random_state=random_state,
|
7115
|
+
n_jobs=-1
|
7116
|
+
),
|
7117
|
+
"Gradient Boosting": GradientBoostingClassifier(
|
7118
|
+
n_estimators=50,
|
7119
|
+
random_state=random_state
|
7120
|
+
),
|
7121
|
+
"XGBoost": xgb.XGBClassifier(
|
7122
|
+
n_estimators=50,
|
7123
|
+
random_state=random_state,
|
7124
|
+
n_jobs=-1,
|
7125
|
+
verbosity=0
|
7126
|
+
) if 'xgb' in globals() else None
|
7127
|
+
}
|
7128
|
+
# Remove None models
|
7129
|
+
models = {k: v for k, v in models.items() if v is not None}
|
7130
|
+
|
7131
|
+
# Train and evaluate models dengan progress bar
|
7132
|
+
results = {}
|
7133
|
+
model_progress = st.progress(0)
|
7134
|
+
total_models = len(models)
|
7135
|
+
|
7136
|
+
for i, (name, model) in enumerate(models.items()):
|
7137
|
+
status_text.text(f"Training {name}...")
|
6939
7138
|
|
6940
|
-
|
6941
|
-
|
6942
|
-
|
6943
|
-
|
6944
|
-
|
6945
|
-
# Model selection berdasarkan problem type
|
6946
|
-
st.subheader("🚀 Model Training & Evaluation")
|
6947
|
-
|
6948
|
-
if problem_type == "Regression":
|
6949
|
-
models = {
|
6950
|
-
"Linear Regression": LinearRegression(),
|
6951
|
-
"Ridge Regression": Ridge(random_state=random_state),
|
6952
|
-
"Random Forest": RandomForestRegressor(n_estimators=100, random_state=random_state)
|
6953
|
-
}
|
6954
|
-
|
6955
|
-
elif problem_type == "Classification":
|
6956
|
-
models = {
|
6957
|
-
"Logistic Regression": LogisticRegression(random_state=random_state),
|
6958
|
-
"Random Forest": RandomForestClassifier(n_estimators=100, random_state=random_state),
|
6959
|
-
"SVM": SVC(random_state=random_state)
|
6960
|
-
}
|
6961
|
-
|
6962
|
-
# Train and evaluate models
|
6963
|
-
results = {}
|
6964
|
-
|
6965
|
-
for name, model in models.items():
|
6966
|
-
with st.spinner(f"Training {name}..."):
|
6967
|
-
try:
|
6968
|
-
# Train model
|
6969
|
-
model.fit(X_train_scaled, y_train)
|
6970
|
-
y_pred = model.predict(X_test_scaled)
|
6971
|
-
|
6972
|
-
# Calculate metrics
|
6973
|
-
if problem_type == "Regression":
|
6974
|
-
mse = mean_squared_error(y_test, y_pred)
|
6975
|
-
r2 = r2_score(y_test, y_pred)
|
6976
|
-
|
6977
|
-
results[name] = {
|
6978
|
-
'MSE': mse,
|
6979
|
-
'R2 Score': r2,
|
6980
|
-
'predictions': y_pred,
|
6981
|
-
'model': model
|
6982
|
-
}
|
6983
|
-
|
6984
|
-
elif problem_type == "Classification":
|
6985
|
-
accuracy = accuracy_score(y_test, y_pred)
|
6986
|
-
|
6987
|
-
results[name] = {
|
6988
|
-
'Accuracy': accuracy,
|
6989
|
-
'predictions': y_pred,
|
6990
|
-
'model': model
|
6991
|
-
}
|
6992
|
-
except Exception as model_error:
|
6993
|
-
st.warning(f"Error training {name}: {str(model_error)}")
|
6994
|
-
|
6995
|
-
# Display results
|
6996
|
-
if results:
|
6997
|
-
st.subheader("📊 Model Performance Comparison")
|
7139
|
+
try:
|
7140
|
+
# Train model
|
7141
|
+
model.fit(X_train_scaled, y_train)
|
7142
|
+
y_pred = model.predict(X_test_scaled)
|
6998
7143
|
|
7144
|
+
# Calculate metrics
|
6999
7145
|
if problem_type == "Regression":
|
7000
|
-
|
7001
|
-
|
7002
|
-
|
7003
|
-
|
7004
|
-
|
7005
|
-
|
7006
|
-
|
7007
|
-
'
|
7008
|
-
'
|
7009
|
-
|
7146
|
+
mse = mean_squared_error(y_test, y_pred)
|
7147
|
+
rmse = np.sqrt(mse)
|
7148
|
+
mae = mean_absolute_error(y_test, y_pred)
|
7149
|
+
r2 = r2_score(y_test, y_pred)
|
7150
|
+
|
7151
|
+
results[name] = {
|
7152
|
+
'MSE': mse,
|
7153
|
+
'RMSE': rmse,
|
7154
|
+
'MAE': mae,
|
7155
|
+
'R2 Score': r2,
|
7156
|
+
'predictions': y_pred,
|
7157
|
+
'model': model
|
7158
|
+
}
|
7010
7159
|
|
7011
|
-
|
7012
|
-
|
7013
|
-
|
7014
|
-
|
7015
|
-
|
7016
|
-
|
7017
|
-
|
7018
|
-
|
7019
|
-
|
7020
|
-
|
7021
|
-
|
7022
|
-
|
7023
|
-
|
7024
|
-
|
7025
|
-
|
7026
|
-
|
7027
|
-
|
7028
|
-
|
7029
|
-
|
7030
|
-
|
7031
|
-
|
7032
|
-
|
7033
|
-
|
7034
|
-
else:
|
7035
|
-
st.warning("Tidak ada model yang berhasil di-training")
|
7160
|
+
elif problem_type == "Classification":
|
7161
|
+
accuracy = accuracy_score(y_test, y_pred)
|
7162
|
+
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
|
7163
|
+
recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
|
7164
|
+
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
|
7165
|
+
|
7166
|
+
results[name] = {
|
7167
|
+
'Accuracy': accuracy,
|
7168
|
+
'Precision': precision,
|
7169
|
+
'Recall': recall,
|
7170
|
+
'F1-Score': f1,
|
7171
|
+
'predictions': y_pred,
|
7172
|
+
'model': model
|
7173
|
+
}
|
7174
|
+
|
7175
|
+
st.success(f"✅ {name} trained successfully")
|
7176
|
+
|
7177
|
+
except Exception as model_error:
|
7178
|
+
st.warning(f"⚠️ Error training {name}: {str(model_error)}")
|
7179
|
+
|
7180
|
+
model_progress.progress((i + 1) / total_models)
|
7181
|
+
|
7182
|
+
status_text.text("Completed!")
|
7036
7183
|
|
7037
|
-
|
7038
|
-
|
7184
|
+
# Display results
|
7185
|
+
if results:
|
7186
|
+
display_ml_results(results, problem_type, X_test, y_test, selected_features, le_target)
|
7187
|
+
else:
|
7188
|
+
st.error("❌ Tidak ada model yang berhasil di-training")
|
7189
|
+
|
7190
|
+
except Exception as e:
|
7191
|
+
st.error(f"❌ Error dalam ML analysis: {str(e)}")
|
7192
|
+
st.info("💡 Tips: Coba kurangi jumlah features atau gunakan sample size yang lebih kecil")
|
7193
|
+
|
7194
|
+
def optimize_memory_usage(df):
|
7195
|
+
"""Optimize memory usage of dataframe"""
|
7196
|
+
for col in df.columns:
|
7197
|
+
if df[col].dtype == 'object':
|
7198
|
+
df[col] = df[col].astype('category')
|
7199
|
+
elif df[col].dtype in ['int64', 'int32']:
|
7200
|
+
c_min = df[col].min()
|
7201
|
+
c_max = df[col].max()
|
7202
|
+
if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
|
7203
|
+
df[col] = df[col].astype(np.int8)
|
7204
|
+
elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
|
7205
|
+
df[col] = df[col].astype(np.int16)
|
7206
|
+
elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
|
7207
|
+
df[col] = df[col].astype(np.int32)
|
7208
|
+
elif df[col].dtype in ['float64', 'float32']:
|
7209
|
+
c_min = df[col].min()
|
7210
|
+
c_max = df[col].max()
|
7211
|
+
if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
|
7212
|
+
df[col] = df[col].astype(np.float32)
|
7213
|
+
return df
|
7214
|
+
|
7215
|
+
def remove_correlated_features(X, threshold=0.9):
|
7216
|
+
"""Remove highly correlated features"""
|
7217
|
+
corr_matrix = X.corr().abs()
|
7218
|
+
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
|
7219
|
+
to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
|
7220
|
+
return X.drop(columns=to_drop)
|
7221
|
+
|
7222
|
+
def display_ml_results(results, problem_type, X_test, y_test, selected_features, le_target):
|
7223
|
+
"""Display ML results with comprehensive visualizations"""
|
7224
|
+
|
7225
|
+
st.subheader("📊 Model Performance Comparison")
|
7226
|
+
|
7227
|
+
# Create results dataframe
|
7228
|
+
if problem_type == "Regression":
|
7229
|
+
metrics_df = pd.DataFrame({
|
7230
|
+
'Model': list(results.keys()),
|
7231
|
+
'MSE': [results[name]['MSE'] for name in results.keys()],
|
7232
|
+
'RMSE': [results[name]['RMSE'] for name in results.keys()],
|
7233
|
+
'MAE': [results[name]['MAE'] for name in results.keys()],
|
7234
|
+
'R2 Score': [results[name]['R2 Score'] for name in results.keys()]
|
7235
|
+
})
|
7236
|
+
sort_metric = 'R2 Score'
|
7237
|
+
else:
|
7238
|
+
metrics_df = pd.DataFrame({
|
7239
|
+
'Model': list(results.keys()),
|
7240
|
+
'Accuracy': [results[name]['Accuracy'] for name in results.keys()],
|
7241
|
+
'Precision': [results[name]['Precision'] for name in results.keys()],
|
7242
|
+
'Recall': [results[name]['Recall'] for name in results.keys()],
|
7243
|
+
'F1-Score': [results[name]['F1-Score'] for name in results.keys()]
|
7244
|
+
})
|
7245
|
+
sort_metric = 'Accuracy'
|
7246
|
+
|
7247
|
+
# Display metrics table
|
7248
|
+
st.dataframe(metrics_df.sort_values(sort_metric, ascending=False), use_container_width=True)
|
7249
|
+
|
7250
|
+
# Visualization
|
7251
|
+
col1, col2 = st.columns(2)
|
7252
|
+
|
7253
|
+
with col1:
|
7254
|
+
# Performance comparison chart
|
7255
|
+
if problem_type == "Regression":
|
7256
|
+
fig = px.bar(metrics_df, x='Model', y='R2 Score', title="R2 Score Comparison")
|
7257
|
+
else:
|
7258
|
+
fig = px.bar(metrics_df, x='Model', y='Accuracy', title="Accuracy Comparison")
|
7259
|
+
st.plotly_chart(fig, use_container_width=True)
|
7260
|
+
|
7261
|
+
with col2:
|
7262
|
+
# Actual vs Predicted untuk model terbaik
|
7263
|
+
best_model_name = metrics_df.loc[metrics_df[sort_metric].idxmax(), 'Model']
|
7264
|
+
best_result = results[best_model_name]
|
7265
|
+
|
7266
|
+
if problem_type == "Regression":
|
7267
|
+
fig = px.scatter(
|
7268
|
+
x=y_test,
|
7269
|
+
y=best_result['predictions'],
|
7270
|
+
labels={'x': 'Actual', 'y': 'Predicted'},
|
7271
|
+
title=f"Actual vs Predicted - {best_model_name}"
|
7272
|
+
)
|
7273
|
+
fig.add_trace(px.line(x=[y_test.min(), y_test.max()], y=[y_test.min(), y_test.max()]).data[0])
|
7274
|
+
else:
|
7275
|
+
# Confusion matrix
|
7276
|
+
cm = confusion_matrix(y_test, best_result['predictions'])
|
7277
|
+
fig = px.imshow(
|
7278
|
+
cm,
|
7279
|
+
labels=dict(x="Predicted", y="Actual", color="Count"),
|
7280
|
+
title=f"Confusion Matrix - {best_model_name}"
|
7281
|
+
)
|
7282
|
+
st.plotly_chart(fig, use_container_width=True)
|
7283
|
+
|
7284
|
+
# Feature importance
|
7285
|
+
st.subheader("🔍 Feature Importance")
|
7286
|
+
for name, result in results.items():
|
7287
|
+
model = result['model']
|
7288
|
+
if hasattr(model, 'feature_importances_'):
|
7289
|
+
feature_importance = pd.DataFrame({
|
7290
|
+
'feature': selected_features[:len(model.feature_importances_)],
|
7291
|
+
'importance': model.feature_importances_
|
7292
|
+
}).sort_values('importance', ascending=False)
|
7293
|
+
|
7294
|
+
fig = px.bar(
|
7295
|
+
feature_importance.head(10),
|
7296
|
+
x='importance',
|
7297
|
+
y='feature',
|
7298
|
+
title=f"Top 10 Feature Importance - {name}",
|
7299
|
+
orientation='h'
|
7300
|
+
)
|
7301
|
+
st.plotly_chart(fig, use_container_width=True)
|
7039
7302
|
|
7040
7303
|
def deep_learning_analysis(df, numeric_cols, non_numeric_cols):
|
7041
|
-
"""Analisis Deep Learning"""
|
7304
|
+
"""Analisis Deep Learning Lengkap - Optimized for Large Datasets"""
|
7042
7305
|
|
7043
|
-
st.header("🧠 Deep Learning Analysis")
|
7306
|
+
st.header("🧠 Deep Learning Analysis - High Performance")
|
7044
7307
|
|
7045
|
-
|
7308
|
+
# Validasi dataset
|
7309
|
+
if df.empty:
|
7310
|
+
st.error("❌ Dataset kosong! Silakan upload data terlebih dahulu.")
|
7311
|
+
return
|
7312
|
+
|
7313
|
+
if len(numeric_cols) < 2:
|
7314
|
+
st.error("❌ Diperuhkan minimal 2 kolom numerik untuk analisis Deep Learning")
|
7315
|
+
return
|
7046
7316
|
|
7047
|
-
#
|
7048
|
-
|
7317
|
+
# Configuration untuk kecepatan
|
7318
|
+
st.subheader("⚡ Konfigurasi Kecepatan & Performa")
|
7319
|
+
|
7320
|
+
col1, col2, col3 = st.columns(3)
|
7049
7321
|
|
7050
7322
|
with col1:
|
7323
|
+
processing_speed = st.selectbox(
|
7324
|
+
"Kecepatan Processing",
|
7325
|
+
["🚀 Very Fast", "⚡ Fast", "✅ Balanced", "🐢 Comprehensive"],
|
7326
|
+
index=0,
|
7327
|
+
key="processing_speed"
|
7328
|
+
)
|
7329
|
+
|
7330
|
+
# Set parameters berdasarkan kecepatan
|
7331
|
+
if processing_speed == "🚀 Very Fast":
|
7332
|
+
sample_size = 0.3
|
7333
|
+
epochs = 20
|
7334
|
+
batch_size = 128
|
7335
|
+
elif processing_speed == "⚡ Fast":
|
7336
|
+
sample_size = 0.5
|
7337
|
+
epochs = 30
|
7338
|
+
batch_size = 64
|
7339
|
+
elif processing_speed == "✅ Balanced":
|
7340
|
+
sample_size = 0.7
|
7341
|
+
epochs = 50
|
7342
|
+
batch_size = 32
|
7343
|
+
else:
|
7344
|
+
sample_size = 1.0
|
7345
|
+
epochs = 80
|
7346
|
+
batch_size = 16
|
7347
|
+
|
7348
|
+
with col2:
|
7051
7349
|
dl_target = st.selectbox(
|
7052
7350
|
"Pilih Target Variable",
|
7053
7351
|
numeric_cols,
|
@@ -7060,30 +7358,76 @@ def deep_learning_analysis(df, numeric_cols, non_numeric_cols):
|
|
7060
7358
|
key="dl_problem_type"
|
7061
7359
|
)
|
7062
7360
|
|
7063
|
-
with
|
7064
|
-
epochs = st.slider("Epochs", 10, 200,
|
7065
|
-
batch_size = st.slider("Batch Size", 16, 256,
|
7066
|
-
learning_rate = st.selectbox("Learning Rate", [0.001, 0.01, 0.
|
7361
|
+
with col3:
|
7362
|
+
epochs = st.slider("Epochs", 10, 200, epochs, key="dl_epochs")
|
7363
|
+
batch_size = st.slider("Batch Size", 16, 256, batch_size, key="dl_batch_size")
|
7364
|
+
learning_rate = st.selectbox("Learning Rate", [0.001, 0.01, 0.0001, 0.00001],
|
7365
|
+
index=0, key="dl_learning_rate")
|
7366
|
+
|
7367
|
+
# Optimasi dataset besar
|
7368
|
+
st.info(f"**Mode {processing_speed}** - Sample size: {sample_size*100}% - Dataset: {len(df):,} rows")
|
7067
7369
|
|
7068
|
-
# Feature selection
|
7370
|
+
# Feature selection dengan optimasi
|
7371
|
+
available_features = [f for f in numeric_cols if f != dl_target]
|
7069
7372
|
dl_features = st.multiselect(
|
7070
7373
|
"Pilih Features untuk Deep Learning",
|
7071
|
-
|
7072
|
-
default=[
|
7374
|
+
available_features,
|
7375
|
+
default=available_features[:min(6, len(available_features))],
|
7073
7376
|
key="dl_features_select"
|
7074
7377
|
)
|
7075
7378
|
|
7076
|
-
if dl_target
|
7077
|
-
|
7078
|
-
|
7379
|
+
if not dl_target or not dl_features:
|
7380
|
+
st.info("📝 Pilih target variable dan features untuk memulai analisis DL")
|
7381
|
+
return
|
7382
|
+
|
7383
|
+
try:
|
7384
|
+
|
7385
|
+
# Check GPU availability
|
7386
|
+
gpu_available = len(tf.config.experimental.list_physical_devices('GPU')) > 0
|
7387
|
+
if gpu_available:
|
7388
|
+
st.success("🎯 GPU tersedia - Training akan dipercepat!")
|
7389
|
+
else:
|
7390
|
+
st.info("💡 GPU tidak tersedia - Training menggunakan CPU")
|
7391
|
+
|
7392
|
+
# Optimasi memory untuk dataset besar
|
7393
|
+
@st.cache_data(show_spinner=False)
|
7394
|
+
def prepare_data_optimized(_df, features, target, sample_frac=1.0, problem_type="Regression"):
|
7395
|
+
"""Prepare data dengan optimasi memory"""
|
7396
|
+
# Sampling untuk dataset besar
|
7397
|
+
if sample_frac < 1.0:
|
7398
|
+
_df = _df.sample(frac=sample_frac, random_state=42)
|
7399
|
+
|
7400
|
+
X = _df[features].fillna(_df[features].mean())
|
7401
|
+
y = _df[target]
|
7402
|
+
|
7403
|
+
# Preprocessing target untuk classification
|
7404
|
+
if problem_type != "Regression":
|
7405
|
+
if problem_type == "Binary Classification":
|
7406
|
+
# Pastikan binary classification
|
7407
|
+
unique_vals = y.unique()
|
7408
|
+
if len(unique_vals) > 2:
|
7409
|
+
st.warning(f"⚠️ Target memiliki {len(unique_vals)} kelas. Menggunakan 2 kelas terbanyak.")
|
7410
|
+
top_2_classes = y.value_counts().head(2).index
|
7411
|
+
mask = y.isin(top_2_classes)
|
7412
|
+
X = X[mask]
|
7413
|
+
y = y[mask]
|
7414
|
+
y = LabelEncoder().fit_transform(y)
|
7415
|
+
else:
|
7416
|
+
y = LabelEncoder().fit_transform(y)
|
7417
|
+
else:
|
7418
|
+
# Multi-class classification
|
7419
|
+
y = LabelEncoder().fit_transform(y)
|
7079
7420
|
|
7080
|
-
|
7081
|
-
|
7082
|
-
|
7421
|
+
return X, y
|
7422
|
+
|
7423
|
+
# Prepare data dengan optimasi
|
7424
|
+
with st.spinner("🔄 Memproses data dengan optimasi kecepatan..."):
|
7425
|
+
X, y = prepare_data_optimized(df, dl_features, dl_target, sample_size, dl_problem_type)
|
7083
7426
|
|
7084
7427
|
# Split data
|
7085
7428
|
X_train, X_test, y_train, y_test = train_test_split(
|
7086
|
-
X, y, test_size=0.2, random_state=42
|
7429
|
+
X, y, test_size=0.2, random_state=42,
|
7430
|
+
stratify=y if dl_problem_type != "Regression" else None
|
7087
7431
|
)
|
7088
7432
|
|
7089
7433
|
# Scale features
|
@@ -7091,198 +7435,1532 @@ def deep_learning_analysis(df, numeric_cols, non_numeric_cols):
|
|
7091
7435
|
X_train_scaled = scaler.fit_transform(X_train)
|
7092
7436
|
X_test_scaled = scaler.transform(X_test)
|
7093
7437
|
|
7094
|
-
#
|
7095
|
-
|
7438
|
+
# Convert to TensorFlow datasets untuk performa tinggi
|
7439
|
+
train_dataset = tf.data.Dataset.from_tensor_slices((X_train_scaled, y_train))
|
7440
|
+
train_dataset = train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
|
7096
7441
|
|
7442
|
+
val_dataset = tf.data.Dataset.from_tensor_slices((X_test_scaled, y_test))
|
7443
|
+
val_dataset = val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
|
7444
|
+
|
7445
|
+
# Tampilkan info dataset
|
7446
|
+
st.success(f"✅ Data siap: {len(X_train):,} training samples, {len(X_test):,} test samples")
|
7447
|
+
|
7448
|
+
# Model architecture dengan optimasi
|
7449
|
+
st.subheader("🏗️ Neural Network Architecture - Optimized")
|
7450
|
+
|
7451
|
+
col1, col2 = st.columns(2)
|
7452
|
+
|
7453
|
+
with col1:
|
7454
|
+
hidden_layers = st.slider("Jumlah Hidden Layers", 1, 5, 2, key="dl_hidden_layers")
|
7455
|
+
units_per_layer = st.slider("Units per Layer", 32, 512, 64, key="dl_units")
|
7456
|
+
activation = st.selectbox("Activation Function", ["relu", "elu", "tanh", "selu"],
|
7457
|
+
index=0, key="dl_activation")
|
7458
|
+
|
7459
|
+
with col2:
|
7460
|
+
dropout_rate = st.slider("Dropout Rate", 0.0, 0.5, 0.2, 0.1, key="dl_dropout")
|
7461
|
+
optimizer = st.selectbox("Optimizer", ["adam", "rmsprop", "nadam", "sgd"],
|
7462
|
+
index=0, key="dl_optimizer")
|
7463
|
+
use_batch_norm = st.checkbox("Gunakan Batch Normalization", value=True, key="dl_batchnorm")
|
7464
|
+
use_early_stopping = st.checkbox("Gunakan Early Stopping", value=True, key="dl_earlystop")
|
7465
|
+
|
7466
|
+
# Advanced configuration
|
7467
|
+
with st.expander("⚙️ Konfigurasi Lanjutan"):
|
7097
7468
|
col1, col2 = st.columns(2)
|
7098
|
-
|
7099
7469
|
with col1:
|
7100
|
-
|
7101
|
-
|
7102
|
-
|
7103
|
-
|
7470
|
+
weight_initializer = st.selectbox(
|
7471
|
+
"Weight Initializer",
|
7472
|
+
["glorot_uniform", "he_normal", "lecun_uniform"],
|
7473
|
+
index=0
|
7474
|
+
)
|
7475
|
+
use_l2_reg = st.checkbox("Gunakan L2 Regularization", value=False)
|
7476
|
+
l2_rate = st.slider("L2 Rate", 0.0001, 0.01, 0.001, 0.0001) if use_l2_reg else 0.0
|
7477
|
+
|
7104
7478
|
with col2:
|
7105
|
-
|
7106
|
-
|
7107
|
-
|
7108
|
-
|
7479
|
+
learning_rate_schedule = st.selectbox(
|
7480
|
+
"Learning Rate Schedule",
|
7481
|
+
["Constant", "ExponentialDecay", "CosineDecay"],
|
7482
|
+
index=0
|
7483
|
+
)
|
7484
|
+
|
7485
|
+
# Build optimized model
|
7486
|
+
with st.spinner("🔄 Membangun model neural network..."):
|
7109
7487
|
model = tf.keras.Sequential()
|
7110
7488
|
|
7111
7489
|
# Input layer
|
7112
|
-
|
7490
|
+
if use_l2_reg:
|
7491
|
+
model.add(tf.keras.layers.Dense(
|
7492
|
+
units_per_layer,
|
7493
|
+
activation=activation,
|
7494
|
+
input_shape=(len(dl_features),),
|
7495
|
+
kernel_initializer=weight_initializer,
|
7496
|
+
kernel_regularizer=tf.keras.regularizers.l2(l2_rate)
|
7497
|
+
))
|
7498
|
+
else:
|
7499
|
+
model.add(tf.keras.layers.Dense(
|
7500
|
+
units_per_layer,
|
7501
|
+
activation=activation,
|
7502
|
+
input_shape=(len(dl_features),),
|
7503
|
+
kernel_initializer=weight_initializer
|
7504
|
+
))
|
7505
|
+
|
7506
|
+
if use_batch_norm:
|
7507
|
+
model.add(tf.keras.layers.BatchNormalization())
|
7113
7508
|
model.add(tf.keras.layers.Dropout(dropout_rate))
|
7114
7509
|
|
7115
|
-
# Hidden layers
|
7510
|
+
# Hidden layers dengan optimasi
|
7116
7511
|
for i in range(hidden_layers - 1):
|
7117
|
-
|
7512
|
+
# Reduce units in deeper layers untuk efisiensi
|
7513
|
+
units = max(32, units_per_layer // (2 ** (i + 1)))
|
7514
|
+
|
7515
|
+
if use_l2_reg:
|
7516
|
+
model.add(tf.keras.layers.Dense(
|
7517
|
+
units,
|
7518
|
+
activation=activation,
|
7519
|
+
kernel_regularizer=tf.keras.regularizers.l2(l2_rate)
|
7520
|
+
))
|
7521
|
+
else:
|
7522
|
+
model.add(tf.keras.layers.Dense(units, activation=activation))
|
7523
|
+
|
7524
|
+
if use_batch_norm:
|
7525
|
+
model.add(tf.keras.layers.BatchNormalization())
|
7118
7526
|
model.add(tf.keras.layers.Dropout(dropout_rate))
|
7119
7527
|
|
7120
7528
|
# Output layer
|
7121
7529
|
if dl_problem_type == "Regression":
|
7122
7530
|
model.add(tf.keras.layers.Dense(1, activation='linear'))
|
7123
7531
|
loss = 'mse'
|
7124
|
-
metrics = ['mae']
|
7532
|
+
metrics = ['mae', 'mse']
|
7533
|
+
monitor_metric = 'val_loss'
|
7125
7534
|
else:
|
7126
|
-
num_classes = len(
|
7535
|
+
num_classes = len(np.unique(y)) if dl_problem_type == "Multi-class Classification" else 1
|
7127
7536
|
activation_output = 'softmax' if dl_problem_type == "Multi-class Classification" else 'sigmoid'
|
7128
|
-
|
7537
|
+
output_units = num_classes if dl_problem_type == "Multi-class Classification" else 1
|
7538
|
+
model.add(tf.keras.layers.Dense(output_units, activation=activation_output))
|
7129
7539
|
loss = 'sparse_categorical_crossentropy' if dl_problem_type == "Multi-class Classification" else 'binary_crossentropy'
|
7130
7540
|
metrics = ['accuracy']
|
7131
|
-
|
7132
|
-
|
7133
|
-
|
7134
|
-
|
7135
|
-
|
7136
|
-
|
7541
|
+
monitor_metric = 'val_accuracy'
|
7542
|
+
|
7543
|
+
# Learning rate schedule
|
7544
|
+
if learning_rate_schedule == "ExponentialDecay":
|
7545
|
+
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
|
7546
|
+
initial_learning_rate=learning_rate,
|
7547
|
+
decay_steps=1000,
|
7548
|
+
decay_rate=0.9
|
7137
7549
|
)
|
7138
|
-
|
7139
|
-
|
7140
|
-
|
7141
|
-
|
7142
|
-
|
7143
|
-
|
7144
|
-
|
7145
|
-
|
7146
|
-
|
7147
|
-
|
7148
|
-
|
7149
|
-
|
7150
|
-
|
7151
|
-
|
7152
|
-
|
7153
|
-
|
7154
|
-
|
7155
|
-
X_train_scaled, y_train,
|
7156
|
-
epochs=epochs,
|
7157
|
-
batch_size=batch_size,
|
7158
|
-
validation_split=0.2,
|
7159
|
-
callbacks=[early_stopping],
|
7160
|
-
verbose=0
|
7161
|
-
)
|
7162
|
-
|
7163
|
-
# Plot training history
|
7164
|
-
fig = go.Figure()
|
7165
|
-
fig.add_trace(go.Scatter(
|
7166
|
-
y=history.history['loss'],
|
7167
|
-
mode='lines',
|
7168
|
-
name='Training Loss'
|
7169
|
-
))
|
7170
|
-
if 'val_loss' in history.history:
|
7171
|
-
fig.add_trace(go.Scatter(
|
7172
|
-
y=history.history['val_loss'],
|
7173
|
-
mode='lines',
|
7174
|
-
name='Validation Loss'
|
7175
|
-
))
|
7176
|
-
fig.update_layout(
|
7177
|
-
title="Training History - Loss",
|
7178
|
-
xaxis_title="Epoch",
|
7179
|
-
yaxis_title="Loss",
|
7180
|
-
height=400
|
7181
|
-
)
|
7182
|
-
st.plotly_chart(fig, use_container_width=True)
|
7183
|
-
|
7184
|
-
# Evaluate model
|
7185
|
-
test_results = model.evaluate(X_test_scaled, y_test, verbose=0)
|
7186
|
-
st.success(f"✅ Model Training Complete!")
|
7187
|
-
st.metric("Test Loss", f"{test_results[0]:.4f}")
|
7188
|
-
if len(test_results) > 1:
|
7189
|
-
st.metric("Test Metric", f"{test_results[1]:.4f}")
|
7550
|
+
elif learning_rate_schedule == "CosineDecay":
|
7551
|
+
lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
|
7552
|
+
initial_learning_rate=learning_rate,
|
7553
|
+
decay_steps=epochs * len(X_train) // batch_size
|
7554
|
+
)
|
7555
|
+
else:
|
7556
|
+
lr_schedule = learning_rate
|
7557
|
+
|
7558
|
+
# Compile model dengan learning rate
|
7559
|
+
if optimizer == "adam":
|
7560
|
+
optimizer_obj = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
|
7561
|
+
elif optimizer == "rmsprop":
|
7562
|
+
optimizer_obj = tf.keras.optimizers.RMSprop(learning_rate=lr_schedule)
|
7563
|
+
elif optimizer == "nadam":
|
7564
|
+
optimizer_obj = tf.keras.optimizers.Nadam(learning_rate=lr_schedule)
|
7565
|
+
else:
|
7566
|
+
optimizer_obj = tf.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9)
|
7190
7567
|
|
7191
|
-
|
7192
|
-
|
7193
|
-
|
7194
|
-
|
7568
|
+
model.compile(optimizer=optimizer_obj, loss=loss, metrics=metrics)
|
7569
|
+
|
7570
|
+
# Display model summary
|
7571
|
+
st.subheader("📊 Model Summary")
|
7195
7572
|
|
7196
|
-
|
7197
|
-
|
7198
|
-
|
7199
|
-
|
7200
|
-
|
7201
|
-
st.info("🔄 Fitur Model Comparison - Pilih model dari tab Machine Learning dan Deep Learning untuk perbandingan")
|
7202
|
-
|
7203
|
-
# Placeholder untuk implementasi lengkap
|
7204
|
-
col1, col2, col3 = st.columns(3)
|
7205
|
-
|
7206
|
-
with col1:
|
7207
|
-
st.metric("ML Models", "3")
|
7208
|
-
with col2:
|
7209
|
-
st.metric("Evaluation Metrics", "5+")
|
7210
|
-
with col3:
|
7211
|
-
st.metric("Feature Importance", "✓")
|
7573
|
+
# Tangkap output summary dari model
|
7574
|
+
model_summary = []
|
7575
|
+
model.summary(print_fn=lambda x: model_summary.append(x))
|
7576
|
+
summary_text = "\n".join(model_summary)
|
7212
7577
|
|
7213
|
-
|
7214
|
-
|
7215
|
-
|
7216
|
-
|
7217
|
-
|
7218
|
-
|
7219
|
-
|
7220
|
-
|
7221
|
-
|
7222
|
-
|
7223
|
-
|
7224
|
-
|
7225
|
-
|
7226
|
-
|
7227
|
-
|
7228
|
-
|
7229
|
-
|
7230
|
-
|
7231
|
-
|
7232
|
-
|
7233
|
-
|
7234
|
-
|
7235
|
-
|
7236
|
-
|
7237
|
-
|
7238
|
-
|
7239
|
-
|
7240
|
-
|
7241
|
-
|
7242
|
-
|
7243
|
-
|
7244
|
-
|
7245
|
-
|
7246
|
-
|
7247
|
-
|
7248
|
-
|
7249
|
-
|
7250
|
-
|
7251
|
-
|
7252
|
-
|
7253
|
-
|
7254
|
-
|
7255
|
-
|
7256
|
-
|
7257
|
-
title="Random Forest Feature Importance",
|
7258
|
-
orientation='h'
|
7578
|
+
# Tambahkan CSS styling
|
7579
|
+
st.markdown("""
|
7580
|
+
<style>
|
7581
|
+
.model-summary-box {
|
7582
|
+
background-color: #fff; /* Warna gelap seperti terminal */
|
7583
|
+
color: #000; /* Warna teks hijau neon */
|
7584
|
+
border-radius: 10px;
|
7585
|
+
padding: 15px;
|
7586
|
+
font-family: 'Courier New', monospace;
|
7587
|
+
font-size: 14px;
|
7588
|
+
line-height: 1.5;
|
7589
|
+
white-space: pre-wrap;
|
7590
|
+
box-shadow: 0 0 8px rgba(0,255,179,0.3);
|
7591
|
+
border: 1px solid rgba(0,255,179,0.4);
|
7592
|
+
overflow-x: auto;
|
7593
|
+
}
|
7594
|
+
</style>
|
7595
|
+
""", unsafe_allow_html=True)
|
7596
|
+
|
7597
|
+
# Gunakan expander untuk dropdown
|
7598
|
+
with st.expander("🧠 Lihat / Sembunyikan Model Summary"):
|
7599
|
+
st.markdown(f"<div class='model-summary-box'>{summary_text}</div>", unsafe_allow_html=True)
|
7600
|
+
|
7601
|
+
# Calculate total parameters
|
7602
|
+
total_params = model.count_params()
|
7603
|
+
st.info(f"📈 Total Parameters: {total_params:,}")
|
7604
|
+
|
7605
|
+
# Training section
|
7606
|
+
st.subheader("🚀 Pelatihan Model")
|
7607
|
+
|
7608
|
+
if st.button("🎯 Mulai Pelatihan Deep Learning", type="primary", key="dl_train_button"):
|
7609
|
+
start_time = time.time()
|
7610
|
+
|
7611
|
+
with st.spinner("🧠 Training neural network... Mohon tunggu..."):
|
7612
|
+
# Callbacks untuk training lebih cepat
|
7613
|
+
callbacks = []
|
7614
|
+
|
7615
|
+
if use_early_stopping:
|
7616
|
+
early_stopping = tf.keras.callbacks.EarlyStopping(
|
7617
|
+
monitor=monitor_metric,
|
7618
|
+
patience=10,
|
7619
|
+
restore_best_weights=True,
|
7620
|
+
mode='min' if dl_problem_type == "Regression" else 'max',
|
7621
|
+
verbose=1
|
7259
7622
|
)
|
7260
|
-
|
7623
|
+
callbacks.append(early_stopping)
|
7624
|
+
|
7625
|
+
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
|
7626
|
+
monitor='val_loss',
|
7627
|
+
factor=0.5,
|
7628
|
+
patience=5,
|
7629
|
+
min_lr=0.00001,
|
7630
|
+
verbose=1
|
7631
|
+
)
|
7632
|
+
callbacks.append(reduce_lr)
|
7633
|
+
|
7634
|
+
# TensorBoard callback (optional)
|
7635
|
+
# callbacks.append(tf.keras.callbacks.TensorBoard(log_dir='./logs'))
|
7636
|
+
|
7637
|
+
# Train model dengan progress bar
|
7638
|
+
progress_bar = st.progress(0)
|
7639
|
+
status_text = st.empty()
|
7640
|
+
time_estimator = st.empty()
|
7641
|
+
metrics_display = st.empty()
|
7261
7642
|
|
7262
|
-
|
7263
|
-
|
7264
|
-
|
7643
|
+
class TrainingCallback(tf.keras.callbacks.Callback):
|
7644
|
+
def on_epoch_begin(self, epoch, logs=None):
|
7645
|
+
self.epoch_start_time = time.time()
|
7265
7646
|
|
7266
|
-
|
7267
|
-
|
7268
|
-
|
7647
|
+
def on_epoch_end(self, epoch, logs=None):
|
7648
|
+
progress = (epoch + 1) / epochs
|
7649
|
+
progress_bar.progress(min(progress, 1.0))
|
7650
|
+
|
7651
|
+
# Metrics display
|
7652
|
+
if dl_problem_type == "Regression":
|
7653
|
+
metrics_str = f"Loss: {logs['loss']:.4f}, Val Loss: {logs['val_loss']:.4f}, MAE: {logs['mae']:.4f}"
|
7654
|
+
else:
|
7655
|
+
metrics_str = f"Loss: {logs['loss']:.4f}, Val Loss: {logs['val_loss']:.4f}, Acc: {logs['accuracy']:.4f}"
|
7656
|
+
|
7657
|
+
status_text.text(f"Epoch {epoch+1}/{epochs}")
|
7658
|
+
metrics_display.text(f"📊 {metrics_str}")
|
7659
|
+
|
7660
|
+
# Time estimation
|
7661
|
+
elapsed = time.time() - start_time
|
7662
|
+
epoch_time = time.time() - self.epoch_start_time
|
7663
|
+
remaining = epoch_time * (epochs - epoch - 1)
|
7664
|
+
|
7665
|
+
time_estimator.text(f"⏱️ Elapsed: {elapsed:.1f}s | Est. remaining: {remaining:.1f}s")
|
7666
|
+
|
7667
|
+
callbacks.append(TrainingCallback())
|
7668
|
+
|
7669
|
+
# Train model
|
7670
|
+
history = model.fit(
|
7671
|
+
train_dataset,
|
7672
|
+
epochs=epochs,
|
7673
|
+
validation_data=val_dataset,
|
7674
|
+
callbacks=callbacks,
|
7675
|
+
verbose=0
|
7676
|
+
)
|
7677
|
+
|
7678
|
+
training_time = time.time() - start_time
|
7679
|
+
progress_bar.progress(1.0)
|
7680
|
+
status_text.text(f"✅ Pelatihan Selesai! Waktu: {training_time:.1f} detik")
|
7681
|
+
time_estimator.text("")
|
7682
|
+
metrics_display.text("")
|
7683
|
+
|
7684
|
+
# ==================== EVALUASI DETAIL ====================
|
7685
|
+
st.subheader("📈 Hasil Evaluasi Detail")
|
7686
|
+
|
7687
|
+
# Predictions
|
7688
|
+
y_pred = model.predict(X_test_scaled, verbose=0)
|
7689
|
+
|
7690
|
+
# 1. PERFORMANCE METRICS COMPREHENSIVE
|
7691
|
+
st.subheader("🎯 Dashboard Performa Model")
|
7692
|
+
|
7693
|
+
if dl_problem_type == "Regression":
|
7694
|
+
# Regression metrics
|
7695
|
+
y_pred_flat = y_pred.flatten()
|
7696
|
+
mse = mean_squared_error(y_test, y_pred_flat)
|
7697
|
+
mae = mean_absolute_error(y_test, y_pred_flat)
|
7698
|
+
r2 = r2_score(y_test, y_pred_flat)
|
7699
|
+
rmse = np.sqrt(mse)
|
7269
7700
|
|
7270
|
-
|
7271
|
-
|
7272
|
-
|
7273
|
-
|
7274
|
-
|
7275
|
-
|
7276
|
-
|
7277
|
-
|
7278
|
-
|
7279
|
-
|
7280
|
-
|
7281
|
-
|
7282
|
-
|
7283
|
-
|
7284
|
-
|
7285
|
-
|
7701
|
+
# Additional metrics
|
7702
|
+
mape = np.mean(np.abs((y_test - y_pred_flat) / np.where(y_test != 0, y_test, 1))) * 100
|
7703
|
+
accuracy_percentage = max(0, min(100, (1 - mae / (y_test.max() - y_test.min())) * 100))
|
7704
|
+
|
7705
|
+
# Display metrics
|
7706
|
+
col1, col2, col3, col4 = st.columns(4)
|
7707
|
+
|
7708
|
+
with col1:
|
7709
|
+
st.metric("R² Score", f"{r2:.4f}",
|
7710
|
+
delta="Excellent" if r2 > 0.8 else "Good" if r2 > 0.6 else "Needs Improvement")
|
7711
|
+
with col2:
|
7712
|
+
st.metric("MAE", f"{mae:.4f}")
|
7713
|
+
with col3:
|
7714
|
+
st.metric("RMSE", f"{rmse:.4f}")
|
7715
|
+
with col4:
|
7716
|
+
st.metric("MAPE", f"{mape:.2f}%")
|
7717
|
+
|
7718
|
+
else:
|
7719
|
+
# Classification metrics
|
7720
|
+
if dl_problem_type == "Binary Classification":
|
7721
|
+
y_pred_class = (y_pred > 0.5).astype(int).flatten()
|
7722
|
+
else:
|
7723
|
+
y_pred_class = np.argmax(y_pred, axis=1)
|
7724
|
+
|
7725
|
+
accuracy = accuracy_score(y_test, y_pred_class)
|
7726
|
+
precision = precision_score(y_test, y_pred_class, average='weighted', zero_division=0)
|
7727
|
+
recall = recall_score(y_test, y_pred_class, average='weighted', zero_division=0)
|
7728
|
+
f1 = f1_score(y_test, y_pred_class, average='weighted', zero_division=0)
|
7729
|
+
|
7730
|
+
# Display metrics
|
7731
|
+
col1, col2, col3, col4 = st.columns(4)
|
7732
|
+
|
7733
|
+
with col1:
|
7734
|
+
st.metric("Accuracy", f"{accuracy:.4f}",
|
7735
|
+
delta="Excellent" if accuracy > 0.9 else "Good" if accuracy > 0.8 else "Needs Improvement")
|
7736
|
+
with col2:
|
7737
|
+
st.metric("Precision", f"{precision:.4f}")
|
7738
|
+
with col3:
|
7739
|
+
st.metric("Recall", f"{recall:.4f}")
|
7740
|
+
with col4:
|
7741
|
+
st.metric("F1-Score", f"{f1:.4f}")
|
7742
|
+
|
7743
|
+
# 2. VISUALISASI LENGKAP
|
7744
|
+
st.subheader("📊 Visualisasi Komprehensif")
|
7745
|
+
|
7746
|
+
# Training history visualization
|
7747
|
+
fig_history = make_subplots(
|
7748
|
+
rows=1, cols=2,
|
7749
|
+
subplot_titles=('Loss Progression', 'Metrics Progression'),
|
7750
|
+
specs=[[{"secondary_y": False}, {"secondary_y": False}]]
|
7751
|
+
)
|
7752
|
+
|
7753
|
+
# Loss plot
|
7754
|
+
fig_history.add_trace(
|
7755
|
+
go.Scatter(x=list(range(1, len(history.history['loss'])+1)),
|
7756
|
+
y=history.history['loss'],
|
7757
|
+
name='Training Loss', line=dict(color='blue')),
|
7758
|
+
row=1, col=1
|
7759
|
+
)
|
7760
|
+
fig_history.add_trace(
|
7761
|
+
go.Scatter(x=list(range(1, len(history.history['val_loss'])+1)),
|
7762
|
+
y=history.history['val_loss'],
|
7763
|
+
name='Validation Loss', line=dict(color='red')),
|
7764
|
+
row=1, col=1
|
7765
|
+
)
|
7766
|
+
|
7767
|
+
# Metrics plot
|
7768
|
+
if dl_problem_type == "Regression":
|
7769
|
+
fig_history.add_trace(
|
7770
|
+
go.Scatter(x=list(range(1, len(history.history['mae'])+1)),
|
7771
|
+
y=history.history['mae'],
|
7772
|
+
name='Training MAE', line=dict(color='green')),
|
7773
|
+
row=1, col=2
|
7774
|
+
)
|
7775
|
+
if 'val_mae' in history.history:
|
7776
|
+
fig_history.add_trace(
|
7777
|
+
go.Scatter(x=list(range(1, len(history.history['val_mae'])+1)),
|
7778
|
+
y=history.history['val_mae'],
|
7779
|
+
name='Validation MAE', line=dict(color='orange')),
|
7780
|
+
row=1, col=2
|
7781
|
+
)
|
7782
|
+
else:
|
7783
|
+
fig_history.add_trace(
|
7784
|
+
go.Scatter(x=list(range(1, len(history.history['accuracy'])+1)),
|
7785
|
+
y=history.history['accuracy'],
|
7786
|
+
name='Training Accuracy', line=dict(color='green')),
|
7787
|
+
row=1, col=2
|
7788
|
+
)
|
7789
|
+
fig_history.add_trace(
|
7790
|
+
go.Scatter(x=list(range(1, len(history.history['val_accuracy'])+1)),
|
7791
|
+
y=history.history['val_accuracy'],
|
7792
|
+
name='Validation Accuracy', line=dict(color='orange')),
|
7793
|
+
row=1, col=2
|
7794
|
+
)
|
7795
|
+
|
7796
|
+
fig_history.update_layout(height=400, title_text="Training History")
|
7797
|
+
st.plotly_chart(fig_history, use_container_width=True)
|
7798
|
+
|
7799
|
+
# 3. PREDICTION VISUALIZATION
|
7800
|
+
if dl_problem_type == "Regression":
|
7801
|
+
# Regression plots
|
7802
|
+
col1, col2 = st.columns(2)
|
7803
|
+
|
7804
|
+
with col1:
|
7805
|
+
# Actual vs Predicted
|
7806
|
+
fig_actual_pred = px.scatter(
|
7807
|
+
x=y_test, y=y_pred_flat,
|
7808
|
+
title="Actual vs Predicted",
|
7809
|
+
labels={'x': 'Actual', 'y': 'Predicted'},
|
7810
|
+
trendline="lowess"
|
7811
|
+
)
|
7812
|
+
fig_actual_pred.add_trace(
|
7813
|
+
go.Scatter(x=[y_test.min(), y_test.max()],
|
7814
|
+
y=[y_test.min(), y_test.max()],
|
7815
|
+
mode='lines', name='Perfect Prediction',
|
7816
|
+
line=dict(color='red', dash='dash'))
|
7817
|
+
)
|
7818
|
+
st.plotly_chart(fig_actual_pred, use_container_width=True)
|
7819
|
+
|
7820
|
+
with col2:
|
7821
|
+
# Residual plot
|
7822
|
+
residuals = y_test - y_pred_flat
|
7823
|
+
fig_residual = px.scatter(
|
7824
|
+
x=y_pred_flat, y=residuals,
|
7825
|
+
title="Residual Plot",
|
7826
|
+
labels={'x': 'Predicted', 'y': 'Residuals'},
|
7827
|
+
trendline="lowess"
|
7828
|
+
)
|
7829
|
+
fig_residual.add_hline(y=0, line_dash="dash", line_color="red")
|
7830
|
+
st.plotly_chart(fig_residual, use_container_width=True)
|
7831
|
+
|
7832
|
+
else:
|
7833
|
+
# Classification plots
|
7834
|
+
col1, col2 = st.columns(2)
|
7835
|
+
|
7836
|
+
with col1:
|
7837
|
+
# Confusion Matrix
|
7838
|
+
cm = confusion_matrix(y_test, y_pred_class)
|
7839
|
+
fig_cm = px.imshow(
|
7840
|
+
cm,
|
7841
|
+
text_auto=True,
|
7842
|
+
title="Confusion Matrix",
|
7843
|
+
color_continuous_scale='Blues',
|
7844
|
+
aspect="auto"
|
7845
|
+
)
|
7846
|
+
st.plotly_chart(fig_cm, use_container_width=True)
|
7847
|
+
|
7848
|
+
with col2:
|
7849
|
+
# Classification report heatmap
|
7850
|
+
report = classification_report(y_test, y_pred_class, output_dict=True)
|
7851
|
+
report_df = pd.DataFrame(report).transpose().iloc[:-1, :3]
|
7852
|
+
fig_report = px.imshow(
|
7853
|
+
report_df.values,
|
7854
|
+
x=report_df.columns,
|
7855
|
+
y=report_df.index,
|
7856
|
+
text_auto=".2f",
|
7857
|
+
title="Classification Report",
|
7858
|
+
color_continuous_scale='Viridis',
|
7859
|
+
aspect="auto"
|
7860
|
+
)
|
7861
|
+
st.plotly_chart(fig_report, use_container_width=True)
|
7862
|
+
|
7863
|
+
# 4. FEATURE IMPORTANCE ANALYSIS
|
7864
|
+
st.subheader("🔍 Analisis Feature Importance")
|
7865
|
+
|
7866
|
+
try:
|
7867
|
+
# Simplified feature importance using permutation
|
7868
|
+
@st.cache_data
|
7869
|
+
def calculate_feature_importance(model, X_test_scaled, y_test, feature_names, problem_type):
|
7870
|
+
baseline_score = model.evaluate(X_test_scaled, y_test, verbose=0)
|
7871
|
+
baseline_loss = baseline_score[0] if problem_type == "Regression" else 1 - baseline_score[1]
|
7872
|
+
|
7873
|
+
importance_scores = []
|
7874
|
+
for i in range(len(feature_names)):
|
7875
|
+
X_permuted = X_test_scaled.copy()
|
7876
|
+
np.random.shuffle(X_permuted[:, i])
|
7877
|
+
permuted_score = model.evaluate(X_permuted, y_test, verbose=0)
|
7878
|
+
permuted_loss = permuted_score[0] if problem_type == "Regression" else 1 - permuted_score[1]
|
7879
|
+
importance = max(0, baseline_loss - permuted_loss)
|
7880
|
+
importance_scores.append(importance)
|
7881
|
+
|
7882
|
+
return pd.DataFrame({
|
7883
|
+
'Feature': feature_names,
|
7884
|
+
'Importance': importance_scores
|
7885
|
+
}).sort_values('Importance', ascending=False)
|
7886
|
+
|
7887
|
+
feature_importance_df = calculate_feature_importance(
|
7888
|
+
model, X_test_scaled, y_test, dl_features, dl_problem_type
|
7889
|
+
)
|
7890
|
+
|
7891
|
+
col1, col2 = st.columns(2)
|
7892
|
+
|
7893
|
+
with col1:
|
7894
|
+
fig_importance = px.bar(
|
7895
|
+
feature_importance_df,
|
7896
|
+
x='Importance',
|
7897
|
+
y='Feature',
|
7898
|
+
orientation='h',
|
7899
|
+
title="Feature Importance",
|
7900
|
+
color='Importance',
|
7901
|
+
color_continuous_scale='Viridis'
|
7902
|
+
)
|
7903
|
+
st.plotly_chart(fig_importance, use_container_width=True)
|
7904
|
+
|
7905
|
+
with col2:
|
7906
|
+
fig_importance_pie = px.pie(
|
7907
|
+
feature_importance_df,
|
7908
|
+
values='Importance',
|
7909
|
+
names='Feature',
|
7910
|
+
title="Feature Importance Distribution"
|
7911
|
+
)
|
7912
|
+
st.plotly_chart(fig_importance_pie, use_container_width=True)
|
7913
|
+
|
7914
|
+
except Exception as e:
|
7915
|
+
st.warning(f"⚠️ Feature importance calculation skipped: {str(e)}")
|
7916
|
+
|
7917
|
+
# 5. MODEL PERFORMANCE GAUGE
|
7918
|
+
st.subheader("📈 Performance Summary")
|
7919
|
+
|
7920
|
+
if dl_problem_type == "Regression":
|
7921
|
+
performance_score = min(100, max(0, (r2 + (1 - mae/y_test.std())) * 50))
|
7922
|
+
performance_level = "Sangat Baik" if performance_score > 85 else \
|
7923
|
+
"Baik" if performance_score > 70 else \
|
7924
|
+
"Cukup" if performance_score > 60 else "Perlu Improvement"
|
7925
|
+
else:
|
7926
|
+
performance_score = accuracy * 100
|
7927
|
+
performance_level = "Sangat Baik" if performance_score > 90 else \
|
7928
|
+
"Baik" if performance_score > 80 else \
|
7929
|
+
"Cukup" if performance_score > 70 else "Perlu Improvement"
|
7930
|
+
|
7931
|
+
# Gauge chart
|
7932
|
+
fig_gauge = go.Figure(go.Indicator(
|
7933
|
+
mode = "gauge+number+delta",
|
7934
|
+
value = performance_score,
|
7935
|
+
domain = {'x': [0, 1], 'y': [0, 1]},
|
7936
|
+
title = {'text': f"Model Performance: {performance_level}"},
|
7937
|
+
gauge = {
|
7938
|
+
'axis': {'range': [None, 100]},
|
7939
|
+
'bar': {'color': "darkblue"},
|
7940
|
+
'steps': [
|
7941
|
+
{'range': [0, 60], 'color': "red"},
|
7942
|
+
{'range': [60, 75], 'color': "yellow"},
|
7943
|
+
{'range': [75, 90], 'color': "lightgreen"},
|
7944
|
+
{'range': [90, 100], 'color': "green"}],
|
7945
|
+
'threshold': {
|
7946
|
+
'line': {'color': "red", 'width': 4},
|
7947
|
+
'thickness': 0.75,
|
7948
|
+
'value': 90}}
|
7949
|
+
))
|
7950
|
+
st.plotly_chart(fig_gauge, use_container_width=True)
|
7951
|
+
|
7952
|
+
# 6. DOWNLOAD DAN EXPORT MODEL
|
7953
|
+
st.subheader("💾 Export Model")
|
7954
|
+
|
7955
|
+
col1, col2 = st.columns(2)
|
7956
|
+
|
7957
|
+
with col1:
|
7958
|
+
# Save model
|
7959
|
+
if st.button("💾 Save TensorFlow Model"):
|
7960
|
+
model.save('saved_model.h5')
|
7961
|
+
with open('saved_model.h5', 'rb') as f:
|
7962
|
+
st.download_button(
|
7963
|
+
label="📥 Download Model",
|
7964
|
+
data=f,
|
7965
|
+
file_name="deep_learning_model.h5",
|
7966
|
+
mime="application/octet-stream"
|
7967
|
+
)
|
7968
|
+
|
7969
|
+
with col2:
|
7970
|
+
# Export predictions
|
7971
|
+
predictions_df = pd.DataFrame({
|
7972
|
+
'Actual': y_test,
|
7973
|
+
'Predicted': y_pred.flatten() if dl_problem_type == "Regression" else y_pred_class
|
7974
|
+
})
|
7975
|
+
csv = predictions_df.to_csv(index=False)
|
7976
|
+
st.download_button(
|
7977
|
+
label="📥 Download Predictions",
|
7978
|
+
data=csv,
|
7979
|
+
file_name="model_predictions.csv",
|
7980
|
+
mime="text/csv"
|
7981
|
+
)
|
7982
|
+
|
7983
|
+
# 7. RECOMMENDATIONS AND INSIGHTS
|
7984
|
+
st.subheader("💡 Insights & Rekomendasi")
|
7985
|
+
|
7986
|
+
# Training insights
|
7987
|
+
final_epoch = len(history.history['loss'])
|
7988
|
+
final_loss = history.history['loss'][-1]
|
7989
|
+
final_val_loss = history.history['val_loss'][-1]
|
7990
|
+
|
7991
|
+
col1, col2, col3 = st.columns(3)
|
7992
|
+
with col1:
|
7993
|
+
st.metric("Final Training Loss", f"{final_loss:.4f}")
|
7994
|
+
with col2:
|
7995
|
+
st.metric("Final Validation Loss", f"{final_val_loss:.4f}")
|
7996
|
+
with col3:
|
7997
|
+
st.metric("Training Time", f"{training_time:.1f}s")
|
7998
|
+
|
7999
|
+
# Recommendations based on performance
|
8000
|
+
st.info("""
|
8001
|
+
**🎯 Rekomendasi Improvement:**
|
8002
|
+
- **Data Quality**: Periksa missing values dan outliers
|
8003
|
+
- **Feature Engineering**: Tambahkan feature yang lebih relevan
|
8004
|
+
- **Hyperparameter Tuning**: Eksperimen dengan architecture berbeda
|
8005
|
+
- **Regularization**: Adjust dropout dan L2 regularization
|
8006
|
+
- **Learning Rate**: Coba learning rate scheduling
|
8007
|
+
""")
|
8008
|
+
|
8009
|
+
# Performance tips
|
8010
|
+
if performance_score < 70:
|
8011
|
+
st.warning("""
|
8012
|
+
**⚠️ Area Improvement:**
|
8013
|
+
- Pertimbangkan feature selection yang lebih baik
|
8014
|
+
- Coba model architecture yang lebih dalam/lebar
|
8015
|
+
- Gunakan lebih banyak data training
|
8016
|
+
- Eksperimen dengan different optimizers
|
8017
|
+
""")
|
8018
|
+
else:
|
8019
|
+
st.success("""
|
8020
|
+
**✅ Performa Baik!**
|
8021
|
+
- Model sudah menunjukkan hasil yang promising
|
8022
|
+
- Pertimbangkan deployment untuk penggunaan real-time
|
8023
|
+
- Monitor model performance secara berkala
|
8024
|
+
""")
|
8025
|
+
|
8026
|
+
except Exception as e:
|
8027
|
+
st.error(f"❌ Error dalam DL analysis: {str(e)}")
|
8028
|
+
st.info("""
|
8029
|
+
💡 Tips Troubleshooting:
|
8030
|
+
- Pastikan dataset cukup besar (>100 samples)
|
8031
|
+
- Gunakan mode kecepatan lebih tinggi untuk dataset besar
|
8032
|
+
- Kurangi jumlah features jika memory error
|
8033
|
+
- Pastikan target variable sesuai dengan problem type
|
8034
|
+
- Coba learning rate yang lebih kecil
|
8035
|
+
""")
|
8036
|
+
|
8037
|
+
# Tambahkan fungsi utility jika diperlukan
|
8038
|
+
def validate_tensorflow_installation():
|
8039
|
+
"""Validate TensorFlow installation"""
|
8040
|
+
try:
|
8041
|
+
import tensorflow as tf
|
8042
|
+
version = tf.__version__
|
8043
|
+
gpu_available = tf.config.list_physical_devices('GPU')
|
8044
|
+
return True, version, len(gpu_available) > 0
|
8045
|
+
except ImportError:
|
8046
|
+
return False, None, False
|
8047
|
+
|
8048
|
+
def model_comparison_analysis(df, numeric_cols, non_numeric_cols):
|
8049
|
+
"""Analisis komparatif data yang komprehensif tanpa model machine learning"""
|
8050
|
+
|
8051
|
+
st.header("📊 Advanced Data Analysis Dashboard")
|
8052
|
+
|
8053
|
+
# Informasi dataset
|
8054
|
+
st.subheader("📋 Dataset Overview")
|
8055
|
+
col1, col2, col3, col4 = st.columns(4)
|
8056
|
+
with col1:
|
8057
|
+
st.metric("Total Samples", f"{len(df):,}")
|
8058
|
+
with col2:
|
8059
|
+
st.metric("Features", f"{len(numeric_cols) + len(non_numeric_cols):,}")
|
8060
|
+
with col3:
|
8061
|
+
st.metric("Numeric", f"{len(numeric_cols):,}")
|
8062
|
+
with col4:
|
8063
|
+
st.metric("Categorical", f"{len(non_numeric_cols):,}")
|
8064
|
+
|
8065
|
+
# Configuration section
|
8066
|
+
st.subheader("⚙️ Analysis Configuration")
|
8067
|
+
|
8068
|
+
col1, col2 = st.columns(2)
|
8069
|
+
|
8070
|
+
with col1:
|
8071
|
+
# Target selection untuk analisis
|
8072
|
+
target_variable = st.selectbox(
|
8073
|
+
"dwibaktindev AI",
|
8074
|
+
numeric_cols + non_numeric_cols,
|
8075
|
+
key="analysis_target"
|
8076
|
+
)
|
8077
|
+
|
8078
|
+
# Analysis type
|
8079
|
+
analysis_type = st.selectbox(
|
8080
|
+
"Alisa AI",
|
8081
|
+
["Descriptive Statistics", "Correlation Analysis", "Distribution Analysis",
|
8082
|
+
"Relationship Analysis", "Comparative Analysis"],
|
8083
|
+
key="analysis_type"
|
8084
|
+
)
|
8085
|
+
|
8086
|
+
with col2:
|
8087
|
+
# Feature selection
|
8088
|
+
available_features = [f for f in numeric_cols + non_numeric_cols if f != target_variable]
|
8089
|
+
selected_features = st.multiselect(
|
8090
|
+
"Sasha AI",
|
8091
|
+
available_features,
|
8092
|
+
default=available_features[:min(10, len(available_features))],
|
8093
|
+
key="analysis_features"
|
8094
|
+
)
|
8095
|
+
|
8096
|
+
# Sample size untuk visualisasi
|
8097
|
+
sample_size = st.slider("Sample Size for Visualization", 100, len(df),
|
8098
|
+
min(1000, len(df)), 100, key="sample_size")
|
8099
|
+
|
8100
|
+
if st.button("🚀 Start Model AI", type="primary", key="start_analysis"):
|
8101
|
+
if not target_variable or not selected_features:
|
8102
|
+
st.error("❌ Please select target variable and features")
|
8103
|
+
return
|
8104
|
+
|
8105
|
+
try:
|
8106
|
+
# Lakukan analisis berdasarkan jenis
|
8107
|
+
with st.spinner("🔄 Performing analysis..."):
|
8108
|
+
if analysis_type == "Descriptive Statistics":
|
8109
|
+
perform_descriptive_analysis(df, target_variable, selected_features)
|
8110
|
+
|
8111
|
+
elif analysis_type == "Correlation Analysis":
|
8112
|
+
perform_correlation_analysis(df, target_variable, selected_features)
|
8113
|
+
|
8114
|
+
elif analysis_type == "Distribution Analysis":
|
8115
|
+
perform_distribution_analysis(df, target_variable, selected_features, sample_size)
|
8116
|
+
|
8117
|
+
elif analysis_type == "Relationship Analysis":
|
8118
|
+
perform_relationship_analysis(df, target_variable, selected_features, sample_size)
|
8119
|
+
|
8120
|
+
elif analysis_type == "Comparative Analysis":
|
8121
|
+
perform_comparative_analysis(df, target_variable, selected_features)
|
8122
|
+
|
8123
|
+
st.success("✅ Analysis completed!")
|
8124
|
+
|
8125
|
+
except Exception as e:
|
8126
|
+
st.error(f"❌ Error in data analysis: {str(e)}")
|
8127
|
+
|
8128
|
+
def perform_descriptive_analysis(df, target, features):
|
8129
|
+
"""Analisis statistik deskriptif"""
|
8130
|
+
import pandas as pd
|
8131
|
+
import numpy as np
|
8132
|
+
|
8133
|
+
st.subheader("📊 Descriptive Statistics")
|
8134
|
+
|
8135
|
+
# Statistik untuk target variable
|
8136
|
+
st.write(f"### Target Variable: `{target}`")
|
8137
|
+
|
8138
|
+
if pd.api.types.is_numeric_dtype(df[target]):
|
8139
|
+
col1, col2, col3, col4 = st.columns(4)
|
8140
|
+
|
8141
|
+
with col1:
|
8142
|
+
st.metric("Mean", f"{df[target].mean():.2f}")
|
8143
|
+
with col2:
|
8144
|
+
st.metric("Median", f"{df[target].median():.2f}")
|
8145
|
+
with col3:
|
8146
|
+
st.metric("Std Dev", f"{df[target].std():.2f}")
|
8147
|
+
with col4:
|
8148
|
+
st.metric("Missing", f"{df[target].isnull().sum()}")
|
8149
|
+
|
8150
|
+
# Detailed statistics
|
8151
|
+
st.dataframe(df[target].describe(), use_container_width=True)
|
8152
|
+
|
8153
|
+
else:
|
8154
|
+
col1, col2, col3 = st.columns(3)
|
8155
|
+
|
8156
|
+
with col1:
|
8157
|
+
st.metric("Unique Values", df[target].nunique())
|
8158
|
+
with col2:
|
8159
|
+
st.metric("Most Frequent", df[target].mode().iloc[0] if not df[target].mode().empty else "N/A")
|
8160
|
+
with col3:
|
8161
|
+
st.metric("Missing", f"{df[target].isnull().sum()}")
|
8162
|
+
|
8163
|
+
# Value counts
|
8164
|
+
value_counts = df[target].value_counts()
|
8165
|
+
st.write("**Value Distribution:**")
|
8166
|
+
st.dataframe(value_counts, use_container_width=True)
|
8167
|
+
|
8168
|
+
# Statistik untuk features numerik
|
8169
|
+
numeric_features = [f for f in features if pd.api.types.is_numeric_dtype(df[f])]
|
8170
|
+
if numeric_features:
|
8171
|
+
st.write("### Numeric Features Summary")
|
8172
|
+
st.dataframe(df[numeric_features].describe(), use_container_width=True)
|
8173
|
+
|
8174
|
+
# Statistik untuk features kategorik
|
8175
|
+
categorical_features = [f for f in features if not pd.api.types.is_numeric_dtype(df[f])]
|
8176
|
+
if categorical_features:
|
8177
|
+
st.write("### Categorical Features Summary")
|
8178
|
+
for feature in categorical_features:
|
8179
|
+
with st.expander(f"`{feature}`"):
|
8180
|
+
value_counts = df[feature].value_counts()
|
8181
|
+
st.dataframe(value_counts, use_container_width=True)
|
8182
|
+
|
8183
|
+
def perform_correlation_analysis(df, target, features):
|
8184
|
+
"""Analisis korelasi"""
|
8185
|
+
import pandas as pd
|
8186
|
+
import numpy as np
|
8187
|
+
import plotly.express as px
|
8188
|
+
import plotly.graph_objects as go
|
8189
|
+
|
8190
|
+
st.subheader("🔗 Correlation Analysis")
|
8191
|
+
|
8192
|
+
# Pilih hanya features numerik untuk korelasi
|
8193
|
+
numeric_features = [f for f in features if pd.api.types.is_numeric_dtype(df[f])]
|
8194
|
+
|
8195
|
+
if pd.api.types.is_numeric_dtype(df[target]):
|
8196
|
+
numeric_features.append(target)
|
8197
|
+
|
8198
|
+
if len(numeric_features) < 2:
|
8199
|
+
st.warning("⚠️ Need at least 2 numeric features for correlation analysis")
|
8200
|
+
return
|
8201
|
+
|
8202
|
+
correlation_df = df[numeric_features].corr()
|
8203
|
+
|
8204
|
+
# Heatmap korelasi
|
8205
|
+
st.write("### Correlation Heatmap")
|
8206
|
+
fig = px.imshow(correlation_df,
|
8207
|
+
title="Feature Correlation Heatmap",
|
8208
|
+
color_continuous_scale="RdBu_r",
|
8209
|
+
aspect="auto")
|
8210
|
+
st.plotly_chart(fig, use_container_width=True)
|
8211
|
+
|
8212
|
+
# Korelasi dengan target
|
8213
|
+
if pd.api.types.is_numeric_dtype(df[target]):
|
8214
|
+
st.write("### Correlation with Target")
|
8215
|
+
target_corr = correlation_df[target].drop(target).sort_values(ascending=False)
|
8216
|
+
|
8217
|
+
col1, col2 = st.columns(2)
|
8218
|
+
|
8219
|
+
with col1:
|
8220
|
+
fig = px.bar(x=target_corr.values, y=target_corr.index,
|
8221
|
+
orientation='h',
|
8222
|
+
title=f"Correlation with {target}",
|
8223
|
+
labels={'x': 'Correlation', 'y': 'Feature'})
|
8224
|
+
st.plotly_chart(fig, use_container_width=True)
|
8225
|
+
|
8226
|
+
with col2:
|
8227
|
+
# Tabel korelasi
|
8228
|
+
st.dataframe(target_corr.round(4), use_container_width=True)
|
8229
|
+
|
8230
|
+
def perform_distribution_analysis(df, target, features, sample_size):
|
8231
|
+
"""Analisis distribusi"""
|
8232
|
+
import pandas as pd
|
8233
|
+
import plotly.express as px
|
8234
|
+
import plotly.graph_objects as go
|
8235
|
+
from plotly.subplots import make_subplots
|
8236
|
+
|
8237
|
+
st.subheader("📈 Distribution Analysis")
|
8238
|
+
|
8239
|
+
# Sample data untuk performa visualisasi
|
8240
|
+
sample_df = df.sample(min(sample_size, len(df)), random_state=42)
|
8241
|
+
|
8242
|
+
# Distribusi target variable
|
8243
|
+
st.write(f"### Target Variable Distribution: `{target}`")
|
8244
|
+
|
8245
|
+
if pd.api.types.is_numeric_dtype(df[target]):
|
8246
|
+
col1, col2 = st.columns(2)
|
8247
|
+
|
8248
|
+
with col1:
|
8249
|
+
# Histogram
|
8250
|
+
fig = px.histogram(df, x=target,
|
8251
|
+
title=f"Distribution of {target}",
|
8252
|
+
nbins=50)
|
8253
|
+
st.plotly_chart(fig, use_container_width=True)
|
8254
|
+
|
8255
|
+
with col2:
|
8256
|
+
# Box plot
|
8257
|
+
fig = px.box(df, y=target,
|
8258
|
+
title=f"Box Plot of {target}")
|
8259
|
+
st.plotly_chart(fig, use_container_width=True)
|
8260
|
+
else:
|
8261
|
+
# Untuk variabel kategorik
|
8262
|
+
value_counts = df[target].value_counts()
|
8263
|
+
fig = px.pie(values=value_counts.values,
|
8264
|
+
names=value_counts.index,
|
8265
|
+
title=f"Distribution of {target}")
|
8266
|
+
st.plotly_chart(fig, use_container_width=True)
|
8267
|
+
|
8268
|
+
# Distribusi features numerik
|
8269
|
+
numeric_features = [f for f in features if pd.api.types.is_numeric_dtype(df[f])]
|
8270
|
+
if numeric_features:
|
8271
|
+
st.write("### Numeric Features Distribution")
|
8272
|
+
|
8273
|
+
# Pilih features untuk ditampilkan
|
8274
|
+
selected_numeric = st.multiselect(
|
8275
|
+
"Select numeric features to visualize:",
|
8276
|
+
numeric_features,
|
8277
|
+
default=numeric_features[:min(3, len(numeric_features))]
|
8278
|
+
)
|
8279
|
+
|
8280
|
+
if selected_numeric:
|
8281
|
+
# Histogram multiple
|
8282
|
+
fig = make_subplots(rows=len(selected_numeric), cols=1,
|
8283
|
+
subplot_titles=selected_numeric)
|
8284
|
+
|
8285
|
+
for i, feature in enumerate(selected_numeric, 1):
|
8286
|
+
fig.add_trace(
|
8287
|
+
go.Histogram(x=df[feature], name=feature, nbinsx=30),
|
8288
|
+
row=i, col=1
|
8289
|
+
)
|
8290
|
+
|
8291
|
+
fig.update_layout(height=300*len(selected_numeric),
|
8292
|
+
title_text="Distribution of Numeric Features")
|
8293
|
+
st.plotly_chart(fig, use_container_width=True)
|
8294
|
+
|
8295
|
+
# Distribusi features kategorik
|
8296
|
+
categorical_features = [f for f in features if not pd.api.types.is_numeric_dtype(df[f])]
|
8297
|
+
if categorical_features:
|
8298
|
+
st.write("### Categorical Features Distribution")
|
8299
|
+
|
8300
|
+
selected_categorical = st.multiselect(
|
8301
|
+
"Select categorical features to visualize:",
|
8302
|
+
categorical_features,
|
8303
|
+
default=categorical_features[:min(2, len(categorical_features))]
|
8304
|
+
)
|
8305
|
+
|
8306
|
+
if selected_categorical:
|
8307
|
+
for feature in selected_categorical:
|
8308
|
+
value_counts = df[feature].value_counts().head(10) # Top 10 saja
|
8309
|
+
fig = px.bar(x=value_counts.values, y=value_counts.index,
|
8310
|
+
orientation='h',
|
8311
|
+
title=f"Top 10 Values in {feature}")
|
8312
|
+
st.plotly_chart(fig, use_container_width=True)
|
8313
|
+
|
8314
|
+
def perform_relationship_analysis(df, target, features, sample_size):
|
8315
|
+
"""Analisis hubungan antara variabel"""
|
8316
|
+
import pandas as pd
|
8317
|
+
import plotly.express as px
|
8318
|
+
import plotly.graph_objects as go
|
8319
|
+
|
8320
|
+
st.subheader("🔄 Relationship Analysis")
|
8321
|
+
|
8322
|
+
sample_df = df.sample(min(sample_size, len(df)), random_state=42)
|
8323
|
+
|
8324
|
+
# Pilih features numerik untuk scatter plot
|
8325
|
+
numeric_features = [f for f in features if pd.api.types.is_numeric_dtype(df[f])]
|
8326
|
+
|
8327
|
+
if pd.api.types.is_numeric_dtype(df[target]) and len(numeric_features) >= 1:
|
8328
|
+
st.write("### Scatter Plots with Target")
|
8329
|
+
|
8330
|
+
col1, col2 = st.columns(2)
|
8331
|
+
|
8332
|
+
with col1:
|
8333
|
+
x_feature = st.selectbox("X-axis feature:", numeric_features, key="scatter_x")
|
8334
|
+
|
8335
|
+
with col2:
|
8336
|
+
color_feature = st.selectbox("Color by (optional):",
|
8337
|
+
[None] + [f for f in features if f != x_feature],
|
8338
|
+
key="scatter_color")
|
8339
|
+
|
8340
|
+
if x_feature:
|
8341
|
+
fig = px.scatter(sample_df, x=x_feature, y=target,
|
8342
|
+
color=color_feature if color_feature else None,
|
8343
|
+
title=f"{target} vs {x_feature}",
|
8344
|
+
opacity=0.6)
|
8345
|
+
st.plotly_chart(fig, use_container_width=True)
|
8346
|
+
|
8347
|
+
# Pair plot untuk multiple numeric features
|
8348
|
+
if len(numeric_features) >= 2:
|
8349
|
+
st.write("### Pairwise Relationships")
|
8350
|
+
|
8351
|
+
selected_for_pairplot = st.multiselect(
|
8352
|
+
"Select features for pair plot:",
|
8353
|
+
numeric_features + ([target] if pd.api.types.is_numeric_dtype(df[target]) else []),
|
8354
|
+
default=(numeric_features + [target])[:min(4, len(numeric_features) + 1)]
|
8355
|
+
)
|
8356
|
+
|
8357
|
+
if len(selected_for_pairplot) >= 2:
|
8358
|
+
fig = px.scatter_matrix(sample_df[selected_for_pairplot],
|
8359
|
+
dimensions=selected_for_pairplot,
|
8360
|
+
height=800)
|
8361
|
+
st.plotly_chart(fig, use_container_width=True)
|
8362
|
+
|
8363
|
+
# Analisis hubungan kategorik-numerik
|
8364
|
+
categorical_features = [f for f in features if not pd.api.types.is_numeric_dtype(df[f])]
|
8365
|
+
if categorical_features and pd.api.types.is_numeric_dtype(df[target]):
|
8366
|
+
st.write("### Categorical vs Numerical Analysis")
|
8367
|
+
|
8368
|
+
cat_feature = st.selectbox("Select categorical feature:", categorical_features)
|
8369
|
+
num_feature = st.selectbox("Select numerical feature:",
|
8370
|
+
[target] + numeric_features)
|
8371
|
+
|
8372
|
+
if cat_feature and num_feature:
|
8373
|
+
col1, col2 = st.columns(2)
|
8374
|
+
|
8375
|
+
with col1:
|
8376
|
+
# Box plot
|
8377
|
+
fig = px.box(df, x=cat_feature, y=num_feature,
|
8378
|
+
title=f"{num_feature} by {cat_feature}")
|
8379
|
+
st.plotly_chart(fig, use_container_width=True)
|
8380
|
+
|
8381
|
+
with col2:
|
8382
|
+
# Violin plot
|
8383
|
+
fig = px.violin(df, x=cat_feature, y=num_feature,
|
8384
|
+
title=f"Distribution of {num_feature} by {cat_feature}")
|
8385
|
+
st.plotly_chart(fig, use_container_width=True)
|
8386
|
+
|
8387
|
+
def perform_comparative_analysis(df, target, features):
|
8388
|
+
"""Analisis komparatif"""
|
8389
|
+
import pandas as pd
|
8390
|
+
import plotly.express as px
|
8391
|
+
import plotly.graph_objects as go
|
8392
|
+
|
8393
|
+
st.subheader("⚖️ Comparative Analysis")
|
8394
|
+
|
8395
|
+
# Group by analysis
|
8396
|
+
st.write("### Group-wise Analysis")
|
8397
|
+
|
8398
|
+
group_feature = st.selectbox(
|
8399
|
+
"Group by feature:",
|
8400
|
+
[None] + [f for f in features if not pd.api.types.is_numeric_dtype(df[f])]
|
8401
|
+
)
|
8402
|
+
|
8403
|
+
if group_feature:
|
8404
|
+
if pd.api.types.is_numeric_dtype(df[target]):
|
8405
|
+
# Untuk target numerik
|
8406
|
+
summary = df.groupby(group_feature)[target].agg(['mean', 'median', 'std', 'count']).round(2)
|
8407
|
+
st.dataframe(summary, use_container_width=True)
|
8408
|
+
|
8409
|
+
# Visualisasi
|
8410
|
+
col1, col2 = st.columns(2)
|
8411
|
+
|
8412
|
+
with col1:
|
8413
|
+
fig = px.bar(summary.reset_index(), x=group_feature, y='mean',
|
8414
|
+
title=f"Average {target} by {group_feature}")
|
8415
|
+
st.plotly_chart(fig, use_container_width=True)
|
8416
|
+
|
8417
|
+
with col2:
|
8418
|
+
fig = px.box(df, x=group_feature, y=target,
|
8419
|
+
title=f"Distribution of {target} by {group_feature}")
|
8420
|
+
st.plotly_chart(fig, use_container_width=True)
|
8421
|
+
|
8422
|
+
else:
|
8423
|
+
# Untuk target kategorik
|
8424
|
+
cross_tab = pd.crosstab(df[group_feature], df[target], normalize='index') * 100
|
8425
|
+
st.write("**Percentage Distribution:**")
|
8426
|
+
st.dataframe(cross_tab.round(2), use_container_width=True)
|
8427
|
+
|
8428
|
+
# Stacked bar chart
|
8429
|
+
fig = px.bar(cross_tab.reset_index(),
|
8430
|
+
x=group_feature,
|
8431
|
+
y=cross_tab.columns.tolist(),
|
8432
|
+
title=f"Distribution of {target} by {group_feature}",
|
8433
|
+
barmode='stack')
|
8434
|
+
st.plotly_chart(fig, use_container_width=True)
|
8435
|
+
|
8436
|
+
# Time series analysis (jika ada kolom datetime)
|
8437
|
+
datetime_columns = df.select_dtypes(include=['datetime64']).columns.tolist()
|
8438
|
+
if datetime_columns and pd.api.types.is_numeric_dtype(df[target]):
|
8439
|
+
st.write("### Time Series Analysis")
|
8440
|
+
|
8441
|
+
date_col = st.selectbox("Select date column:", datetime_columns)
|
8442
|
+
|
8443
|
+
if date_col:
|
8444
|
+
# Aggregasi berdasarkan waktu
|
8445
|
+
df_sorted = df.sort_values(date_col)
|
8446
|
+
|
8447
|
+
# Pilih frekuensi aggregasi
|
8448
|
+
freq = st.selectbox("Aggregation frequency:",
|
8449
|
+
['D', 'W', 'M', 'Q'],
|
8450
|
+
format_func=lambda x: {'D': 'Daily', 'W': 'Weekly',
|
8451
|
+
'M': 'Monthly', 'Q': 'Quarterly'}[x])
|
8452
|
+
|
8453
|
+
time_series = df_sorted.set_index(date_col)[target].resample(freq).mean()
|
8454
|
+
|
8455
|
+
fig = px.line(time_series.reset_index(),
|
8456
|
+
x=date_col, y=target,
|
8457
|
+
title=f"{target} Over Time")
|
8458
|
+
st.plotly_chart(fig, use_container_width=True)
|
8459
|
+
|
8460
|
+
def feature_analysis_dashboard(df, numeric_cols, non_numeric_cols):
|
8461
|
+
"""Dashboard analisis feature yang komprehensif dengan optimasi dataset besar"""
|
8462
|
+
|
8463
|
+
st.header("🔍 Advanced Feature Analysis")
|
8464
|
+
|
8465
|
+
# Informasi dataset
|
8466
|
+
st.subheader("📊 Dataset Overview")
|
8467
|
+
col1, col2, col3 = st.columns(3)
|
8468
|
+
with col1:
|
8469
|
+
st.metric("Total Features", f"{len(numeric_cols) + len(non_numeric_cols):,}")
|
8470
|
+
with col2:
|
8471
|
+
st.metric("Numeric Features", f"{len(numeric_cols):,}")
|
8472
|
+
with col3:
|
8473
|
+
st.metric("Categorical Features", f"{len(non_numeric_cols):,}")
|
8474
|
+
|
8475
|
+
# Optimasi memory
|
8476
|
+
if st.checkbox("Optimize Memory Usage", value=True, key="feature_optimize_mem"):
|
8477
|
+
df = optimize_memory_usage_feature(df)
|
8478
|
+
st.success("✅ Memory usage optimized!")
|
8479
|
+
|
8480
|
+
# Performance configuration
|
8481
|
+
st.subheader("⚡ Performance Configuration")
|
8482
|
+
|
8483
|
+
col1, col2 = st.columns(2)
|
8484
|
+
|
8485
|
+
with col1:
|
8486
|
+
# Sampling options untuk dataset besar
|
8487
|
+
use_sampling = st.checkbox("Use Sampling for Large Dataset", value=len(df) > 10000,
|
8488
|
+
key="feature_use_sampling")
|
8489
|
+
|
8490
|
+
if use_sampling:
|
8491
|
+
sample_size = st.slider(
|
8492
|
+
"Sample Size",
|
8493
|
+
min_value=1000,
|
8494
|
+
max_value=min(50000, len(df)),
|
8495
|
+
value=min(20000, len(df)),
|
8496
|
+
step=1000,
|
8497
|
+
key="feature_sample_size"
|
8498
|
+
)
|
8499
|
+
st.info(f"🎯 Using {sample_size} samples from {len(df):,} total records")
|
8500
|
+
|
8501
|
+
# Processing speed control
|
8502
|
+
processing_speed = st.select_slider(
|
8503
|
+
"Processing Speed",
|
8504
|
+
options=["Fast", "Balanced", "Comprehensive"],
|
8505
|
+
value="Balanced",
|
8506
|
+
key="feature_processing_speed"
|
8507
|
+
)
|
8508
|
+
|
8509
|
+
# Configure parameters based on speed selection
|
8510
|
+
speed_config = {
|
8511
|
+
"Fast": {"n_estimators": 50, "n_repeats": 3, "max_features": 20},
|
8512
|
+
"Balanced": {"n_estimators": 100, "n_repeats": 5, "max_features": 30},
|
8513
|
+
"Comprehensive": {"n_estimators": 200, "n_repeats": 10, "max_features": 50}
|
8514
|
+
}
|
8515
|
+
config = speed_config[processing_speed]
|
8516
|
+
|
8517
|
+
with col2:
|
8518
|
+
# Advanced options
|
8519
|
+
st.write("**Advanced Options:**")
|
8520
|
+
|
8521
|
+
max_features_display = st.slider(
|
8522
|
+
"Max Features to Display",
|
8523
|
+
5, 50, 15,
|
8524
|
+
key="max_features_display"
|
8525
|
+
)
|
8526
|
+
|
8527
|
+
remove_high_corr = st.checkbox(
|
8528
|
+
"Remove Highly Correlated Features",
|
8529
|
+
value=True,
|
8530
|
+
key="feature_remove_corr"
|
8531
|
+
)
|
8532
|
+
|
8533
|
+
correlation_threshold = st.slider(
|
8534
|
+
"Correlation Threshold",
|
8535
|
+
0.7, 0.99, 0.9, 0.01,
|
8536
|
+
key="feature_corr_threshold"
|
8537
|
+
)
|
8538
|
+
|
8539
|
+
random_state = st.number_input(
|
8540
|
+
"Random State",
|
8541
|
+
value=42,
|
8542
|
+
key="feature_random_state"
|
8543
|
+
)
|
8544
|
+
|
8545
|
+
# Feature importance analysis
|
8546
|
+
st.subheader("🎯 Feature Importance Analysis")
|
8547
|
+
|
8548
|
+
col1, col2 = st.columns(2)
|
8549
|
+
|
8550
|
+
with col1:
|
8551
|
+
# Multiple methods untuk feature importance
|
8552
|
+
importance_method = st.selectbox(
|
8553
|
+
"Pilih Feature Importance Method",
|
8554
|
+
["Random Forest", "Permutation Importance", "Mutual Information", "All Methods"],
|
8555
|
+
key="feature_importance_method"
|
8556
|
+
)
|
8557
|
+
|
8558
|
+
# Problem type selection
|
8559
|
+
problem_type = st.radio(
|
8560
|
+
"Problem Type",
|
8561
|
+
["Regression", "Classification", "Auto Detect"],
|
8562
|
+
key="feature_problem_type"
|
8563
|
+
)
|
8564
|
+
|
8565
|
+
with col2:
|
8566
|
+
target_feature = st.selectbox(
|
8567
|
+
"Pilih Target untuk Feature Importance",
|
8568
|
+
numeric_cols + non_numeric_cols,
|
8569
|
+
key="feature_importance_target"
|
8570
|
+
)
|
8571
|
+
|
8572
|
+
# Feature selection
|
8573
|
+
available_features = [f for f in numeric_cols + non_numeric_cols if f != target_feature]
|
8574
|
+
|
8575
|
+
if len(available_features) > config["max_features"]:
|
8576
|
+
st.warning(f"⚠️ Showing first {config['max_features']} features. Use comprehensive mode for more.")
|
8577
|
+
available_features = available_features[:config["max_features"]]
|
8578
|
+
|
8579
|
+
selected_features = st.multiselect(
|
8580
|
+
"Pilih Features untuk Analysis",
|
8581
|
+
available_features,
|
8582
|
+
default=available_features[:min(10, len(available_features))],
|
8583
|
+
key="feature_analysis_features"
|
8584
|
+
)
|
8585
|
+
|
8586
|
+
if not target_feature or not selected_features:
|
8587
|
+
st.warning("📝 Pilih target feature dan features untuk analysis")
|
8588
|
+
return
|
8589
|
+
|
8590
|
+
# Progress tracking
|
8591
|
+
progress_bar = st.progress(0)
|
8592
|
+
status_text = st.empty()
|
8593
|
+
|
8594
|
+
if st.button("🚀 Hitung Feature Importance", key="feature_importance_button"):
|
8595
|
+
try:
|
8596
|
+
# Apply sampling jika diperlukan
|
8597
|
+
if use_sampling and len(df) > sample_size:
|
8598
|
+
df_analysis = df.sample(n=sample_size, random_state=random_state)
|
8599
|
+
st.info(f"🔬 Analyzing {sample_size:,} sampled records")
|
8600
|
+
else:
|
8601
|
+
df_analysis = df
|
8602
|
+
|
8603
|
+
status_text.text("🔄 Preparing data...")
|
8604
|
+
progress_bar.progress(10)
|
8605
|
+
|
8606
|
+
# Prepare features and target
|
8607
|
+
X = df_analysis[selected_features].copy()
|
8608
|
+
y = df_analysis[target_feature]
|
8609
|
+
|
8610
|
+
# Auto-detect problem type
|
8611
|
+
if problem_type == "Auto Detect":
|
8612
|
+
if target_feature in numeric_cols:
|
8613
|
+
problem_type_detected = "Regression"
|
8614
|
+
else:
|
8615
|
+
problem_type_detected = "Classification"
|
8616
|
+
st.info(f"🔍 Auto-detected: {problem_type_detected}")
|
8617
|
+
else:
|
8618
|
+
problem_type_detected = problem_type
|
8619
|
+
|
8620
|
+
progress_bar.progress(20)
|
8621
|
+
|
8622
|
+
# Preprocessing dengan optimasi
|
8623
|
+
status_text.text("🔧 Preprocessing features...")
|
8624
|
+
X_processed, feature_names = preprocess_features_optimized(
|
8625
|
+
X, numeric_cols, non_numeric_cols, remove_high_corr, correlation_threshold
|
8626
|
+
)
|
8627
|
+
|
8628
|
+
progress_bar.progress(40)
|
8629
|
+
|
8630
|
+
# Encode target variable jika classification
|
8631
|
+
le_target = None
|
8632
|
+
if problem_type_detected == "Classification" and y.dtype == 'object':
|
8633
|
+
le_target = LabelEncoder()
|
8634
|
+
y = le_target.fit_transform(y.astype(str))
|
8635
|
+
st.info(f"🎯 Target encoded: {len(le_target.classes_)} classes")
|
8636
|
+
|
8637
|
+
progress_bar.progress(50)
|
8638
|
+
|
8639
|
+
# Handle missing values
|
8640
|
+
X_processed = handle_missing_values_optimized(X_processed)
|
8641
|
+
|
8642
|
+
progress_bar.progress(60)
|
8643
|
+
|
8644
|
+
# Calculate feature importance berdasarkan method yang dipilih
|
8645
|
+
status_text.text("📊 Calculating feature importance...")
|
8646
|
+
|
8647
|
+
results = {}
|
8648
|
+
|
8649
|
+
if importance_method in ["Random Forest", "All Methods"]:
|
8650
|
+
results["Random Forest"] = calculate_rf_importance(
|
8651
|
+
X_processed, y, problem_type_detected, config, random_state
|
8652
|
+
)
|
8653
|
+
progress_bar.progress(70)
|
8654
|
+
|
8655
|
+
if importance_method in ["Permutation Importance", "All Methods"]:
|
8656
|
+
results["Permutation"] = calculate_permutation_importance(
|
8657
|
+
X_processed, y, problem_type_detected, config, random_state
|
8658
|
+
)
|
8659
|
+
progress_bar.progress(80)
|
8660
|
+
|
8661
|
+
if importance_method in ["Mutual Information", "All Methods"]:
|
8662
|
+
results["Mutual Info"] = calculate_mutual_info(
|
8663
|
+
X_processed, y, problem_type_detected
|
8664
|
+
)
|
8665
|
+
progress_bar.progress(90)
|
8666
|
+
|
8667
|
+
progress_bar.progress(95)
|
8668
|
+
|
8669
|
+
# Display results
|
8670
|
+
status_text.text("📈 Displaying results...")
|
8671
|
+
display_feature_importance_results(
|
8672
|
+
results, feature_names, max_features_display, problem_type_detected
|
8673
|
+
)
|
8674
|
+
|
8675
|
+
progress_bar.progress(100)
|
8676
|
+
status_text.text("✅ Analysis completed!")
|
8677
|
+
|
8678
|
+
# Additional insights
|
8679
|
+
show_feature_analysis_insights(results, X_processed, y, problem_type_detected)
|
8680
|
+
|
8681
|
+
except Exception as e:
|
8682
|
+
st.error(f"❌ Error dalam feature importance analysis: {str(e)}")
|
8683
|
+
st.info("💡 Tips: Coba kurangi jumlah features, gunakan sampling, atau pilih mode 'Fast'")
|
8684
|
+
|
8685
|
+
def optimize_memory_usage_feature(df):
|
8686
|
+
"""Optimize memory usage for feature analysis"""
|
8687
|
+
start_mem = df.memory_usage(deep=True).sum() / 1024**2
|
8688
|
+
|
8689
|
+
for col in df.columns:
|
8690
|
+
col_type = df[col].dtype
|
8691
|
+
|
8692
|
+
if col_type == 'object':
|
8693
|
+
if df[col].nunique() / len(df) < 0.5: # Jika cardinality tidak terlalu tinggi
|
8694
|
+
df[col] = df[col].astype('category')
|
8695
|
+
elif col_type in ['int64', 'int32']:
|
8696
|
+
c_min = df[col].min()
|
8697
|
+
c_max = df[col].max()
|
8698
|
+
if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
|
8699
|
+
df[col] = df[col].astype(np.int8)
|
8700
|
+
elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
|
8701
|
+
df[col] = df[col].astype(np.int16)
|
8702
|
+
elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
|
8703
|
+
df[col] = df[col].astype(np.int32)
|
8704
|
+
elif col_type in ['float64', 'float32']:
|
8705
|
+
c_min = df[col].min()
|
8706
|
+
c_max = df[col].max()
|
8707
|
+
if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
|
8708
|
+
df[col] = df[col].astype(np.float16)
|
8709
|
+
elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
|
8710
|
+
df[col] = df[col].astype(np.float32)
|
8711
|
+
|
8712
|
+
end_mem = df.memory_usage(deep=True).sum() / 1024**2
|
8713
|
+
st.success(f"💾 Memory reduced: {start_mem:.2f}MB → {end_mem:.2f}MB ({((start_mem - end_mem) / start_mem * 100):.1f}% reduction)")
|
8714
|
+
|
8715
|
+
return df
|
8716
|
+
|
8717
|
+
def preprocess_features_optimized(X, numeric_cols, non_numeric_cols, remove_high_corr, threshold):
|
8718
|
+
"""Preprocess features dengan optimasi untuk dataset besar"""
|
8719
|
+
|
8720
|
+
X_processed = X.copy()
|
8721
|
+
feature_names = list(X.columns)
|
8722
|
+
|
8723
|
+
# Encode categorical features dengan metode yang efisien
|
8724
|
+
categorical_columns = [col for col in X.columns if col in non_numeric_cols]
|
8725
|
+
|
8726
|
+
for col in categorical_columns:
|
8727
|
+
if X_processed[col].nunique() > 50: # Untuk categorical dengan banyak unique values
|
8728
|
+
# Gunakan frequency encoding
|
8729
|
+
freq_map = X_processed[col].value_counts().to_dict()
|
8730
|
+
X_processed[col] = X_processed[col].map(freq_map)
|
8731
|
+
X_processed[col].fillna(0, inplace=True)
|
8732
|
+
else:
|
8733
|
+
# Gunakan label encoding
|
8734
|
+
le = LabelEncoder()
|
8735
|
+
X_processed[col] = le.fit_transform(X_processed[col].astype(str))
|
8736
|
+
|
8737
|
+
# Remove highly correlated features
|
8738
|
+
if remove_high_corr and len(X_processed.columns) > 1:
|
8739
|
+
numeric_features = [col for col in X_processed.columns if col in numeric_cols or col in categorical_columns]
|
8740
|
+
if len(numeric_features) > 1:
|
8741
|
+
X_numeric = X_processed[numeric_features]
|
8742
|
+
corr_matrix = X_numeric.corr().abs()
|
8743
|
+
|
8744
|
+
# Hapus feature yang highly correlated
|
8745
|
+
upper_triangle = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
|
8746
|
+
to_drop = [column for column in upper_triangle.columns if any(upper_triangle[column] > threshold)]
|
8747
|
+
|
8748
|
+
if to_drop:
|
8749
|
+
X_processed = X_processed.drop(columns=to_drop)
|
8750
|
+
feature_names = [f for f in feature_names if f not in to_drop]
|
8751
|
+
st.info(f"🗑️ Removed {len(to_drop)} highly correlated features")
|
8752
|
+
|
8753
|
+
return X_processed, feature_names
|
8754
|
+
|
8755
|
+
def handle_missing_values_optimized(X):
|
8756
|
+
"""Handle missing values dengan metode yang optimal"""
|
8757
|
+
X_processed = X.copy()
|
8758
|
+
|
8759
|
+
for col in X_processed.columns:
|
8760
|
+
if X_processed[col].isnull().sum() > 0:
|
8761
|
+
if X_processed[col].dtype in ['int8', 'int16', 'int32', 'int64', 'float16', 'float32', 'float64']:
|
8762
|
+
# Untuk numeric, gunakan median (lebih robust terhadap outliers)
|
8763
|
+
X_processed[col].fillna(X_processed[col].median(), inplace=True)
|
8764
|
+
else:
|
8765
|
+
# Untuk categorical, gunakan mode
|
8766
|
+
if len(X_processed[col].mode()) > 0:
|
8767
|
+
X_processed[col].fillna(X_processed[col].mode()[0], inplace=True)
|
8768
|
+
else:
|
8769
|
+
X_processed[col].fillna(0, inplace=True)
|
8770
|
+
|
8771
|
+
return X_processed
|
8772
|
+
|
8773
|
+
def calculate_rf_importance(X, y, problem_type, config, random_state):
|
8774
|
+
"""Calculate Random Forest feature importance"""
|
8775
|
+
if problem_type == "Regression":
|
8776
|
+
model = RandomForestRegressor(
|
8777
|
+
n_estimators=config["n_estimators"],
|
8778
|
+
random_state=random_state,
|
8779
|
+
n_jobs=-1 # Parallel processing
|
8780
|
+
)
|
8781
|
+
else:
|
8782
|
+
model = RandomForestClassifier(
|
8783
|
+
n_estimators=config["n_estimators"],
|
8784
|
+
random_state=random_state,
|
8785
|
+
n_jobs=-1
|
8786
|
+
)
|
8787
|
+
|
8788
|
+
model.fit(X, y)
|
8789
|
+
importances = model.feature_importances_
|
8790
|
+
|
8791
|
+
return {
|
8792
|
+
'importances': importances,
|
8793
|
+
'model': model
|
8794
|
+
}
|
8795
|
+
|
8796
|
+
def calculate_permutation_importance(X, y, problem_type, config, random_state):
|
8797
|
+
"""Calculate permutation importance"""
|
8798
|
+
if problem_type == "Regression":
|
8799
|
+
model = RandomForestRegressor(
|
8800
|
+
n_estimators=config["n_estimators"],
|
8801
|
+
random_state=random_state,
|
8802
|
+
n_jobs=-1
|
8803
|
+
)
|
8804
|
+
else:
|
8805
|
+
model = RandomForestClassifier(
|
8806
|
+
n_estimators=config["n_estimators"],
|
8807
|
+
random_state=random_state,
|
8808
|
+
n_jobs=-1
|
8809
|
+
)
|
8810
|
+
|
8811
|
+
model.fit(X, y)
|
8812
|
+
|
8813
|
+
# Untuk dataset besar, gunakan subsample
|
8814
|
+
if len(X) > 10000:
|
8815
|
+
X_subsample = X.sample(n=10000, random_state=random_state)
|
8816
|
+
y_subsample = y.loc[X_subsample.index]
|
8817
|
+
else:
|
8818
|
+
X_subsample = X
|
8819
|
+
y_subsample = y
|
8820
|
+
|
8821
|
+
perm_importance = permutation_importance(
|
8822
|
+
model, X_subsample, y_subsample,
|
8823
|
+
n_repeats=config["n_repeats"],
|
8824
|
+
random_state=random_state,
|
8825
|
+
n_jobs=-1 # Parallel processing
|
8826
|
+
)
|
8827
|
+
|
8828
|
+
return {
|
8829
|
+
'importances': perm_importance.importances_mean,
|
8830
|
+
'std': perm_importance.importances_std
|
8831
|
+
}
|
8832
|
+
|
8833
|
+
def calculate_mutual_info(X, y, problem_type):
|
8834
|
+
"""Calculate mutual information"""
|
8835
|
+
if problem_type == "Regression":
|
8836
|
+
mi = mutual_info_regression(X, y, random_state=42, n_jobs=-1)
|
8837
|
+
else:
|
8838
|
+
mi = mutual_info_classif(X, y, random_state=42, n_jobs=-1)
|
8839
|
+
|
8840
|
+
return {
|
8841
|
+
'importances': mi
|
8842
|
+
}
|
8843
|
+
|
8844
|
+
def display_feature_importance_results(results, feature_names, max_display, problem_type):
|
8845
|
+
"""Display feature importance results dengan visualisasi yang komprehensif"""
|
8846
|
+
|
8847
|
+
st.subheader("📊 Feature Importance Results")
|
8848
|
+
|
8849
|
+
# Tampilkan semua methods dalam tabs
|
8850
|
+
tabs = st.tabs(list(results.keys()))
|
8851
|
+
|
8852
|
+
for tab, (method_name, result) in zip(tabs, results.items()):
|
8853
|
+
with tab:
|
8854
|
+
importances = result['importances']
|
8855
|
+
|
8856
|
+
# Create importance dataframe
|
8857
|
+
importance_df = pd.DataFrame({
|
8858
|
+
'feature': feature_names,
|
8859
|
+
'importance': importances
|
8860
|
+
}).sort_values('importance', ascending=False)
|
8861
|
+
|
8862
|
+
# Display top features
|
8863
|
+
st.write(f"**Top {min(max_display, len(importance_df))} Features - {method_name}**")
|
8864
|
+
|
8865
|
+
col1, col2 = st.columns([2, 1])
|
8866
|
+
|
8867
|
+
with col1:
|
8868
|
+
# Bar chart
|
8869
|
+
fig = px.bar(
|
8870
|
+
importance_df.head(max_display),
|
8871
|
+
x='importance',
|
8872
|
+
y='feature',
|
8873
|
+
title=f"{method_name} Feature Importance",
|
8874
|
+
orientation='h',
|
8875
|
+
color='importance',
|
8876
|
+
color_continuous_scale='viridis'
|
8877
|
+
)
|
8878
|
+
fig.update_layout(showlegend=False)
|
8879
|
+
st.plotly_chart(fig, use_container_width=True)
|
8880
|
+
|
8881
|
+
with col2:
|
8882
|
+
# Table view
|
8883
|
+
st.dataframe(
|
8884
|
+
importance_df.head(10)[['feature', 'importance']].round(4),
|
8885
|
+
use_container_width=True
|
8886
|
+
)
|
8887
|
+
|
8888
|
+
# Additional info untuk permutation importance
|
8889
|
+
if method_name == "Permutation" and 'std' in result:
|
8890
|
+
st.write("**Permutation Importance with Std Dev:**")
|
8891
|
+
perm_df = pd.DataFrame({
|
8892
|
+
'feature': feature_names,
|
8893
|
+
'importance': importances,
|
8894
|
+
'std': result['std']
|
8895
|
+
}).sort_values('importance', ascending=False)
|
8896
|
+
|
8897
|
+
fig = px.bar(
|
8898
|
+
perm_df.head(max_display),
|
8899
|
+
x='importance',
|
8900
|
+
y='feature',
|
8901
|
+
error_x='std',
|
8902
|
+
title="Permutation Importance ± Std Dev",
|
8903
|
+
orientation='h'
|
8904
|
+
)
|
8905
|
+
st.plotly_chart(fig, use_container_width=True)
|
8906
|
+
|
8907
|
+
def show_feature_analysis_insights(results, X, y, problem_type):
|
8908
|
+
"""Show additional insights dari feature analysis"""
|
8909
|
+
|
8910
|
+
st.subheader("💡 Analysis Insights")
|
8911
|
+
|
8912
|
+
col1, col2 = st.columns(2)
|
8913
|
+
|
8914
|
+
with col1:
|
8915
|
+
st.write("**Dataset Characteristics:**")
|
8916
|
+
st.write(f"- Total samples: {len(X):,}")
|
8917
|
+
st.write(f"- Total features: {len(X.columns)}")
|
8918
|
+
st.write(f"- Problem type: {problem_type}")
|
8919
|
+
|
8920
|
+
if problem_type == "Classification":
|
8921
|
+
st.write(f"- Number of classes: {len(np.unique(y))}")
|
8922
|
+
else:
|
8923
|
+
st.write(f"- Target range: {y.min():.2f} to {y.max():.2f}")
|
8924
|
+
|
8925
|
+
with col2:
|
8926
|
+
st.write("**Feature Importance Consensus:**")
|
8927
|
+
|
8928
|
+
# Hitung consensus dari semua methods
|
8929
|
+
consensus_scores = {}
|
8930
|
+
for method_name, result in results.items():
|
8931
|
+
importances = result['importances']
|
8932
|
+
for i, feature in enumerate(X.columns):
|
8933
|
+
if feature not in consensus_scores:
|
8934
|
+
consensus_scores[feature] = []
|
8935
|
+
consensus_scores[feature].append(importances[i])
|
8936
|
+
|
8937
|
+
# Rata-rata score across methods
|
8938
|
+
avg_scores = {feature: np.mean(scores) for feature, scores in consensus_scores.items()}
|
8939
|
+
top_features = sorted(avg_scores.items(), key=lambda x: x[1], reverse=True)[:5]
|
8940
|
+
|
8941
|
+
for feature, score in top_features:
|
8942
|
+
st.write(f"- {feature}: {score:.4f}")
|
8943
|
+
|
8944
|
+
# Correlation analysis untuk top features
|
8945
|
+
if len(results) > 0:
|
8946
|
+
st.write("**Top Features Correlation Matrix:**")
|
8947
|
+
|
8948
|
+
# Ambil top 8 features dari method pertama
|
8949
|
+
first_method = list(results.values())[0]
|
8950
|
+
top_indices = np.argsort(first_method['importances'])[-8:][::-1]
|
8951
|
+
top_features_corr = [X.columns[i] for i in top_indices if i < len(X.columns)]
|
8952
|
+
|
8953
|
+
if len(top_features_corr) > 1:
|
8954
|
+
corr_matrix = X[top_features_corr].corr()
|
8955
|
+
|
8956
|
+
fig = px.imshow(
|
8957
|
+
corr_matrix,
|
8958
|
+
text_auto=True,
|
8959
|
+
aspect="auto",
|
8960
|
+
color_continuous_scale="RdBu_r",
|
8961
|
+
title="Correlation Matrix of Top Features"
|
8962
|
+
)
|
8963
|
+
st.plotly_chart(fig, use_container_width=True)
|
7286
8964
|
|
7287
8965
|
# Fungsi untuk memuat data
|
7288
8966
|
def load_data(uploaded_file):
|
@@ -7958,6 +9636,29 @@ uploaded_files = st.sidebar.file_uploader(
|
|
7958
9636
|
accept_multiple_files=True
|
7959
9637
|
)
|
7960
9638
|
|
9639
|
+
# Pilihan website
|
9640
|
+
website_option = st.sidebar.selectbox(
|
9641
|
+
"Pilih Website:",
|
9642
|
+
["https://streamlit-launcher.vercel.app/", "Custom URL"]
|
9643
|
+
)
|
9644
|
+
|
9645
|
+
if website_option == "Custom URL":
|
9646
|
+
custom_url = st.sidebar.text_input("Masukkan URL custom:")
|
9647
|
+
if custom_url:
|
9648
|
+
website_url = custom_url
|
9649
|
+
else:
|
9650
|
+
website_url = "https://streamlit-launcher.vercel.app/"
|
9651
|
+
else:
|
9652
|
+
website_url = website_option
|
9653
|
+
|
9654
|
+
# Tampilkan iframe
|
9655
|
+
if st.sidebar.button("🌐 Tampilkan Website"):
|
9656
|
+
st.markdown(f"""
|
9657
|
+
<div style="border: 2px solid #e0e0e0; border-radius: 10px; padding: 10px; margin: 10px 0;">
|
9658
|
+
<iframe src="{website_url}" width="100%" height="600" style="border: none; border-radius: 8px;"></iframe>
|
9659
|
+
</div>
|
9660
|
+
""", unsafe_allow_html=True)
|
9661
|
+
|
7961
9662
|
merge_method = "concat"
|
7962
9663
|
if uploaded_files and len(uploaded_files) > 1:
|
7963
9664
|
merge_method = st.sidebar.selectbox(
|
@@ -7982,439 +9683,1033 @@ if uploaded_files:
|
|
7982
9683
|
else:
|
7983
9684
|
df = merge_datasets(datasets, merge_method)
|
7984
9685
|
|
9686
|
+
try:
|
9687
|
+
from stl import mesh
|
9688
|
+
import trimesh
|
9689
|
+
import os
|
9690
|
+
except ImportError:
|
9691
|
+
st.warning("Beberapa library 3D tidak terinstall. Install dengan: pip install numpy-stl trimesh plotly")
|
7985
9692
|
REMOVE_BG_API_KEY = "xQH5KznYiupRrywK5yPcjeyi"
|
7986
9693
|
PIXELS_API_KEY = "LH59shPdj1xO0lolnHPsClH23qsnHE4NjkCFBhKEXvR0CbqwkrXbqBnw"
|
7987
9694
|
if df is not None:
|
7988
|
-
tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8, tab9 = st.tabs([
|
9695
|
+
tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8, tab9, tab10 = st.tabs([
|
7989
9696
|
"📊 Statistik",
|
7990
9697
|
"📈 Visualisasi",
|
7991
9698
|
"💾 Data",
|
7992
9699
|
"ℹ️ Informasi",
|
7993
9700
|
"🧮 Kalkulator",
|
7994
9701
|
"🖼️ Vitures",
|
7995
|
-
"📍 Flowchart",
|
9702
|
+
"📍 Flowchart",
|
7996
9703
|
"📊 Grafik Saham",
|
7997
|
-
"🗃️ SQL Style"
|
9704
|
+
"🗃️ SQL Style",
|
9705
|
+
"🔄 3D Model & Analisis"
|
7998
9706
|
])
|
9707
|
+
|
9708
|
+
with tab10:
|
9709
|
+
st.header("🔄 Konversi Gambar ke 3D Model dengan Analisis")
|
9710
|
+
|
9711
|
+
# Upload gambar
|
9712
|
+
uploaded_file = st.file_uploader("Unggah gambar untuk dikonversi ke 3D",
|
9713
|
+
type=['png', 'jpg', 'jpeg'],
|
9714
|
+
key="3d_converter")
|
9715
|
+
|
9716
|
+
col1, col2 = st.columns(2)
|
9717
|
+
|
9718
|
+
with col1:
|
9719
|
+
if uploaded_file is not None:
|
9720
|
+
# Display original image
|
9721
|
+
st.subheader("🖼️ Gambar Asli")
|
9722
|
+
st.image(uploaded_file, use_column_width=True)
|
9723
|
+
|
9724
|
+
# Image analysis
|
9725
|
+
st.subheader("📊 Analisis Gambar")
|
9726
|
+
|
9727
|
+
# Convert to numpy array for analysis
|
9728
|
+
import numpy as np
|
9729
|
+
from PIL import Image
|
9730
|
+
|
9731
|
+
image = Image.open(uploaded_file)
|
9732
|
+
img_array = np.array(image)
|
9733
|
+
|
9734
|
+
# Basic image statistics
|
9735
|
+
st.write(f"**Dimensi Gambar:** {img_array.shape}")
|
9736
|
+
st.write(f"**Tipe Data:** {img_array.dtype}")
|
9737
|
+
st.write(f"**Range Nilai:** {img_array.min()} - {img_array.max()}")
|
9738
|
+
|
9739
|
+
# Color distribution
|
9740
|
+
if len(img_array.shape) == 3: # Color image
|
9741
|
+
st.write("**Distribusi Warna RGB:**")
|
9742
|
+
colors = ['Red', 'Green', 'Blue']
|
9743
|
+
for i, color in enumerate(colors):
|
9744
|
+
channel_data = img_array[:, :, i]
|
9745
|
+
st.write(f"{color}: Mean={channel_data.mean():.2f}, Std={channel_data.std():.2f}")
|
9746
|
+
|
9747
|
+
with col2:
|
9748
|
+
if uploaded_file is not None:
|
9749
|
+
st.subheader("📈 Chart Analisis")
|
9750
|
+
|
9751
|
+
# Create some sample 3D data based on image
|
9752
|
+
height, width = img_array.shape[0], img_array.shape[1]
|
9753
|
+
|
9754
|
+
# Generate 3D surface data from image intensity
|
9755
|
+
if len(img_array.shape) == 3:
|
9756
|
+
gray_img = np.mean(img_array, axis=2) # Convert to grayscale
|
9757
|
+
else:
|
9758
|
+
gray_img = img_array
|
9759
|
+
|
9760
|
+
# Downsample for performance
|
9761
|
+
downsample_factor = max(1, gray_img.shape[0] // 50)
|
9762
|
+
gray_img_small = gray_img[::downsample_factor, ::downsample_factor]
|
9763
|
+
|
9764
|
+
# Create 3D surface plot
|
9765
|
+
fig_3d = go.Figure(data=[go.Surface(z=gray_img_small)])
|
9766
|
+
fig_3d.update_layout(
|
9767
|
+
title='3D Surface dari Gambar',
|
9768
|
+
scene=dict(
|
9769
|
+
xaxis_title='X',
|
9770
|
+
yaxis_title='Y',
|
9771
|
+
zaxis_title='Intensitas'
|
9772
|
+
)
|
9773
|
+
)
|
9774
|
+
st.plotly_chart(fig_3d, use_container_width=True)
|
9775
|
+
|
9776
|
+
# 2D Histogram of intensities
|
9777
|
+
fig_hist = px.histogram(x=gray_img.flatten(),
|
9778
|
+
title='Distribusi Intensitas Pixel',
|
9779
|
+
labels={'x': 'Intensitas', 'y': 'Frekuensi'})
|
9780
|
+
st.plotly_chart(fig_hist, use_container_width=True)
|
9781
|
+
|
9782
|
+
# Additional analysis section
|
9783
|
+
if uploaded_file is not None:
|
9784
|
+
st.subheader("🔍 Analisis Detail")
|
9785
|
+
|
9786
|
+
col3, col4 = st.columns(2)
|
9787
|
+
|
9788
|
+
with col3:
|
9789
|
+
# Edge detection simulation
|
9790
|
+
st.write("**Deteksi Tepi (Simulasi):**")
|
9791
|
+
|
9792
|
+
# Simple edge detection using gradient
|
9793
|
+
from scipy import ndimage
|
9794
|
+
|
9795
|
+
# Calculate gradients
|
9796
|
+
grad_x = ndimage.sobel(gray_img, axis=0)
|
9797
|
+
grad_y = ndimage.sobel(gray_img, axis=1)
|
9798
|
+
gradient_magnitude = np.hypot(grad_x, grad_y)
|
9799
|
+
|
9800
|
+
# Display edge map
|
9801
|
+
fig_edges = px.imshow(gradient_magnitude,
|
9802
|
+
title='Peta Tepi',
|
9803
|
+
color_continuous_scale='gray')
|
9804
|
+
st.plotly_chart(fig_edges, use_container_width=True)
|
9805
|
+
|
9806
|
+
with col4:
|
9807
|
+
# Statistical summary
|
9808
|
+
st.write("**Ringkasan Statistik:**")
|
9809
|
+
|
9810
|
+
stats_data = {
|
9811
|
+
'Metrik': ['Mean', 'Median', 'Std Dev', 'Varians', 'Entropi'],
|
9812
|
+
'Nilai': [
|
9813
|
+
f"{gray_img.mean():.2f}",
|
9814
|
+
f"{np.median(gray_img):.2f}",
|
9815
|
+
f"{gray_img.std():.2f}",
|
9816
|
+
f"{gray_img.var():.2f}",
|
9817
|
+
f"{-np.sum(gray_img * np.log2(gray_img + 1e-8)):.2f}"
|
9818
|
+
]
|
9819
|
+
}
|
9820
|
+
|
9821
|
+
st.dataframe(stats_data, use_container_width=True)
|
9822
|
+
|
9823
|
+
# Date selection for analysis
|
9824
|
+
analysis_date = st.date_input("Pilih Tanggal Analisis",
|
9825
|
+
value=datetime.now().date(),
|
9826
|
+
key="3d_analysis_date")
|
9827
|
+
|
9828
|
+
st.write(f"**Analisis untuk tanggal:** {analysis_date}")
|
9829
|
+
|
9830
|
+
# Model conversion options
|
9831
|
+
if uploaded_file is not None:
|
9832
|
+
st.subheader("⚙️ Opsi Konversi 3D")
|
9833
|
+
|
9834
|
+
conversion_type = st.selectbox(
|
9835
|
+
"Pilih tipe model 3D:",
|
9836
|
+
["Surface Mesh", "Point Cloud", "Voxel Grid", "Height Map"]
|
9837
|
+
)
|
9838
|
+
|
9839
|
+
resolution = st.slider("Resolusi Model 3D", 10, 100, 50)
|
9840
|
+
height_scale = st.slider("Skala Tinggi 3D", 0.1, 5.0, 1.0)
|
9841
|
+
|
9842
|
+
if st.button("🚀 Generate Model 3D", type="primary"):
|
9843
|
+
with st.spinner("Membuat model 3D..."):
|
9844
|
+
try:
|
9845
|
+
# Progress bar
|
9846
|
+
progress_bar = st.progress(0)
|
9847
|
+
|
9848
|
+
# Convert image to grayscale and normalize
|
9849
|
+
if len(img_array.shape) == 3:
|
9850
|
+
gray_img = np.mean(img_array, axis=2)
|
9851
|
+
else:
|
9852
|
+
gray_img = img_array
|
9853
|
+
|
9854
|
+
# Normalize to 0-1
|
9855
|
+
gray_img_normalized = gray_img.astype(np.float32) / 255.0
|
9856
|
+
|
9857
|
+
progress_bar.progress(25)
|
9858
|
+
|
9859
|
+
# Downsample image based on resolution
|
9860
|
+
downsample = max(1, gray_img_normalized.shape[0] // resolution)
|
9861
|
+
height_map = gray_img_normalized[::downsample, ::downsample]
|
9862
|
+
|
9863
|
+
progress_bar.progress(50)
|
9864
|
+
|
9865
|
+
# Generate 3D mesh from height map
|
9866
|
+
x, y = np.mgrid[0:height_map.shape[0], 0:height_map.shape[1]]
|
9867
|
+
z = height_map * height_scale
|
9868
|
+
|
9869
|
+
progress_bar.progress(75)
|
9870
|
+
|
9871
|
+
# Create vertices and faces for the mesh
|
9872
|
+
vertices = []
|
9873
|
+
faces = []
|
9874
|
+
|
9875
|
+
# Create vertices
|
9876
|
+
for i in range(z.shape[0]):
|
9877
|
+
for j in range(z.shape[1]):
|
9878
|
+
vertices.append([i, j, z[i, j]])
|
9879
|
+
|
9880
|
+
# Create faces
|
9881
|
+
for i in range(z.shape[0]-1):
|
9882
|
+
for j in range(z.shape[1]-1):
|
9883
|
+
# Two triangles per quad
|
9884
|
+
v1 = i * z.shape[1] + j
|
9885
|
+
v2 = v1 + 1
|
9886
|
+
v3 = (i + 1) * z.shape[1] + j
|
9887
|
+
v4 = v3 + 1
|
9888
|
+
|
9889
|
+
# First triangle
|
9890
|
+
faces.append([v1, v2, v3])
|
9891
|
+
# Second triangle
|
9892
|
+
faces.append([v2, v4, v3])
|
9893
|
+
|
9894
|
+
progress_bar.progress(90)
|
9895
|
+
|
9896
|
+
# Convert to numpy arrays
|
9897
|
+
vertices = np.array(vertices)
|
9898
|
+
faces = np.array(faces)
|
9899
|
+
|
9900
|
+
# Create STL mesh
|
9901
|
+
from stl import mesh
|
9902
|
+
|
9903
|
+
# Create the mesh object
|
9904
|
+
stl_mesh = mesh.Mesh(np.zeros(faces.shape[0], dtype=mesh.Mesh.dtype))
|
9905
|
+
|
9906
|
+
# Assign vertices to mesh
|
9907
|
+
for i, face in enumerate(faces):
|
9908
|
+
for j in range(3):
|
9909
|
+
stl_mesh.vectors[i][j] = vertices[face[j]]
|
9910
|
+
|
9911
|
+
progress_bar.progress(100)
|
9912
|
+
|
9913
|
+
# Save STL file to temporary file
|
9914
|
+
import tempfile
|
9915
|
+
import os
|
9916
|
+
|
9917
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.stl') as tmp_file:
|
9918
|
+
stl_mesh.save(tmp_file.name)
|
9919
|
+
|
9920
|
+
# Read the file data for download
|
9921
|
+
with open(tmp_file.name, 'rb') as f:
|
9922
|
+
stl_data = f.read()
|
9923
|
+
|
9924
|
+
# Clean up temporary file
|
9925
|
+
os.unlink(tmp_file.name)
|
9926
|
+
|
9927
|
+
st.success("✅ Model 3D berhasil dibuat!")
|
9928
|
+
|
9929
|
+
# Display results
|
9930
|
+
st.info(f"**Model 3D tipe:** {conversion_type}")
|
9931
|
+
st.info(f"**Resolusi:** {resolution}")
|
9932
|
+
st.info(f"**Dimensi Mesh:** {len(vertices)} vertices, {len(faces)} faces")
|
9933
|
+
st.info(f"**Skala Tinggi:** {height_scale}")
|
9934
|
+
|
9935
|
+
# Download button for 3D model
|
9936
|
+
st.download_button(
|
9937
|
+
label="📥 Download Model 3D (STL)",
|
9938
|
+
data=stl_data,
|
9939
|
+
file_name=f"3d_model_{uploaded_file.name.split('.')[0]}.stl",
|
9940
|
+
mime="application/octet-stream"
|
9941
|
+
)
|
9942
|
+
|
9943
|
+
# Display mesh information
|
9944
|
+
col5, col6 = st.columns(2)
|
9945
|
+
|
9946
|
+
with col5:
|
9947
|
+
st.write("**Informasi Mesh:**")
|
9948
|
+
mesh_info = {
|
9949
|
+
'Parameter': ['Jumlah Vertex', 'Jumlah Face', 'Dimensi X', 'Dimensi Y', 'Tinggi Maks'],
|
9950
|
+
'Nilai': [
|
9951
|
+
len(vertices),
|
9952
|
+
len(faces),
|
9953
|
+
f"{z.shape[0]} points",
|
9954
|
+
f"{z.shape[1]} points",
|
9955
|
+
f"{z.max():.3f}"
|
9956
|
+
]
|
9957
|
+
}
|
9958
|
+
st.dataframe(mesh_info)
|
9959
|
+
|
9960
|
+
with col6:
|
9961
|
+
# Display 3D preview using plotly
|
9962
|
+
st.write("**Preview 3D:**")
|
9963
|
+
|
9964
|
+
# Create simplified mesh for preview
|
9965
|
+
preview_downsample = max(1, len(vertices) // 1000)
|
9966
|
+
preview_vertices = vertices[::preview_downsample]
|
9967
|
+
|
9968
|
+
fig_3d_preview = go.Figure(data=[go.Mesh3d(
|
9969
|
+
x=preview_vertices[:, 0],
|
9970
|
+
y=preview_vertices[:, 1],
|
9971
|
+
z=preview_vertices[:, 2],
|
9972
|
+
opacity=0.7,
|
9973
|
+
color='lightblue'
|
9974
|
+
)])
|
9975
|
+
|
9976
|
+
fig_3d_preview.update_layout(
|
9977
|
+
title='Preview Model 3D',
|
9978
|
+
scene=dict(
|
9979
|
+
xaxis_title='X',
|
9980
|
+
yaxis_title='Y',
|
9981
|
+
zaxis_title='Z'
|
9982
|
+
)
|
9983
|
+
)
|
9984
|
+
|
9985
|
+
st.plotly_chart(fig_3d_preview, use_container_width=True)
|
9986
|
+
|
9987
|
+
except Exception as e:
|
9988
|
+
st.error(f"❌ Error dalam membuat model 3D: {str(e)}")
|
9989
|
+
st.info("Pastikan library numpy-stl dan trimesh terinstall: `pip install numpy-stl trimesh`")
|
9990
|
+
|
7999
9991
|
|
8000
9992
|
with tab9:
|
8001
|
-
|
8002
|
-
|
8003
|
-
|
8004
|
-
|
9993
|
+
st.header("📁 Upload File & Analisis Lengkap Database SQL")
|
9994
|
+
with st.expander("📜 Keterangan Dalam Statistik Dan Analisis", expanded=False):
|
9995
|
+
st.markdown(
|
9996
|
+
"""
|
9997
|
+
<img src="https://media.finebi.com/strapi/Annual_Sales_Summary_59110fda60.jpg" class="responsive-img">
|
9998
|
+
""",
|
9999
|
+
unsafe_allow_html=True
|
10000
|
+
)
|
10001
|
+
st.markdown("""
|
10002
|
+
|
10003
|
+
### 🚀 Keterangan Lengkap Dalam Analisis Dan Statistik Pada SQL Style
|
10004
|
+
- Akankah Hal Gila Dapat Terjadi Dan Ini lah yang Mungkin Menjadi Kenyataan Pada SQL Style?
|
10005
|
+
- Dengan adanya fitur analisis data pada SQL Style, kini Anda dapat dengan mudah mengunggah file CSV atau Excel berisi data dari database SQL Anda untuk dianalisis secara menyeluruh.
|
10006
|
+
- Fitur ini dirancang untuk memberikan wawasan mendalam tentang struktur data Anda, termasuk deteksi kolom tanggal, analisis statistik dasar, dan visualisasi data yang informatif.
|
10007
|
+
- Setelah mengunggah file, SQL Style akan secara otomatis mendeteksi kolom tanggal dan melakukan analisis mendalam terhadap data tersebut.
|
10008
|
+
- Anda akan mendapatkan statistik dasar seperti jumlah baris dan kolom, nilai unik, serta informasi tentang missing values.
|
10009
|
+
- Selain itu, fitur visualisasi data akan membantu Anda memahami distribusi data, tren waktu, dan pola musiman dengan grafik yang mudah dipahami.
|
10010
|
+
- Fitur ini sangat berguna bagi para analis data, pengembang database, dan siapa saja yang ingin mendapatkan pemahaman lebih baik tentang data mereka.
|
10011
|
+
- Kami terus berupaya untuk meningkatkan fitur ini agar dapat memberikan pengalaman analisis data yang lebih baik dan lebih komprehensif.
|
10012
|
+
- dan kami akan segera update SQL Style ini agar lebih baik lagi kedepannya.
|
10013
|
+
- Terima kasih atas pengertian dan dukungannya.
|
10014
|
+
""")
|
10015
|
+
|
10016
|
+
# Upload file
|
10017
|
+
uploaded_file = st.file_uploader(
|
10018
|
+
"Pilih file CSV atau Excel",
|
10019
|
+
type=['csv', 'xlsx', 'xls'],
|
10020
|
+
help="Upload file data untuk dianalisis"
|
10021
|
+
)
|
10022
|
+
|
10023
|
+
if uploaded_file is not None:
|
10024
|
+
try:
|
10025
|
+
# Baca file berdasarkan tipe
|
10026
|
+
if uploaded_file.name.endswith('.csv'):
|
10027
|
+
df = pd.read_csv(uploaded_file)
|
10028
|
+
else:
|
10029
|
+
df = pd.read_excel(uploaded_file)
|
10030
|
+
|
10031
|
+
# Clean dataframe - handle mixed types and object dtypes
|
10032
|
+
def clean_dataframe(df):
|
10033
|
+
df_clean = df.copy()
|
10034
|
+
|
10035
|
+
# Convert object columns to appropriate types
|
10036
|
+
for col in df_clean.columns:
|
10037
|
+
# Skip if column is already numeric or datetime
|
10038
|
+
if pd.api.types.is_numeric_dtype(df_clean[col]):
|
10039
|
+
continue
|
10040
|
+
if pd.api.types.is_datetime64_any_dtype(df_clean[col]):
|
10041
|
+
continue
|
10042
|
+
|
10043
|
+
# Try to convert to numeric first
|
10044
|
+
try:
|
10045
|
+
df_clean[col] = pd.to_numeric(df_clean[col], errors='ignore')
|
10046
|
+
except:
|
10047
|
+
pass
|
10048
|
+
|
10049
|
+
# If still object, try to convert to datetime
|
10050
|
+
if df_clean[col].dtype == 'object':
|
10051
|
+
try:
|
10052
|
+
df_clean[col] = pd.to_datetime(df_clean[col], errors='ignore')
|
10053
|
+
except:
|
10054
|
+
pass
|
10055
|
+
|
10056
|
+
# Handle ObjectDType specifically
|
10057
|
+
if hasattr(df_clean[col].dtype, 'name') and df_clean[col].dtype.name == 'object':
|
10058
|
+
# Convert to string to avoid ObjectDType issues
|
10059
|
+
df_clean[col] = df_clean[col].astype(str)
|
10060
|
+
|
10061
|
+
return df_clean
|
8005
10062
|
|
8006
|
-
|
8007
|
-
|
8008
|
-
|
8009
|
-
|
8010
|
-
|
8011
|
-
|
8012
|
-
|
8013
|
-
|
8014
|
-
|
8015
|
-
|
8016
|
-
|
8017
|
-
|
8018
|
-
|
8019
|
-
|
8020
|
-
|
8021
|
-
|
8022
|
-
|
8023
|
-
|
8024
|
-
|
8025
|
-
|
10063
|
+
df = clean_dataframe(df)
|
10064
|
+
|
10065
|
+
st.success(f"File berhasil diupload! Shape: {df.shape}")
|
10066
|
+
|
10067
|
+
# Tampilkan preview data
|
10068
|
+
st.subheader("📋 Preview Data")
|
10069
|
+
st.dataframe(df.head())
|
10070
|
+
|
10071
|
+
# Informasi dasar dataset
|
10072
|
+
st.subheader("📊 Informasi Dataset")
|
10073
|
+
col1, col2, col3, col4 = st.columns(4)
|
10074
|
+
|
10075
|
+
with col1:
|
10076
|
+
st.metric("Jumlah Baris", df.shape[0])
|
10077
|
+
with col2:
|
10078
|
+
st.metric("Jumlah Kolom", df.shape[1])
|
10079
|
+
with col3:
|
10080
|
+
st.metric("Missing Values", df.isnull().sum().sum())
|
10081
|
+
with col4:
|
10082
|
+
st.metric("Duplikat", df.duplicated().sum())
|
10083
|
+
|
10084
|
+
# --- ANALISIS STRUKTUR DATA UNTUK ERD DINAMIS ---
|
10085
|
+
st.subheader("🔍 Analisis Struktur Data untuk ERD")
|
10086
|
+
|
10087
|
+
# Fungsi untuk deteksi tipe data yang aman
|
10088
|
+
def safe_dtype_detection(df):
|
10089
|
+
numeric_cols = []
|
10090
|
+
categorical_cols = []
|
10091
|
+
date_cols = []
|
10092
|
+
bool_cols = []
|
10093
|
+
other_cols = []
|
8026
10094
|
|
8027
|
-
|
10095
|
+
for col in df.columns:
|
10096
|
+
col_dtype = str(df[col].dtype)
|
10097
|
+
|
10098
|
+
# Check numeric
|
10099
|
+
if pd.api.types.is_numeric_dtype(df[col]):
|
10100
|
+
numeric_cols.append(col)
|
10101
|
+
# Check datetime
|
10102
|
+
elif pd.api.types.is_datetime64_any_dtype(df[col]):
|
10103
|
+
date_cols.append(col)
|
10104
|
+
# Check boolean
|
10105
|
+
elif pd.api.types.is_bool_dtype(df[col]):
|
10106
|
+
bool_cols.append(col)
|
10107
|
+
# Check categorical (object but limited unique values)
|
10108
|
+
elif df[col].dtype == 'object':
|
10109
|
+
if df[col].nunique() <= 50: # Consider as categorical if <= 50 unique values
|
10110
|
+
categorical_cols.append(col)
|
10111
|
+
else:
|
10112
|
+
other_cols.append(col)
|
10113
|
+
else:
|
10114
|
+
other_cols.append(col)
|
8028
10115
|
|
8029
|
-
|
8030
|
-
|
8031
|
-
|
10116
|
+
return numeric_cols, categorical_cols, date_cols, bool_cols, other_cols
|
10117
|
+
|
10118
|
+
numeric_cols, categorical_cols, date_cols, bool_cols, other_cols = safe_dtype_detection(df)
|
10119
|
+
|
10120
|
+
# Fungsi analisis yang lebih robust
|
10121
|
+
def robust_column_analysis(df):
|
10122
|
+
column_analysis = {}
|
8032
10123
|
|
8033
|
-
|
8034
|
-
|
8035
|
-
|
10124
|
+
for col in df.columns:
|
10125
|
+
try:
|
10126
|
+
col_data = df[col]
|
10127
|
+
|
10128
|
+
# Handle ObjectDType and other problematic types
|
10129
|
+
if hasattr(col_data.dtype, 'name') and col_data.dtype.name == 'object':
|
10130
|
+
# Convert to string for analysis
|
10131
|
+
col_data = col_data.astype(str)
|
10132
|
+
|
10133
|
+
analysis = {
|
10134
|
+
'dtype': str(col_data.dtype),
|
10135
|
+
'unique_count': col_data.nunique(),
|
10136
|
+
'null_count': col_data.isnull().sum(),
|
10137
|
+
'null_percentage': (col_data.isnull().sum() / len(col_data)) * 100,
|
10138
|
+
'sample_values': col_data.dropna().head(3).tolist() if not col_data.empty else []
|
10139
|
+
}
|
10140
|
+
|
10141
|
+
# Safe sample values conversion
|
10142
|
+
safe_samples = []
|
10143
|
+
for val in analysis['sample_values']:
|
10144
|
+
try:
|
10145
|
+
safe_samples.append(str(val))
|
10146
|
+
except:
|
10147
|
+
safe_samples.append('N/A')
|
10148
|
+
analysis['sample_values'] = safe_samples
|
10149
|
+
|
10150
|
+
# Deteksi tipe kolom untuk ERD
|
10151
|
+
col_lower = str(col).lower()
|
10152
|
+
|
10153
|
+
# Primary Key detection
|
10154
|
+
if (analysis['unique_count'] == len(col_data) and
|
10155
|
+
analysis['null_count'] == 0 and
|
10156
|
+
any(keyword in col_lower for keyword in ['id', 'pk', 'key', 'code'])):
|
10157
|
+
analysis['role'] = 'PRIMARY_KEY'
|
10158
|
+
analysis['icon'] = '🔑'
|
10159
|
+
|
10160
|
+
# Foreign Key detection
|
10161
|
+
elif (any(keyword in col_lower for keyword in ['id', 'fk', 'ref', 'code']) and
|
10162
|
+
analysis['unique_count'] < len(col_data) * 0.8):
|
10163
|
+
analysis['role'] = 'FOREIGN_KEY'
|
10164
|
+
analysis['icon'] = '🔗'
|
10165
|
+
|
10166
|
+
# Measurement columns
|
10167
|
+
elif any(keyword in col_lower for keyword in ['amount', 'price', 'value', 'total', 'sum', 'avg', 'quantity']):
|
10168
|
+
analysis['role'] = 'MEASUREMENT'
|
10169
|
+
analysis['icon'] = '💰'
|
10170
|
+
|
10171
|
+
# Date/Time columns
|
10172
|
+
elif any(keyword in col_lower for keyword in ['date', 'time', 'year', 'month', 'day']):
|
10173
|
+
analysis['role'] = 'TEMPORAL'
|
10174
|
+
analysis['icon'] = '📅'
|
10175
|
+
|
10176
|
+
# Category columns
|
10177
|
+
elif (analysis['unique_count'] <= 20 and
|
10178
|
+
analysis['unique_count'] > 1 and
|
10179
|
+
str(col_data.dtype) == 'object'):
|
10180
|
+
analysis['role'] = 'CATEGORY'
|
10181
|
+
analysis['icon'] = '🏷️'
|
10182
|
+
|
10183
|
+
# Description columns
|
10184
|
+
elif (str(col_data.dtype) == 'object' and
|
10185
|
+
col_data.astype(str).str.len().mean() > 20):
|
10186
|
+
analysis['role'] = 'DESCRIPTION'
|
10187
|
+
analysis['icon'] = '📝'
|
10188
|
+
|
10189
|
+
# Numeric metrics
|
10190
|
+
elif pd.api.types.is_numeric_dtype(col_data):
|
10191
|
+
analysis['role'] = 'METRIC'
|
10192
|
+
analysis['icon'] = '📊'
|
10193
|
+
|
10194
|
+
else:
|
10195
|
+
analysis['role'] = 'ATTRIBUTE'
|
10196
|
+
analysis['icon'] = '📄'
|
10197
|
+
|
10198
|
+
column_analysis[col] = analysis
|
10199
|
+
|
10200
|
+
except Exception as e:
|
10201
|
+
# Fallback analysis for problematic columns
|
10202
|
+
column_analysis[col] = {
|
10203
|
+
'dtype': 'unknown',
|
10204
|
+
'role': 'ATTRIBUTE',
|
10205
|
+
'icon': '❓',
|
10206
|
+
'unique_count': 0,
|
10207
|
+
'null_count': len(df[col]),
|
10208
|
+
'null_percentage': 100.0,
|
10209
|
+
'sample_values': ['Error in analysis']
|
10210
|
+
}
|
8036
10211
|
|
8037
|
-
|
8038
|
-
|
8039
|
-
|
8040
|
-
|
8041
|
-
|
8042
|
-
|
10212
|
+
return column_analysis
|
10213
|
+
|
10214
|
+
# Analisis kolom
|
10215
|
+
column_analysis = robust_column_analysis(df)
|
10216
|
+
|
10217
|
+
# Tampilkan analisis kolom
|
10218
|
+
st.write("**Analisis Detail Kolom:**")
|
10219
|
+
analysis_data = []
|
10220
|
+
for col, analysis in column_analysis.items():
|
10221
|
+
analysis_data.append({
|
10222
|
+
'Kolom': col,
|
10223
|
+
'Tipe': analysis['dtype'],
|
10224
|
+
'Role': analysis['role'],
|
10225
|
+
'Icon': analysis['icon'],
|
10226
|
+
'Unique': analysis['unique_count'],
|
10227
|
+
'Null %': f"{analysis['null_percentage']:.1f}%"
|
10228
|
+
})
|
10229
|
+
|
10230
|
+
analysis_df = pd.DataFrame(analysis_data)
|
10231
|
+
st.dataframe(analysis_df, use_container_width=True)
|
10232
|
+
|
10233
|
+
# --- ERD DINAMIS YANG LEBIH AKURAT ---
|
10234
|
+
st.subheader("🗄️ Entity Relationship Diagram (ERD) Dinamis")
|
10235
|
+
|
10236
|
+
# Konfigurasi ERD
|
10237
|
+
col1, col2, col3 = st.columns(3)
|
10238
|
+
|
10239
|
+
with col1:
|
10240
|
+
erd_style = st.selectbox(
|
10241
|
+
"Style ERD:",
|
10242
|
+
['Vertical', 'Horizontal', 'Circular'],
|
10243
|
+
index=0
|
10244
|
+
)
|
10245
|
+
|
10246
|
+
with col2:
|
10247
|
+
show_relationships = st.checkbox("Tampilkan Relasi", value=True)
|
10248
|
+
|
10249
|
+
with col3:
|
10250
|
+
max_tables = st.slider("Max Tabel", 3, 15, 8)
|
10251
|
+
|
10252
|
+
try:
|
10253
|
+
import graphviz
|
10254
|
+
|
10255
|
+
# Buat graph ERD
|
10256
|
+
dot = graphviz.Digraph(comment='Dynamic Database ERD')
|
10257
|
+
|
10258
|
+
# Atur layout
|
10259
|
+
if erd_style == 'Vertical':
|
10260
|
+
dot.attr(rankdir='TB', size='12,16')
|
10261
|
+
elif erd_style == 'Horizontal':
|
10262
|
+
dot.attr(rankdir='LR', size='16,12')
|
10263
|
+
else: # Circular
|
10264
|
+
dot.attr(rankdir='LR', size='14,14', layout='circo')
|
10265
|
+
|
10266
|
+
# Kelompokkan kolom berdasarkan role untuk membuat tabel
|
10267
|
+
main_table_cols = []
|
10268
|
+
reference_tables = {}
|
10269
|
+
|
10270
|
+
for col, analysis in column_analysis.items():
|
10271
|
+
if analysis['role'] == 'FOREIGN_KEY':
|
10272
|
+
# Buat tabel referensi untuk foreign key
|
10273
|
+
ref_table_name = f"ref_{col}"
|
10274
|
+
if ref_table_name not in reference_tables:
|
10275
|
+
ref_display_name = col.replace('_id', '').replace('ID', '').replace('_', ' ').title()
|
10276
|
+
reference_tables[ref_table_name] = {
|
10277
|
+
'name': ref_display_name,
|
10278
|
+
'columns': []
|
10279
|
+
}
|
10280
|
+
reference_tables[ref_table_name]['columns'].append(col)
|
10281
|
+
else:
|
10282
|
+
main_table_cols.append((col, analysis))
|
8043
10283
|
|
8044
|
-
#
|
8045
|
-
|
8046
|
-
|
8047
|
-
#
|
8048
|
-
|
8049
|
-
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
|
8050
|
-
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
|
8051
|
-
|
8052
|
-
col1, col2 = st.columns(2)
|
8053
|
-
|
8054
|
-
with col1:
|
8055
|
-
st.write("**Kolom Numerik:**")
|
8056
|
-
df_numeric = pd.DataFrame({"Nama Kolom": numeric_cols})
|
8057
|
-
st.table(df_numeric)
|
8058
|
-
|
8059
|
-
with col2:
|
8060
|
-
st.write("**Kolom Kategorikal:**")
|
8061
|
-
df_categorical = pd.DataFrame({"Nama Kolom": categorical_cols})
|
8062
|
-
st.table(df_categorical)
|
8063
|
-
|
8064
|
-
# --- Visualisasi ERD yang Sesungguhnya ---
|
8065
|
-
st.write("---")
|
8066
|
-
st.subheader("🗄️ Entity Relationship Diagram Visualization")
|
8067
|
-
|
8068
|
-
# Buat struktur entitas dan relasi
|
8069
|
-
st.write("**Struktur Tabel Database:**")
|
8070
|
-
|
8071
|
-
# Generate SQL CREATE TABLE statements
|
8072
|
-
st.markdown("### 📝 SQL Schema Definition")
|
8073
|
-
|
8074
|
-
# Buat diagram ERD menggunakan graphviz
|
8075
|
-
try:
|
8076
|
-
import graphviz
|
8077
|
-
|
8078
|
-
# Buat graph untuk ERD
|
8079
|
-
dot = graphviz.Digraph(comment='Database ERD')
|
8080
|
-
dot.attr(rankdir='TB', size='8,8')
|
8081
|
-
|
8082
|
-
# Buat node untuk tabel utama
|
10284
|
+
# Batasi jumlah tabel yang ditampilkan
|
10285
|
+
tables_to_show = min(max_tables, len(reference_tables) + 1)
|
10286
|
+
|
10287
|
+
# Buat tabel utama
|
10288
|
+
if main_table_cols and tables_to_show > 0:
|
8083
10289
|
with dot.subgraph(name='cluster_main') as c:
|
8084
|
-
|
8085
|
-
|
8086
|
-
|
8087
|
-
c.node('table_header', f'📊 dataset_table', shape='plaintext', fontsize='14', fontname='Arial bold')
|
10290
|
+
table_name = uploaded_file.name.split('.')[0] # Remove extension
|
10291
|
+
c.attr(label=f'📊 {table_name}', style='filled',
|
10292
|
+
color='lightblue', fontsize='14', fontname='Arial Bold')
|
8088
10293
|
|
8089
|
-
# Field-field dalam tabel
|
8090
10294
|
fields = []
|
10295
|
+
for col, analysis in main_table_cols[:12]: # Batasi kolom per tabel
|
10296
|
+
field_type = ""
|
10297
|
+
if pd.api.types.is_numeric_dtype(df[col]):
|
10298
|
+
field_type = "NUMERIC"
|
10299
|
+
elif pd.api.types.is_datetime64_any_dtype(df[col]):
|
10300
|
+
field_type = "DATETIME"
|
10301
|
+
elif df[col].dtype == 'object':
|
10302
|
+
try:
|
10303
|
+
max_len = df[col].astype(str).str.len().max()
|
10304
|
+
field_type = f"VARCHAR({min(255, max(50, int(max_len)))})"
|
10305
|
+
except:
|
10306
|
+
field_type = "TEXT"
|
10307
|
+
elif df[col].dtype == 'bool':
|
10308
|
+
field_type = "BOOLEAN"
|
10309
|
+
else:
|
10310
|
+
field_type = "TEXT"
|
10311
|
+
|
10312
|
+
constraint = ""
|
10313
|
+
if analysis['role'] == 'PRIMARY_KEY':
|
10314
|
+
constraint = " [PK]"
|
10315
|
+
elif analysis['role'] == 'FOREIGN_KEY':
|
10316
|
+
constraint = " [FK]"
|
10317
|
+
|
10318
|
+
fields.append(f"<TR><TD ALIGN='LEFT'>{analysis['icon']} {col}</TD><TD ALIGN='LEFT'>{field_type}{constraint}</TD></TR>")
|
10319
|
+
|
10320
|
+
# Tambahkan indicator jika ada kolom yang tidak ditampilkan
|
10321
|
+
if len(main_table_cols) > 12:
|
10322
|
+
fields.append(f"<TR><TD ALIGN='LEFT'>...</TD><TD ALIGN='LEFT'>+{len(main_table_cols)-12} more</TD></TR>")
|
8091
10323
|
|
8092
|
-
|
8093
|
-
|
8094
|
-
|
8095
|
-
|
10324
|
+
table_html = f'''<
|
10325
|
+
<TABLE BORDER="1" CELLBORDER="0" CELLSPACING="0" CELLPADDING="4">
|
10326
|
+
<TR><TD ALIGN="CENTER" BGCOLOR="#e6f3ff"><B>COLUMN</B></TD><TD ALIGN="CENTER" BGCOLOR="#e6f3ff"><B>TYPE</B></TD></TR>
|
10327
|
+
{''.join(fields)}
|
10328
|
+
</TABLE>
|
10329
|
+
>'''
|
8096
10330
|
|
8097
|
-
|
8098
|
-
|
8099
|
-
|
8100
|
-
|
10331
|
+
c.node('main_table', table_html, shape='none', fontname='Arial')
|
10332
|
+
|
10333
|
+
# Buat tabel referensi
|
10334
|
+
colors = ['#e6ffe6', '#fff0e6', '#e6f9ff', '#ffe6ff', '#ffffe6', '#f0e6ff']
|
10335
|
+
for i, (ref_name, ref_info) in enumerate(list(reference_tables.items())[:tables_to_show-1]):
|
10336
|
+
color = colors[i % len(colors)]
|
10337
|
+
with dot.subgraph(name=f'cluster_{ref_name}') as c:
|
10338
|
+
c.attr(label=f'📁 {ref_info["name"]}', style='filled',
|
10339
|
+
color=color, fontsize='12', fontname='Arial')
|
8101
10340
|
|
8102
|
-
|
8103
|
-
|
8104
|
-
|
10341
|
+
fields = []
|
10342
|
+
# Primary key untuk tabel referensi
|
10343
|
+
for fk_col in ref_info['columns']:
|
10344
|
+
fields.append(f"<TR><TD ALIGN='LEFT'><B>🔑 {fk_col}</B></TD><TD ALIGN='LEFT'>[PK]</TD></TR>")
|
8105
10345
|
|
8106
|
-
#
|
8107
|
-
|
8108
|
-
|
8109
|
-
|
10346
|
+
# Tambahkan kolom umum untuk tabel referensi
|
10347
|
+
fields.append(f"<TR><TD ALIGN='LEFT'>📝 Name</TD><TD ALIGN='LEFT'>VARCHAR(100)</TD></TR>")
|
10348
|
+
fields.append(f"<TR><TD ALIGN='LEFT'>📝 Description</TD><TD ALIGN='LEFT'>VARCHAR(255)</TD></TR>")
|
10349
|
+
fields.append(f"<TR><TD ALIGN='LEFT'>📅 Created_Date</TD><TD ALIGN='LEFT'>DATETIME</TD></TR>")
|
10350
|
+
fields.append(f"<TR><TD ALIGN='LEFT'>✅ Is_Active</TD><TD ALIGN='LEFT'>BOOLEAN</TD></TR>")
|
8110
10351
|
|
8111
10352
|
table_html = f'''<
|
8112
|
-
<TABLE BORDER="1" CELLBORDER="0" CELLSPACING="0" CELLPADDING="
|
8113
|
-
<TR><TD ALIGN="CENTER"><B>COLUMN</B></TD><TD ALIGN="CENTER"><B>TYPE</B></TD></TR>
|
10353
|
+
<TABLE BORDER="1" CELLBORDER="0" CELLSPACING="0" CELLPADDING="3">
|
10354
|
+
<TR><TD ALIGN="CENTER" BGCOLOR="{color}"><B>COLUMN</B></TD><TD ALIGN="CENTER" BGCOLOR="{color}"><B>TYPE</B></TD></TR>
|
8114
10355
|
{''.join(fields)}
|
8115
10356
|
</TABLE>
|
8116
10357
|
>'''
|
8117
10358
|
|
8118
|
-
c.node(
|
8119
|
-
|
8120
|
-
# Tampilkan graph
|
8121
|
-
st.graphviz_chart(dot)
|
8122
|
-
|
8123
|
-
except ImportError:
|
8124
|
-
st.warning("Graphviz tidak terinstall. Menggunakan visualisasi alternatif...")
|
10359
|
+
c.node(ref_name, table_html, shape='none', fontname='Arial')
|
8125
10360
|
|
8126
|
-
#
|
8127
|
-
|
8128
|
-
|
8129
|
-
|
8130
|
-
|
8131
|
-
|
8132
|
-
|
8133
|
-
|
8134
|
-
|
8135
|
-
|
8136
|
-
|
8137
|
-
|
8138
|
-
|
8139
|
-
|
8140
|
-
|
8141
|
-
|
8142
|
-
|
8143
|
-
|
8144
|
-
|
8145
|
-
|
8146
|
-
|
8147
|
-
|
8148
|
-
|
8149
|
-
|
8150
|
-
|
8151
|
-
|
8152
|
-
|
8153
|
-
|
8154
|
-
|
8155
|
-
|
8156
|
-
|
10361
|
+
# Tambahkan relasi
|
10362
|
+
if show_relationships:
|
10363
|
+
for fk_col in ref_info['columns']:
|
10364
|
+
dot.edge(ref_name, 'main_table', label='1:N', style='dashed', color='#666666')
|
10365
|
+
|
10366
|
+
# Tampilkan ERD
|
10367
|
+
st.graphviz_chart(dot)
|
10368
|
+
|
10369
|
+
# Legenda
|
10370
|
+
st.markdown("""
|
10371
|
+
**📋 Legenda ERD:**
|
10372
|
+
- 🔑 Primary Key | 🔗 Foreign Key | 📊 Metric | 💰 Measurement
|
10373
|
+
- 📅 Temporal | 🏷️ Category | 📝 Description | 📄 Attribute
|
10374
|
+
- **Warna berbeda**: Tabel yang berbeda domain
|
10375
|
+
""")
|
10376
|
+
|
10377
|
+
except ImportError:
|
10378
|
+
st.warning("Graphviz tidak terinstall. Menggunakan visualisasi alternatif...")
|
10379
|
+
|
10380
|
+
# Visualisasi alternatif yang lebih sederhana
|
10381
|
+
import plotly.graph_objects as go
|
10382
|
+
|
10383
|
+
# Hitung posisi node secara dinamis
|
10384
|
+
num_tables = min(8, len(reference_tables) + 1)
|
10385
|
+
angles = np.linspace(0, 2*np.pi, num_tables, endpoint=False)
|
10386
|
+
radius = 0.4
|
10387
|
+
|
10388
|
+
fig = go.Figure()
|
10389
|
+
|
10390
|
+
# Node positions
|
10391
|
+
node_x = [0.5] # Main table di center
|
10392
|
+
node_y = [0.5]
|
10393
|
+
node_text = ["MAIN"]
|
10394
|
+
node_colors = ['#3366CC']
|
10395
|
+
|
10396
|
+
# Reference tables di sekeliling
|
10397
|
+
for i, (ref_name, ref_info) in enumerate(list(reference_tables.items())[:num_tables-1]):
|
10398
|
+
angle = angles[i]
|
10399
|
+
x = 0.5 + radius * np.cos(angle)
|
10400
|
+
y = 0.5 + radius * np.sin(angle)
|
8157
10401
|
|
8158
|
-
|
10402
|
+
node_x.append(x)
|
10403
|
+
node_y.append(y)
|
10404
|
+
node_text.append(ref_info['name'][:10])
|
10405
|
+
node_colors.append(colors[i % len(colors)])
|
10406
|
+
|
10407
|
+
# Add nodes
|
10408
|
+
fig.add_trace(go.Scatter(
|
10409
|
+
x=node_x, y=node_y,
|
10410
|
+
mode='markers+text',
|
10411
|
+
marker=dict(size=80, color=node_colors),
|
10412
|
+
text=node_text,
|
10413
|
+
textposition="middle center",
|
10414
|
+
textfont=dict(size=12, color='white'),
|
10415
|
+
name="Tables"
|
10416
|
+
))
|
10417
|
+
|
10418
|
+
# Add relationships
|
10419
|
+
if show_relationships and len(node_x) > 1:
|
8159
10420
|
for i in range(1, len(node_x)):
|
8160
10421
|
fig.add_trace(go.Scatter(
|
8161
|
-
x=[node_x[
|
8162
|
-
y=[node_y[0], node_y[i]],
|
10422
|
+
x=[node_x[i], node_x[0]], y=[node_y[i], node_y[0]],
|
8163
10423
|
mode='lines',
|
8164
|
-
line=dict(width=2, color='gray'),
|
8165
|
-
hoverinfo='none'
|
10424
|
+
line=dict(width=2, color='gray', dash='dash'),
|
10425
|
+
hoverinfo='none',
|
10426
|
+
showlegend=False
|
8166
10427
|
))
|
8167
|
-
|
8168
|
-
|
8169
|
-
|
8170
|
-
|
8171
|
-
|
8172
|
-
|
8173
|
-
|
8174
|
-
|
8175
|
-
|
8176
|
-
|
8177
|
-
|
8178
|
-
|
8179
|
-
# --- Bagian Penyambung SQL ---
|
8180
|
-
st.write("---")
|
8181
|
-
st.subheader("🧩 Format SQL (Comma Separated)")
|
8182
|
-
|
8183
|
-
numeric_sql = ", ".join(numeric_cols)
|
8184
|
-
categorical_sql = ", ".join(categorical_cols)
|
8185
|
-
|
8186
|
-
st.code(f"SELECT {numeric_sql}, {categorical_sql} FROM dataset_table;", language="sql")
|
8187
|
-
|
8188
|
-
# Generate CREATE TABLE statement
|
8189
|
-
st.markdown("### 🗃️ SQL CREATE TABLE Statement")
|
8190
|
-
|
8191
|
-
# Deteksi tipe data untuk SQL
|
8192
|
-
def infer_sql_type(dtype, sample_data):
|
8193
|
-
if np.issubdtype(dtype, np.number):
|
8194
|
-
return "DECIMAL(10,2)"
|
8195
|
-
elif np.issubdtype(dtype, np.datetime64):
|
8196
|
-
return "DATETIME"
|
8197
|
-
else:
|
8198
|
-
# Cek panjang string maksimum
|
8199
|
-
max_len = sample_data.astype(str).str.len().max()
|
8200
|
-
return f"VARCHAR({min(255, max(100, int(max_len * 1.5)))})"
|
8201
|
-
|
8202
|
-
create_table_sql = "CREATE TABLE dataset_table (\n"
|
8203
|
-
for i, col in enumerate(df.columns):
|
8204
|
-
sql_type = infer_sql_type(df[col].dtype, df[col])
|
8205
|
-
if i == 0:
|
8206
|
-
create_table_sql += f" {col} {sql_type} PRIMARY KEY,\n"
|
8207
|
-
else:
|
8208
|
-
create_table_sql += f" {col} {sql_type},\n"
|
8209
|
-
|
8210
|
-
create_table_sql = create_table_sql.rstrip(',\n') + "\n);"
|
8211
|
-
|
8212
|
-
st.code(create_table_sql, language="sql")
|
8213
|
-
|
8214
|
-
# Jika ingin lihat hanya daftar kolom
|
8215
|
-
col3, col4 = st.columns(2)
|
8216
|
-
with col3:
|
8217
|
-
st.write("**Kolom Numerik (SQL String):**")
|
8218
|
-
st.code(numeric_sql, language="sql")
|
8219
|
-
|
8220
|
-
with col4:
|
8221
|
-
st.write("**Kolom Kategorikal (SQL String):**")
|
8222
|
-
st.code(categorical_sql, language="sql")
|
8223
|
-
|
8224
|
-
# Visualisasi korelasi sebagai ERD sederhana
|
8225
|
-
if len(numeric_cols) > 1:
|
8226
|
-
st.write("---")
|
8227
|
-
st.subheader("📊 Matriks Korelasi (Hubungan Numerik)")
|
8228
|
-
corr_matrix = df[numeric_cols].corr()
|
8229
|
-
|
8230
|
-
# Plot menggunakan Plotly
|
8231
|
-
fig = px.imshow(
|
8232
|
-
corr_matrix,
|
8233
|
-
text_auto=".2f",
|
8234
|
-
color_continuous_scale='RdBu_r',
|
8235
|
-
zmin=-1,
|
8236
|
-
zmax=1,
|
8237
|
-
aspect="auto",
|
8238
|
-
labels=dict(color="Korelasi")
|
8239
|
-
)
|
8240
|
-
fig.update_layout(
|
8241
|
-
title="Matriks Korelasi Numerik",
|
8242
|
-
xaxis_title="Fitur",
|
8243
|
-
yaxis_title="Fitur",
|
8244
|
-
autosize=True,
|
8245
|
-
margin=dict(l=40, r=40, t=60, b=40),
|
8246
|
-
height=600
|
8247
|
-
)
|
8248
|
-
st.plotly_chart(fig, use_container_width=True)
|
8249
|
-
|
8250
|
-
# --- Linear Regression Analysis ---
|
8251
|
-
st.write("---")
|
8252
|
-
st.subheader("🧮 Linear Regression Analysis (SQL-Style LRS)")
|
10428
|
+
|
10429
|
+
fig.update_layout(
|
10430
|
+
title="Database Table Relationships",
|
10431
|
+
showlegend=False,
|
10432
|
+
height=500,
|
10433
|
+
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]),
|
10434
|
+
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]),
|
10435
|
+
margin=dict(l=20, r=20, t=60, b=20)
|
10436
|
+
)
|
10437
|
+
|
10438
|
+
st.plotly_chart(fig, use_container_width=True)
|
8253
10439
|
|
8254
|
-
|
8255
|
-
|
10440
|
+
# --- VISUALISASI DATA YANG AMAN ---
|
10441
|
+
st.subheader("📈 Visualisasi Data")
|
10442
|
+
|
10443
|
+
# Warna konsisten untuk chart
|
10444
|
+
color_palette = px.colors.qualitative.Set3
|
10445
|
+
|
10446
|
+
# Fungsi safe plotting
|
10447
|
+
def safe_plotting(plot_function, *args, **kwargs):
|
10448
|
+
try:
|
10449
|
+
return plot_function(*args, **kwargs)
|
10450
|
+
except Exception as e:
|
10451
|
+
st.error(f"Error dalam membuat chart: {str(e)}")
|
10452
|
+
return None
|
10453
|
+
|
10454
|
+
# Tab untuk organisasi chart yang lebih baik
|
10455
|
+
tab111, tab222, tab333 = st.tabs(["📊 Distribusi Numerik", "🏷️ Analisis Kategorikal", "📋 Data Quality"])
|
10456
|
+
|
10457
|
+
with tab111:
|
10458
|
+
st.subheader("Analisis Distribusi Numerik")
|
10459
|
+
|
10460
|
+
if numeric_cols:
|
10461
|
+
col1, col2 = st.columns(2)
|
10462
|
+
|
10463
|
+
with col1:
|
10464
|
+
# Histogram dengan pengelompokan yang baik
|
10465
|
+
selected_num_hist = st.selectbox(
|
10466
|
+
"Pilih variabel untuk histogram:",
|
10467
|
+
numeric_cols,
|
10468
|
+
key="hist_num"
|
10469
|
+
)
|
8256
10470
|
|
8257
|
-
|
8258
|
-
|
8259
|
-
|
8260
|
-
|
10471
|
+
if selected_num_hist:
|
10472
|
+
fig_hist = safe_plotting(px.histogram,
|
10473
|
+
df,
|
10474
|
+
x=selected_num_hist,
|
10475
|
+
title=f"Distribusi {selected_num_hist}",
|
10476
|
+
nbins=30,
|
10477
|
+
color_discrete_sequence=['#3366CC'],
|
10478
|
+
opacity=0.8
|
10479
|
+
)
|
10480
|
+
if fig_hist:
|
10481
|
+
fig_hist.update_layout(
|
10482
|
+
bargap=0.1,
|
10483
|
+
xaxis_title=selected_num_hist,
|
10484
|
+
yaxis_title="Frekuensi"
|
10485
|
+
)
|
10486
|
+
st.plotly_chart(fig_hist, use_container_width=True)
|
10487
|
+
|
10488
|
+
with col2:
|
10489
|
+
# Box plot
|
10490
|
+
selected_num_box = st.selectbox(
|
10491
|
+
"Pilih variabel untuk box plot:",
|
10492
|
+
numeric_cols,
|
10493
|
+
key="box_num"
|
10494
|
+
)
|
8261
10495
|
|
8262
|
-
if
|
8263
|
-
|
8264
|
-
slope, intercept, r_value, p_value, std_err = stats.linregress(df[x_axis], df[y_axis])
|
8265
|
-
correlation = df[x_axis].corr(df[y_axis])
|
8266
|
-
r_squared = r_value**2
|
8267
|
-
|
8268
|
-
# --- Tampilan SQL Query ---
|
8269
|
-
st.markdown("### 🧩 SQL Query Representation")
|
8270
|
-
st.code(f"""
|
8271
|
-
SELECT
|
8272
|
-
{x_axis} AS X,
|
8273
|
-
{y_axis} AS Y,
|
8274
|
-
ROUND(REGR_SLOPE({y_axis}, {x_axis}), 4) AS slope,
|
8275
|
-
ROUND(REGR_INTERCEPT({y_axis}, {x_axis}), 4) AS intercept,
|
8276
|
-
ROUND(CORR({y_axis}, {x_axis}), 4) AS correlation,
|
8277
|
-
ROUND(POWER(CORR({y_axis}, {x_axis}), 2), 4) AS r_squared
|
8278
|
-
FROM dataset_table;
|
8279
|
-
""", language="sql")
|
8280
|
-
|
8281
|
-
# --- Plot hubungan ---
|
8282
|
-
fig = px.scatter(
|
10496
|
+
if selected_num_box:
|
10497
|
+
fig_box = safe_plotting(px.box,
|
8283
10498
|
df,
|
8284
|
-
|
8285
|
-
|
8286
|
-
|
8287
|
-
title=f"📊 SQL Visualization: {y_axis} vs {x_axis}",
|
8288
|
-
labels={x_axis: f"{x_axis}", y_axis: f"{y_axis}"}
|
8289
|
-
)
|
8290
|
-
fig.update_layout(
|
8291
|
-
autosize=True,
|
8292
|
-
margin=dict(l=40, r=40, t=60, b=40),
|
8293
|
-
height=500,
|
8294
|
-
title_x=0.5
|
10499
|
+
y=selected_num_box,
|
10500
|
+
title=f"Box Plot {selected_num_box}",
|
10501
|
+
color_discrete_sequence=['#FF6B6B']
|
8295
10502
|
)
|
8296
|
-
|
8297
|
-
|
8298
|
-
|
8299
|
-
|
8300
|
-
|
8301
|
-
|
8302
|
-
|
8303
|
-
|
8304
|
-
|
8305
|
-
|
8306
|
-
|
8307
|
-
|
8308
|
-
|
8309
|
-
|
8310
|
-
textposition="middle center",
|
8311
|
-
textfont=dict(size=14)
|
8312
|
-
))
|
8313
|
-
|
8314
|
-
# Add relationship line dengan annotation korelasi
|
8315
|
-
rel_fig.add_trace(go.Scatter(
|
8316
|
-
x=[0.3, 0.7], y=[0.5, 0.5],
|
8317
|
-
mode='lines+text',
|
8318
|
-
line=dict(width=4, color='red'),
|
8319
|
-
text=[f"r = {correlation:.3f}"],
|
8320
|
-
textposition="middle center",
|
8321
|
-
textfont=dict(size=12, color='red')
|
8322
|
-
))
|
8323
|
-
|
8324
|
-
rel_fig.update_layout(
|
8325
|
-
title=f"Relationship Diagram: {x_axis} → {y_axis}",
|
8326
|
-
showlegend=False,
|
8327
|
-
height=300,
|
8328
|
-
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]),
|
8329
|
-
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]),
|
8330
|
-
margin=dict(l=20, r=20, t=60, b=20)
|
10503
|
+
if fig_box:
|
10504
|
+
st.plotly_chart(fig_box, use_container_width=True)
|
10505
|
+
|
10506
|
+
# Matriks korelasi
|
10507
|
+
if len(numeric_cols) >= 2:
|
10508
|
+
st.write("**Matriks Korelasi:**")
|
10509
|
+
try:
|
10510
|
+
corr_matrix = df[numeric_cols].corr()
|
10511
|
+
fig_corr = px.imshow(
|
10512
|
+
corr_matrix,
|
10513
|
+
text_auto=".2f",
|
10514
|
+
color_continuous_scale='RdBu_r',
|
10515
|
+
aspect="auto",
|
10516
|
+
title="Matriks Korelasi Numerik"
|
8331
10517
|
)
|
8332
|
-
|
8333
|
-
|
8334
|
-
|
8335
|
-
|
8336
|
-
|
8337
|
-
|
8338
|
-
"Metric": ["X (Independent)", "Y (Dependent)", "Slope (β1)", "Intercept (β0)",
|
8339
|
-
"R-Value", "R² (R-squared)", "P-Value", "Std Error", "Correlation"],
|
8340
|
-
"Value": [x_axis, y_axis, f"{slope:.4f}", f"{intercept:.4f}",
|
8341
|
-
f"{r_value:.4f}", f"{r_squared:.4f}", f"{p_value:.4f}",
|
8342
|
-
f"{std_err:.4f}", f"{correlation:.4f}"]
|
8343
|
-
})
|
8344
|
-
|
8345
|
-
st.dataframe(result_df, use_container_width=True, hide_index=True)
|
8346
|
-
|
8347
|
-
# Analisis statistik lengkap
|
8348
|
-
st.subheader("📊 Analisis Statistik Lengkap")
|
8349
|
-
|
8350
|
-
# Statistik deskriptif
|
8351
|
-
st.write("**Statistik Deskriptif:**")
|
8352
|
-
st.dataframe(df.describe())
|
8353
|
-
|
8354
|
-
# Analisis missing values
|
8355
|
-
st.write("**Analisis Missing Values:**")
|
8356
|
-
missing_data = df.isnull().sum()
|
8357
|
-
if missing_data.sum() > 0:
|
8358
|
-
fig_missing = px.bar(x=missing_data.index, y=missing_data.values,
|
8359
|
-
title="Missing Values per Kolom")
|
8360
|
-
st.plotly_chart(fig_missing)
|
8361
|
-
else:
|
8362
|
-
st.success("Tidak ada missing values dalam dataset!")
|
10518
|
+
st.plotly_chart(fig_corr, use_container_width=True)
|
10519
|
+
except Exception as e:
|
10520
|
+
st.warning(f"Tidak dapat menghitung matriks korelasi: {str(e)}")
|
10521
|
+
|
10522
|
+
with tab222:
|
10523
|
+
st.subheader("Analisis Data Kategorikal")
|
8363
10524
|
|
8364
|
-
|
8365
|
-
|
10525
|
+
if categorical_cols:
|
10526
|
+
col1, col2 = st.columns(2)
|
10527
|
+
|
10528
|
+
with col1:
|
10529
|
+
# Pie chart yang terorganisir
|
10530
|
+
selected_cat_pie = st.selectbox(
|
10531
|
+
"Pilih variabel kategorikal:",
|
10532
|
+
categorical_cols,
|
10533
|
+
key="pie_cat"
|
10534
|
+
)
|
10535
|
+
|
10536
|
+
if selected_cat_pie:
|
10537
|
+
try:
|
10538
|
+
value_counts = df[selected_cat_pie].value_counts().head(8)
|
10539
|
+
fig_pie = safe_plotting(px.pie,
|
10540
|
+
values=value_counts.values,
|
10541
|
+
names=value_counts.index,
|
10542
|
+
title=f"Distribusi {selected_cat_pie} (Top 8)",
|
10543
|
+
color_discrete_sequence=color_palette
|
10544
|
+
)
|
10545
|
+
if fig_pie:
|
10546
|
+
st.plotly_chart(fig_pie, use_container_width=True)
|
10547
|
+
except Exception as e:
|
10548
|
+
st.warning(f"Tidak dapat membuat pie chart: {str(e)}")
|
10549
|
+
|
10550
|
+
with col2:
|
10551
|
+
# Bar chart horizontal
|
10552
|
+
if selected_cat_pie:
|
10553
|
+
try:
|
10554
|
+
value_counts = df[selected_cat_pie].value_counts().head(10)
|
10555
|
+
fig_bar = safe_plotting(px.bar,
|
10556
|
+
x=value_counts.values,
|
10557
|
+
y=value_counts.index,
|
10558
|
+
orientation='h',
|
10559
|
+
title=f"Top 10 {selected_cat_pie}",
|
10560
|
+
color=value_counts.values,
|
10561
|
+
color_continuous_scale='Blues'
|
10562
|
+
)
|
10563
|
+
if fig_bar:
|
10564
|
+
fig_bar.update_layout(
|
10565
|
+
xaxis_title="Count",
|
10566
|
+
yaxis_title=selected_cat_pie,
|
10567
|
+
showlegend=False
|
10568
|
+
)
|
10569
|
+
st.plotly_chart(fig_bar, use_container_width=True)
|
10570
|
+
except Exception as e:
|
10571
|
+
st.warning(f"Tidak dapat membuat bar chart: {str(e)}")
|
10572
|
+
|
10573
|
+
with tab333:
|
10574
|
+
st.subheader("Data Quality Report")
|
8366
10575
|
|
8367
|
-
|
10576
|
+
# Buat laporan kualitas data yang komprehensif
|
10577
|
+
quality_report = []
|
8368
10578
|
for col in df.columns:
|
8369
|
-
|
10579
|
+
analysis = column_analysis[col]
|
10580
|
+
quality_report.append({
|
8370
10581
|
'Kolom': col,
|
8371
|
-
'Tipe':
|
8372
|
-
'
|
8373
|
-
'
|
8374
|
-
'
|
8375
|
-
'
|
10582
|
+
'Tipe Data': analysis['dtype'],
|
10583
|
+
'Role': analysis['role'],
|
10584
|
+
'Unique Values': analysis['unique_count'],
|
10585
|
+
'Null Values': analysis['null_count'],
|
10586
|
+
'Null %': f"{analysis['null_percentage']:.2f}%",
|
10587
|
+
'Sample': analysis['sample_values'][0] if analysis['sample_values'] else 'N/A'
|
8376
10588
|
})
|
8377
10589
|
|
8378
|
-
quality_df = pd.DataFrame(
|
8379
|
-
st.dataframe(quality_df)
|
10590
|
+
quality_df = pd.DataFrame(quality_report)
|
10591
|
+
st.dataframe(quality_df, use_container_width=True)
|
8380
10592
|
|
8381
|
-
#
|
8382
|
-
st.
|
10593
|
+
# Visualisasi kualitas data sederhana
|
10594
|
+
col1, col2 = st.columns(2)
|
10595
|
+
|
10596
|
+
with col1:
|
10597
|
+
# Missing values bar chart
|
10598
|
+
missing_data = quality_df[['Kolom', 'Null Values']].set_index('Kolom')
|
10599
|
+
fig_missing = safe_plotting(px.bar,
|
10600
|
+
missing_data,
|
10601
|
+
y='Null Values',
|
10602
|
+
title="Missing Values per Kolom",
|
10603
|
+
color='Null Values',
|
10604
|
+
color_continuous_scale='Reds'
|
10605
|
+
)
|
10606
|
+
if fig_missing:
|
10607
|
+
st.plotly_chart(fig_missing, use_container_width=True)
|
8383
10608
|
|
8384
|
-
|
8385
|
-
|
10609
|
+
with col2:
|
10610
|
+
# Data types distribution
|
10611
|
+
type_dist = quality_df['Tipe Data'].value_counts()
|
10612
|
+
fig_types = safe_plotting(px.pie,
|
10613
|
+
values=type_dist.values,
|
10614
|
+
names=type_dist.index,
|
10615
|
+
title="Distribusi Tipe Data",
|
10616
|
+
color_discrete_sequence=color_palette
|
10617
|
+
)
|
10618
|
+
if fig_types:
|
10619
|
+
st.plotly_chart(fig_types, use_container_width=True)
|
10620
|
+
|
10621
|
+
# --- DOWNLOAD SECTION ---
|
10622
|
+
st.subheader("💾 Download Hasil Analisis")
|
10623
|
+
|
10624
|
+
col1, col2, col3 = st.columns(3)
|
10625
|
+
|
10626
|
+
with col1:
|
8386
10627
|
st.download_button(
|
8387
|
-
|
8388
|
-
|
8389
|
-
|
8390
|
-
|
10628
|
+
"📊 Download Quality Report",
|
10629
|
+
quality_df.to_csv(index=False),
|
10630
|
+
"data_quality_report.csv",
|
10631
|
+
"text/csv"
|
8391
10632
|
)
|
10633
|
+
|
10634
|
+
with col2:
|
10635
|
+
# Buat summary report
|
10636
|
+
summary_report = {
|
10637
|
+
'file_name': uploaded_file.name,
|
10638
|
+
'file_size': f"{uploaded_file.size / 1024:.2f} KB",
|
10639
|
+
'rows': df.shape[0],
|
10640
|
+
'columns': df.shape[1],
|
10641
|
+
'analysis_date': pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"),
|
10642
|
+
'numeric_columns': numeric_cols,
|
10643
|
+
'categorical_columns': categorical_cols,
|
10644
|
+
'date_columns': date_cols,
|
10645
|
+
'primary_keys': [col for col, analysis in column_analysis.items()
|
10646
|
+
if analysis['role'] == 'PRIMARY_KEY'],
|
10647
|
+
'foreign_keys': [col for col, analysis in column_analysis.items()
|
10648
|
+
if analysis['role'] == 'FOREIGN_KEY']
|
10649
|
+
}
|
8392
10650
|
|
8393
|
-
|
8394
|
-
st.
|
8395
|
-
|
8396
|
-
|
8397
|
-
|
8398
|
-
|
8399
|
-
|
8400
|
-
example_data = {
|
8401
|
-
'ID': [1, 2, 3, 4, 5],
|
8402
|
-
'Nama': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
|
8403
|
-
'Usia': [25, 30, 35, 28, 32],
|
8404
|
-
'Gaji': [50000, 60000, 70000, 55000, 65000],
|
8405
|
-
'Departemen': ['IT', 'HR', 'IT', 'Finance', 'HR']
|
8406
|
-
}
|
8407
|
-
example_df = pd.DataFrame(example_data)
|
8408
|
-
st.dataframe(example_df)
|
10651
|
+
import json
|
10652
|
+
st.download_button(
|
10653
|
+
"📋 Download Summary Report",
|
10654
|
+
json.dumps(summary_report, indent=2, ensure_ascii=False),
|
10655
|
+
"analysis_summary.json",
|
10656
|
+
"application/json"
|
10657
|
+
)
|
8409
10658
|
|
8410
|
-
|
8411
|
-
|
8412
|
-
|
8413
|
-
|
8414
|
-
|
8415
|
-
|
8416
|
-
|
8417
|
-
|
10659
|
+
with col3:
|
10660
|
+
# Download processed data
|
10661
|
+
st.download_button(
|
10662
|
+
"💾 Download Processed Data",
|
10663
|
+
df.to_csv(index=False),
|
10664
|
+
"processed_data.csv",
|
10665
|
+
"text/csv"
|
10666
|
+
)
|
10667
|
+
|
10668
|
+
except Exception as e:
|
10669
|
+
st.error(f"Error dalam analisis data: {str(e)}")
|
10670
|
+
st.info("Pastikan file yang diupload berformat CSV atau Excel yang valid")
|
10671
|
+
st.code(f"Error details: {str(e)}", language='python')
|
10672
|
+
else:
|
10673
|
+
st.info("📤 Silakan upload file CSV atau Excel untuk memulai analisis")
|
10674
|
+
|
10675
|
+
# Template dan panduan
|
10676
|
+
st.subheader("🎯 Panduan Format Data")
|
10677
|
+
|
10678
|
+
col1, col2 = st.columns(2)
|
10679
|
+
|
10680
|
+
with col1:
|
10681
|
+
st.write("**Format yang Disarankan:**")
|
10682
|
+
sample_data = {
|
10683
|
+
'customer_id': [1, 2, 3, 4, 5],
|
10684
|
+
'order_id': [101, 102, 103, 104, 105],
|
10685
|
+
'product_id': [201, 202, 203, 204, 205],
|
10686
|
+
'order_date': pd.date_range('2024-01-01', periods=5),
|
10687
|
+
'amount': [100.50, 75.25, 200.00, 150.75, 90.99],
|
10688
|
+
'category': ['Electronics', 'Books', 'Electronics', 'Clothing', 'Books'],
|
10689
|
+
'status': ['Completed', 'Pending', 'Completed', 'Shipped', 'Pending']
|
10690
|
+
}
|
10691
|
+
sample_df = pd.DataFrame(sample_data)
|
10692
|
+
st.dataframe(sample_df)
|
10693
|
+
|
10694
|
+
with col2:
|
10695
|
+
st.write("**Keterangan Fitur:**")
|
10696
|
+
st.markdown("""
|
10697
|
+
- **🔑 Primary Key**: Kolom dengan nilai unik (ID, code)
|
10698
|
+
- **🔗 Foreign Key**: Kolom referensi ke tabel lain
|
10699
|
+
- **📊 ERD Dinamis**: Diagram relasi otomatis
|
10700
|
+
- **📈 Visualisasi Aman**: Error handling untuk semua chart
|
10701
|
+
- **🎨 Warna Konsisten**: Skema warna yang harmonis
|
10702
|
+
- **📋 Analisis Komprehensif**: Statistik detail dan laporan
|
10703
|
+
""")
|
10704
|
+
|
10705
|
+
# Download template
|
10706
|
+
csv_template = sample_df.to_csv(index=False)
|
10707
|
+
st.download_button(
|
10708
|
+
"📥 Download Template CSV",
|
10709
|
+
csv_template,
|
10710
|
+
"analysis_template.csv",
|
10711
|
+
"text/csv"
|
10712
|
+
)
|
8418
10713
|
|
8419
10714
|
|
8420
10715
|
with tab8:
|
@@ -8426,12 +10721,29 @@ if df is not None:
|
|
8426
10721
|
type=['csv', 'xlsx', 'xls'],
|
8427
10722
|
key="stock_uploader"
|
8428
10723
|
)
|
8429
|
-
with st.expander("📜 Ketarangan Lengkap Tentang
|
10724
|
+
with st.expander("📜 Ketarangan Lengkap Tentang Analisis Saham", expanded=False):
|
10725
|
+
st.markdown(
|
10726
|
+
"""
|
10727
|
+
<img src="https://s3-ap-southeast-1.amazonaws.com/membership-media/public/uploads/posts/1653502344_Memahami_Apa_Itu_Saham_Dan_Cara_Kerjanya_1170x658.jpg" class="responsive-img">
|
10728
|
+
""",
|
10729
|
+
unsafe_allow_html=True
|
10730
|
+
)
|
8430
10731
|
st.markdown("""
|
8431
|
-
|
10732
|
+
|
8432
10733
|
|
8433
10734
|
### 🧾 Pengambangan Saham
|
8434
|
-
- Saham
|
10735
|
+
- Saham merupakan salah satu instrumen investasi yang populer di kalangan investor. Dengan membeli saham, investor memiliki sebagian kepemilikan dalam sebuah perusahaan dan berhak atas sebagian keuntungan perusahaan tersebut.
|
10736
|
+
- Analisis saham melibatkan evaluasi berbagai faktor seperti kinerja keuangan perusahaan, kondisi pasar, tren industri, dan faktor ekonomi makro untuk membuat keputusan investasi yang lebih baik.
|
10737
|
+
- Analisis saham dapat dilakukan dengan menggunakan teknologi yang terkenal seperti Excel, Google Sheets, atau Microsoft Excel.
|
10738
|
+
|
10739
|
+
### 📈 Analisis Grafik Saham
|
10740
|
+
- Analisis grafik saham adalah proses menganalisis data saham untuk membuat grafik yang menampilkan informasi tentang saham secara visual.
|
10741
|
+
- Grafik saham dapat digunakan untuk membuat perbandingan antara saham yang berbeda, menampilkan trend, dan menentukan kemungkinan investasi yang lebih baik.
|
10742
|
+
- Grafik saham dapat digunakan untuk menentukan kemungkinan investasi yang lebih baik dan meningkatkan keuntungan investasi.
|
10743
|
+
|
10744
|
+
### 💰 Analisis Grafik Saham
|
10745
|
+
- Analisis grafik saham dapat digunakan untuk membuat perbandingan antara saham yang berbeda, menampilkan trend, dan menentukan kemungkinan investasi yang lebih baik.
|
10746
|
+
- Grafik saham dapat digunakan untuk menentukan kemungkinan investasi yang lebih baik dan meningkatkan keuntungan investasi.
|
8435
10747
|
""")
|
8436
10748
|
if uploaded_file is not None:
|
8437
10749
|
try:
|
@@ -9545,7 +11857,7 @@ if df is not None:
|
|
9545
11857
|
# Sidebar untuk memilih jenis kalkulator
|
9546
11858
|
calc_type = st.sidebar.selectbox(
|
9547
11859
|
"Pilih Jenis Kalkulator",
|
9548
|
-
["Kalkulator Dasar", "Kalkulator Ilmiah", "Kalkulator Keuangan", "Konverter Satuan", "Kalkulator BMI", "Kalkulator Waktu"]
|
11860
|
+
["🔢 Kalkulator Dasar", "🔬 Kalkulator Ilmiah", "💰 Kalkulator Keuangan", "📐 Konverter Satuan", "⚖️ Kalkulator BMI", "⏰ Kalkulator Waktu"]
|
9549
11861
|
)
|
9550
11862
|
|
9551
11863
|
# Initialize session state for history
|
@@ -9558,7 +11870,7 @@ if df is not None:
|
|
9558
11870
|
if len(st.session_state.calc_history) > 10: # Batasi hanya 10 riwayat terakhir
|
9559
11871
|
st.session_state.calc_history.pop(0)
|
9560
11872
|
|
9561
|
-
if calc_type == "Kalkulator Dasar":
|
11873
|
+
if calc_type == "🔢 Kalkulator Dasar":
|
9562
11874
|
st.subheader("🔢 Kalkulator Dasar")
|
9563
11875
|
|
9564
11876
|
# Layout dengan columns untuk tampilan kalkulator
|
@@ -9635,7 +11947,7 @@ if df is not None:
|
|
9635
11947
|
if st.button("🗑️ Reset", use_container_width=True):
|
9636
11948
|
st.rerun()
|
9637
11949
|
|
9638
|
-
elif calc_type == "Kalkulator Ilmiah":
|
11950
|
+
elif calc_type == "🔬 Kalkulator Ilmiah":
|
9639
11951
|
st.subheader("🔬 Kalkulator Ilmiah")
|
9640
11952
|
|
9641
11953
|
col1, col2 = st.columns(2)
|
@@ -9754,7 +12066,7 @@ if df is not None:
|
|
9754
12066
|
except Exception as e:
|
9755
12067
|
st.error(f"❌ Error: {str(e)}")
|
9756
12068
|
|
9757
|
-
elif calc_type == "Kalkulator Keuangan":
|
12069
|
+
elif calc_type == "💰 Kalkulator Keuangan":
|
9758
12070
|
st.subheader("💰 Kalkulator Keuangan")
|
9759
12071
|
|
9760
12072
|
finance_option = st.selectbox(
|
@@ -9836,7 +12148,7 @@ if df is not None:
|
|
9836
12148
|
""")
|
9837
12149
|
add_to_history(f"Cicilan: Rp {loan_amount:,.0f} → Rp {monthly_payment:,.0f}/bulan")
|
9838
12150
|
|
9839
|
-
elif calc_type == "Konverter Satuan":
|
12151
|
+
elif calc_type == "📐 Konverter Satuan":
|
9840
12152
|
st.subheader("📐 Konverter Satuan")
|
9841
12153
|
|
9842
12154
|
conversion_type = st.selectbox(
|
@@ -9921,8 +12233,8 @@ if df is not None:
|
|
9921
12233
|
st.success(f"**Hasil:** {calc_str}")
|
9922
12234
|
add_to_history(calc_str)
|
9923
12235
|
|
9924
|
-
elif calc_type == "Kalkulator BMI":
|
9925
|
-
st.subheader("
|
12236
|
+
elif calc_type == "⚖️ Kalkulator BMI":
|
12237
|
+
st.subheader("⚖️ Kalkulator BMI (Body Mass Index)")
|
9926
12238
|
|
9927
12239
|
col1, col2 = st.columns(2)
|
9928
12240
|
|
@@ -9959,7 +12271,7 @@ if df is not None:
|
|
9959
12271
|
""")
|
9960
12272
|
add_to_history(f"BMI: {bmi:.1f} ({category})")
|
9961
12273
|
|
9962
|
-
elif calc_type == "Kalkulator Waktu":
|
12274
|
+
elif calc_type == "⏰ Kalkulator Waktu":
|
9963
12275
|
st.subheader("⏰ Kalkulator Waktu")
|
9964
12276
|
|
9965
12277
|
time_option = st.selectbox("Pilih jenis perhitungan", [
|
@@ -10095,7 +12407,7 @@ if df is not None:
|
|
10095
12407
|
st.error("**🧹 Pembersihan Data**\n\nAuto-clean missing values")
|
10096
12408
|
|
10097
12409
|
# Video Tutorial (placeholder)
|
10098
|
-
st.markdown("### 🎥 Video Tutorial Penggunaan V2.
|
12410
|
+
st.markdown("### 🎥 Video Tutorial Penggunaan V2.3.8")
|
10099
12411
|
import streamlit.components.v1 as components
|
10100
12412
|
google_drive_id = "1obx6q2jQS1fRrNi1E4VpAPlyI_rR9nO5"
|
10101
12413
|
|
@@ -10464,7 +12776,8 @@ if df is not None:
|
|
10464
12776
|
with col3:
|
10465
12777
|
st.markdown("""
|
10466
12778
|
### 🔄 Update
|
10467
|
-
- Versi terbaru: 2.
|
12779
|
+
- Versi terbaru: 2.3.8
|
12780
|
+
- Rilis: Oktober 2025
|
10468
12781
|
- Last updated: 2025
|
10469
12782
|
- Compatibility: Python 3.8+
|
10470
12783
|
""")
|