streamlit-launcher 2.2.7__py3-none-any.whl → 2.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- streamlit_launcher/dashboard.py +2993 -704
- {streamlit_launcher-2.2.7.dist-info → streamlit_launcher-2.3.9.dist-info}/METADATA +1 -1
- streamlit_launcher-2.3.9.dist-info/RECORD +11 -0
- streamlit_launcher-2.2.7.dist-info/RECORD +0 -11
- {streamlit_launcher-2.2.7.dist-info → streamlit_launcher-2.3.9.dist-info}/WHEEL +0 -0
- {streamlit_launcher-2.2.7.dist-info → streamlit_launcher-2.3.9.dist-info}/entry_points.txt +0 -0
- {streamlit_launcher-2.2.7.dist-info → streamlit_launcher-2.3.9.dist-info}/licenses/LICENSE +0 -0
- {streamlit_launcher-2.2.7.dist-info → streamlit_launcher-2.3.9.dist-info}/licenses/license.txt +0 -0
- {streamlit_launcher-2.2.7.dist-info → streamlit_launcher-2.3.9.dist-info}/top_level.txt +0 -0
streamlit_launcher/dashboard.py
CHANGED
@@ -35,6 +35,38 @@ from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classi
|
|
35
35
|
from sklearn.inspection import permutation_importance
|
36
36
|
from scipy.stats import gaussian_kde
|
37
37
|
import streamlit.components.v1 as components
|
38
|
+
import tensorflow as tf
|
39
|
+
import numpy as np
|
40
|
+
import pandas as pd
|
41
|
+
from sklearn.model_selection import train_test_split
|
42
|
+
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
43
|
+
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
44
|
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
45
|
+
from sklearn.metrics import confusion_matrix, classification_report
|
46
|
+
import plotly.express as px
|
47
|
+
import plotly.graph_objects as go
|
48
|
+
from plotly.subplots import make_subplots
|
49
|
+
import xgboost as xgb
|
50
|
+
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
|
51
|
+
from sklearn.feature_selection import mutual_info_regression, mutual_info_classif
|
52
|
+
import plotly.graph_objects as go
|
53
|
+
from plotly.subplots import make_subplots
|
54
|
+
import time
|
55
|
+
import warnings
|
56
|
+
warnings.filterwarnings('ignore')
|
57
|
+
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
|
58
|
+
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
|
59
|
+
from xgboost import XGBRegressor, XGBClassifier
|
60
|
+
from sklearn.ensemble import VotingRegressor, VotingClassifier
|
61
|
+
from sklearn.ensemble import StackingRegressor, StackingClassifier
|
62
|
+
from sklearn.model_selection import cross_validate, GridSearchCV
|
63
|
+
from sklearn.metrics import get_scorer
|
64
|
+
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
|
65
|
+
from sklearn.compose import ColumnTransformer
|
66
|
+
from sklearn.impute import SimpleImputer
|
67
|
+
from sklearn.pipeline import Pipeline
|
68
|
+
import keras
|
69
|
+
|
38
70
|
|
39
71
|
# Konfigurasi untuk performa
|
40
72
|
plt.style.use('default')
|
@@ -6435,8 +6467,13 @@ def create_ml_dl_analysis_dashboard(df, numeric_cols, non_numeric_cols):
|
|
6435
6467
|
"""
|
6436
6468
|
Dashboard komprehensif untuk analisis Machine Learning dan Deep Learning
|
6437
6469
|
"""
|
6438
|
-
|
6439
|
-
|
6470
|
+
st.markdown("""
|
6471
|
+
<div style='text-align: center; padding: 10px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
6472
|
+
border-radius: 10px; margin: 10px 0;'>
|
6473
|
+
<h3 style='color: white; margin: 0;'>🧠 dwibaktindev AI</h3>
|
6474
|
+
<p style='color: white; margin: 0;'>Sasha • Alisa • dwibaktindev Models</p>
|
6475
|
+
</div>
|
6476
|
+
""", unsafe_allow_html=True)
|
6440
6477
|
|
6441
6478
|
# Deteksi tipe data
|
6442
6479
|
data_size = len(df)
|
@@ -6874,10 +6911,25 @@ def create_outlier_analysis(df, numeric_cols):
|
|
6874
6911
|
st.plotly_chart(fig, use_container_width=True)
|
6875
6912
|
|
6876
6913
|
def machine_learning_analysis(df, numeric_cols, non_numeric_cols):
|
6877
|
-
"""Analisis Machine Learning"""
|
6914
|
+
"""Analisis Machine Learning dengan Optimasi untuk Dataset Besar"""
|
6878
6915
|
|
6879
6916
|
st.header("🤖 Machine Learning Analysis")
|
6880
6917
|
|
6918
|
+
# Informasi dataset
|
6919
|
+
st.subheader("📊 Dataset Info")
|
6920
|
+
col1, col2, col3 = st.columns(3)
|
6921
|
+
with col1:
|
6922
|
+
st.metric("Total Rows", f"{len(df):,}")
|
6923
|
+
with col2:
|
6924
|
+
st.metric("Total Columns", f"{len(df.columns):,}")
|
6925
|
+
with col3:
|
6926
|
+
st.metric("Memory Usage", f"{df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
|
6927
|
+
|
6928
|
+
# Optimasi memory usage
|
6929
|
+
if st.checkbox("Optimize Memory Usage", value=True):
|
6930
|
+
df = optimize_memory_usage(df)
|
6931
|
+
st.success("Memory usage optimized!")
|
6932
|
+
|
6881
6933
|
# Preprocessing
|
6882
6934
|
st.subheader("🔧 Data Preprocessing")
|
6883
6935
|
|
@@ -6893,162 +6945,407 @@ def machine_learning_analysis(df, numeric_cols, non_numeric_cols):
|
|
6893
6945
|
|
6894
6946
|
problem_type = st.selectbox(
|
6895
6947
|
"Jenis Problem",
|
6896
|
-
["Regression", "Classification"],
|
6948
|
+
["Regression", "Classification", "Auto Detect"],
|
6897
6949
|
key="ml_problem_type"
|
6898
6950
|
)
|
6951
|
+
|
6952
|
+
# Auto detect problem type
|
6953
|
+
if problem_type == "Auto Detect":
|
6954
|
+
if target_variable in numeric_cols:
|
6955
|
+
problem_type = "Regression"
|
6956
|
+
else:
|
6957
|
+
problem_type = "Classification"
|
6958
|
+
st.info(f"Auto-detected: {problem_type}")
|
6899
6959
|
|
6900
6960
|
with col2:
|
6901
6961
|
test_size = st.slider("Test Size Ratio", 0.1, 0.5, 0.2, 0.05, key="ml_test_size")
|
6902
6962
|
random_state = st.number_input("Random State", value=42, key="ml_random_state")
|
6963
|
+
|
6964
|
+
# Sampling untuk dataset besar
|
6965
|
+
sample_size = st.slider("Sample Size (untuk dataset besar)",
|
6966
|
+
min_value=1000,
|
6967
|
+
max_value=min(50000, len(df)),
|
6968
|
+
value=min(10000, len(df)),
|
6969
|
+
step=1000,
|
6970
|
+
key="ml_sample_size")
|
6903
6971
|
|
6904
|
-
# Feature selection
|
6972
|
+
# Feature selection dengan advanced options
|
6905
6973
|
st.subheader("🎯 Feature Selection")
|
6974
|
+
|
6906
6975
|
available_features = [f for f in numeric_cols + non_numeric_cols if f != target_variable]
|
6907
|
-
selected_features = st.multiselect(
|
6908
|
-
"Pilih Features untuk Model",
|
6909
|
-
available_features,
|
6910
|
-
default=available_features[:min(10, len(available_features))],
|
6911
|
-
key="ml_features_select"
|
6912
|
-
)
|
6913
6976
|
|
6914
|
-
|
6915
|
-
|
6916
|
-
|
6917
|
-
|
6918
|
-
|
6919
|
-
|
6920
|
-
|
6921
|
-
|
6922
|
-
|
6923
|
-
|
6924
|
-
|
6925
|
-
|
6926
|
-
|
6927
|
-
|
6928
|
-
|
6929
|
-
if problem_type == "Classification" and y.dtype == 'object':
|
6930
|
-
le_target = LabelEncoder()
|
6931
|
-
y = le_target.fit_transform(y.astype(str))
|
6932
|
-
|
6933
|
-
# Handle missing values
|
6934
|
-
X = X.fillna(X.mean(numeric_only=True))
|
6935
|
-
|
6936
|
-
# Split data
|
6937
|
-
X_train, X_test, y_train, y_test = train_test_split(
|
6938
|
-
X, y, test_size=test_size, random_state=random_state, stratify=y if problem_type == "Classification" else None
|
6977
|
+
col1, col2 = st.columns([2, 1])
|
6978
|
+
|
6979
|
+
with col1:
|
6980
|
+
feature_selection_method = st.radio(
|
6981
|
+
"Feature Selection Method",
|
6982
|
+
["Manual Selection", "Auto Select Top Features"],
|
6983
|
+
key="feature_selection_method"
|
6984
|
+
)
|
6985
|
+
|
6986
|
+
if feature_selection_method == "Manual Selection":
|
6987
|
+
selected_features = st.multiselect(
|
6988
|
+
"Pilih Features untuk Model",
|
6989
|
+
available_features,
|
6990
|
+
default=available_features[:min(10, len(available_features))],
|
6991
|
+
key="ml_features_select"
|
6939
6992
|
)
|
6993
|
+
else:
|
6994
|
+
top_k = st.slider("Number of Top Features", 5, 50, 15, key="top_k_features")
|
6995
|
+
selected_features = available_features[:top_k]
|
6996
|
+
st.info(f"Auto-selected top {top_k} features")
|
6997
|
+
|
6998
|
+
with col2:
|
6999
|
+
# Advanced options
|
7000
|
+
st.write("**Advanced Options:**")
|
7001
|
+
use_feature_engineering = st.checkbox("Feature Engineering", value=False)
|
7002
|
+
remove_high_correlation = st.checkbox("Remove High Correlation", value=True)
|
7003
|
+
correlation_threshold = st.slider("Correlation Threshold", 0.7, 0.99, 0.9, 0.01)
|
7004
|
+
|
7005
|
+
if not target_variable or not selected_features:
|
7006
|
+
st.warning("Pilih target variable dan features terlebih dahulu")
|
7007
|
+
return
|
7008
|
+
|
7009
|
+
try:
|
7010
|
+
# Sampling untuk dataset besar
|
7011
|
+
if len(df) > sample_size:
|
7012
|
+
st.info(f"Using sample of {sample_size} records for faster processing")
|
7013
|
+
df_sampled = df.sample(n=sample_size, random_state=random_state)
|
7014
|
+
else:
|
7015
|
+
df_sampled = df
|
7016
|
+
|
7017
|
+
# Progress tracking
|
7018
|
+
progress_bar = st.progress(0)
|
7019
|
+
status_text = st.empty()
|
7020
|
+
|
7021
|
+
# Prepare data
|
7022
|
+
status_text.text("Preparing data...")
|
7023
|
+
X = df_sampled[selected_features].copy()
|
7024
|
+
y = df_sampled[target_variable]
|
7025
|
+
progress_bar.progress(20)
|
7026
|
+
|
7027
|
+
# Handle large dataset - incremental processing
|
7028
|
+
chunk_size = min(1000, len(X))
|
7029
|
+
|
7030
|
+
# Encode categorical features
|
7031
|
+
status_text.text("Encoding categorical features...")
|
7032
|
+
le_dict = {}
|
7033
|
+
categorical_columns = [col for col in selected_features if col in non_numeric_cols]
|
7034
|
+
|
7035
|
+
for col in categorical_columns:
|
7036
|
+
# Untuk dataset besar, gunakan categorical encoding yang lebih efisien
|
7037
|
+
if X[col].nunique() > 100: # Jika terlalu banyak kategori, gunakan frequency encoding
|
7038
|
+
freq_encoding = X[col].value_counts().to_dict()
|
7039
|
+
X[col] = X[col].map(freq_encoding)
|
7040
|
+
X[col].fillna(0, inplace=True)
|
7041
|
+
else:
|
7042
|
+
le = LabelEncoder()
|
7043
|
+
X[col] = le.fit_transform(X[col].astype(str))
|
7044
|
+
le_dict[col] = le
|
7045
|
+
progress_bar.progress(40)
|
7046
|
+
|
7047
|
+
# Encode target variable
|
7048
|
+
status_text.text("Encoding target variable...")
|
7049
|
+
le_target = None
|
7050
|
+
if problem_type == "Classification" and y.dtype == 'object':
|
7051
|
+
le_target = LabelEncoder()
|
7052
|
+
y = le_target.fit_transform(y.astype(str))
|
7053
|
+
|
7054
|
+
# Remove high correlation features
|
7055
|
+
if remove_high_correlation and len(selected_features) > 1:
|
7056
|
+
status_text.text("Removing highly correlated features...")
|
7057
|
+
X = remove_correlated_features(X, correlation_threshold)
|
7058
|
+
|
7059
|
+
progress_bar.progress(60)
|
7060
|
+
|
7061
|
+
# Handle missing values dengan metode yang lebih robust
|
7062
|
+
status_text.text("Handling missing values...")
|
7063
|
+
for col in X.columns:
|
7064
|
+
if X[col].isnull().sum() > 0:
|
7065
|
+
if X[col].dtype in ['int64', 'float64']:
|
7066
|
+
X[col].fillna(X[col].median(), inplace=True)
|
7067
|
+
else:
|
7068
|
+
X[col].fillna(X[col].mode()[0] if len(X[col].mode()) > 0 else 0, inplace=True)
|
7069
|
+
|
7070
|
+
progress_bar.progress(80)
|
7071
|
+
|
7072
|
+
# Split data
|
7073
|
+
status_text.text("Splitting data...")
|
7074
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
7075
|
+
X, y,
|
7076
|
+
test_size=test_size,
|
7077
|
+
random_state=random_state,
|
7078
|
+
stratify=y if problem_type == "Classification" else None
|
7079
|
+
)
|
7080
|
+
|
7081
|
+
# Scale features - gunakan StandardScaler yang lebih efisien
|
7082
|
+
scaler = StandardScaler()
|
7083
|
+
X_train_scaled = scaler.fit_transform(X_train)
|
7084
|
+
X_test_scaled = scaler.transform(X_test)
|
7085
|
+
progress_bar.progress(100)
|
7086
|
+
|
7087
|
+
# Model selection dengan progress tracking
|
7088
|
+
st.subheader("🚀 Model Training & Evaluation")
|
7089
|
+
|
7090
|
+
# Pilihan model berdasarkan problem type dan dataset size
|
7091
|
+
if problem_type == "Regression":
|
7092
|
+
models = {
|
7093
|
+
"Linear Regression": LinearRegression(),
|
7094
|
+
"Ridge Regression": Ridge(random_state=random_state),
|
7095
|
+
"Random Forest": RandomForestRegressor(
|
7096
|
+
n_estimators=50, # Kurangi untuk dataset besar
|
7097
|
+
random_state=random_state,
|
7098
|
+
n_jobs=-1 # Gunakan semua core CPU
|
7099
|
+
),
|
7100
|
+
"Gradient Boosting": GradientBoostingRegressor(
|
7101
|
+
n_estimators=50,
|
7102
|
+
random_state=random_state
|
7103
|
+
)
|
7104
|
+
}
|
7105
|
+
elif problem_type == "Classification":
|
7106
|
+
models = {
|
7107
|
+
"Logistic Regression": LogisticRegression(
|
7108
|
+
random_state=random_state,
|
7109
|
+
n_jobs=-1,
|
7110
|
+
max_iter=1000
|
7111
|
+
),
|
7112
|
+
"Random Forest": RandomForestClassifier(
|
7113
|
+
n_estimators=50,
|
7114
|
+
random_state=random_state,
|
7115
|
+
n_jobs=-1
|
7116
|
+
),
|
7117
|
+
"Gradient Boosting": GradientBoostingClassifier(
|
7118
|
+
n_estimators=50,
|
7119
|
+
random_state=random_state
|
7120
|
+
),
|
7121
|
+
"XGBoost": xgb.XGBClassifier(
|
7122
|
+
n_estimators=50,
|
7123
|
+
random_state=random_state,
|
7124
|
+
n_jobs=-1,
|
7125
|
+
verbosity=0
|
7126
|
+
) if 'xgb' in globals() else None
|
7127
|
+
}
|
7128
|
+
# Remove None models
|
7129
|
+
models = {k: v for k, v in models.items() if v is not None}
|
7130
|
+
|
7131
|
+
# Train and evaluate models dengan progress bar
|
7132
|
+
results = {}
|
7133
|
+
model_progress = st.progress(0)
|
7134
|
+
total_models = len(models)
|
7135
|
+
|
7136
|
+
for i, (name, model) in enumerate(models.items()):
|
7137
|
+
status_text.text(f"Training {name}...")
|
6940
7138
|
|
6941
|
-
|
6942
|
-
|
6943
|
-
|
6944
|
-
|
6945
|
-
|
6946
|
-
# Model selection berdasarkan problem type
|
6947
|
-
st.subheader("🚀 Model Training & Evaluation")
|
6948
|
-
|
6949
|
-
if problem_type == "Regression":
|
6950
|
-
models = {
|
6951
|
-
"Linear Regression": LinearRegression(),
|
6952
|
-
"Ridge Regression": Ridge(random_state=random_state),
|
6953
|
-
"Random Forest": RandomForestRegressor(n_estimators=100, random_state=random_state)
|
6954
|
-
}
|
6955
|
-
|
6956
|
-
elif problem_type == "Classification":
|
6957
|
-
models = {
|
6958
|
-
"Logistic Regression": LogisticRegression(random_state=random_state),
|
6959
|
-
"Random Forest": RandomForestClassifier(n_estimators=100, random_state=random_state),
|
6960
|
-
"SVM": SVC(random_state=random_state)
|
6961
|
-
}
|
6962
|
-
|
6963
|
-
# Train and evaluate models
|
6964
|
-
results = {}
|
6965
|
-
|
6966
|
-
for name, model in models.items():
|
6967
|
-
with st.spinner(f"Training {name}..."):
|
6968
|
-
try:
|
6969
|
-
# Train model
|
6970
|
-
model.fit(X_train_scaled, y_train)
|
6971
|
-
y_pred = model.predict(X_test_scaled)
|
6972
|
-
|
6973
|
-
# Calculate metrics
|
6974
|
-
if problem_type == "Regression":
|
6975
|
-
mse = mean_squared_error(y_test, y_pred)
|
6976
|
-
r2 = r2_score(y_test, y_pred)
|
6977
|
-
|
6978
|
-
results[name] = {
|
6979
|
-
'MSE': mse,
|
6980
|
-
'R2 Score': r2,
|
6981
|
-
'predictions': y_pred,
|
6982
|
-
'model': model
|
6983
|
-
}
|
6984
|
-
|
6985
|
-
elif problem_type == "Classification":
|
6986
|
-
accuracy = accuracy_score(y_test, y_pred)
|
6987
|
-
|
6988
|
-
results[name] = {
|
6989
|
-
'Accuracy': accuracy,
|
6990
|
-
'predictions': y_pred,
|
6991
|
-
'model': model
|
6992
|
-
}
|
6993
|
-
except Exception as model_error:
|
6994
|
-
st.warning(f"Error training {name}: {str(model_error)}")
|
6995
|
-
|
6996
|
-
# Display results
|
6997
|
-
if results:
|
6998
|
-
st.subheader("📊 Model Performance Comparison")
|
7139
|
+
try:
|
7140
|
+
# Train model
|
7141
|
+
model.fit(X_train_scaled, y_train)
|
7142
|
+
y_pred = model.predict(X_test_scaled)
|
6999
7143
|
|
7144
|
+
# Calculate metrics
|
7000
7145
|
if problem_type == "Regression":
|
7001
|
-
|
7002
|
-
|
7003
|
-
|
7004
|
-
|
7005
|
-
|
7006
|
-
|
7007
|
-
|
7008
|
-
'
|
7009
|
-
'
|
7010
|
-
|
7146
|
+
mse = mean_squared_error(y_test, y_pred)
|
7147
|
+
rmse = np.sqrt(mse)
|
7148
|
+
mae = mean_absolute_error(y_test, y_pred)
|
7149
|
+
r2 = r2_score(y_test, y_pred)
|
7150
|
+
|
7151
|
+
results[name] = {
|
7152
|
+
'MSE': mse,
|
7153
|
+
'RMSE': rmse,
|
7154
|
+
'MAE': mae,
|
7155
|
+
'R2 Score': r2,
|
7156
|
+
'predictions': y_pred,
|
7157
|
+
'model': model
|
7158
|
+
}
|
7011
7159
|
|
7012
|
-
|
7013
|
-
|
7014
|
-
|
7015
|
-
|
7016
|
-
|
7017
|
-
|
7018
|
-
|
7019
|
-
|
7020
|
-
|
7021
|
-
|
7022
|
-
|
7023
|
-
|
7024
|
-
|
7025
|
-
|
7026
|
-
|
7027
|
-
|
7028
|
-
|
7029
|
-
|
7030
|
-
|
7031
|
-
|
7032
|
-
|
7033
|
-
|
7034
|
-
|
7035
|
-
else:
|
7036
|
-
st.warning("Tidak ada model yang berhasil di-training")
|
7160
|
+
elif problem_type == "Classification":
|
7161
|
+
accuracy = accuracy_score(y_test, y_pred)
|
7162
|
+
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
|
7163
|
+
recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
|
7164
|
+
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
|
7165
|
+
|
7166
|
+
results[name] = {
|
7167
|
+
'Accuracy': accuracy,
|
7168
|
+
'Precision': precision,
|
7169
|
+
'Recall': recall,
|
7170
|
+
'F1-Score': f1,
|
7171
|
+
'predictions': y_pred,
|
7172
|
+
'model': model
|
7173
|
+
}
|
7174
|
+
|
7175
|
+
st.success(f"✅ {name} trained successfully")
|
7176
|
+
|
7177
|
+
except Exception as model_error:
|
7178
|
+
st.warning(f"⚠️ Error training {name}: {str(model_error)}")
|
7179
|
+
|
7180
|
+
model_progress.progress((i + 1) / total_models)
|
7181
|
+
|
7182
|
+
status_text.text("Completed!")
|
7037
7183
|
|
7038
|
-
|
7039
|
-
|
7184
|
+
# Display results
|
7185
|
+
if results:
|
7186
|
+
display_ml_results(results, problem_type, X_test, y_test, selected_features, le_target)
|
7187
|
+
else:
|
7188
|
+
st.error("❌ Tidak ada model yang berhasil di-training")
|
7189
|
+
|
7190
|
+
except Exception as e:
|
7191
|
+
st.error(f"❌ Error dalam ML analysis: {str(e)}")
|
7192
|
+
st.info("💡 Tips: Coba kurangi jumlah features atau gunakan sample size yang lebih kecil")
|
7193
|
+
|
7194
|
+
def optimize_memory_usage(df):
|
7195
|
+
"""Optimize memory usage of dataframe"""
|
7196
|
+
for col in df.columns:
|
7197
|
+
if df[col].dtype == 'object':
|
7198
|
+
df[col] = df[col].astype('category')
|
7199
|
+
elif df[col].dtype in ['int64', 'int32']:
|
7200
|
+
c_min = df[col].min()
|
7201
|
+
c_max = df[col].max()
|
7202
|
+
if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
|
7203
|
+
df[col] = df[col].astype(np.int8)
|
7204
|
+
elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
|
7205
|
+
df[col] = df[col].astype(np.int16)
|
7206
|
+
elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
|
7207
|
+
df[col] = df[col].astype(np.int32)
|
7208
|
+
elif df[col].dtype in ['float64', 'float32']:
|
7209
|
+
c_min = df[col].min()
|
7210
|
+
c_max = df[col].max()
|
7211
|
+
if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
|
7212
|
+
df[col] = df[col].astype(np.float32)
|
7213
|
+
return df
|
7214
|
+
|
7215
|
+
def remove_correlated_features(X, threshold=0.9):
|
7216
|
+
"""Remove highly correlated features"""
|
7217
|
+
corr_matrix = X.corr().abs()
|
7218
|
+
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
|
7219
|
+
to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
|
7220
|
+
return X.drop(columns=to_drop)
|
7221
|
+
|
7222
|
+
def display_ml_results(results, problem_type, X_test, y_test, selected_features, le_target):
|
7223
|
+
"""Display ML results with comprehensive visualizations"""
|
7224
|
+
|
7225
|
+
st.subheader("📊 Model Performance Comparison")
|
7226
|
+
|
7227
|
+
# Create results dataframe
|
7228
|
+
if problem_type == "Regression":
|
7229
|
+
metrics_df = pd.DataFrame({
|
7230
|
+
'Model': list(results.keys()),
|
7231
|
+
'MSE': [results[name]['MSE'] for name in results.keys()],
|
7232
|
+
'RMSE': [results[name]['RMSE'] for name in results.keys()],
|
7233
|
+
'MAE': [results[name]['MAE'] for name in results.keys()],
|
7234
|
+
'R2 Score': [results[name]['R2 Score'] for name in results.keys()]
|
7235
|
+
})
|
7236
|
+
sort_metric = 'R2 Score'
|
7237
|
+
else:
|
7238
|
+
metrics_df = pd.DataFrame({
|
7239
|
+
'Model': list(results.keys()),
|
7240
|
+
'Accuracy': [results[name]['Accuracy'] for name in results.keys()],
|
7241
|
+
'Precision': [results[name]['Precision'] for name in results.keys()],
|
7242
|
+
'Recall': [results[name]['Recall'] for name in results.keys()],
|
7243
|
+
'F1-Score': [results[name]['F1-Score'] for name in results.keys()]
|
7244
|
+
})
|
7245
|
+
sort_metric = 'Accuracy'
|
7246
|
+
|
7247
|
+
# Display metrics table
|
7248
|
+
st.dataframe(metrics_df.sort_values(sort_metric, ascending=False), use_container_width=True)
|
7249
|
+
|
7250
|
+
# Visualization
|
7251
|
+
col1, col2 = st.columns(2)
|
7252
|
+
|
7253
|
+
with col1:
|
7254
|
+
# Performance comparison chart
|
7255
|
+
if problem_type == "Regression":
|
7256
|
+
fig = px.bar(metrics_df, x='Model', y='R2 Score', title="R2 Score Comparison")
|
7257
|
+
else:
|
7258
|
+
fig = px.bar(metrics_df, x='Model', y='Accuracy', title="Accuracy Comparison")
|
7259
|
+
st.plotly_chart(fig, use_container_width=True)
|
7260
|
+
|
7261
|
+
with col2:
|
7262
|
+
# Actual vs Predicted untuk model terbaik
|
7263
|
+
best_model_name = metrics_df.loc[metrics_df[sort_metric].idxmax(), 'Model']
|
7264
|
+
best_result = results[best_model_name]
|
7265
|
+
|
7266
|
+
if problem_type == "Regression":
|
7267
|
+
fig = px.scatter(
|
7268
|
+
x=y_test,
|
7269
|
+
y=best_result['predictions'],
|
7270
|
+
labels={'x': 'Actual', 'y': 'Predicted'},
|
7271
|
+
title=f"Actual vs Predicted - {best_model_name}"
|
7272
|
+
)
|
7273
|
+
fig.add_trace(px.line(x=[y_test.min(), y_test.max()], y=[y_test.min(), y_test.max()]).data[0])
|
7274
|
+
else:
|
7275
|
+
# Confusion matrix
|
7276
|
+
cm = confusion_matrix(y_test, best_result['predictions'])
|
7277
|
+
fig = px.imshow(
|
7278
|
+
cm,
|
7279
|
+
labels=dict(x="Predicted", y="Actual", color="Count"),
|
7280
|
+
title=f"Confusion Matrix - {best_model_name}"
|
7281
|
+
)
|
7282
|
+
st.plotly_chart(fig, use_container_width=True)
|
7283
|
+
|
7284
|
+
# Feature importance
|
7285
|
+
st.subheader("🔍 Feature Importance")
|
7286
|
+
for name, result in results.items():
|
7287
|
+
model = result['model']
|
7288
|
+
if hasattr(model, 'feature_importances_'):
|
7289
|
+
feature_importance = pd.DataFrame({
|
7290
|
+
'feature': selected_features[:len(model.feature_importances_)],
|
7291
|
+
'importance': model.feature_importances_
|
7292
|
+
}).sort_values('importance', ascending=False)
|
7293
|
+
|
7294
|
+
fig = px.bar(
|
7295
|
+
feature_importance.head(10),
|
7296
|
+
x='importance',
|
7297
|
+
y='feature',
|
7298
|
+
title=f"Top 10 Feature Importance - {name}",
|
7299
|
+
orientation='h'
|
7300
|
+
)
|
7301
|
+
st.plotly_chart(fig, use_container_width=True)
|
7040
7302
|
|
7041
7303
|
def deep_learning_analysis(df, numeric_cols, non_numeric_cols):
|
7042
|
-
"""Analisis Deep Learning"""
|
7304
|
+
"""Analisis Deep Learning Lengkap - Optimized for Large Datasets"""
|
7043
7305
|
|
7044
|
-
st.header("🧠 Deep Learning Analysis")
|
7306
|
+
st.header("🧠 Deep Learning Analysis - High Performance")
|
7045
7307
|
|
7046
|
-
|
7308
|
+
# Validasi dataset
|
7309
|
+
if df.empty:
|
7310
|
+
st.error("❌ Dataset kosong! Silakan upload data terlebih dahulu.")
|
7311
|
+
return
|
7312
|
+
|
7313
|
+
if len(numeric_cols) < 2:
|
7314
|
+
st.error("❌ Diperuhkan minimal 2 kolom numerik untuk analisis Deep Learning")
|
7315
|
+
return
|
7047
7316
|
|
7048
|
-
#
|
7049
|
-
|
7317
|
+
# Configuration untuk kecepatan
|
7318
|
+
st.subheader("⚡ Konfigurasi Kecepatan & Performa")
|
7319
|
+
|
7320
|
+
col1, col2, col3 = st.columns(3)
|
7050
7321
|
|
7051
7322
|
with col1:
|
7323
|
+
processing_speed = st.selectbox(
|
7324
|
+
"Kecepatan Processing",
|
7325
|
+
["🚀 Very Fast", "⚡ Fast", "✅ Balanced", "🐢 Comprehensive"],
|
7326
|
+
index=0,
|
7327
|
+
key="processing_speed"
|
7328
|
+
)
|
7329
|
+
|
7330
|
+
# Set parameters berdasarkan kecepatan
|
7331
|
+
if processing_speed == "🚀 Very Fast":
|
7332
|
+
sample_size = 0.3
|
7333
|
+
epochs = 20
|
7334
|
+
batch_size = 128
|
7335
|
+
elif processing_speed == "⚡ Fast":
|
7336
|
+
sample_size = 0.5
|
7337
|
+
epochs = 30
|
7338
|
+
batch_size = 64
|
7339
|
+
elif processing_speed == "✅ Balanced":
|
7340
|
+
sample_size = 0.7
|
7341
|
+
epochs = 50
|
7342
|
+
batch_size = 32
|
7343
|
+
else:
|
7344
|
+
sample_size = 1.0
|
7345
|
+
epochs = 80
|
7346
|
+
batch_size = 16
|
7347
|
+
|
7348
|
+
with col2:
|
7052
7349
|
dl_target = st.selectbox(
|
7053
7350
|
"Pilih Target Variable",
|
7054
7351
|
numeric_cols,
|
@@ -7061,30 +7358,76 @@ def deep_learning_analysis(df, numeric_cols, non_numeric_cols):
|
|
7061
7358
|
key="dl_problem_type"
|
7062
7359
|
)
|
7063
7360
|
|
7064
|
-
with
|
7065
|
-
epochs = st.slider("Epochs", 10, 200,
|
7066
|
-
batch_size = st.slider("Batch Size", 16, 256,
|
7067
|
-
learning_rate = st.selectbox("Learning Rate", [0.001, 0.01, 0.
|
7361
|
+
with col3:
|
7362
|
+
epochs = st.slider("Epochs", 10, 200, epochs, key="dl_epochs")
|
7363
|
+
batch_size = st.slider("Batch Size", 16, 256, batch_size, key="dl_batch_size")
|
7364
|
+
learning_rate = st.selectbox("Learning Rate", [0.001, 0.01, 0.0001, 0.00001],
|
7365
|
+
index=0, key="dl_learning_rate")
|
7366
|
+
|
7367
|
+
# Optimasi dataset besar
|
7368
|
+
st.info(f"**Mode {processing_speed}** - Sample size: {sample_size*100}% - Dataset: {len(df):,} rows")
|
7068
7369
|
|
7069
|
-
# Feature selection
|
7370
|
+
# Feature selection dengan optimasi
|
7371
|
+
available_features = [f for f in numeric_cols if f != dl_target]
|
7070
7372
|
dl_features = st.multiselect(
|
7071
7373
|
"Pilih Features untuk Deep Learning",
|
7072
|
-
|
7073
|
-
default=[
|
7374
|
+
available_features,
|
7375
|
+
default=available_features[:min(6, len(available_features))],
|
7074
7376
|
key="dl_features_select"
|
7075
7377
|
)
|
7076
7378
|
|
7077
|
-
if dl_target
|
7078
|
-
|
7079
|
-
|
7379
|
+
if not dl_target or not dl_features:
|
7380
|
+
st.info("📝 Pilih target variable dan features untuk memulai analisis DL")
|
7381
|
+
return
|
7382
|
+
|
7383
|
+
try:
|
7384
|
+
|
7385
|
+
# Check GPU availability
|
7386
|
+
gpu_available = len(tf.config.experimental.list_physical_devices('GPU')) > 0
|
7387
|
+
if gpu_available:
|
7388
|
+
st.success("🎯 GPU tersedia - Training akan dipercepat!")
|
7389
|
+
else:
|
7390
|
+
st.info("💡 GPU tidak tersedia - Training menggunakan CPU")
|
7391
|
+
|
7392
|
+
# Optimasi memory untuk dataset besar
|
7393
|
+
@st.cache_data(show_spinner=False)
|
7394
|
+
def prepare_data_optimized(_df, features, target, sample_frac=1.0, problem_type="Regression"):
|
7395
|
+
"""Prepare data dengan optimasi memory"""
|
7396
|
+
# Sampling untuk dataset besar
|
7397
|
+
if sample_frac < 1.0:
|
7398
|
+
_df = _df.sample(frac=sample_frac, random_state=42)
|
7399
|
+
|
7400
|
+
X = _df[features].fillna(_df[features].mean())
|
7401
|
+
y = _df[target]
|
7402
|
+
|
7403
|
+
# Preprocessing target untuk classification
|
7404
|
+
if problem_type != "Regression":
|
7405
|
+
if problem_type == "Binary Classification":
|
7406
|
+
# Pastikan binary classification
|
7407
|
+
unique_vals = y.unique()
|
7408
|
+
if len(unique_vals) > 2:
|
7409
|
+
st.warning(f"⚠️ Target memiliki {len(unique_vals)} kelas. Menggunakan 2 kelas terbanyak.")
|
7410
|
+
top_2_classes = y.value_counts().head(2).index
|
7411
|
+
mask = y.isin(top_2_classes)
|
7412
|
+
X = X[mask]
|
7413
|
+
y = y[mask]
|
7414
|
+
y = LabelEncoder().fit_transform(y)
|
7415
|
+
else:
|
7416
|
+
y = LabelEncoder().fit_transform(y)
|
7417
|
+
else:
|
7418
|
+
# Multi-class classification
|
7419
|
+
y = LabelEncoder().fit_transform(y)
|
7080
7420
|
|
7081
|
-
|
7082
|
-
|
7083
|
-
|
7421
|
+
return X, y
|
7422
|
+
|
7423
|
+
# Prepare data dengan optimasi
|
7424
|
+
with st.spinner("🔄 Memproses data dengan optimasi kecepatan..."):
|
7425
|
+
X, y = prepare_data_optimized(df, dl_features, dl_target, sample_size, dl_problem_type)
|
7084
7426
|
|
7085
7427
|
# Split data
|
7086
7428
|
X_train, X_test, y_train, y_test = train_test_split(
|
7087
|
-
X, y, test_size=0.2, random_state=42
|
7429
|
+
X, y, test_size=0.2, random_state=42,
|
7430
|
+
stratify=y if dl_problem_type != "Regression" else None
|
7088
7431
|
)
|
7089
7432
|
|
7090
7433
|
# Scale features
|
@@ -7092,198 +7435,1532 @@ def deep_learning_analysis(df, numeric_cols, non_numeric_cols):
|
|
7092
7435
|
X_train_scaled = scaler.fit_transform(X_train)
|
7093
7436
|
X_test_scaled = scaler.transform(X_test)
|
7094
7437
|
|
7095
|
-
#
|
7096
|
-
|
7438
|
+
# Convert to TensorFlow datasets untuk performa tinggi
|
7439
|
+
train_dataset = tf.data.Dataset.from_tensor_slices((X_train_scaled, y_train))
|
7440
|
+
train_dataset = train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
|
7097
7441
|
|
7442
|
+
val_dataset = tf.data.Dataset.from_tensor_slices((X_test_scaled, y_test))
|
7443
|
+
val_dataset = val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
|
7444
|
+
|
7445
|
+
# Tampilkan info dataset
|
7446
|
+
st.success(f"✅ Data siap: {len(X_train):,} training samples, {len(X_test):,} test samples")
|
7447
|
+
|
7448
|
+
# Model architecture dengan optimasi
|
7449
|
+
st.subheader("🏗️ Neural Network Architecture - Optimized")
|
7450
|
+
|
7451
|
+
col1, col2 = st.columns(2)
|
7452
|
+
|
7453
|
+
with col1:
|
7454
|
+
hidden_layers = st.slider("Jumlah Hidden Layers", 1, 5, 2, key="dl_hidden_layers")
|
7455
|
+
units_per_layer = st.slider("Units per Layer", 32, 512, 64, key="dl_units")
|
7456
|
+
activation = st.selectbox("Activation Function", ["relu", "elu", "tanh", "selu"],
|
7457
|
+
index=0, key="dl_activation")
|
7458
|
+
|
7459
|
+
with col2:
|
7460
|
+
dropout_rate = st.slider("Dropout Rate", 0.0, 0.5, 0.2, 0.1, key="dl_dropout")
|
7461
|
+
optimizer = st.selectbox("Optimizer", ["adam", "rmsprop", "nadam", "sgd"],
|
7462
|
+
index=0, key="dl_optimizer")
|
7463
|
+
use_batch_norm = st.checkbox("Gunakan Batch Normalization", value=True, key="dl_batchnorm")
|
7464
|
+
use_early_stopping = st.checkbox("Gunakan Early Stopping", value=True, key="dl_earlystop")
|
7465
|
+
|
7466
|
+
# Advanced configuration
|
7467
|
+
with st.expander("⚙️ Konfigurasi Lanjutan"):
|
7098
7468
|
col1, col2 = st.columns(2)
|
7099
|
-
|
7100
7469
|
with col1:
|
7101
|
-
|
7102
|
-
|
7103
|
-
|
7104
|
-
|
7470
|
+
weight_initializer = st.selectbox(
|
7471
|
+
"Weight Initializer",
|
7472
|
+
["glorot_uniform", "he_normal", "lecun_uniform"],
|
7473
|
+
index=0
|
7474
|
+
)
|
7475
|
+
use_l2_reg = st.checkbox("Gunakan L2 Regularization", value=False)
|
7476
|
+
l2_rate = st.slider("L2 Rate", 0.0001, 0.01, 0.001, 0.0001) if use_l2_reg else 0.0
|
7477
|
+
|
7105
7478
|
with col2:
|
7106
|
-
|
7107
|
-
|
7108
|
-
|
7109
|
-
|
7479
|
+
learning_rate_schedule = st.selectbox(
|
7480
|
+
"Learning Rate Schedule",
|
7481
|
+
["Constant", "ExponentialDecay", "CosineDecay"],
|
7482
|
+
index=0
|
7483
|
+
)
|
7484
|
+
|
7485
|
+
# Build optimized model
|
7486
|
+
with st.spinner("🔄 Membangun model neural network..."):
|
7110
7487
|
model = tf.keras.Sequential()
|
7111
7488
|
|
7112
7489
|
# Input layer
|
7113
|
-
|
7490
|
+
if use_l2_reg:
|
7491
|
+
model.add(tf.keras.layers.Dense(
|
7492
|
+
units_per_layer,
|
7493
|
+
activation=activation,
|
7494
|
+
input_shape=(len(dl_features),),
|
7495
|
+
kernel_initializer=weight_initializer,
|
7496
|
+
kernel_regularizer=tf.keras.regularizers.l2(l2_rate)
|
7497
|
+
))
|
7498
|
+
else:
|
7499
|
+
model.add(tf.keras.layers.Dense(
|
7500
|
+
units_per_layer,
|
7501
|
+
activation=activation,
|
7502
|
+
input_shape=(len(dl_features),),
|
7503
|
+
kernel_initializer=weight_initializer
|
7504
|
+
))
|
7505
|
+
|
7506
|
+
if use_batch_norm:
|
7507
|
+
model.add(tf.keras.layers.BatchNormalization())
|
7114
7508
|
model.add(tf.keras.layers.Dropout(dropout_rate))
|
7115
7509
|
|
7116
|
-
# Hidden layers
|
7510
|
+
# Hidden layers dengan optimasi
|
7117
7511
|
for i in range(hidden_layers - 1):
|
7118
|
-
|
7512
|
+
# Reduce units in deeper layers untuk efisiensi
|
7513
|
+
units = max(32, units_per_layer // (2 ** (i + 1)))
|
7514
|
+
|
7515
|
+
if use_l2_reg:
|
7516
|
+
model.add(tf.keras.layers.Dense(
|
7517
|
+
units,
|
7518
|
+
activation=activation,
|
7519
|
+
kernel_regularizer=tf.keras.regularizers.l2(l2_rate)
|
7520
|
+
))
|
7521
|
+
else:
|
7522
|
+
model.add(tf.keras.layers.Dense(units, activation=activation))
|
7523
|
+
|
7524
|
+
if use_batch_norm:
|
7525
|
+
model.add(tf.keras.layers.BatchNormalization())
|
7119
7526
|
model.add(tf.keras.layers.Dropout(dropout_rate))
|
7120
7527
|
|
7121
7528
|
# Output layer
|
7122
7529
|
if dl_problem_type == "Regression":
|
7123
7530
|
model.add(tf.keras.layers.Dense(1, activation='linear'))
|
7124
7531
|
loss = 'mse'
|
7125
|
-
metrics = ['mae']
|
7532
|
+
metrics = ['mae', 'mse']
|
7533
|
+
monitor_metric = 'val_loss'
|
7126
7534
|
else:
|
7127
|
-
num_classes = len(
|
7535
|
+
num_classes = len(np.unique(y)) if dl_problem_type == "Multi-class Classification" else 1
|
7128
7536
|
activation_output = 'softmax' if dl_problem_type == "Multi-class Classification" else 'sigmoid'
|
7129
|
-
|
7537
|
+
output_units = num_classes if dl_problem_type == "Multi-class Classification" else 1
|
7538
|
+
model.add(tf.keras.layers.Dense(output_units, activation=activation_output))
|
7130
7539
|
loss = 'sparse_categorical_crossentropy' if dl_problem_type == "Multi-class Classification" else 'binary_crossentropy'
|
7131
7540
|
metrics = ['accuracy']
|
7132
|
-
|
7133
|
-
|
7134
|
-
|
7135
|
-
|
7136
|
-
|
7137
|
-
|
7541
|
+
monitor_metric = 'val_accuracy'
|
7542
|
+
|
7543
|
+
# Learning rate schedule
|
7544
|
+
if learning_rate_schedule == "ExponentialDecay":
|
7545
|
+
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
|
7546
|
+
initial_learning_rate=learning_rate,
|
7547
|
+
decay_steps=1000,
|
7548
|
+
decay_rate=0.9
|
7138
7549
|
)
|
7139
|
-
|
7140
|
-
|
7141
|
-
|
7142
|
-
|
7143
|
-
|
7144
|
-
|
7145
|
-
|
7146
|
-
|
7147
|
-
|
7148
|
-
|
7149
|
-
|
7150
|
-
|
7151
|
-
|
7152
|
-
|
7153
|
-
|
7154
|
-
|
7155
|
-
|
7156
|
-
X_train_scaled, y_train,
|
7157
|
-
epochs=epochs,
|
7158
|
-
batch_size=batch_size,
|
7159
|
-
validation_split=0.2,
|
7160
|
-
callbacks=[early_stopping],
|
7161
|
-
verbose=0
|
7162
|
-
)
|
7163
|
-
|
7164
|
-
# Plot training history
|
7165
|
-
fig = go.Figure()
|
7166
|
-
fig.add_trace(go.Scatter(
|
7167
|
-
y=history.history['loss'],
|
7168
|
-
mode='lines',
|
7169
|
-
name='Training Loss'
|
7170
|
-
))
|
7171
|
-
if 'val_loss' in history.history:
|
7172
|
-
fig.add_trace(go.Scatter(
|
7173
|
-
y=history.history['val_loss'],
|
7174
|
-
mode='lines',
|
7175
|
-
name='Validation Loss'
|
7176
|
-
))
|
7177
|
-
fig.update_layout(
|
7178
|
-
title="Training History - Loss",
|
7179
|
-
xaxis_title="Epoch",
|
7180
|
-
yaxis_title="Loss",
|
7181
|
-
height=400
|
7182
|
-
)
|
7183
|
-
st.plotly_chart(fig, use_container_width=True)
|
7184
|
-
|
7185
|
-
# Evaluate model
|
7186
|
-
test_results = model.evaluate(X_test_scaled, y_test, verbose=0)
|
7187
|
-
st.success(f"✅ Model Training Complete!")
|
7188
|
-
st.metric("Test Loss", f"{test_results[0]:.4f}")
|
7189
|
-
if len(test_results) > 1:
|
7190
|
-
st.metric("Test Metric", f"{test_results[1]:.4f}")
|
7550
|
+
elif learning_rate_schedule == "CosineDecay":
|
7551
|
+
lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
|
7552
|
+
initial_learning_rate=learning_rate,
|
7553
|
+
decay_steps=epochs * len(X_train) // batch_size
|
7554
|
+
)
|
7555
|
+
else:
|
7556
|
+
lr_schedule = learning_rate
|
7557
|
+
|
7558
|
+
# Compile model dengan learning rate
|
7559
|
+
if optimizer == "adam":
|
7560
|
+
optimizer_obj = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
|
7561
|
+
elif optimizer == "rmsprop":
|
7562
|
+
optimizer_obj = tf.keras.optimizers.RMSprop(learning_rate=lr_schedule)
|
7563
|
+
elif optimizer == "nadam":
|
7564
|
+
optimizer_obj = tf.keras.optimizers.Nadam(learning_rate=lr_schedule)
|
7565
|
+
else:
|
7566
|
+
optimizer_obj = tf.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9)
|
7191
7567
|
|
7192
|
-
|
7193
|
-
|
7194
|
-
|
7195
|
-
|
7568
|
+
model.compile(optimizer=optimizer_obj, loss=loss, metrics=metrics)
|
7569
|
+
|
7570
|
+
# Display model summary
|
7571
|
+
st.subheader("📊 Model Summary")
|
7196
7572
|
|
7197
|
-
|
7198
|
-
|
7199
|
-
|
7200
|
-
|
7201
|
-
|
7202
|
-
st.info("🔄 Fitur Model Comparison - Pilih model dari tab Machine Learning dan Deep Learning untuk perbandingan")
|
7203
|
-
|
7204
|
-
# Placeholder untuk implementasi lengkap
|
7205
|
-
col1, col2, col3 = st.columns(3)
|
7206
|
-
|
7207
|
-
with col1:
|
7208
|
-
st.metric("ML Models", "3")
|
7209
|
-
with col2:
|
7210
|
-
st.metric("Evaluation Metrics", "5+")
|
7211
|
-
with col3:
|
7212
|
-
st.metric("Feature Importance", "✓")
|
7573
|
+
# Tangkap output summary dari model
|
7574
|
+
model_summary = []
|
7575
|
+
model.summary(print_fn=lambda x: model_summary.append(x))
|
7576
|
+
summary_text = "\n".join(model_summary)
|
7213
7577
|
|
7214
|
-
|
7215
|
-
|
7216
|
-
|
7217
|
-
|
7218
|
-
|
7219
|
-
|
7220
|
-
|
7221
|
-
|
7222
|
-
|
7223
|
-
|
7224
|
-
|
7225
|
-
|
7226
|
-
|
7227
|
-
|
7228
|
-
|
7229
|
-
|
7230
|
-
|
7231
|
-
|
7232
|
-
|
7233
|
-
|
7234
|
-
|
7235
|
-
|
7236
|
-
|
7237
|
-
|
7238
|
-
|
7239
|
-
|
7240
|
-
|
7241
|
-
|
7242
|
-
|
7243
|
-
|
7244
|
-
|
7245
|
-
|
7246
|
-
|
7247
|
-
|
7248
|
-
|
7249
|
-
|
7250
|
-
|
7251
|
-
|
7252
|
-
|
7253
|
-
|
7254
|
-
|
7255
|
-
|
7256
|
-
|
7257
|
-
|
7258
|
-
title="Random Forest Feature Importance",
|
7259
|
-
orientation='h'
|
7578
|
+
# Tambahkan CSS styling
|
7579
|
+
st.markdown("""
|
7580
|
+
<style>
|
7581
|
+
.model-summary-box {
|
7582
|
+
background-color: #fff; /* Warna gelap seperti terminal */
|
7583
|
+
color: #000; /* Warna teks hijau neon */
|
7584
|
+
border-radius: 10px;
|
7585
|
+
padding: 15px;
|
7586
|
+
font-family: 'Courier New', monospace;
|
7587
|
+
font-size: 14px;
|
7588
|
+
line-height: 1.5;
|
7589
|
+
white-space: pre-wrap;
|
7590
|
+
box-shadow: 0 0 8px rgba(0,255,179,0.3);
|
7591
|
+
border: 1px solid rgba(0,255,179,0.4);
|
7592
|
+
overflow-x: auto;
|
7593
|
+
}
|
7594
|
+
</style>
|
7595
|
+
""", unsafe_allow_html=True)
|
7596
|
+
|
7597
|
+
# Gunakan expander untuk dropdown
|
7598
|
+
with st.expander("🧠 Lihat / Sembunyikan Model Summary"):
|
7599
|
+
st.markdown(f"<div class='model-summary-box'>{summary_text}</div>", unsafe_allow_html=True)
|
7600
|
+
|
7601
|
+
# Calculate total parameters
|
7602
|
+
total_params = model.count_params()
|
7603
|
+
st.info(f"📈 Total Parameters: {total_params:,}")
|
7604
|
+
|
7605
|
+
# Training section
|
7606
|
+
st.subheader("🚀 Pelatihan Model")
|
7607
|
+
|
7608
|
+
if st.button("🎯 Mulai Pelatihan Deep Learning", type="primary", key="dl_train_button"):
|
7609
|
+
start_time = time.time()
|
7610
|
+
|
7611
|
+
with st.spinner("🧠 Training neural network... Mohon tunggu..."):
|
7612
|
+
# Callbacks untuk training lebih cepat
|
7613
|
+
callbacks = []
|
7614
|
+
|
7615
|
+
if use_early_stopping:
|
7616
|
+
early_stopping = tf.keras.callbacks.EarlyStopping(
|
7617
|
+
monitor=monitor_metric,
|
7618
|
+
patience=10,
|
7619
|
+
restore_best_weights=True,
|
7620
|
+
mode='min' if dl_problem_type == "Regression" else 'max',
|
7621
|
+
verbose=1
|
7260
7622
|
)
|
7261
|
-
|
7623
|
+
callbacks.append(early_stopping)
|
7624
|
+
|
7625
|
+
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
|
7626
|
+
monitor='val_loss',
|
7627
|
+
factor=0.5,
|
7628
|
+
patience=5,
|
7629
|
+
min_lr=0.00001,
|
7630
|
+
verbose=1
|
7631
|
+
)
|
7632
|
+
callbacks.append(reduce_lr)
|
7633
|
+
|
7634
|
+
# TensorBoard callback (optional)
|
7635
|
+
# callbacks.append(tf.keras.callbacks.TensorBoard(log_dir='./logs'))
|
7636
|
+
|
7637
|
+
# Train model dengan progress bar
|
7638
|
+
progress_bar = st.progress(0)
|
7639
|
+
status_text = st.empty()
|
7640
|
+
time_estimator = st.empty()
|
7641
|
+
metrics_display = st.empty()
|
7262
7642
|
|
7263
|
-
|
7264
|
-
|
7265
|
-
|
7643
|
+
class TrainingCallback(tf.keras.callbacks.Callback):
|
7644
|
+
def on_epoch_begin(self, epoch, logs=None):
|
7645
|
+
self.epoch_start_time = time.time()
|
7266
7646
|
|
7267
|
-
|
7268
|
-
|
7269
|
-
|
7647
|
+
def on_epoch_end(self, epoch, logs=None):
|
7648
|
+
progress = (epoch + 1) / epochs
|
7649
|
+
progress_bar.progress(min(progress, 1.0))
|
7650
|
+
|
7651
|
+
# Metrics display
|
7652
|
+
if dl_problem_type == "Regression":
|
7653
|
+
metrics_str = f"Loss: {logs['loss']:.4f}, Val Loss: {logs['val_loss']:.4f}, MAE: {logs['mae']:.4f}"
|
7654
|
+
else:
|
7655
|
+
metrics_str = f"Loss: {logs['loss']:.4f}, Val Loss: {logs['val_loss']:.4f}, Acc: {logs['accuracy']:.4f}"
|
7656
|
+
|
7657
|
+
status_text.text(f"Epoch {epoch+1}/{epochs}")
|
7658
|
+
metrics_display.text(f"📊 {metrics_str}")
|
7659
|
+
|
7660
|
+
# Time estimation
|
7661
|
+
elapsed = time.time() - start_time
|
7662
|
+
epoch_time = time.time() - self.epoch_start_time
|
7663
|
+
remaining = epoch_time * (epochs - epoch - 1)
|
7664
|
+
|
7665
|
+
time_estimator.text(f"⏱️ Elapsed: {elapsed:.1f}s | Est. remaining: {remaining:.1f}s")
|
7666
|
+
|
7667
|
+
callbacks.append(TrainingCallback())
|
7668
|
+
|
7669
|
+
# Train model
|
7670
|
+
history = model.fit(
|
7671
|
+
train_dataset,
|
7672
|
+
epochs=epochs,
|
7673
|
+
validation_data=val_dataset,
|
7674
|
+
callbacks=callbacks,
|
7675
|
+
verbose=0
|
7676
|
+
)
|
7677
|
+
|
7678
|
+
training_time = time.time() - start_time
|
7679
|
+
progress_bar.progress(1.0)
|
7680
|
+
status_text.text(f"✅ Pelatihan Selesai! Waktu: {training_time:.1f} detik")
|
7681
|
+
time_estimator.text("")
|
7682
|
+
metrics_display.text("")
|
7683
|
+
|
7684
|
+
# ==================== EVALUASI DETAIL ====================
|
7685
|
+
st.subheader("📈 Hasil Evaluasi Detail")
|
7686
|
+
|
7687
|
+
# Predictions
|
7688
|
+
y_pred = model.predict(X_test_scaled, verbose=0)
|
7689
|
+
|
7690
|
+
# 1. PERFORMANCE METRICS COMPREHENSIVE
|
7691
|
+
st.subheader("🎯 Dashboard Performa Model")
|
7692
|
+
|
7693
|
+
if dl_problem_type == "Regression":
|
7694
|
+
# Regression metrics
|
7695
|
+
y_pred_flat = y_pred.flatten()
|
7696
|
+
mse = mean_squared_error(y_test, y_pred_flat)
|
7697
|
+
mae = mean_absolute_error(y_test, y_pred_flat)
|
7698
|
+
r2 = r2_score(y_test, y_pred_flat)
|
7699
|
+
rmse = np.sqrt(mse)
|
7270
7700
|
|
7271
|
-
|
7272
|
-
|
7273
|
-
|
7274
|
-
|
7275
|
-
|
7276
|
-
|
7277
|
-
|
7278
|
-
|
7279
|
-
|
7280
|
-
|
7281
|
-
|
7282
|
-
|
7283
|
-
|
7284
|
-
|
7285
|
-
|
7286
|
-
|
7701
|
+
# Additional metrics
|
7702
|
+
mape = np.mean(np.abs((y_test - y_pred_flat) / np.where(y_test != 0, y_test, 1))) * 100
|
7703
|
+
accuracy_percentage = max(0, min(100, (1 - mae / (y_test.max() - y_test.min())) * 100))
|
7704
|
+
|
7705
|
+
# Display metrics
|
7706
|
+
col1, col2, col3, col4 = st.columns(4)
|
7707
|
+
|
7708
|
+
with col1:
|
7709
|
+
st.metric("R² Score", f"{r2:.4f}",
|
7710
|
+
delta="Excellent" if r2 > 0.8 else "Good" if r2 > 0.6 else "Needs Improvement")
|
7711
|
+
with col2:
|
7712
|
+
st.metric("MAE", f"{mae:.4f}")
|
7713
|
+
with col3:
|
7714
|
+
st.metric("RMSE", f"{rmse:.4f}")
|
7715
|
+
with col4:
|
7716
|
+
st.metric("MAPE", f"{mape:.2f}%")
|
7717
|
+
|
7718
|
+
else:
|
7719
|
+
# Classification metrics
|
7720
|
+
if dl_problem_type == "Binary Classification":
|
7721
|
+
y_pred_class = (y_pred > 0.5).astype(int).flatten()
|
7722
|
+
else:
|
7723
|
+
y_pred_class = np.argmax(y_pred, axis=1)
|
7724
|
+
|
7725
|
+
accuracy = accuracy_score(y_test, y_pred_class)
|
7726
|
+
precision = precision_score(y_test, y_pred_class, average='weighted', zero_division=0)
|
7727
|
+
recall = recall_score(y_test, y_pred_class, average='weighted', zero_division=0)
|
7728
|
+
f1 = f1_score(y_test, y_pred_class, average='weighted', zero_division=0)
|
7729
|
+
|
7730
|
+
# Display metrics
|
7731
|
+
col1, col2, col3, col4 = st.columns(4)
|
7732
|
+
|
7733
|
+
with col1:
|
7734
|
+
st.metric("Accuracy", f"{accuracy:.4f}",
|
7735
|
+
delta="Excellent" if accuracy > 0.9 else "Good" if accuracy > 0.8 else "Needs Improvement")
|
7736
|
+
with col2:
|
7737
|
+
st.metric("Precision", f"{precision:.4f}")
|
7738
|
+
with col3:
|
7739
|
+
st.metric("Recall", f"{recall:.4f}")
|
7740
|
+
with col4:
|
7741
|
+
st.metric("F1-Score", f"{f1:.4f}")
|
7742
|
+
|
7743
|
+
# 2. VISUALISASI LENGKAP
|
7744
|
+
st.subheader("📊 Visualisasi Komprehensif")
|
7745
|
+
|
7746
|
+
# Training history visualization
|
7747
|
+
fig_history = make_subplots(
|
7748
|
+
rows=1, cols=2,
|
7749
|
+
subplot_titles=('Loss Progression', 'Metrics Progression'),
|
7750
|
+
specs=[[{"secondary_y": False}, {"secondary_y": False}]]
|
7751
|
+
)
|
7752
|
+
|
7753
|
+
# Loss plot
|
7754
|
+
fig_history.add_trace(
|
7755
|
+
go.Scatter(x=list(range(1, len(history.history['loss'])+1)),
|
7756
|
+
y=history.history['loss'],
|
7757
|
+
name='Training Loss', line=dict(color='blue')),
|
7758
|
+
row=1, col=1
|
7759
|
+
)
|
7760
|
+
fig_history.add_trace(
|
7761
|
+
go.Scatter(x=list(range(1, len(history.history['val_loss'])+1)),
|
7762
|
+
y=history.history['val_loss'],
|
7763
|
+
name='Validation Loss', line=dict(color='red')),
|
7764
|
+
row=1, col=1
|
7765
|
+
)
|
7766
|
+
|
7767
|
+
# Metrics plot
|
7768
|
+
if dl_problem_type == "Regression":
|
7769
|
+
fig_history.add_trace(
|
7770
|
+
go.Scatter(x=list(range(1, len(history.history['mae'])+1)),
|
7771
|
+
y=history.history['mae'],
|
7772
|
+
name='Training MAE', line=dict(color='green')),
|
7773
|
+
row=1, col=2
|
7774
|
+
)
|
7775
|
+
if 'val_mae' in history.history:
|
7776
|
+
fig_history.add_trace(
|
7777
|
+
go.Scatter(x=list(range(1, len(history.history['val_mae'])+1)),
|
7778
|
+
y=history.history['val_mae'],
|
7779
|
+
name='Validation MAE', line=dict(color='orange')),
|
7780
|
+
row=1, col=2
|
7781
|
+
)
|
7782
|
+
else:
|
7783
|
+
fig_history.add_trace(
|
7784
|
+
go.Scatter(x=list(range(1, len(history.history['accuracy'])+1)),
|
7785
|
+
y=history.history['accuracy'],
|
7786
|
+
name='Training Accuracy', line=dict(color='green')),
|
7787
|
+
row=1, col=2
|
7788
|
+
)
|
7789
|
+
fig_history.add_trace(
|
7790
|
+
go.Scatter(x=list(range(1, len(history.history['val_accuracy'])+1)),
|
7791
|
+
y=history.history['val_accuracy'],
|
7792
|
+
name='Validation Accuracy', line=dict(color='orange')),
|
7793
|
+
row=1, col=2
|
7794
|
+
)
|
7795
|
+
|
7796
|
+
fig_history.update_layout(height=400, title_text="Training History")
|
7797
|
+
st.plotly_chart(fig_history, use_container_width=True)
|
7798
|
+
|
7799
|
+
# 3. PREDICTION VISUALIZATION
|
7800
|
+
if dl_problem_type == "Regression":
|
7801
|
+
# Regression plots
|
7802
|
+
col1, col2 = st.columns(2)
|
7803
|
+
|
7804
|
+
with col1:
|
7805
|
+
# Actual vs Predicted
|
7806
|
+
fig_actual_pred = px.scatter(
|
7807
|
+
x=y_test, y=y_pred_flat,
|
7808
|
+
title="Actual vs Predicted",
|
7809
|
+
labels={'x': 'Actual', 'y': 'Predicted'},
|
7810
|
+
trendline="lowess"
|
7811
|
+
)
|
7812
|
+
fig_actual_pred.add_trace(
|
7813
|
+
go.Scatter(x=[y_test.min(), y_test.max()],
|
7814
|
+
y=[y_test.min(), y_test.max()],
|
7815
|
+
mode='lines', name='Perfect Prediction',
|
7816
|
+
line=dict(color='red', dash='dash'))
|
7817
|
+
)
|
7818
|
+
st.plotly_chart(fig_actual_pred, use_container_width=True)
|
7819
|
+
|
7820
|
+
with col2:
|
7821
|
+
# Residual plot
|
7822
|
+
residuals = y_test - y_pred_flat
|
7823
|
+
fig_residual = px.scatter(
|
7824
|
+
x=y_pred_flat, y=residuals,
|
7825
|
+
title="Residual Plot",
|
7826
|
+
labels={'x': 'Predicted', 'y': 'Residuals'},
|
7827
|
+
trendline="lowess"
|
7828
|
+
)
|
7829
|
+
fig_residual.add_hline(y=0, line_dash="dash", line_color="red")
|
7830
|
+
st.plotly_chart(fig_residual, use_container_width=True)
|
7831
|
+
|
7832
|
+
else:
|
7833
|
+
# Classification plots
|
7834
|
+
col1, col2 = st.columns(2)
|
7835
|
+
|
7836
|
+
with col1:
|
7837
|
+
# Confusion Matrix
|
7838
|
+
cm = confusion_matrix(y_test, y_pred_class)
|
7839
|
+
fig_cm = px.imshow(
|
7840
|
+
cm,
|
7841
|
+
text_auto=True,
|
7842
|
+
title="Confusion Matrix",
|
7843
|
+
color_continuous_scale='Blues',
|
7844
|
+
aspect="auto"
|
7845
|
+
)
|
7846
|
+
st.plotly_chart(fig_cm, use_container_width=True)
|
7847
|
+
|
7848
|
+
with col2:
|
7849
|
+
# Classification report heatmap
|
7850
|
+
report = classification_report(y_test, y_pred_class, output_dict=True)
|
7851
|
+
report_df = pd.DataFrame(report).transpose().iloc[:-1, :3]
|
7852
|
+
fig_report = px.imshow(
|
7853
|
+
report_df.values,
|
7854
|
+
x=report_df.columns,
|
7855
|
+
y=report_df.index,
|
7856
|
+
text_auto=".2f",
|
7857
|
+
title="Classification Report",
|
7858
|
+
color_continuous_scale='Viridis',
|
7859
|
+
aspect="auto"
|
7860
|
+
)
|
7861
|
+
st.plotly_chart(fig_report, use_container_width=True)
|
7862
|
+
|
7863
|
+
# 4. FEATURE IMPORTANCE ANALYSIS
|
7864
|
+
st.subheader("🔍 Analisis Feature Importance")
|
7865
|
+
|
7866
|
+
try:
|
7867
|
+
# Simplified feature importance using permutation
|
7868
|
+
@st.cache_data
|
7869
|
+
def calculate_feature_importance(model, X_test_scaled, y_test, feature_names, problem_type):
|
7870
|
+
baseline_score = model.evaluate(X_test_scaled, y_test, verbose=0)
|
7871
|
+
baseline_loss = baseline_score[0] if problem_type == "Regression" else 1 - baseline_score[1]
|
7872
|
+
|
7873
|
+
importance_scores = []
|
7874
|
+
for i in range(len(feature_names)):
|
7875
|
+
X_permuted = X_test_scaled.copy()
|
7876
|
+
np.random.shuffle(X_permuted[:, i])
|
7877
|
+
permuted_score = model.evaluate(X_permuted, y_test, verbose=0)
|
7878
|
+
permuted_loss = permuted_score[0] if problem_type == "Regression" else 1 - permuted_score[1]
|
7879
|
+
importance = max(0, baseline_loss - permuted_loss)
|
7880
|
+
importance_scores.append(importance)
|
7881
|
+
|
7882
|
+
return pd.DataFrame({
|
7883
|
+
'Feature': feature_names,
|
7884
|
+
'Importance': importance_scores
|
7885
|
+
}).sort_values('Importance', ascending=False)
|
7886
|
+
|
7887
|
+
feature_importance_df = calculate_feature_importance(
|
7888
|
+
model, X_test_scaled, y_test, dl_features, dl_problem_type
|
7889
|
+
)
|
7890
|
+
|
7891
|
+
col1, col2 = st.columns(2)
|
7892
|
+
|
7893
|
+
with col1:
|
7894
|
+
fig_importance = px.bar(
|
7895
|
+
feature_importance_df,
|
7896
|
+
x='Importance',
|
7897
|
+
y='Feature',
|
7898
|
+
orientation='h',
|
7899
|
+
title="Feature Importance",
|
7900
|
+
color='Importance',
|
7901
|
+
color_continuous_scale='Viridis'
|
7902
|
+
)
|
7903
|
+
st.plotly_chart(fig_importance, use_container_width=True)
|
7904
|
+
|
7905
|
+
with col2:
|
7906
|
+
fig_importance_pie = px.pie(
|
7907
|
+
feature_importance_df,
|
7908
|
+
values='Importance',
|
7909
|
+
names='Feature',
|
7910
|
+
title="Feature Importance Distribution"
|
7911
|
+
)
|
7912
|
+
st.plotly_chart(fig_importance_pie, use_container_width=True)
|
7913
|
+
|
7914
|
+
except Exception as e:
|
7915
|
+
st.warning(f"⚠️ Feature importance calculation skipped: {str(e)}")
|
7916
|
+
|
7917
|
+
# 5. MODEL PERFORMANCE GAUGE
|
7918
|
+
st.subheader("📈 Performance Summary")
|
7919
|
+
|
7920
|
+
if dl_problem_type == "Regression":
|
7921
|
+
performance_score = min(100, max(0, (r2 + (1 - mae/y_test.std())) * 50))
|
7922
|
+
performance_level = "Sangat Baik" if performance_score > 85 else \
|
7923
|
+
"Baik" if performance_score > 70 else \
|
7924
|
+
"Cukup" if performance_score > 60 else "Perlu Improvement"
|
7925
|
+
else:
|
7926
|
+
performance_score = accuracy * 100
|
7927
|
+
performance_level = "Sangat Baik" if performance_score > 90 else \
|
7928
|
+
"Baik" if performance_score > 80 else \
|
7929
|
+
"Cukup" if performance_score > 70 else "Perlu Improvement"
|
7930
|
+
|
7931
|
+
# Gauge chart
|
7932
|
+
fig_gauge = go.Figure(go.Indicator(
|
7933
|
+
mode = "gauge+number+delta",
|
7934
|
+
value = performance_score,
|
7935
|
+
domain = {'x': [0, 1], 'y': [0, 1]},
|
7936
|
+
title = {'text': f"Model Performance: {performance_level}"},
|
7937
|
+
gauge = {
|
7938
|
+
'axis': {'range': [None, 100]},
|
7939
|
+
'bar': {'color': "darkblue"},
|
7940
|
+
'steps': [
|
7941
|
+
{'range': [0, 60], 'color': "red"},
|
7942
|
+
{'range': [60, 75], 'color': "yellow"},
|
7943
|
+
{'range': [75, 90], 'color': "lightgreen"},
|
7944
|
+
{'range': [90, 100], 'color': "green"}],
|
7945
|
+
'threshold': {
|
7946
|
+
'line': {'color': "red", 'width': 4},
|
7947
|
+
'thickness': 0.75,
|
7948
|
+
'value': 90}}
|
7949
|
+
))
|
7950
|
+
st.plotly_chart(fig_gauge, use_container_width=True)
|
7951
|
+
|
7952
|
+
# 6. DOWNLOAD DAN EXPORT MODEL
|
7953
|
+
st.subheader("💾 Export Model")
|
7954
|
+
|
7955
|
+
col1, col2 = st.columns(2)
|
7956
|
+
|
7957
|
+
with col1:
|
7958
|
+
# Save model
|
7959
|
+
if st.button("💾 Save TensorFlow Model"):
|
7960
|
+
model.save('saved_model.h5')
|
7961
|
+
with open('saved_model.h5', 'rb') as f:
|
7962
|
+
st.download_button(
|
7963
|
+
label="📥 Download Model",
|
7964
|
+
data=f,
|
7965
|
+
file_name="deep_learning_model.h5",
|
7966
|
+
mime="application/octet-stream"
|
7967
|
+
)
|
7968
|
+
|
7969
|
+
with col2:
|
7970
|
+
# Export predictions
|
7971
|
+
predictions_df = pd.DataFrame({
|
7972
|
+
'Actual': y_test,
|
7973
|
+
'Predicted': y_pred.flatten() if dl_problem_type == "Regression" else y_pred_class
|
7974
|
+
})
|
7975
|
+
csv = predictions_df.to_csv(index=False)
|
7976
|
+
st.download_button(
|
7977
|
+
label="📥 Download Predictions",
|
7978
|
+
data=csv,
|
7979
|
+
file_name="model_predictions.csv",
|
7980
|
+
mime="text/csv"
|
7981
|
+
)
|
7982
|
+
|
7983
|
+
# 7. RECOMMENDATIONS AND INSIGHTS
|
7984
|
+
st.subheader("💡 Insights & Rekomendasi")
|
7985
|
+
|
7986
|
+
# Training insights
|
7987
|
+
final_epoch = len(history.history['loss'])
|
7988
|
+
final_loss = history.history['loss'][-1]
|
7989
|
+
final_val_loss = history.history['val_loss'][-1]
|
7990
|
+
|
7991
|
+
col1, col2, col3 = st.columns(3)
|
7992
|
+
with col1:
|
7993
|
+
st.metric("Final Training Loss", f"{final_loss:.4f}")
|
7994
|
+
with col2:
|
7995
|
+
st.metric("Final Validation Loss", f"{final_val_loss:.4f}")
|
7996
|
+
with col3:
|
7997
|
+
st.metric("Training Time", f"{training_time:.1f}s")
|
7998
|
+
|
7999
|
+
# Recommendations based on performance
|
8000
|
+
st.info("""
|
8001
|
+
**🎯 Rekomendasi Improvement:**
|
8002
|
+
- **Data Quality**: Periksa missing values dan outliers
|
8003
|
+
- **Feature Engineering**: Tambahkan feature yang lebih relevan
|
8004
|
+
- **Hyperparameter Tuning**: Eksperimen dengan architecture berbeda
|
8005
|
+
- **Regularization**: Adjust dropout dan L2 regularization
|
8006
|
+
- **Learning Rate**: Coba learning rate scheduling
|
8007
|
+
""")
|
8008
|
+
|
8009
|
+
# Performance tips
|
8010
|
+
if performance_score < 70:
|
8011
|
+
st.warning("""
|
8012
|
+
**⚠️ Area Improvement:**
|
8013
|
+
- Pertimbangkan feature selection yang lebih baik
|
8014
|
+
- Coba model architecture yang lebih dalam/lebar
|
8015
|
+
- Gunakan lebih banyak data training
|
8016
|
+
- Eksperimen dengan different optimizers
|
8017
|
+
""")
|
8018
|
+
else:
|
8019
|
+
st.success("""
|
8020
|
+
**✅ Performa Baik!**
|
8021
|
+
- Model sudah menunjukkan hasil yang promising
|
8022
|
+
- Pertimbangkan deployment untuk penggunaan real-time
|
8023
|
+
- Monitor model performance secara berkala
|
8024
|
+
""")
|
8025
|
+
|
8026
|
+
except Exception as e:
|
8027
|
+
st.error(f"❌ Error dalam DL analysis: {str(e)}")
|
8028
|
+
st.info("""
|
8029
|
+
💡 Tips Troubleshooting:
|
8030
|
+
- Pastikan dataset cukup besar (>100 samples)
|
8031
|
+
- Gunakan mode kecepatan lebih tinggi untuk dataset besar
|
8032
|
+
- Kurangi jumlah features jika memory error
|
8033
|
+
- Pastikan target variable sesuai dengan problem type
|
8034
|
+
- Coba learning rate yang lebih kecil
|
8035
|
+
""")
|
8036
|
+
|
8037
|
+
# Tambahkan fungsi utility jika diperlukan
|
8038
|
+
def validate_tensorflow_installation():
|
8039
|
+
"""Validate TensorFlow installation"""
|
8040
|
+
try:
|
8041
|
+
import tensorflow as tf
|
8042
|
+
version = tf.__version__
|
8043
|
+
gpu_available = tf.config.list_physical_devices('GPU')
|
8044
|
+
return True, version, len(gpu_available) > 0
|
8045
|
+
except ImportError:
|
8046
|
+
return False, None, False
|
8047
|
+
|
8048
|
+
def model_comparison_analysis(df, numeric_cols, non_numeric_cols):
|
8049
|
+
"""Analisis komparatif data yang komprehensif tanpa model machine learning"""
|
8050
|
+
|
8051
|
+
st.header("📊 Advanced Data Analysis Dashboard")
|
8052
|
+
|
8053
|
+
# Informasi dataset
|
8054
|
+
st.subheader("📋 Dataset Overview")
|
8055
|
+
col1, col2, col3, col4 = st.columns(4)
|
8056
|
+
with col1:
|
8057
|
+
st.metric("Total Samples", f"{len(df):,}")
|
8058
|
+
with col2:
|
8059
|
+
st.metric("Features", f"{len(numeric_cols) + len(non_numeric_cols):,}")
|
8060
|
+
with col3:
|
8061
|
+
st.metric("Numeric", f"{len(numeric_cols):,}")
|
8062
|
+
with col4:
|
8063
|
+
st.metric("Categorical", f"{len(non_numeric_cols):,}")
|
8064
|
+
|
8065
|
+
# Configuration section
|
8066
|
+
st.subheader("⚙️ Analysis Configuration")
|
8067
|
+
|
8068
|
+
col1, col2 = st.columns(2)
|
8069
|
+
|
8070
|
+
with col1:
|
8071
|
+
# Target selection untuk analisis
|
8072
|
+
target_variable = st.selectbox(
|
8073
|
+
"dwibaktindev AI",
|
8074
|
+
numeric_cols + non_numeric_cols,
|
8075
|
+
key="analysis_target"
|
8076
|
+
)
|
8077
|
+
|
8078
|
+
# Analysis type
|
8079
|
+
analysis_type = st.selectbox(
|
8080
|
+
"Alisa AI",
|
8081
|
+
["Descriptive Statistics", "Correlation Analysis", "Distribution Analysis",
|
8082
|
+
"Relationship Analysis", "Comparative Analysis"],
|
8083
|
+
key="analysis_type"
|
8084
|
+
)
|
8085
|
+
|
8086
|
+
with col2:
|
8087
|
+
# Feature selection
|
8088
|
+
available_features = [f for f in numeric_cols + non_numeric_cols if f != target_variable]
|
8089
|
+
selected_features = st.multiselect(
|
8090
|
+
"Sasha AI",
|
8091
|
+
available_features,
|
8092
|
+
default=available_features[:min(10, len(available_features))],
|
8093
|
+
key="analysis_features"
|
8094
|
+
)
|
8095
|
+
|
8096
|
+
# Sample size untuk visualisasi
|
8097
|
+
sample_size = st.slider("Sample Size for Visualization", 100, len(df),
|
8098
|
+
min(1000, len(df)), 100, key="sample_size")
|
8099
|
+
|
8100
|
+
if st.button("🚀 Start Model AI", type="primary", key="start_analysis"):
|
8101
|
+
if not target_variable or not selected_features:
|
8102
|
+
st.error("❌ Please select target variable and features")
|
8103
|
+
return
|
8104
|
+
|
8105
|
+
try:
|
8106
|
+
# Lakukan analisis berdasarkan jenis
|
8107
|
+
with st.spinner("🔄 Performing analysis..."):
|
8108
|
+
if analysis_type == "Descriptive Statistics":
|
8109
|
+
perform_descriptive_analysis(df, target_variable, selected_features)
|
8110
|
+
|
8111
|
+
elif analysis_type == "Correlation Analysis":
|
8112
|
+
perform_correlation_analysis(df, target_variable, selected_features)
|
8113
|
+
|
8114
|
+
elif analysis_type == "Distribution Analysis":
|
8115
|
+
perform_distribution_analysis(df, target_variable, selected_features, sample_size)
|
8116
|
+
|
8117
|
+
elif analysis_type == "Relationship Analysis":
|
8118
|
+
perform_relationship_analysis(df, target_variable, selected_features, sample_size)
|
8119
|
+
|
8120
|
+
elif analysis_type == "Comparative Analysis":
|
8121
|
+
perform_comparative_analysis(df, target_variable, selected_features)
|
8122
|
+
|
8123
|
+
st.success("✅ Analysis completed!")
|
8124
|
+
|
8125
|
+
except Exception as e:
|
8126
|
+
st.error(f"❌ Error in data analysis: {str(e)}")
|
8127
|
+
|
8128
|
+
def perform_descriptive_analysis(df, target, features):
|
8129
|
+
"""Analisis statistik deskriptif"""
|
8130
|
+
import pandas as pd
|
8131
|
+
import numpy as np
|
8132
|
+
|
8133
|
+
st.subheader("📊 Descriptive Statistics")
|
8134
|
+
|
8135
|
+
# Statistik untuk target variable
|
8136
|
+
st.write(f"### Target Variable: `{target}`")
|
8137
|
+
|
8138
|
+
if pd.api.types.is_numeric_dtype(df[target]):
|
8139
|
+
col1, col2, col3, col4 = st.columns(4)
|
8140
|
+
|
8141
|
+
with col1:
|
8142
|
+
st.metric("Mean", f"{df[target].mean():.2f}")
|
8143
|
+
with col2:
|
8144
|
+
st.metric("Median", f"{df[target].median():.2f}")
|
8145
|
+
with col3:
|
8146
|
+
st.metric("Std Dev", f"{df[target].std():.2f}")
|
8147
|
+
with col4:
|
8148
|
+
st.metric("Missing", f"{df[target].isnull().sum()}")
|
8149
|
+
|
8150
|
+
# Detailed statistics
|
8151
|
+
st.dataframe(df[target].describe(), use_container_width=True)
|
8152
|
+
|
8153
|
+
else:
|
8154
|
+
col1, col2, col3 = st.columns(3)
|
8155
|
+
|
8156
|
+
with col1:
|
8157
|
+
st.metric("Unique Values", df[target].nunique())
|
8158
|
+
with col2:
|
8159
|
+
st.metric("Most Frequent", df[target].mode().iloc[0] if not df[target].mode().empty else "N/A")
|
8160
|
+
with col3:
|
8161
|
+
st.metric("Missing", f"{df[target].isnull().sum()}")
|
8162
|
+
|
8163
|
+
# Value counts
|
8164
|
+
value_counts = df[target].value_counts()
|
8165
|
+
st.write("**Value Distribution:**")
|
8166
|
+
st.dataframe(value_counts, use_container_width=True)
|
8167
|
+
|
8168
|
+
# Statistik untuk features numerik
|
8169
|
+
numeric_features = [f for f in features if pd.api.types.is_numeric_dtype(df[f])]
|
8170
|
+
if numeric_features:
|
8171
|
+
st.write("### Numeric Features Summary")
|
8172
|
+
st.dataframe(df[numeric_features].describe(), use_container_width=True)
|
8173
|
+
|
8174
|
+
# Statistik untuk features kategorik
|
8175
|
+
categorical_features = [f for f in features if not pd.api.types.is_numeric_dtype(df[f])]
|
8176
|
+
if categorical_features:
|
8177
|
+
st.write("### Categorical Features Summary")
|
8178
|
+
for feature in categorical_features:
|
8179
|
+
with st.expander(f"`{feature}`"):
|
8180
|
+
value_counts = df[feature].value_counts()
|
8181
|
+
st.dataframe(value_counts, use_container_width=True)
|
8182
|
+
|
8183
|
+
def perform_correlation_analysis(df, target, features):
|
8184
|
+
"""Analisis korelasi"""
|
8185
|
+
import pandas as pd
|
8186
|
+
import numpy as np
|
8187
|
+
import plotly.express as px
|
8188
|
+
import plotly.graph_objects as go
|
8189
|
+
|
8190
|
+
st.subheader("🔗 Correlation Analysis")
|
8191
|
+
|
8192
|
+
# Pilih hanya features numerik untuk korelasi
|
8193
|
+
numeric_features = [f for f in features if pd.api.types.is_numeric_dtype(df[f])]
|
8194
|
+
|
8195
|
+
if pd.api.types.is_numeric_dtype(df[target]):
|
8196
|
+
numeric_features.append(target)
|
8197
|
+
|
8198
|
+
if len(numeric_features) < 2:
|
8199
|
+
st.warning("⚠️ Need at least 2 numeric features for correlation analysis")
|
8200
|
+
return
|
8201
|
+
|
8202
|
+
correlation_df = df[numeric_features].corr()
|
8203
|
+
|
8204
|
+
# Heatmap korelasi
|
8205
|
+
st.write("### Correlation Heatmap")
|
8206
|
+
fig = px.imshow(correlation_df,
|
8207
|
+
title="Feature Correlation Heatmap",
|
8208
|
+
color_continuous_scale="RdBu_r",
|
8209
|
+
aspect="auto")
|
8210
|
+
st.plotly_chart(fig, use_container_width=True)
|
8211
|
+
|
8212
|
+
# Korelasi dengan target
|
8213
|
+
if pd.api.types.is_numeric_dtype(df[target]):
|
8214
|
+
st.write("### Correlation with Target")
|
8215
|
+
target_corr = correlation_df[target].drop(target).sort_values(ascending=False)
|
8216
|
+
|
8217
|
+
col1, col2 = st.columns(2)
|
8218
|
+
|
8219
|
+
with col1:
|
8220
|
+
fig = px.bar(x=target_corr.values, y=target_corr.index,
|
8221
|
+
orientation='h',
|
8222
|
+
title=f"Correlation with {target}",
|
8223
|
+
labels={'x': 'Correlation', 'y': 'Feature'})
|
8224
|
+
st.plotly_chart(fig, use_container_width=True)
|
8225
|
+
|
8226
|
+
with col2:
|
8227
|
+
# Tabel korelasi
|
8228
|
+
st.dataframe(target_corr.round(4), use_container_width=True)
|
8229
|
+
|
8230
|
+
def perform_distribution_analysis(df, target, features, sample_size):
|
8231
|
+
"""Analisis distribusi"""
|
8232
|
+
import pandas as pd
|
8233
|
+
import plotly.express as px
|
8234
|
+
import plotly.graph_objects as go
|
8235
|
+
from plotly.subplots import make_subplots
|
8236
|
+
|
8237
|
+
st.subheader("📈 Distribution Analysis")
|
8238
|
+
|
8239
|
+
# Sample data untuk performa visualisasi
|
8240
|
+
sample_df = df.sample(min(sample_size, len(df)), random_state=42)
|
8241
|
+
|
8242
|
+
# Distribusi target variable
|
8243
|
+
st.write(f"### Target Variable Distribution: `{target}`")
|
8244
|
+
|
8245
|
+
if pd.api.types.is_numeric_dtype(df[target]):
|
8246
|
+
col1, col2 = st.columns(2)
|
8247
|
+
|
8248
|
+
with col1:
|
8249
|
+
# Histogram
|
8250
|
+
fig = px.histogram(df, x=target,
|
8251
|
+
title=f"Distribution of {target}",
|
8252
|
+
nbins=50)
|
8253
|
+
st.plotly_chart(fig, use_container_width=True)
|
8254
|
+
|
8255
|
+
with col2:
|
8256
|
+
# Box plot
|
8257
|
+
fig = px.box(df, y=target,
|
8258
|
+
title=f"Box Plot of {target}")
|
8259
|
+
st.plotly_chart(fig, use_container_width=True)
|
8260
|
+
else:
|
8261
|
+
# Untuk variabel kategorik
|
8262
|
+
value_counts = df[target].value_counts()
|
8263
|
+
fig = px.pie(values=value_counts.values,
|
8264
|
+
names=value_counts.index,
|
8265
|
+
title=f"Distribution of {target}")
|
8266
|
+
st.plotly_chart(fig, use_container_width=True)
|
8267
|
+
|
8268
|
+
# Distribusi features numerik
|
8269
|
+
numeric_features = [f for f in features if pd.api.types.is_numeric_dtype(df[f])]
|
8270
|
+
if numeric_features:
|
8271
|
+
st.write("### Numeric Features Distribution")
|
8272
|
+
|
8273
|
+
# Pilih features untuk ditampilkan
|
8274
|
+
selected_numeric = st.multiselect(
|
8275
|
+
"Select numeric features to visualize:",
|
8276
|
+
numeric_features,
|
8277
|
+
default=numeric_features[:min(3, len(numeric_features))]
|
8278
|
+
)
|
8279
|
+
|
8280
|
+
if selected_numeric:
|
8281
|
+
# Histogram multiple
|
8282
|
+
fig = make_subplots(rows=len(selected_numeric), cols=1,
|
8283
|
+
subplot_titles=selected_numeric)
|
8284
|
+
|
8285
|
+
for i, feature in enumerate(selected_numeric, 1):
|
8286
|
+
fig.add_trace(
|
8287
|
+
go.Histogram(x=df[feature], name=feature, nbinsx=30),
|
8288
|
+
row=i, col=1
|
8289
|
+
)
|
8290
|
+
|
8291
|
+
fig.update_layout(height=300*len(selected_numeric),
|
8292
|
+
title_text="Distribution of Numeric Features")
|
8293
|
+
st.plotly_chart(fig, use_container_width=True)
|
8294
|
+
|
8295
|
+
# Distribusi features kategorik
|
8296
|
+
categorical_features = [f for f in features if not pd.api.types.is_numeric_dtype(df[f])]
|
8297
|
+
if categorical_features:
|
8298
|
+
st.write("### Categorical Features Distribution")
|
8299
|
+
|
8300
|
+
selected_categorical = st.multiselect(
|
8301
|
+
"Select categorical features to visualize:",
|
8302
|
+
categorical_features,
|
8303
|
+
default=categorical_features[:min(2, len(categorical_features))]
|
8304
|
+
)
|
8305
|
+
|
8306
|
+
if selected_categorical:
|
8307
|
+
for feature in selected_categorical:
|
8308
|
+
value_counts = df[feature].value_counts().head(10) # Top 10 saja
|
8309
|
+
fig = px.bar(x=value_counts.values, y=value_counts.index,
|
8310
|
+
orientation='h',
|
8311
|
+
title=f"Top 10 Values in {feature}")
|
8312
|
+
st.plotly_chart(fig, use_container_width=True)
|
8313
|
+
|
8314
|
+
def perform_relationship_analysis(df, target, features, sample_size):
|
8315
|
+
"""Analisis hubungan antara variabel"""
|
8316
|
+
import pandas as pd
|
8317
|
+
import plotly.express as px
|
8318
|
+
import plotly.graph_objects as go
|
8319
|
+
|
8320
|
+
st.subheader("🔄 Relationship Analysis")
|
8321
|
+
|
8322
|
+
sample_df = df.sample(min(sample_size, len(df)), random_state=42)
|
8323
|
+
|
8324
|
+
# Pilih features numerik untuk scatter plot
|
8325
|
+
numeric_features = [f for f in features if pd.api.types.is_numeric_dtype(df[f])]
|
8326
|
+
|
8327
|
+
if pd.api.types.is_numeric_dtype(df[target]) and len(numeric_features) >= 1:
|
8328
|
+
st.write("### Scatter Plots with Target")
|
8329
|
+
|
8330
|
+
col1, col2 = st.columns(2)
|
8331
|
+
|
8332
|
+
with col1:
|
8333
|
+
x_feature = st.selectbox("X-axis feature:", numeric_features, key="scatter_x")
|
8334
|
+
|
8335
|
+
with col2:
|
8336
|
+
color_feature = st.selectbox("Color by (optional):",
|
8337
|
+
[None] + [f for f in features if f != x_feature],
|
8338
|
+
key="scatter_color")
|
8339
|
+
|
8340
|
+
if x_feature:
|
8341
|
+
fig = px.scatter(sample_df, x=x_feature, y=target,
|
8342
|
+
color=color_feature if color_feature else None,
|
8343
|
+
title=f"{target} vs {x_feature}",
|
8344
|
+
opacity=0.6)
|
8345
|
+
st.plotly_chart(fig, use_container_width=True)
|
8346
|
+
|
8347
|
+
# Pair plot untuk multiple numeric features
|
8348
|
+
if len(numeric_features) >= 2:
|
8349
|
+
st.write("### Pairwise Relationships")
|
8350
|
+
|
8351
|
+
selected_for_pairplot = st.multiselect(
|
8352
|
+
"Select features for pair plot:",
|
8353
|
+
numeric_features + ([target] if pd.api.types.is_numeric_dtype(df[target]) else []),
|
8354
|
+
default=(numeric_features + [target])[:min(4, len(numeric_features) + 1)]
|
8355
|
+
)
|
8356
|
+
|
8357
|
+
if len(selected_for_pairplot) >= 2:
|
8358
|
+
fig = px.scatter_matrix(sample_df[selected_for_pairplot],
|
8359
|
+
dimensions=selected_for_pairplot,
|
8360
|
+
height=800)
|
8361
|
+
st.plotly_chart(fig, use_container_width=True)
|
8362
|
+
|
8363
|
+
# Analisis hubungan kategorik-numerik
|
8364
|
+
categorical_features = [f for f in features if not pd.api.types.is_numeric_dtype(df[f])]
|
8365
|
+
if categorical_features and pd.api.types.is_numeric_dtype(df[target]):
|
8366
|
+
st.write("### Categorical vs Numerical Analysis")
|
8367
|
+
|
8368
|
+
cat_feature = st.selectbox("Select categorical feature:", categorical_features)
|
8369
|
+
num_feature = st.selectbox("Select numerical feature:",
|
8370
|
+
[target] + numeric_features)
|
8371
|
+
|
8372
|
+
if cat_feature and num_feature:
|
8373
|
+
col1, col2 = st.columns(2)
|
8374
|
+
|
8375
|
+
with col1:
|
8376
|
+
# Box plot
|
8377
|
+
fig = px.box(df, x=cat_feature, y=num_feature,
|
8378
|
+
title=f"{num_feature} by {cat_feature}")
|
8379
|
+
st.plotly_chart(fig, use_container_width=True)
|
8380
|
+
|
8381
|
+
with col2:
|
8382
|
+
# Violin plot
|
8383
|
+
fig = px.violin(df, x=cat_feature, y=num_feature,
|
8384
|
+
title=f"Distribution of {num_feature} by {cat_feature}")
|
8385
|
+
st.plotly_chart(fig, use_container_width=True)
|
8386
|
+
|
8387
|
+
def perform_comparative_analysis(df, target, features):
|
8388
|
+
"""Analisis komparatif"""
|
8389
|
+
import pandas as pd
|
8390
|
+
import plotly.express as px
|
8391
|
+
import plotly.graph_objects as go
|
8392
|
+
|
8393
|
+
st.subheader("⚖️ Comparative Analysis")
|
8394
|
+
|
8395
|
+
# Group by analysis
|
8396
|
+
st.write("### Group-wise Analysis")
|
8397
|
+
|
8398
|
+
group_feature = st.selectbox(
|
8399
|
+
"Group by feature:",
|
8400
|
+
[None] + [f for f in features if not pd.api.types.is_numeric_dtype(df[f])]
|
8401
|
+
)
|
8402
|
+
|
8403
|
+
if group_feature:
|
8404
|
+
if pd.api.types.is_numeric_dtype(df[target]):
|
8405
|
+
# Untuk target numerik
|
8406
|
+
summary = df.groupby(group_feature)[target].agg(['mean', 'median', 'std', 'count']).round(2)
|
8407
|
+
st.dataframe(summary, use_container_width=True)
|
8408
|
+
|
8409
|
+
# Visualisasi
|
8410
|
+
col1, col2 = st.columns(2)
|
8411
|
+
|
8412
|
+
with col1:
|
8413
|
+
fig = px.bar(summary.reset_index(), x=group_feature, y='mean',
|
8414
|
+
title=f"Average {target} by {group_feature}")
|
8415
|
+
st.plotly_chart(fig, use_container_width=True)
|
8416
|
+
|
8417
|
+
with col2:
|
8418
|
+
fig = px.box(df, x=group_feature, y=target,
|
8419
|
+
title=f"Distribution of {target} by {group_feature}")
|
8420
|
+
st.plotly_chart(fig, use_container_width=True)
|
8421
|
+
|
8422
|
+
else:
|
8423
|
+
# Untuk target kategorik
|
8424
|
+
cross_tab = pd.crosstab(df[group_feature], df[target], normalize='index') * 100
|
8425
|
+
st.write("**Percentage Distribution:**")
|
8426
|
+
st.dataframe(cross_tab.round(2), use_container_width=True)
|
8427
|
+
|
8428
|
+
# Stacked bar chart
|
8429
|
+
fig = px.bar(cross_tab.reset_index(),
|
8430
|
+
x=group_feature,
|
8431
|
+
y=cross_tab.columns.tolist(),
|
8432
|
+
title=f"Distribution of {target} by {group_feature}",
|
8433
|
+
barmode='stack')
|
8434
|
+
st.plotly_chart(fig, use_container_width=True)
|
8435
|
+
|
8436
|
+
# Time series analysis (jika ada kolom datetime)
|
8437
|
+
datetime_columns = df.select_dtypes(include=['datetime64']).columns.tolist()
|
8438
|
+
if datetime_columns and pd.api.types.is_numeric_dtype(df[target]):
|
8439
|
+
st.write("### Time Series Analysis")
|
8440
|
+
|
8441
|
+
date_col = st.selectbox("Select date column:", datetime_columns)
|
8442
|
+
|
8443
|
+
if date_col:
|
8444
|
+
# Aggregasi berdasarkan waktu
|
8445
|
+
df_sorted = df.sort_values(date_col)
|
8446
|
+
|
8447
|
+
# Pilih frekuensi aggregasi
|
8448
|
+
freq = st.selectbox("Aggregation frequency:",
|
8449
|
+
['D', 'W', 'M', 'Q'],
|
8450
|
+
format_func=lambda x: {'D': 'Daily', 'W': 'Weekly',
|
8451
|
+
'M': 'Monthly', 'Q': 'Quarterly'}[x])
|
8452
|
+
|
8453
|
+
time_series = df_sorted.set_index(date_col)[target].resample(freq).mean()
|
8454
|
+
|
8455
|
+
fig = px.line(time_series.reset_index(),
|
8456
|
+
x=date_col, y=target,
|
8457
|
+
title=f"{target} Over Time")
|
8458
|
+
st.plotly_chart(fig, use_container_width=True)
|
8459
|
+
|
8460
|
+
def feature_analysis_dashboard(df, numeric_cols, non_numeric_cols):
|
8461
|
+
"""Dashboard analisis feature yang komprehensif dengan optimasi dataset besar"""
|
8462
|
+
|
8463
|
+
st.header("🔍 Advanced Feature Analysis")
|
8464
|
+
|
8465
|
+
# Informasi dataset
|
8466
|
+
st.subheader("📊 Dataset Overview")
|
8467
|
+
col1, col2, col3 = st.columns(3)
|
8468
|
+
with col1:
|
8469
|
+
st.metric("Total Features", f"{len(numeric_cols) + len(non_numeric_cols):,}")
|
8470
|
+
with col2:
|
8471
|
+
st.metric("Numeric Features", f"{len(numeric_cols):,}")
|
8472
|
+
with col3:
|
8473
|
+
st.metric("Categorical Features", f"{len(non_numeric_cols):,}")
|
8474
|
+
|
8475
|
+
# Optimasi memory
|
8476
|
+
if st.checkbox("Optimize Memory Usage", value=True, key="feature_optimize_mem"):
|
8477
|
+
df = optimize_memory_usage_feature(df)
|
8478
|
+
st.success("✅ Memory usage optimized!")
|
8479
|
+
|
8480
|
+
# Performance configuration
|
8481
|
+
st.subheader("⚡ Performance Configuration")
|
8482
|
+
|
8483
|
+
col1, col2 = st.columns(2)
|
8484
|
+
|
8485
|
+
with col1:
|
8486
|
+
# Sampling options untuk dataset besar
|
8487
|
+
use_sampling = st.checkbox("Use Sampling for Large Dataset", value=len(df) > 10000,
|
8488
|
+
key="feature_use_sampling")
|
8489
|
+
|
8490
|
+
if use_sampling:
|
8491
|
+
sample_size = st.slider(
|
8492
|
+
"Sample Size",
|
8493
|
+
min_value=1000,
|
8494
|
+
max_value=min(50000, len(df)),
|
8495
|
+
value=min(20000, len(df)),
|
8496
|
+
step=1000,
|
8497
|
+
key="feature_sample_size"
|
8498
|
+
)
|
8499
|
+
st.info(f"🎯 Using {sample_size} samples from {len(df):,} total records")
|
8500
|
+
|
8501
|
+
# Processing speed control
|
8502
|
+
processing_speed = st.select_slider(
|
8503
|
+
"Processing Speed",
|
8504
|
+
options=["Fast", "Balanced", "Comprehensive"],
|
8505
|
+
value="Balanced",
|
8506
|
+
key="feature_processing_speed"
|
8507
|
+
)
|
8508
|
+
|
8509
|
+
# Configure parameters based on speed selection
|
8510
|
+
speed_config = {
|
8511
|
+
"Fast": {"n_estimators": 50, "n_repeats": 3, "max_features": 20},
|
8512
|
+
"Balanced": {"n_estimators": 100, "n_repeats": 5, "max_features": 30},
|
8513
|
+
"Comprehensive": {"n_estimators": 200, "n_repeats": 10, "max_features": 50}
|
8514
|
+
}
|
8515
|
+
config = speed_config[processing_speed]
|
8516
|
+
|
8517
|
+
with col2:
|
8518
|
+
# Advanced options
|
8519
|
+
st.write("**Advanced Options:**")
|
8520
|
+
|
8521
|
+
max_features_display = st.slider(
|
8522
|
+
"Max Features to Display",
|
8523
|
+
5, 50, 15,
|
8524
|
+
key="max_features_display"
|
8525
|
+
)
|
8526
|
+
|
8527
|
+
remove_high_corr = st.checkbox(
|
8528
|
+
"Remove Highly Correlated Features",
|
8529
|
+
value=True,
|
8530
|
+
key="feature_remove_corr"
|
8531
|
+
)
|
8532
|
+
|
8533
|
+
correlation_threshold = st.slider(
|
8534
|
+
"Correlation Threshold",
|
8535
|
+
0.7, 0.99, 0.9, 0.01,
|
8536
|
+
key="feature_corr_threshold"
|
8537
|
+
)
|
8538
|
+
|
8539
|
+
random_state = st.number_input(
|
8540
|
+
"Random State",
|
8541
|
+
value=42,
|
8542
|
+
key="feature_random_state"
|
8543
|
+
)
|
8544
|
+
|
8545
|
+
# Feature importance analysis
|
8546
|
+
st.subheader("🎯 Feature Importance Analysis")
|
8547
|
+
|
8548
|
+
col1, col2 = st.columns(2)
|
8549
|
+
|
8550
|
+
with col1:
|
8551
|
+
# Multiple methods untuk feature importance
|
8552
|
+
importance_method = st.selectbox(
|
8553
|
+
"Pilih Feature Importance Method",
|
8554
|
+
["Random Forest", "Permutation Importance", "Mutual Information", "All Methods"],
|
8555
|
+
key="feature_importance_method"
|
8556
|
+
)
|
8557
|
+
|
8558
|
+
# Problem type selection
|
8559
|
+
problem_type = st.radio(
|
8560
|
+
"Problem Type",
|
8561
|
+
["Regression", "Classification", "Auto Detect"],
|
8562
|
+
key="feature_problem_type"
|
8563
|
+
)
|
8564
|
+
|
8565
|
+
with col2:
|
8566
|
+
target_feature = st.selectbox(
|
8567
|
+
"Pilih Target untuk Feature Importance",
|
8568
|
+
numeric_cols + non_numeric_cols,
|
8569
|
+
key="feature_importance_target"
|
8570
|
+
)
|
8571
|
+
|
8572
|
+
# Feature selection
|
8573
|
+
available_features = [f for f in numeric_cols + non_numeric_cols if f != target_feature]
|
8574
|
+
|
8575
|
+
if len(available_features) > config["max_features"]:
|
8576
|
+
st.warning(f"⚠️ Showing first {config['max_features']} features. Use comprehensive mode for more.")
|
8577
|
+
available_features = available_features[:config["max_features"]]
|
8578
|
+
|
8579
|
+
selected_features = st.multiselect(
|
8580
|
+
"Pilih Features untuk Analysis",
|
8581
|
+
available_features,
|
8582
|
+
default=available_features[:min(10, len(available_features))],
|
8583
|
+
key="feature_analysis_features"
|
8584
|
+
)
|
8585
|
+
|
8586
|
+
if not target_feature or not selected_features:
|
8587
|
+
st.warning("📝 Pilih target feature dan features untuk analysis")
|
8588
|
+
return
|
8589
|
+
|
8590
|
+
# Progress tracking
|
8591
|
+
progress_bar = st.progress(0)
|
8592
|
+
status_text = st.empty()
|
8593
|
+
|
8594
|
+
if st.button("🚀 Hitung Feature Importance", key="feature_importance_button"):
|
8595
|
+
try:
|
8596
|
+
# Apply sampling jika diperlukan
|
8597
|
+
if use_sampling and len(df) > sample_size:
|
8598
|
+
df_analysis = df.sample(n=sample_size, random_state=random_state)
|
8599
|
+
st.info(f"🔬 Analyzing {sample_size:,} sampled records")
|
8600
|
+
else:
|
8601
|
+
df_analysis = df
|
8602
|
+
|
8603
|
+
status_text.text("🔄 Preparing data...")
|
8604
|
+
progress_bar.progress(10)
|
8605
|
+
|
8606
|
+
# Prepare features and target
|
8607
|
+
X = df_analysis[selected_features].copy()
|
8608
|
+
y = df_analysis[target_feature]
|
8609
|
+
|
8610
|
+
# Auto-detect problem type
|
8611
|
+
if problem_type == "Auto Detect":
|
8612
|
+
if target_feature in numeric_cols:
|
8613
|
+
problem_type_detected = "Regression"
|
8614
|
+
else:
|
8615
|
+
problem_type_detected = "Classification"
|
8616
|
+
st.info(f"🔍 Auto-detected: {problem_type_detected}")
|
8617
|
+
else:
|
8618
|
+
problem_type_detected = problem_type
|
8619
|
+
|
8620
|
+
progress_bar.progress(20)
|
8621
|
+
|
8622
|
+
# Preprocessing dengan optimasi
|
8623
|
+
status_text.text("🔧 Preprocessing features...")
|
8624
|
+
X_processed, feature_names = preprocess_features_optimized(
|
8625
|
+
X, numeric_cols, non_numeric_cols, remove_high_corr, correlation_threshold
|
8626
|
+
)
|
8627
|
+
|
8628
|
+
progress_bar.progress(40)
|
8629
|
+
|
8630
|
+
# Encode target variable jika classification
|
8631
|
+
le_target = None
|
8632
|
+
if problem_type_detected == "Classification" and y.dtype == 'object':
|
8633
|
+
le_target = LabelEncoder()
|
8634
|
+
y = le_target.fit_transform(y.astype(str))
|
8635
|
+
st.info(f"🎯 Target encoded: {len(le_target.classes_)} classes")
|
8636
|
+
|
8637
|
+
progress_bar.progress(50)
|
8638
|
+
|
8639
|
+
# Handle missing values
|
8640
|
+
X_processed = handle_missing_values_optimized(X_processed)
|
8641
|
+
|
8642
|
+
progress_bar.progress(60)
|
8643
|
+
|
8644
|
+
# Calculate feature importance berdasarkan method yang dipilih
|
8645
|
+
status_text.text("📊 Calculating feature importance...")
|
8646
|
+
|
8647
|
+
results = {}
|
8648
|
+
|
8649
|
+
if importance_method in ["Random Forest", "All Methods"]:
|
8650
|
+
results["Random Forest"] = calculate_rf_importance(
|
8651
|
+
X_processed, y, problem_type_detected, config, random_state
|
8652
|
+
)
|
8653
|
+
progress_bar.progress(70)
|
8654
|
+
|
8655
|
+
if importance_method in ["Permutation Importance", "All Methods"]:
|
8656
|
+
results["Permutation"] = calculate_permutation_importance(
|
8657
|
+
X_processed, y, problem_type_detected, config, random_state
|
8658
|
+
)
|
8659
|
+
progress_bar.progress(80)
|
8660
|
+
|
8661
|
+
if importance_method in ["Mutual Information", "All Methods"]:
|
8662
|
+
results["Mutual Info"] = calculate_mutual_info(
|
8663
|
+
X_processed, y, problem_type_detected
|
8664
|
+
)
|
8665
|
+
progress_bar.progress(90)
|
8666
|
+
|
8667
|
+
progress_bar.progress(95)
|
8668
|
+
|
8669
|
+
# Display results
|
8670
|
+
status_text.text("📈 Displaying results...")
|
8671
|
+
display_feature_importance_results(
|
8672
|
+
results, feature_names, max_features_display, problem_type_detected
|
8673
|
+
)
|
8674
|
+
|
8675
|
+
progress_bar.progress(100)
|
8676
|
+
status_text.text("✅ Analysis completed!")
|
8677
|
+
|
8678
|
+
# Additional insights
|
8679
|
+
show_feature_analysis_insights(results, X_processed, y, problem_type_detected)
|
8680
|
+
|
8681
|
+
except Exception as e:
|
8682
|
+
st.error(f"❌ Error dalam feature importance analysis: {str(e)}")
|
8683
|
+
st.info("💡 Tips: Coba kurangi jumlah features, gunakan sampling, atau pilih mode 'Fast'")
|
8684
|
+
|
8685
|
+
def optimize_memory_usage_feature(df):
|
8686
|
+
"""Optimize memory usage for feature analysis"""
|
8687
|
+
start_mem = df.memory_usage(deep=True).sum() / 1024**2
|
8688
|
+
|
8689
|
+
for col in df.columns:
|
8690
|
+
col_type = df[col].dtype
|
8691
|
+
|
8692
|
+
if col_type == 'object':
|
8693
|
+
if df[col].nunique() / len(df) < 0.5: # Jika cardinality tidak terlalu tinggi
|
8694
|
+
df[col] = df[col].astype('category')
|
8695
|
+
elif col_type in ['int64', 'int32']:
|
8696
|
+
c_min = df[col].min()
|
8697
|
+
c_max = df[col].max()
|
8698
|
+
if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
|
8699
|
+
df[col] = df[col].astype(np.int8)
|
8700
|
+
elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
|
8701
|
+
df[col] = df[col].astype(np.int16)
|
8702
|
+
elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
|
8703
|
+
df[col] = df[col].astype(np.int32)
|
8704
|
+
elif col_type in ['float64', 'float32']:
|
8705
|
+
c_min = df[col].min()
|
8706
|
+
c_max = df[col].max()
|
8707
|
+
if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
|
8708
|
+
df[col] = df[col].astype(np.float16)
|
8709
|
+
elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
|
8710
|
+
df[col] = df[col].astype(np.float32)
|
8711
|
+
|
8712
|
+
end_mem = df.memory_usage(deep=True).sum() / 1024**2
|
8713
|
+
st.success(f"💾 Memory reduced: {start_mem:.2f}MB → {end_mem:.2f}MB ({((start_mem - end_mem) / start_mem * 100):.1f}% reduction)")
|
8714
|
+
|
8715
|
+
return df
|
8716
|
+
|
8717
|
+
def preprocess_features_optimized(X, numeric_cols, non_numeric_cols, remove_high_corr, threshold):
|
8718
|
+
"""Preprocess features dengan optimasi untuk dataset besar"""
|
8719
|
+
|
8720
|
+
X_processed = X.copy()
|
8721
|
+
feature_names = list(X.columns)
|
8722
|
+
|
8723
|
+
# Encode categorical features dengan metode yang efisien
|
8724
|
+
categorical_columns = [col for col in X.columns if col in non_numeric_cols]
|
8725
|
+
|
8726
|
+
for col in categorical_columns:
|
8727
|
+
if X_processed[col].nunique() > 50: # Untuk categorical dengan banyak unique values
|
8728
|
+
# Gunakan frequency encoding
|
8729
|
+
freq_map = X_processed[col].value_counts().to_dict()
|
8730
|
+
X_processed[col] = X_processed[col].map(freq_map)
|
8731
|
+
X_processed[col].fillna(0, inplace=True)
|
8732
|
+
else:
|
8733
|
+
# Gunakan label encoding
|
8734
|
+
le = LabelEncoder()
|
8735
|
+
X_processed[col] = le.fit_transform(X_processed[col].astype(str))
|
8736
|
+
|
8737
|
+
# Remove highly correlated features
|
8738
|
+
if remove_high_corr and len(X_processed.columns) > 1:
|
8739
|
+
numeric_features = [col for col in X_processed.columns if col in numeric_cols or col in categorical_columns]
|
8740
|
+
if len(numeric_features) > 1:
|
8741
|
+
X_numeric = X_processed[numeric_features]
|
8742
|
+
corr_matrix = X_numeric.corr().abs()
|
8743
|
+
|
8744
|
+
# Hapus feature yang highly correlated
|
8745
|
+
upper_triangle = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
|
8746
|
+
to_drop = [column for column in upper_triangle.columns if any(upper_triangle[column] > threshold)]
|
8747
|
+
|
8748
|
+
if to_drop:
|
8749
|
+
X_processed = X_processed.drop(columns=to_drop)
|
8750
|
+
feature_names = [f for f in feature_names if f not in to_drop]
|
8751
|
+
st.info(f"🗑️ Removed {len(to_drop)} highly correlated features")
|
8752
|
+
|
8753
|
+
return X_processed, feature_names
|
8754
|
+
|
8755
|
+
def handle_missing_values_optimized(X):
|
8756
|
+
"""Handle missing values dengan metode yang optimal"""
|
8757
|
+
X_processed = X.copy()
|
8758
|
+
|
8759
|
+
for col in X_processed.columns:
|
8760
|
+
if X_processed[col].isnull().sum() > 0:
|
8761
|
+
if X_processed[col].dtype in ['int8', 'int16', 'int32', 'int64', 'float16', 'float32', 'float64']:
|
8762
|
+
# Untuk numeric, gunakan median (lebih robust terhadap outliers)
|
8763
|
+
X_processed[col].fillna(X_processed[col].median(), inplace=True)
|
8764
|
+
else:
|
8765
|
+
# Untuk categorical, gunakan mode
|
8766
|
+
if len(X_processed[col].mode()) > 0:
|
8767
|
+
X_processed[col].fillna(X_processed[col].mode()[0], inplace=True)
|
8768
|
+
else:
|
8769
|
+
X_processed[col].fillna(0, inplace=True)
|
8770
|
+
|
8771
|
+
return X_processed
|
8772
|
+
|
8773
|
+
def calculate_rf_importance(X, y, problem_type, config, random_state):
|
8774
|
+
"""Calculate Random Forest feature importance"""
|
8775
|
+
if problem_type == "Regression":
|
8776
|
+
model = RandomForestRegressor(
|
8777
|
+
n_estimators=config["n_estimators"],
|
8778
|
+
random_state=random_state,
|
8779
|
+
n_jobs=-1 # Parallel processing
|
8780
|
+
)
|
8781
|
+
else:
|
8782
|
+
model = RandomForestClassifier(
|
8783
|
+
n_estimators=config["n_estimators"],
|
8784
|
+
random_state=random_state,
|
8785
|
+
n_jobs=-1
|
8786
|
+
)
|
8787
|
+
|
8788
|
+
model.fit(X, y)
|
8789
|
+
importances = model.feature_importances_
|
8790
|
+
|
8791
|
+
return {
|
8792
|
+
'importances': importances,
|
8793
|
+
'model': model
|
8794
|
+
}
|
8795
|
+
|
8796
|
+
def calculate_permutation_importance(X, y, problem_type, config, random_state):
|
8797
|
+
"""Calculate permutation importance"""
|
8798
|
+
if problem_type == "Regression":
|
8799
|
+
model = RandomForestRegressor(
|
8800
|
+
n_estimators=config["n_estimators"],
|
8801
|
+
random_state=random_state,
|
8802
|
+
n_jobs=-1
|
8803
|
+
)
|
8804
|
+
else:
|
8805
|
+
model = RandomForestClassifier(
|
8806
|
+
n_estimators=config["n_estimators"],
|
8807
|
+
random_state=random_state,
|
8808
|
+
n_jobs=-1
|
8809
|
+
)
|
8810
|
+
|
8811
|
+
model.fit(X, y)
|
8812
|
+
|
8813
|
+
# Untuk dataset besar, gunakan subsample
|
8814
|
+
if len(X) > 10000:
|
8815
|
+
X_subsample = X.sample(n=10000, random_state=random_state)
|
8816
|
+
y_subsample = y.loc[X_subsample.index]
|
8817
|
+
else:
|
8818
|
+
X_subsample = X
|
8819
|
+
y_subsample = y
|
8820
|
+
|
8821
|
+
perm_importance = permutation_importance(
|
8822
|
+
model, X_subsample, y_subsample,
|
8823
|
+
n_repeats=config["n_repeats"],
|
8824
|
+
random_state=random_state,
|
8825
|
+
n_jobs=-1 # Parallel processing
|
8826
|
+
)
|
8827
|
+
|
8828
|
+
return {
|
8829
|
+
'importances': perm_importance.importances_mean,
|
8830
|
+
'std': perm_importance.importances_std
|
8831
|
+
}
|
8832
|
+
|
8833
|
+
def calculate_mutual_info(X, y, problem_type):
|
8834
|
+
"""Calculate mutual information"""
|
8835
|
+
if problem_type == "Regression":
|
8836
|
+
mi = mutual_info_regression(X, y, random_state=42, n_jobs=-1)
|
8837
|
+
else:
|
8838
|
+
mi = mutual_info_classif(X, y, random_state=42, n_jobs=-1)
|
8839
|
+
|
8840
|
+
return {
|
8841
|
+
'importances': mi
|
8842
|
+
}
|
8843
|
+
|
8844
|
+
def display_feature_importance_results(results, feature_names, max_display, problem_type):
|
8845
|
+
"""Display feature importance results dengan visualisasi yang komprehensif"""
|
8846
|
+
|
8847
|
+
st.subheader("📊 Feature Importance Results")
|
8848
|
+
|
8849
|
+
# Tampilkan semua methods dalam tabs
|
8850
|
+
tabs = st.tabs(list(results.keys()))
|
8851
|
+
|
8852
|
+
for tab, (method_name, result) in zip(tabs, results.items()):
|
8853
|
+
with tab:
|
8854
|
+
importances = result['importances']
|
8855
|
+
|
8856
|
+
# Create importance dataframe
|
8857
|
+
importance_df = pd.DataFrame({
|
8858
|
+
'feature': feature_names,
|
8859
|
+
'importance': importances
|
8860
|
+
}).sort_values('importance', ascending=False)
|
8861
|
+
|
8862
|
+
# Display top features
|
8863
|
+
st.write(f"**Top {min(max_display, len(importance_df))} Features - {method_name}**")
|
8864
|
+
|
8865
|
+
col1, col2 = st.columns([2, 1])
|
8866
|
+
|
8867
|
+
with col1:
|
8868
|
+
# Bar chart
|
8869
|
+
fig = px.bar(
|
8870
|
+
importance_df.head(max_display),
|
8871
|
+
x='importance',
|
8872
|
+
y='feature',
|
8873
|
+
title=f"{method_name} Feature Importance",
|
8874
|
+
orientation='h',
|
8875
|
+
color='importance',
|
8876
|
+
color_continuous_scale='viridis'
|
8877
|
+
)
|
8878
|
+
fig.update_layout(showlegend=False)
|
8879
|
+
st.plotly_chart(fig, use_container_width=True)
|
8880
|
+
|
8881
|
+
with col2:
|
8882
|
+
# Table view
|
8883
|
+
st.dataframe(
|
8884
|
+
importance_df.head(10)[['feature', 'importance']].round(4),
|
8885
|
+
use_container_width=True
|
8886
|
+
)
|
8887
|
+
|
8888
|
+
# Additional info untuk permutation importance
|
8889
|
+
if method_name == "Permutation" and 'std' in result:
|
8890
|
+
st.write("**Permutation Importance with Std Dev:**")
|
8891
|
+
perm_df = pd.DataFrame({
|
8892
|
+
'feature': feature_names,
|
8893
|
+
'importance': importances,
|
8894
|
+
'std': result['std']
|
8895
|
+
}).sort_values('importance', ascending=False)
|
8896
|
+
|
8897
|
+
fig = px.bar(
|
8898
|
+
perm_df.head(max_display),
|
8899
|
+
x='importance',
|
8900
|
+
y='feature',
|
8901
|
+
error_x='std',
|
8902
|
+
title="Permutation Importance ± Std Dev",
|
8903
|
+
orientation='h'
|
8904
|
+
)
|
8905
|
+
st.plotly_chart(fig, use_container_width=True)
|
8906
|
+
|
8907
|
+
def show_feature_analysis_insights(results, X, y, problem_type):
|
8908
|
+
"""Show additional insights dari feature analysis"""
|
8909
|
+
|
8910
|
+
st.subheader("💡 Analysis Insights")
|
8911
|
+
|
8912
|
+
col1, col2 = st.columns(2)
|
8913
|
+
|
8914
|
+
with col1:
|
8915
|
+
st.write("**Dataset Characteristics:**")
|
8916
|
+
st.write(f"- Total samples: {len(X):,}")
|
8917
|
+
st.write(f"- Total features: {len(X.columns)}")
|
8918
|
+
st.write(f"- Problem type: {problem_type}")
|
8919
|
+
|
8920
|
+
if problem_type == "Classification":
|
8921
|
+
st.write(f"- Number of classes: {len(np.unique(y))}")
|
8922
|
+
else:
|
8923
|
+
st.write(f"- Target range: {y.min():.2f} to {y.max():.2f}")
|
8924
|
+
|
8925
|
+
with col2:
|
8926
|
+
st.write("**Feature Importance Consensus:**")
|
8927
|
+
|
8928
|
+
# Hitung consensus dari semua methods
|
8929
|
+
consensus_scores = {}
|
8930
|
+
for method_name, result in results.items():
|
8931
|
+
importances = result['importances']
|
8932
|
+
for i, feature in enumerate(X.columns):
|
8933
|
+
if feature not in consensus_scores:
|
8934
|
+
consensus_scores[feature] = []
|
8935
|
+
consensus_scores[feature].append(importances[i])
|
8936
|
+
|
8937
|
+
# Rata-rata score across methods
|
8938
|
+
avg_scores = {feature: np.mean(scores) for feature, scores in consensus_scores.items()}
|
8939
|
+
top_features = sorted(avg_scores.items(), key=lambda x: x[1], reverse=True)[:5]
|
8940
|
+
|
8941
|
+
for feature, score in top_features:
|
8942
|
+
st.write(f"- {feature}: {score:.4f}")
|
8943
|
+
|
8944
|
+
# Correlation analysis untuk top features
|
8945
|
+
if len(results) > 0:
|
8946
|
+
st.write("**Top Features Correlation Matrix:**")
|
8947
|
+
|
8948
|
+
# Ambil top 8 features dari method pertama
|
8949
|
+
first_method = list(results.values())[0]
|
8950
|
+
top_indices = np.argsort(first_method['importances'])[-8:][::-1]
|
8951
|
+
top_features_corr = [X.columns[i] for i in top_indices if i < len(X.columns)]
|
8952
|
+
|
8953
|
+
if len(top_features_corr) > 1:
|
8954
|
+
corr_matrix = X[top_features_corr].corr()
|
8955
|
+
|
8956
|
+
fig = px.imshow(
|
8957
|
+
corr_matrix,
|
8958
|
+
text_auto=True,
|
8959
|
+
aspect="auto",
|
8960
|
+
color_continuous_scale="RdBu_r",
|
8961
|
+
title="Correlation Matrix of Top Features"
|
8962
|
+
)
|
8963
|
+
st.plotly_chart(fig, use_container_width=True)
|
7287
8964
|
|
7288
8965
|
# Fungsi untuk memuat data
|
7289
8966
|
def load_data(uploaded_file):
|
@@ -8006,439 +9683,1033 @@ if uploaded_files:
|
|
8006
9683
|
else:
|
8007
9684
|
df = merge_datasets(datasets, merge_method)
|
8008
9685
|
|
9686
|
+
try:
|
9687
|
+
from stl import mesh
|
9688
|
+
import trimesh
|
9689
|
+
import os
|
9690
|
+
except ImportError:
|
9691
|
+
st.warning("Beberapa library 3D tidak terinstall. Install dengan: pip install numpy-stl trimesh plotly")
|
8009
9692
|
REMOVE_BG_API_KEY = "xQH5KznYiupRrywK5yPcjeyi"
|
8010
9693
|
PIXELS_API_KEY = "LH59shPdj1xO0lolnHPsClH23qsnHE4NjkCFBhKEXvR0CbqwkrXbqBnw"
|
8011
9694
|
if df is not None:
|
8012
|
-
tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8, tab9 = st.tabs([
|
9695
|
+
tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8, tab9, tab10 = st.tabs([
|
8013
9696
|
"📊 Statistik",
|
8014
9697
|
"📈 Visualisasi",
|
8015
9698
|
"💾 Data",
|
8016
9699
|
"ℹ️ Informasi",
|
8017
9700
|
"🧮 Kalkulator",
|
8018
9701
|
"🖼️ Vitures",
|
8019
|
-
"📍 Flowchart",
|
9702
|
+
"📍 Flowchart",
|
8020
9703
|
"📊 Grafik Saham",
|
8021
|
-
"🗃️ SQL Style"
|
9704
|
+
"🗃️ SQL Style",
|
9705
|
+
"🔄 3D Model & Analisis"
|
8022
9706
|
])
|
9707
|
+
|
9708
|
+
with tab10:
|
9709
|
+
st.header("🔄 Konversi Gambar ke 3D Model dengan Analisis")
|
9710
|
+
|
9711
|
+
# Upload gambar
|
9712
|
+
uploaded_file = st.file_uploader("Unggah gambar untuk dikonversi ke 3D",
|
9713
|
+
type=['png', 'jpg', 'jpeg'],
|
9714
|
+
key="3d_converter")
|
9715
|
+
|
9716
|
+
col1, col2 = st.columns(2)
|
9717
|
+
|
9718
|
+
with col1:
|
9719
|
+
if uploaded_file is not None:
|
9720
|
+
# Display original image
|
9721
|
+
st.subheader("🖼️ Gambar Asli")
|
9722
|
+
st.image(uploaded_file, use_column_width=True)
|
9723
|
+
|
9724
|
+
# Image analysis
|
9725
|
+
st.subheader("📊 Analisis Gambar")
|
9726
|
+
|
9727
|
+
# Convert to numpy array for analysis
|
9728
|
+
import numpy as np
|
9729
|
+
from PIL import Image
|
9730
|
+
|
9731
|
+
image = Image.open(uploaded_file)
|
9732
|
+
img_array = np.array(image)
|
9733
|
+
|
9734
|
+
# Basic image statistics
|
9735
|
+
st.write(f"**Dimensi Gambar:** {img_array.shape}")
|
9736
|
+
st.write(f"**Tipe Data:** {img_array.dtype}")
|
9737
|
+
st.write(f"**Range Nilai:** {img_array.min()} - {img_array.max()}")
|
9738
|
+
|
9739
|
+
# Color distribution
|
9740
|
+
if len(img_array.shape) == 3: # Color image
|
9741
|
+
st.write("**Distribusi Warna RGB:**")
|
9742
|
+
colors = ['Red', 'Green', 'Blue']
|
9743
|
+
for i, color in enumerate(colors):
|
9744
|
+
channel_data = img_array[:, :, i]
|
9745
|
+
st.write(f"{color}: Mean={channel_data.mean():.2f}, Std={channel_data.std():.2f}")
|
9746
|
+
|
9747
|
+
with col2:
|
9748
|
+
if uploaded_file is not None:
|
9749
|
+
st.subheader("📈 Chart Analisis")
|
9750
|
+
|
9751
|
+
# Create some sample 3D data based on image
|
9752
|
+
height, width = img_array.shape[0], img_array.shape[1]
|
9753
|
+
|
9754
|
+
# Generate 3D surface data from image intensity
|
9755
|
+
if len(img_array.shape) == 3:
|
9756
|
+
gray_img = np.mean(img_array, axis=2) # Convert to grayscale
|
9757
|
+
else:
|
9758
|
+
gray_img = img_array
|
9759
|
+
|
9760
|
+
# Downsample for performance
|
9761
|
+
downsample_factor = max(1, gray_img.shape[0] // 50)
|
9762
|
+
gray_img_small = gray_img[::downsample_factor, ::downsample_factor]
|
9763
|
+
|
9764
|
+
# Create 3D surface plot
|
9765
|
+
fig_3d = go.Figure(data=[go.Surface(z=gray_img_small)])
|
9766
|
+
fig_3d.update_layout(
|
9767
|
+
title='3D Surface dari Gambar',
|
9768
|
+
scene=dict(
|
9769
|
+
xaxis_title='X',
|
9770
|
+
yaxis_title='Y',
|
9771
|
+
zaxis_title='Intensitas'
|
9772
|
+
)
|
9773
|
+
)
|
9774
|
+
st.plotly_chart(fig_3d, use_container_width=True)
|
9775
|
+
|
9776
|
+
# 2D Histogram of intensities
|
9777
|
+
fig_hist = px.histogram(x=gray_img.flatten(),
|
9778
|
+
title='Distribusi Intensitas Pixel',
|
9779
|
+
labels={'x': 'Intensitas', 'y': 'Frekuensi'})
|
9780
|
+
st.plotly_chart(fig_hist, use_container_width=True)
|
9781
|
+
|
9782
|
+
# Additional analysis section
|
9783
|
+
if uploaded_file is not None:
|
9784
|
+
st.subheader("🔍 Analisis Detail")
|
9785
|
+
|
9786
|
+
col3, col4 = st.columns(2)
|
9787
|
+
|
9788
|
+
with col3:
|
9789
|
+
# Edge detection simulation
|
9790
|
+
st.write("**Deteksi Tepi (Simulasi):**")
|
9791
|
+
|
9792
|
+
# Simple edge detection using gradient
|
9793
|
+
from scipy import ndimage
|
9794
|
+
|
9795
|
+
# Calculate gradients
|
9796
|
+
grad_x = ndimage.sobel(gray_img, axis=0)
|
9797
|
+
grad_y = ndimage.sobel(gray_img, axis=1)
|
9798
|
+
gradient_magnitude = np.hypot(grad_x, grad_y)
|
9799
|
+
|
9800
|
+
# Display edge map
|
9801
|
+
fig_edges = px.imshow(gradient_magnitude,
|
9802
|
+
title='Peta Tepi',
|
9803
|
+
color_continuous_scale='gray')
|
9804
|
+
st.plotly_chart(fig_edges, use_container_width=True)
|
9805
|
+
|
9806
|
+
with col4:
|
9807
|
+
# Statistical summary
|
9808
|
+
st.write("**Ringkasan Statistik:**")
|
9809
|
+
|
9810
|
+
stats_data = {
|
9811
|
+
'Metrik': ['Mean', 'Median', 'Std Dev', 'Varians', 'Entropi'],
|
9812
|
+
'Nilai': [
|
9813
|
+
f"{gray_img.mean():.2f}",
|
9814
|
+
f"{np.median(gray_img):.2f}",
|
9815
|
+
f"{gray_img.std():.2f}",
|
9816
|
+
f"{gray_img.var():.2f}",
|
9817
|
+
f"{-np.sum(gray_img * np.log2(gray_img + 1e-8)):.2f}"
|
9818
|
+
]
|
9819
|
+
}
|
9820
|
+
|
9821
|
+
st.dataframe(stats_data, use_container_width=True)
|
9822
|
+
|
9823
|
+
# Date selection for analysis
|
9824
|
+
analysis_date = st.date_input("Pilih Tanggal Analisis",
|
9825
|
+
value=datetime.now().date(),
|
9826
|
+
key="3d_analysis_date")
|
9827
|
+
|
9828
|
+
st.write(f"**Analisis untuk tanggal:** {analysis_date}")
|
9829
|
+
|
9830
|
+
# Model conversion options
|
9831
|
+
if uploaded_file is not None:
|
9832
|
+
st.subheader("⚙️ Opsi Konversi 3D")
|
9833
|
+
|
9834
|
+
conversion_type = st.selectbox(
|
9835
|
+
"Pilih tipe model 3D:",
|
9836
|
+
["Surface Mesh", "Point Cloud", "Voxel Grid", "Height Map"]
|
9837
|
+
)
|
9838
|
+
|
9839
|
+
resolution = st.slider("Resolusi Model 3D", 10, 100, 50)
|
9840
|
+
height_scale = st.slider("Skala Tinggi 3D", 0.1, 5.0, 1.0)
|
9841
|
+
|
9842
|
+
if st.button("🚀 Generate Model 3D", type="primary"):
|
9843
|
+
with st.spinner("Membuat model 3D..."):
|
9844
|
+
try:
|
9845
|
+
# Progress bar
|
9846
|
+
progress_bar = st.progress(0)
|
9847
|
+
|
9848
|
+
# Convert image to grayscale and normalize
|
9849
|
+
if len(img_array.shape) == 3:
|
9850
|
+
gray_img = np.mean(img_array, axis=2)
|
9851
|
+
else:
|
9852
|
+
gray_img = img_array
|
9853
|
+
|
9854
|
+
# Normalize to 0-1
|
9855
|
+
gray_img_normalized = gray_img.astype(np.float32) / 255.0
|
9856
|
+
|
9857
|
+
progress_bar.progress(25)
|
9858
|
+
|
9859
|
+
# Downsample image based on resolution
|
9860
|
+
downsample = max(1, gray_img_normalized.shape[0] // resolution)
|
9861
|
+
height_map = gray_img_normalized[::downsample, ::downsample]
|
9862
|
+
|
9863
|
+
progress_bar.progress(50)
|
9864
|
+
|
9865
|
+
# Generate 3D mesh from height map
|
9866
|
+
x, y = np.mgrid[0:height_map.shape[0], 0:height_map.shape[1]]
|
9867
|
+
z = height_map * height_scale
|
9868
|
+
|
9869
|
+
progress_bar.progress(75)
|
9870
|
+
|
9871
|
+
# Create vertices and faces for the mesh
|
9872
|
+
vertices = []
|
9873
|
+
faces = []
|
9874
|
+
|
9875
|
+
# Create vertices
|
9876
|
+
for i in range(z.shape[0]):
|
9877
|
+
for j in range(z.shape[1]):
|
9878
|
+
vertices.append([i, j, z[i, j]])
|
9879
|
+
|
9880
|
+
# Create faces
|
9881
|
+
for i in range(z.shape[0]-1):
|
9882
|
+
for j in range(z.shape[1]-1):
|
9883
|
+
# Two triangles per quad
|
9884
|
+
v1 = i * z.shape[1] + j
|
9885
|
+
v2 = v1 + 1
|
9886
|
+
v3 = (i + 1) * z.shape[1] + j
|
9887
|
+
v4 = v3 + 1
|
9888
|
+
|
9889
|
+
# First triangle
|
9890
|
+
faces.append([v1, v2, v3])
|
9891
|
+
# Second triangle
|
9892
|
+
faces.append([v2, v4, v3])
|
9893
|
+
|
9894
|
+
progress_bar.progress(90)
|
9895
|
+
|
9896
|
+
# Convert to numpy arrays
|
9897
|
+
vertices = np.array(vertices)
|
9898
|
+
faces = np.array(faces)
|
9899
|
+
|
9900
|
+
# Create STL mesh
|
9901
|
+
from stl import mesh
|
9902
|
+
|
9903
|
+
# Create the mesh object
|
9904
|
+
stl_mesh = mesh.Mesh(np.zeros(faces.shape[0], dtype=mesh.Mesh.dtype))
|
9905
|
+
|
9906
|
+
# Assign vertices to mesh
|
9907
|
+
for i, face in enumerate(faces):
|
9908
|
+
for j in range(3):
|
9909
|
+
stl_mesh.vectors[i][j] = vertices[face[j]]
|
9910
|
+
|
9911
|
+
progress_bar.progress(100)
|
9912
|
+
|
9913
|
+
# Save STL file to temporary file
|
9914
|
+
import tempfile
|
9915
|
+
import os
|
9916
|
+
|
9917
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.stl') as tmp_file:
|
9918
|
+
stl_mesh.save(tmp_file.name)
|
9919
|
+
|
9920
|
+
# Read the file data for download
|
9921
|
+
with open(tmp_file.name, 'rb') as f:
|
9922
|
+
stl_data = f.read()
|
9923
|
+
|
9924
|
+
# Clean up temporary file
|
9925
|
+
os.unlink(tmp_file.name)
|
9926
|
+
|
9927
|
+
st.success("✅ Model 3D berhasil dibuat!")
|
9928
|
+
|
9929
|
+
# Display results
|
9930
|
+
st.info(f"**Model 3D tipe:** {conversion_type}")
|
9931
|
+
st.info(f"**Resolusi:** {resolution}")
|
9932
|
+
st.info(f"**Dimensi Mesh:** {len(vertices)} vertices, {len(faces)} faces")
|
9933
|
+
st.info(f"**Skala Tinggi:** {height_scale}")
|
9934
|
+
|
9935
|
+
# Download button for 3D model
|
9936
|
+
st.download_button(
|
9937
|
+
label="📥 Download Model 3D (STL)",
|
9938
|
+
data=stl_data,
|
9939
|
+
file_name=f"3d_model_{uploaded_file.name.split('.')[0]}.stl",
|
9940
|
+
mime="application/octet-stream"
|
9941
|
+
)
|
9942
|
+
|
9943
|
+
# Display mesh information
|
9944
|
+
col5, col6 = st.columns(2)
|
9945
|
+
|
9946
|
+
with col5:
|
9947
|
+
st.write("**Informasi Mesh:**")
|
9948
|
+
mesh_info = {
|
9949
|
+
'Parameter': ['Jumlah Vertex', 'Jumlah Face', 'Dimensi X', 'Dimensi Y', 'Tinggi Maks'],
|
9950
|
+
'Nilai': [
|
9951
|
+
len(vertices),
|
9952
|
+
len(faces),
|
9953
|
+
f"{z.shape[0]} points",
|
9954
|
+
f"{z.shape[1]} points",
|
9955
|
+
f"{z.max():.3f}"
|
9956
|
+
]
|
9957
|
+
}
|
9958
|
+
st.dataframe(mesh_info)
|
9959
|
+
|
9960
|
+
with col6:
|
9961
|
+
# Display 3D preview using plotly
|
9962
|
+
st.write("**Preview 3D:**")
|
9963
|
+
|
9964
|
+
# Create simplified mesh for preview
|
9965
|
+
preview_downsample = max(1, len(vertices) // 1000)
|
9966
|
+
preview_vertices = vertices[::preview_downsample]
|
9967
|
+
|
9968
|
+
fig_3d_preview = go.Figure(data=[go.Mesh3d(
|
9969
|
+
x=preview_vertices[:, 0],
|
9970
|
+
y=preview_vertices[:, 1],
|
9971
|
+
z=preview_vertices[:, 2],
|
9972
|
+
opacity=0.7,
|
9973
|
+
color='lightblue'
|
9974
|
+
)])
|
9975
|
+
|
9976
|
+
fig_3d_preview.update_layout(
|
9977
|
+
title='Preview Model 3D',
|
9978
|
+
scene=dict(
|
9979
|
+
xaxis_title='X',
|
9980
|
+
yaxis_title='Y',
|
9981
|
+
zaxis_title='Z'
|
9982
|
+
)
|
9983
|
+
)
|
9984
|
+
|
9985
|
+
st.plotly_chart(fig_3d_preview, use_container_width=True)
|
9986
|
+
|
9987
|
+
except Exception as e:
|
9988
|
+
st.error(f"❌ Error dalam membuat model 3D: {str(e)}")
|
9989
|
+
st.info("Pastikan library numpy-stl dan trimesh terinstall: `pip install numpy-stl trimesh`")
|
9990
|
+
|
8023
9991
|
|
8024
9992
|
with tab9:
|
8025
|
-
|
8026
|
-
|
8027
|
-
|
8028
|
-
|
9993
|
+
st.header("📁 Upload File & Analisis Lengkap Database SQL")
|
9994
|
+
with st.expander("📜 Keterangan Dalam Statistik Dan Analisis", expanded=False):
|
9995
|
+
st.markdown(
|
9996
|
+
"""
|
9997
|
+
<img src="https://media.finebi.com/strapi/Annual_Sales_Summary_59110fda60.jpg" class="responsive-img">
|
9998
|
+
""",
|
9999
|
+
unsafe_allow_html=True
|
10000
|
+
)
|
10001
|
+
st.markdown("""
|
10002
|
+
|
10003
|
+
### 🚀 Keterangan Lengkap Dalam Analisis Dan Statistik Pada SQL Style
|
10004
|
+
- Akankah Hal Gila Dapat Terjadi Dan Ini lah yang Mungkin Menjadi Kenyataan Pada SQL Style?
|
10005
|
+
- Dengan adanya fitur analisis data pada SQL Style, kini Anda dapat dengan mudah mengunggah file CSV atau Excel berisi data dari database SQL Anda untuk dianalisis secara menyeluruh.
|
10006
|
+
- Fitur ini dirancang untuk memberikan wawasan mendalam tentang struktur data Anda, termasuk deteksi kolom tanggal, analisis statistik dasar, dan visualisasi data yang informatif.
|
10007
|
+
- Setelah mengunggah file, SQL Style akan secara otomatis mendeteksi kolom tanggal dan melakukan analisis mendalam terhadap data tersebut.
|
10008
|
+
- Anda akan mendapatkan statistik dasar seperti jumlah baris dan kolom, nilai unik, serta informasi tentang missing values.
|
10009
|
+
- Selain itu, fitur visualisasi data akan membantu Anda memahami distribusi data, tren waktu, dan pola musiman dengan grafik yang mudah dipahami.
|
10010
|
+
- Fitur ini sangat berguna bagi para analis data, pengembang database, dan siapa saja yang ingin mendapatkan pemahaman lebih baik tentang data mereka.
|
10011
|
+
- Kami terus berupaya untuk meningkatkan fitur ini agar dapat memberikan pengalaman analisis data yang lebih baik dan lebih komprehensif.
|
10012
|
+
- dan kami akan segera update SQL Style ini agar lebih baik lagi kedepannya.
|
10013
|
+
- Terima kasih atas pengertian dan dukungannya.
|
10014
|
+
""")
|
10015
|
+
|
10016
|
+
# Upload file
|
10017
|
+
uploaded_file = st.file_uploader(
|
10018
|
+
"Pilih file CSV atau Excel",
|
10019
|
+
type=['csv', 'xlsx', 'xls'],
|
10020
|
+
help="Upload file data untuk dianalisis"
|
10021
|
+
)
|
10022
|
+
|
10023
|
+
if uploaded_file is not None:
|
10024
|
+
try:
|
10025
|
+
# Baca file berdasarkan tipe
|
10026
|
+
if uploaded_file.name.endswith('.csv'):
|
10027
|
+
df = pd.read_csv(uploaded_file)
|
10028
|
+
else:
|
10029
|
+
df = pd.read_excel(uploaded_file)
|
10030
|
+
|
10031
|
+
# Clean dataframe - handle mixed types and object dtypes
|
10032
|
+
def clean_dataframe(df):
|
10033
|
+
df_clean = df.copy()
|
10034
|
+
|
10035
|
+
# Convert object columns to appropriate types
|
10036
|
+
for col in df_clean.columns:
|
10037
|
+
# Skip if column is already numeric or datetime
|
10038
|
+
if pd.api.types.is_numeric_dtype(df_clean[col]):
|
10039
|
+
continue
|
10040
|
+
if pd.api.types.is_datetime64_any_dtype(df_clean[col]):
|
10041
|
+
continue
|
10042
|
+
|
10043
|
+
# Try to convert to numeric first
|
10044
|
+
try:
|
10045
|
+
df_clean[col] = pd.to_numeric(df_clean[col], errors='ignore')
|
10046
|
+
except:
|
10047
|
+
pass
|
10048
|
+
|
10049
|
+
# If still object, try to convert to datetime
|
10050
|
+
if df_clean[col].dtype == 'object':
|
10051
|
+
try:
|
10052
|
+
df_clean[col] = pd.to_datetime(df_clean[col], errors='ignore')
|
10053
|
+
except:
|
10054
|
+
pass
|
10055
|
+
|
10056
|
+
# Handle ObjectDType specifically
|
10057
|
+
if hasattr(df_clean[col].dtype, 'name') and df_clean[col].dtype.name == 'object':
|
10058
|
+
# Convert to string to avoid ObjectDType issues
|
10059
|
+
df_clean[col] = df_clean[col].astype(str)
|
10060
|
+
|
10061
|
+
return df_clean
|
8029
10062
|
|
8030
|
-
|
8031
|
-
|
8032
|
-
|
8033
|
-
|
8034
|
-
|
8035
|
-
|
8036
|
-
|
8037
|
-
|
8038
|
-
|
8039
|
-
|
8040
|
-
|
8041
|
-
|
8042
|
-
|
8043
|
-
|
8044
|
-
|
8045
|
-
|
8046
|
-
|
8047
|
-
|
8048
|
-
|
8049
|
-
|
10063
|
+
df = clean_dataframe(df)
|
10064
|
+
|
10065
|
+
st.success(f"File berhasil diupload! Shape: {df.shape}")
|
10066
|
+
|
10067
|
+
# Tampilkan preview data
|
10068
|
+
st.subheader("📋 Preview Data")
|
10069
|
+
st.dataframe(df.head())
|
10070
|
+
|
10071
|
+
# Informasi dasar dataset
|
10072
|
+
st.subheader("📊 Informasi Dataset")
|
10073
|
+
col1, col2, col3, col4 = st.columns(4)
|
10074
|
+
|
10075
|
+
with col1:
|
10076
|
+
st.metric("Jumlah Baris", df.shape[0])
|
10077
|
+
with col2:
|
10078
|
+
st.metric("Jumlah Kolom", df.shape[1])
|
10079
|
+
with col3:
|
10080
|
+
st.metric("Missing Values", df.isnull().sum().sum())
|
10081
|
+
with col4:
|
10082
|
+
st.metric("Duplikat", df.duplicated().sum())
|
10083
|
+
|
10084
|
+
# --- ANALISIS STRUKTUR DATA UNTUK ERD DINAMIS ---
|
10085
|
+
st.subheader("🔍 Analisis Struktur Data untuk ERD")
|
10086
|
+
|
10087
|
+
# Fungsi untuk deteksi tipe data yang aman
|
10088
|
+
def safe_dtype_detection(df):
|
10089
|
+
numeric_cols = []
|
10090
|
+
categorical_cols = []
|
10091
|
+
date_cols = []
|
10092
|
+
bool_cols = []
|
10093
|
+
other_cols = []
|
8050
10094
|
|
8051
|
-
|
10095
|
+
for col in df.columns:
|
10096
|
+
col_dtype = str(df[col].dtype)
|
10097
|
+
|
10098
|
+
# Check numeric
|
10099
|
+
if pd.api.types.is_numeric_dtype(df[col]):
|
10100
|
+
numeric_cols.append(col)
|
10101
|
+
# Check datetime
|
10102
|
+
elif pd.api.types.is_datetime64_any_dtype(df[col]):
|
10103
|
+
date_cols.append(col)
|
10104
|
+
# Check boolean
|
10105
|
+
elif pd.api.types.is_bool_dtype(df[col]):
|
10106
|
+
bool_cols.append(col)
|
10107
|
+
# Check categorical (object but limited unique values)
|
10108
|
+
elif df[col].dtype == 'object':
|
10109
|
+
if df[col].nunique() <= 50: # Consider as categorical if <= 50 unique values
|
10110
|
+
categorical_cols.append(col)
|
10111
|
+
else:
|
10112
|
+
other_cols.append(col)
|
10113
|
+
else:
|
10114
|
+
other_cols.append(col)
|
8052
10115
|
|
8053
|
-
|
8054
|
-
|
8055
|
-
|
10116
|
+
return numeric_cols, categorical_cols, date_cols, bool_cols, other_cols
|
10117
|
+
|
10118
|
+
numeric_cols, categorical_cols, date_cols, bool_cols, other_cols = safe_dtype_detection(df)
|
10119
|
+
|
10120
|
+
# Fungsi analisis yang lebih robust
|
10121
|
+
def robust_column_analysis(df):
|
10122
|
+
column_analysis = {}
|
8056
10123
|
|
8057
|
-
|
8058
|
-
|
8059
|
-
|
10124
|
+
for col in df.columns:
|
10125
|
+
try:
|
10126
|
+
col_data = df[col]
|
10127
|
+
|
10128
|
+
# Handle ObjectDType and other problematic types
|
10129
|
+
if hasattr(col_data.dtype, 'name') and col_data.dtype.name == 'object':
|
10130
|
+
# Convert to string for analysis
|
10131
|
+
col_data = col_data.astype(str)
|
10132
|
+
|
10133
|
+
analysis = {
|
10134
|
+
'dtype': str(col_data.dtype),
|
10135
|
+
'unique_count': col_data.nunique(),
|
10136
|
+
'null_count': col_data.isnull().sum(),
|
10137
|
+
'null_percentage': (col_data.isnull().sum() / len(col_data)) * 100,
|
10138
|
+
'sample_values': col_data.dropna().head(3).tolist() if not col_data.empty else []
|
10139
|
+
}
|
10140
|
+
|
10141
|
+
# Safe sample values conversion
|
10142
|
+
safe_samples = []
|
10143
|
+
for val in analysis['sample_values']:
|
10144
|
+
try:
|
10145
|
+
safe_samples.append(str(val))
|
10146
|
+
except:
|
10147
|
+
safe_samples.append('N/A')
|
10148
|
+
analysis['sample_values'] = safe_samples
|
10149
|
+
|
10150
|
+
# Deteksi tipe kolom untuk ERD
|
10151
|
+
col_lower = str(col).lower()
|
10152
|
+
|
10153
|
+
# Primary Key detection
|
10154
|
+
if (analysis['unique_count'] == len(col_data) and
|
10155
|
+
analysis['null_count'] == 0 and
|
10156
|
+
any(keyword in col_lower for keyword in ['id', 'pk', 'key', 'code'])):
|
10157
|
+
analysis['role'] = 'PRIMARY_KEY'
|
10158
|
+
analysis['icon'] = '🔑'
|
10159
|
+
|
10160
|
+
# Foreign Key detection
|
10161
|
+
elif (any(keyword in col_lower for keyword in ['id', 'fk', 'ref', 'code']) and
|
10162
|
+
analysis['unique_count'] < len(col_data) * 0.8):
|
10163
|
+
analysis['role'] = 'FOREIGN_KEY'
|
10164
|
+
analysis['icon'] = '🔗'
|
10165
|
+
|
10166
|
+
# Measurement columns
|
10167
|
+
elif any(keyword in col_lower for keyword in ['amount', 'price', 'value', 'total', 'sum', 'avg', 'quantity']):
|
10168
|
+
analysis['role'] = 'MEASUREMENT'
|
10169
|
+
analysis['icon'] = '💰'
|
10170
|
+
|
10171
|
+
# Date/Time columns
|
10172
|
+
elif any(keyword in col_lower for keyword in ['date', 'time', 'year', 'month', 'day']):
|
10173
|
+
analysis['role'] = 'TEMPORAL'
|
10174
|
+
analysis['icon'] = '📅'
|
10175
|
+
|
10176
|
+
# Category columns
|
10177
|
+
elif (analysis['unique_count'] <= 20 and
|
10178
|
+
analysis['unique_count'] > 1 and
|
10179
|
+
str(col_data.dtype) == 'object'):
|
10180
|
+
analysis['role'] = 'CATEGORY'
|
10181
|
+
analysis['icon'] = '🏷️'
|
10182
|
+
|
10183
|
+
# Description columns
|
10184
|
+
elif (str(col_data.dtype) == 'object' and
|
10185
|
+
col_data.astype(str).str.len().mean() > 20):
|
10186
|
+
analysis['role'] = 'DESCRIPTION'
|
10187
|
+
analysis['icon'] = '📝'
|
10188
|
+
|
10189
|
+
# Numeric metrics
|
10190
|
+
elif pd.api.types.is_numeric_dtype(col_data):
|
10191
|
+
analysis['role'] = 'METRIC'
|
10192
|
+
analysis['icon'] = '📊'
|
10193
|
+
|
10194
|
+
else:
|
10195
|
+
analysis['role'] = 'ATTRIBUTE'
|
10196
|
+
analysis['icon'] = '📄'
|
10197
|
+
|
10198
|
+
column_analysis[col] = analysis
|
10199
|
+
|
10200
|
+
except Exception as e:
|
10201
|
+
# Fallback analysis for problematic columns
|
10202
|
+
column_analysis[col] = {
|
10203
|
+
'dtype': 'unknown',
|
10204
|
+
'role': 'ATTRIBUTE',
|
10205
|
+
'icon': '❓',
|
10206
|
+
'unique_count': 0,
|
10207
|
+
'null_count': len(df[col]),
|
10208
|
+
'null_percentage': 100.0,
|
10209
|
+
'sample_values': ['Error in analysis']
|
10210
|
+
}
|
8060
10211
|
|
8061
|
-
|
8062
|
-
|
8063
|
-
|
8064
|
-
|
8065
|
-
|
8066
|
-
|
10212
|
+
return column_analysis
|
10213
|
+
|
10214
|
+
# Analisis kolom
|
10215
|
+
column_analysis = robust_column_analysis(df)
|
10216
|
+
|
10217
|
+
# Tampilkan analisis kolom
|
10218
|
+
st.write("**Analisis Detail Kolom:**")
|
10219
|
+
analysis_data = []
|
10220
|
+
for col, analysis in column_analysis.items():
|
10221
|
+
analysis_data.append({
|
10222
|
+
'Kolom': col,
|
10223
|
+
'Tipe': analysis['dtype'],
|
10224
|
+
'Role': analysis['role'],
|
10225
|
+
'Icon': analysis['icon'],
|
10226
|
+
'Unique': analysis['unique_count'],
|
10227
|
+
'Null %': f"{analysis['null_percentage']:.1f}%"
|
10228
|
+
})
|
10229
|
+
|
10230
|
+
analysis_df = pd.DataFrame(analysis_data)
|
10231
|
+
st.dataframe(analysis_df, use_container_width=True)
|
10232
|
+
|
10233
|
+
# --- ERD DINAMIS YANG LEBIH AKURAT ---
|
10234
|
+
st.subheader("🗄️ Entity Relationship Diagram (ERD) Dinamis")
|
10235
|
+
|
10236
|
+
# Konfigurasi ERD
|
10237
|
+
col1, col2, col3 = st.columns(3)
|
10238
|
+
|
10239
|
+
with col1:
|
10240
|
+
erd_style = st.selectbox(
|
10241
|
+
"Style ERD:",
|
10242
|
+
['Vertical', 'Horizontal', 'Circular'],
|
10243
|
+
index=0
|
10244
|
+
)
|
10245
|
+
|
10246
|
+
with col2:
|
10247
|
+
show_relationships = st.checkbox("Tampilkan Relasi", value=True)
|
10248
|
+
|
10249
|
+
with col3:
|
10250
|
+
max_tables = st.slider("Max Tabel", 3, 15, 8)
|
10251
|
+
|
10252
|
+
try:
|
10253
|
+
import graphviz
|
10254
|
+
|
10255
|
+
# Buat graph ERD
|
10256
|
+
dot = graphviz.Digraph(comment='Dynamic Database ERD')
|
10257
|
+
|
10258
|
+
# Atur layout
|
10259
|
+
if erd_style == 'Vertical':
|
10260
|
+
dot.attr(rankdir='TB', size='12,16')
|
10261
|
+
elif erd_style == 'Horizontal':
|
10262
|
+
dot.attr(rankdir='LR', size='16,12')
|
10263
|
+
else: # Circular
|
10264
|
+
dot.attr(rankdir='LR', size='14,14', layout='circo')
|
10265
|
+
|
10266
|
+
# Kelompokkan kolom berdasarkan role untuk membuat tabel
|
10267
|
+
main_table_cols = []
|
10268
|
+
reference_tables = {}
|
10269
|
+
|
10270
|
+
for col, analysis in column_analysis.items():
|
10271
|
+
if analysis['role'] == 'FOREIGN_KEY':
|
10272
|
+
# Buat tabel referensi untuk foreign key
|
10273
|
+
ref_table_name = f"ref_{col}"
|
10274
|
+
if ref_table_name not in reference_tables:
|
10275
|
+
ref_display_name = col.replace('_id', '').replace('ID', '').replace('_', ' ').title()
|
10276
|
+
reference_tables[ref_table_name] = {
|
10277
|
+
'name': ref_display_name,
|
10278
|
+
'columns': []
|
10279
|
+
}
|
10280
|
+
reference_tables[ref_table_name]['columns'].append(col)
|
10281
|
+
else:
|
10282
|
+
main_table_cols.append((col, analysis))
|
8067
10283
|
|
8068
|
-
#
|
8069
|
-
|
8070
|
-
|
8071
|
-
#
|
8072
|
-
|
8073
|
-
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
|
8074
|
-
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
|
8075
|
-
|
8076
|
-
col1, col2 = st.columns(2)
|
8077
|
-
|
8078
|
-
with col1:
|
8079
|
-
st.write("**Kolom Numerik:**")
|
8080
|
-
df_numeric = pd.DataFrame({"Nama Kolom": numeric_cols})
|
8081
|
-
st.table(df_numeric)
|
8082
|
-
|
8083
|
-
with col2:
|
8084
|
-
st.write("**Kolom Kategorikal:**")
|
8085
|
-
df_categorical = pd.DataFrame({"Nama Kolom": categorical_cols})
|
8086
|
-
st.table(df_categorical)
|
8087
|
-
|
8088
|
-
# --- Visualisasi ERD yang Sesungguhnya ---
|
8089
|
-
st.write("---")
|
8090
|
-
st.subheader("🗄️ Entity Relationship Diagram Visualization")
|
8091
|
-
|
8092
|
-
# Buat struktur entitas dan relasi
|
8093
|
-
st.write("**Struktur Tabel Database:**")
|
8094
|
-
|
8095
|
-
# Generate SQL CREATE TABLE statements
|
8096
|
-
st.markdown("### 📝 SQL Schema Definition")
|
8097
|
-
|
8098
|
-
# Buat diagram ERD menggunakan graphviz
|
8099
|
-
try:
|
8100
|
-
import graphviz
|
8101
|
-
|
8102
|
-
# Buat graph untuk ERD
|
8103
|
-
dot = graphviz.Digraph(comment='Database ERD')
|
8104
|
-
dot.attr(rankdir='TB', size='8,8')
|
8105
|
-
|
8106
|
-
# Buat node untuk tabel utama
|
10284
|
+
# Batasi jumlah tabel yang ditampilkan
|
10285
|
+
tables_to_show = min(max_tables, len(reference_tables) + 1)
|
10286
|
+
|
10287
|
+
# Buat tabel utama
|
10288
|
+
if main_table_cols and tables_to_show > 0:
|
8107
10289
|
with dot.subgraph(name='cluster_main') as c:
|
8108
|
-
|
8109
|
-
|
8110
|
-
|
8111
|
-
c.node('table_header', f'📊 dataset_table', shape='plaintext', fontsize='14', fontname='Arial bold')
|
10290
|
+
table_name = uploaded_file.name.split('.')[0] # Remove extension
|
10291
|
+
c.attr(label=f'📊 {table_name}', style='filled',
|
10292
|
+
color='lightblue', fontsize='14', fontname='Arial Bold')
|
8112
10293
|
|
8113
|
-
# Field-field dalam tabel
|
8114
10294
|
fields = []
|
10295
|
+
for col, analysis in main_table_cols[:12]: # Batasi kolom per tabel
|
10296
|
+
field_type = ""
|
10297
|
+
if pd.api.types.is_numeric_dtype(df[col]):
|
10298
|
+
field_type = "NUMERIC"
|
10299
|
+
elif pd.api.types.is_datetime64_any_dtype(df[col]):
|
10300
|
+
field_type = "DATETIME"
|
10301
|
+
elif df[col].dtype == 'object':
|
10302
|
+
try:
|
10303
|
+
max_len = df[col].astype(str).str.len().max()
|
10304
|
+
field_type = f"VARCHAR({min(255, max(50, int(max_len)))})"
|
10305
|
+
except:
|
10306
|
+
field_type = "TEXT"
|
10307
|
+
elif df[col].dtype == 'bool':
|
10308
|
+
field_type = "BOOLEAN"
|
10309
|
+
else:
|
10310
|
+
field_type = "TEXT"
|
10311
|
+
|
10312
|
+
constraint = ""
|
10313
|
+
if analysis['role'] == 'PRIMARY_KEY':
|
10314
|
+
constraint = " [PK]"
|
10315
|
+
elif analysis['role'] == 'FOREIGN_KEY':
|
10316
|
+
constraint = " [FK]"
|
10317
|
+
|
10318
|
+
fields.append(f"<TR><TD ALIGN='LEFT'>{analysis['icon']} {col}</TD><TD ALIGN='LEFT'>{field_type}{constraint}</TD></TR>")
|
10319
|
+
|
10320
|
+
# Tambahkan indicator jika ada kolom yang tidak ditampilkan
|
10321
|
+
if len(main_table_cols) > 12:
|
10322
|
+
fields.append(f"<TR><TD ALIGN='LEFT'>...</TD><TD ALIGN='LEFT'>+{len(main_table_cols)-12} more</TD></TR>")
|
8115
10323
|
|
8116
|
-
|
8117
|
-
|
8118
|
-
|
8119
|
-
|
10324
|
+
table_html = f'''<
|
10325
|
+
<TABLE BORDER="1" CELLBORDER="0" CELLSPACING="0" CELLPADDING="4">
|
10326
|
+
<TR><TD ALIGN="CENTER" BGCOLOR="#e6f3ff"><B>COLUMN</B></TD><TD ALIGN="CENTER" BGCOLOR="#e6f3ff"><B>TYPE</B></TD></TR>
|
10327
|
+
{''.join(fields)}
|
10328
|
+
</TABLE>
|
10329
|
+
>'''
|
8120
10330
|
|
8121
|
-
|
8122
|
-
|
8123
|
-
|
8124
|
-
|
10331
|
+
c.node('main_table', table_html, shape='none', fontname='Arial')
|
10332
|
+
|
10333
|
+
# Buat tabel referensi
|
10334
|
+
colors = ['#e6ffe6', '#fff0e6', '#e6f9ff', '#ffe6ff', '#ffffe6', '#f0e6ff']
|
10335
|
+
for i, (ref_name, ref_info) in enumerate(list(reference_tables.items())[:tables_to_show-1]):
|
10336
|
+
color = colors[i % len(colors)]
|
10337
|
+
with dot.subgraph(name=f'cluster_{ref_name}') as c:
|
10338
|
+
c.attr(label=f'📁 {ref_info["name"]}', style='filled',
|
10339
|
+
color=color, fontsize='12', fontname='Arial')
|
8125
10340
|
|
8126
|
-
|
8127
|
-
|
8128
|
-
|
10341
|
+
fields = []
|
10342
|
+
# Primary key untuk tabel referensi
|
10343
|
+
for fk_col in ref_info['columns']:
|
10344
|
+
fields.append(f"<TR><TD ALIGN='LEFT'><B>🔑 {fk_col}</B></TD><TD ALIGN='LEFT'>[PK]</TD></TR>")
|
8129
10345
|
|
8130
|
-
#
|
8131
|
-
|
8132
|
-
|
8133
|
-
|
10346
|
+
# Tambahkan kolom umum untuk tabel referensi
|
10347
|
+
fields.append(f"<TR><TD ALIGN='LEFT'>📝 Name</TD><TD ALIGN='LEFT'>VARCHAR(100)</TD></TR>")
|
10348
|
+
fields.append(f"<TR><TD ALIGN='LEFT'>📝 Description</TD><TD ALIGN='LEFT'>VARCHAR(255)</TD></TR>")
|
10349
|
+
fields.append(f"<TR><TD ALIGN='LEFT'>📅 Created_Date</TD><TD ALIGN='LEFT'>DATETIME</TD></TR>")
|
10350
|
+
fields.append(f"<TR><TD ALIGN='LEFT'>✅ Is_Active</TD><TD ALIGN='LEFT'>BOOLEAN</TD></TR>")
|
8134
10351
|
|
8135
10352
|
table_html = f'''<
|
8136
|
-
<TABLE BORDER="1" CELLBORDER="0" CELLSPACING="0" CELLPADDING="
|
8137
|
-
<TR><TD ALIGN="CENTER"><B>COLUMN</B></TD><TD ALIGN="CENTER"><B>TYPE</B></TD></TR>
|
10353
|
+
<TABLE BORDER="1" CELLBORDER="0" CELLSPACING="0" CELLPADDING="3">
|
10354
|
+
<TR><TD ALIGN="CENTER" BGCOLOR="{color}"><B>COLUMN</B></TD><TD ALIGN="CENTER" BGCOLOR="{color}"><B>TYPE</B></TD></TR>
|
8138
10355
|
{''.join(fields)}
|
8139
10356
|
</TABLE>
|
8140
10357
|
>'''
|
8141
10358
|
|
8142
|
-
c.node(
|
8143
|
-
|
8144
|
-
# Tampilkan graph
|
8145
|
-
st.graphviz_chart(dot)
|
8146
|
-
|
8147
|
-
except ImportError:
|
8148
|
-
st.warning("Graphviz tidak terinstall. Menggunakan visualisasi alternatif...")
|
10359
|
+
c.node(ref_name, table_html, shape='none', fontname='Arial')
|
8149
10360
|
|
8150
|
-
#
|
8151
|
-
|
8152
|
-
|
8153
|
-
|
8154
|
-
|
8155
|
-
|
8156
|
-
|
8157
|
-
|
8158
|
-
|
8159
|
-
|
8160
|
-
|
8161
|
-
|
8162
|
-
|
8163
|
-
|
8164
|
-
|
8165
|
-
|
8166
|
-
|
8167
|
-
|
8168
|
-
|
8169
|
-
|
8170
|
-
|
8171
|
-
|
8172
|
-
|
8173
|
-
|
8174
|
-
|
8175
|
-
|
8176
|
-
|
8177
|
-
|
8178
|
-
|
8179
|
-
|
8180
|
-
|
10361
|
+
# Tambahkan relasi
|
10362
|
+
if show_relationships:
|
10363
|
+
for fk_col in ref_info['columns']:
|
10364
|
+
dot.edge(ref_name, 'main_table', label='1:N', style='dashed', color='#666666')
|
10365
|
+
|
10366
|
+
# Tampilkan ERD
|
10367
|
+
st.graphviz_chart(dot)
|
10368
|
+
|
10369
|
+
# Legenda
|
10370
|
+
st.markdown("""
|
10371
|
+
**📋 Legenda ERD:**
|
10372
|
+
- 🔑 Primary Key | 🔗 Foreign Key | 📊 Metric | 💰 Measurement
|
10373
|
+
- 📅 Temporal | 🏷️ Category | 📝 Description | 📄 Attribute
|
10374
|
+
- **Warna berbeda**: Tabel yang berbeda domain
|
10375
|
+
""")
|
10376
|
+
|
10377
|
+
except ImportError:
|
10378
|
+
st.warning("Graphviz tidak terinstall. Menggunakan visualisasi alternatif...")
|
10379
|
+
|
10380
|
+
# Visualisasi alternatif yang lebih sederhana
|
10381
|
+
import plotly.graph_objects as go
|
10382
|
+
|
10383
|
+
# Hitung posisi node secara dinamis
|
10384
|
+
num_tables = min(8, len(reference_tables) + 1)
|
10385
|
+
angles = np.linspace(0, 2*np.pi, num_tables, endpoint=False)
|
10386
|
+
radius = 0.4
|
10387
|
+
|
10388
|
+
fig = go.Figure()
|
10389
|
+
|
10390
|
+
# Node positions
|
10391
|
+
node_x = [0.5] # Main table di center
|
10392
|
+
node_y = [0.5]
|
10393
|
+
node_text = ["MAIN"]
|
10394
|
+
node_colors = ['#3366CC']
|
10395
|
+
|
10396
|
+
# Reference tables di sekeliling
|
10397
|
+
for i, (ref_name, ref_info) in enumerate(list(reference_tables.items())[:num_tables-1]):
|
10398
|
+
angle = angles[i]
|
10399
|
+
x = 0.5 + radius * np.cos(angle)
|
10400
|
+
y = 0.5 + radius * np.sin(angle)
|
8181
10401
|
|
8182
|
-
|
10402
|
+
node_x.append(x)
|
10403
|
+
node_y.append(y)
|
10404
|
+
node_text.append(ref_info['name'][:10])
|
10405
|
+
node_colors.append(colors[i % len(colors)])
|
10406
|
+
|
10407
|
+
# Add nodes
|
10408
|
+
fig.add_trace(go.Scatter(
|
10409
|
+
x=node_x, y=node_y,
|
10410
|
+
mode='markers+text',
|
10411
|
+
marker=dict(size=80, color=node_colors),
|
10412
|
+
text=node_text,
|
10413
|
+
textposition="middle center",
|
10414
|
+
textfont=dict(size=12, color='white'),
|
10415
|
+
name="Tables"
|
10416
|
+
))
|
10417
|
+
|
10418
|
+
# Add relationships
|
10419
|
+
if show_relationships and len(node_x) > 1:
|
8183
10420
|
for i in range(1, len(node_x)):
|
8184
10421
|
fig.add_trace(go.Scatter(
|
8185
|
-
x=[node_x[
|
8186
|
-
y=[node_y[0], node_y[i]],
|
10422
|
+
x=[node_x[i], node_x[0]], y=[node_y[i], node_y[0]],
|
8187
10423
|
mode='lines',
|
8188
|
-
line=dict(width=2, color='gray'),
|
8189
|
-
hoverinfo='none'
|
10424
|
+
line=dict(width=2, color='gray', dash='dash'),
|
10425
|
+
hoverinfo='none',
|
10426
|
+
showlegend=False
|
8190
10427
|
))
|
8191
|
-
|
8192
|
-
|
8193
|
-
|
8194
|
-
|
8195
|
-
|
8196
|
-
|
8197
|
-
|
8198
|
-
|
8199
|
-
|
8200
|
-
|
8201
|
-
|
8202
|
-
|
8203
|
-
# --- Bagian Penyambung SQL ---
|
8204
|
-
st.write("---")
|
8205
|
-
st.subheader("🧩 Format SQL (Comma Separated)")
|
8206
|
-
|
8207
|
-
numeric_sql = ", ".join(numeric_cols)
|
8208
|
-
categorical_sql = ", ".join(categorical_cols)
|
8209
|
-
|
8210
|
-
st.code(f"SELECT {numeric_sql}, {categorical_sql} FROM dataset_table;", language="sql")
|
8211
|
-
|
8212
|
-
# Generate CREATE TABLE statement
|
8213
|
-
st.markdown("### 🗃️ SQL CREATE TABLE Statement")
|
8214
|
-
|
8215
|
-
# Deteksi tipe data untuk SQL
|
8216
|
-
def infer_sql_type(dtype, sample_data):
|
8217
|
-
if np.issubdtype(dtype, np.number):
|
8218
|
-
return "DECIMAL(10,2)"
|
8219
|
-
elif np.issubdtype(dtype, np.datetime64):
|
8220
|
-
return "DATETIME"
|
8221
|
-
else:
|
8222
|
-
# Cek panjang string maksimum
|
8223
|
-
max_len = sample_data.astype(str).str.len().max()
|
8224
|
-
return f"VARCHAR({min(255, max(100, int(max_len * 1.5)))})"
|
8225
|
-
|
8226
|
-
create_table_sql = "CREATE TABLE dataset_table (\n"
|
8227
|
-
for i, col in enumerate(df.columns):
|
8228
|
-
sql_type = infer_sql_type(df[col].dtype, df[col])
|
8229
|
-
if i == 0:
|
8230
|
-
create_table_sql += f" {col} {sql_type} PRIMARY KEY,\n"
|
8231
|
-
else:
|
8232
|
-
create_table_sql += f" {col} {sql_type},\n"
|
8233
|
-
|
8234
|
-
create_table_sql = create_table_sql.rstrip(',\n') + "\n);"
|
8235
|
-
|
8236
|
-
st.code(create_table_sql, language="sql")
|
8237
|
-
|
8238
|
-
# Jika ingin lihat hanya daftar kolom
|
8239
|
-
col3, col4 = st.columns(2)
|
8240
|
-
with col3:
|
8241
|
-
st.write("**Kolom Numerik (SQL String):**")
|
8242
|
-
st.code(numeric_sql, language="sql")
|
8243
|
-
|
8244
|
-
with col4:
|
8245
|
-
st.write("**Kolom Kategorikal (SQL String):**")
|
8246
|
-
st.code(categorical_sql, language="sql")
|
8247
|
-
|
8248
|
-
# Visualisasi korelasi sebagai ERD sederhana
|
8249
|
-
if len(numeric_cols) > 1:
|
8250
|
-
st.write("---")
|
8251
|
-
st.subheader("📊 Matriks Korelasi (Hubungan Numerik)")
|
8252
|
-
corr_matrix = df[numeric_cols].corr()
|
8253
|
-
|
8254
|
-
# Plot menggunakan Plotly
|
8255
|
-
fig = px.imshow(
|
8256
|
-
corr_matrix,
|
8257
|
-
text_auto=".2f",
|
8258
|
-
color_continuous_scale='RdBu_r',
|
8259
|
-
zmin=-1,
|
8260
|
-
zmax=1,
|
8261
|
-
aspect="auto",
|
8262
|
-
labels=dict(color="Korelasi")
|
8263
|
-
)
|
8264
|
-
fig.update_layout(
|
8265
|
-
title="Matriks Korelasi Numerik",
|
8266
|
-
xaxis_title="Fitur",
|
8267
|
-
yaxis_title="Fitur",
|
8268
|
-
autosize=True,
|
8269
|
-
margin=dict(l=40, r=40, t=60, b=40),
|
8270
|
-
height=600
|
8271
|
-
)
|
8272
|
-
st.plotly_chart(fig, use_container_width=True)
|
8273
|
-
|
8274
|
-
# --- Linear Regression Analysis ---
|
8275
|
-
st.write("---")
|
8276
|
-
st.subheader("🧮 Linear Regression Analysis (SQL-Style LRS)")
|
10428
|
+
|
10429
|
+
fig.update_layout(
|
10430
|
+
title="Database Table Relationships",
|
10431
|
+
showlegend=False,
|
10432
|
+
height=500,
|
10433
|
+
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]),
|
10434
|
+
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]),
|
10435
|
+
margin=dict(l=20, r=20, t=60, b=20)
|
10436
|
+
)
|
10437
|
+
|
10438
|
+
st.plotly_chart(fig, use_container_width=True)
|
8277
10439
|
|
8278
|
-
|
8279
|
-
|
10440
|
+
# --- VISUALISASI DATA YANG AMAN ---
|
10441
|
+
st.subheader("📈 Visualisasi Data")
|
10442
|
+
|
10443
|
+
# Warna konsisten untuk chart
|
10444
|
+
color_palette = px.colors.qualitative.Set3
|
10445
|
+
|
10446
|
+
# Fungsi safe plotting
|
10447
|
+
def safe_plotting(plot_function, *args, **kwargs):
|
10448
|
+
try:
|
10449
|
+
return plot_function(*args, **kwargs)
|
10450
|
+
except Exception as e:
|
10451
|
+
st.error(f"Error dalam membuat chart: {str(e)}")
|
10452
|
+
return None
|
10453
|
+
|
10454
|
+
# Tab untuk organisasi chart yang lebih baik
|
10455
|
+
tab111, tab222, tab333 = st.tabs(["📊 Distribusi Numerik", "🏷️ Analisis Kategorikal", "📋 Data Quality"])
|
10456
|
+
|
10457
|
+
with tab111:
|
10458
|
+
st.subheader("Analisis Distribusi Numerik")
|
10459
|
+
|
10460
|
+
if numeric_cols:
|
10461
|
+
col1, col2 = st.columns(2)
|
10462
|
+
|
10463
|
+
with col1:
|
10464
|
+
# Histogram dengan pengelompokan yang baik
|
10465
|
+
selected_num_hist = st.selectbox(
|
10466
|
+
"Pilih variabel untuk histogram:",
|
10467
|
+
numeric_cols,
|
10468
|
+
key="hist_num"
|
10469
|
+
)
|
8280
10470
|
|
8281
|
-
|
8282
|
-
|
8283
|
-
|
8284
|
-
|
10471
|
+
if selected_num_hist:
|
10472
|
+
fig_hist = safe_plotting(px.histogram,
|
10473
|
+
df,
|
10474
|
+
x=selected_num_hist,
|
10475
|
+
title=f"Distribusi {selected_num_hist}",
|
10476
|
+
nbins=30,
|
10477
|
+
color_discrete_sequence=['#3366CC'],
|
10478
|
+
opacity=0.8
|
10479
|
+
)
|
10480
|
+
if fig_hist:
|
10481
|
+
fig_hist.update_layout(
|
10482
|
+
bargap=0.1,
|
10483
|
+
xaxis_title=selected_num_hist,
|
10484
|
+
yaxis_title="Frekuensi"
|
10485
|
+
)
|
10486
|
+
st.plotly_chart(fig_hist, use_container_width=True)
|
10487
|
+
|
10488
|
+
with col2:
|
10489
|
+
# Box plot
|
10490
|
+
selected_num_box = st.selectbox(
|
10491
|
+
"Pilih variabel untuk box plot:",
|
10492
|
+
numeric_cols,
|
10493
|
+
key="box_num"
|
10494
|
+
)
|
8285
10495
|
|
8286
|
-
if
|
8287
|
-
|
8288
|
-
slope, intercept, r_value, p_value, std_err = stats.linregress(df[x_axis], df[y_axis])
|
8289
|
-
correlation = df[x_axis].corr(df[y_axis])
|
8290
|
-
r_squared = r_value**2
|
8291
|
-
|
8292
|
-
# --- Tampilan SQL Query ---
|
8293
|
-
st.markdown("### 🧩 SQL Query Representation")
|
8294
|
-
st.code(f"""
|
8295
|
-
SELECT
|
8296
|
-
{x_axis} AS X,
|
8297
|
-
{y_axis} AS Y,
|
8298
|
-
ROUND(REGR_SLOPE({y_axis}, {x_axis}), 4) AS slope,
|
8299
|
-
ROUND(REGR_INTERCEPT({y_axis}, {x_axis}), 4) AS intercept,
|
8300
|
-
ROUND(CORR({y_axis}, {x_axis}), 4) AS correlation,
|
8301
|
-
ROUND(POWER(CORR({y_axis}, {x_axis}), 2), 4) AS r_squared
|
8302
|
-
FROM dataset_table;
|
8303
|
-
""", language="sql")
|
8304
|
-
|
8305
|
-
# --- Plot hubungan ---
|
8306
|
-
fig = px.scatter(
|
10496
|
+
if selected_num_box:
|
10497
|
+
fig_box = safe_plotting(px.box,
|
8307
10498
|
df,
|
8308
|
-
|
8309
|
-
|
8310
|
-
|
8311
|
-
title=f"📊 SQL Visualization: {y_axis} vs {x_axis}",
|
8312
|
-
labels={x_axis: f"{x_axis}", y_axis: f"{y_axis}"}
|
8313
|
-
)
|
8314
|
-
fig.update_layout(
|
8315
|
-
autosize=True,
|
8316
|
-
margin=dict(l=40, r=40, t=60, b=40),
|
8317
|
-
height=500,
|
8318
|
-
title_x=0.5
|
10499
|
+
y=selected_num_box,
|
10500
|
+
title=f"Box Plot {selected_num_box}",
|
10501
|
+
color_discrete_sequence=['#FF6B6B']
|
8319
10502
|
)
|
8320
|
-
|
8321
|
-
|
8322
|
-
|
8323
|
-
|
8324
|
-
|
8325
|
-
|
8326
|
-
|
8327
|
-
|
8328
|
-
|
8329
|
-
|
8330
|
-
|
8331
|
-
|
8332
|
-
|
8333
|
-
|
8334
|
-
textposition="middle center",
|
8335
|
-
textfont=dict(size=14)
|
8336
|
-
))
|
8337
|
-
|
8338
|
-
# Add relationship line dengan annotation korelasi
|
8339
|
-
rel_fig.add_trace(go.Scatter(
|
8340
|
-
x=[0.3, 0.7], y=[0.5, 0.5],
|
8341
|
-
mode='lines+text',
|
8342
|
-
line=dict(width=4, color='red'),
|
8343
|
-
text=[f"r = {correlation:.3f}"],
|
8344
|
-
textposition="middle center",
|
8345
|
-
textfont=dict(size=12, color='red')
|
8346
|
-
))
|
8347
|
-
|
8348
|
-
rel_fig.update_layout(
|
8349
|
-
title=f"Relationship Diagram: {x_axis} → {y_axis}",
|
8350
|
-
showlegend=False,
|
8351
|
-
height=300,
|
8352
|
-
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]),
|
8353
|
-
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]),
|
8354
|
-
margin=dict(l=20, r=20, t=60, b=20)
|
10503
|
+
if fig_box:
|
10504
|
+
st.plotly_chart(fig_box, use_container_width=True)
|
10505
|
+
|
10506
|
+
# Matriks korelasi
|
10507
|
+
if len(numeric_cols) >= 2:
|
10508
|
+
st.write("**Matriks Korelasi:**")
|
10509
|
+
try:
|
10510
|
+
corr_matrix = df[numeric_cols].corr()
|
10511
|
+
fig_corr = px.imshow(
|
10512
|
+
corr_matrix,
|
10513
|
+
text_auto=".2f",
|
10514
|
+
color_continuous_scale='RdBu_r',
|
10515
|
+
aspect="auto",
|
10516
|
+
title="Matriks Korelasi Numerik"
|
8355
10517
|
)
|
8356
|
-
|
8357
|
-
|
8358
|
-
|
8359
|
-
|
8360
|
-
|
8361
|
-
|
8362
|
-
"Metric": ["X (Independent)", "Y (Dependent)", "Slope (β1)", "Intercept (β0)",
|
8363
|
-
"R-Value", "R² (R-squared)", "P-Value", "Std Error", "Correlation"],
|
8364
|
-
"Value": [x_axis, y_axis, f"{slope:.4f}", f"{intercept:.4f}",
|
8365
|
-
f"{r_value:.4f}", f"{r_squared:.4f}", f"{p_value:.4f}",
|
8366
|
-
f"{std_err:.4f}", f"{correlation:.4f}"]
|
8367
|
-
})
|
8368
|
-
|
8369
|
-
st.dataframe(result_df, use_container_width=True, hide_index=True)
|
8370
|
-
|
8371
|
-
# Analisis statistik lengkap
|
8372
|
-
st.subheader("📊 Analisis Statistik Lengkap")
|
8373
|
-
|
8374
|
-
# Statistik deskriptif
|
8375
|
-
st.write("**Statistik Deskriptif:**")
|
8376
|
-
st.dataframe(df.describe())
|
8377
|
-
|
8378
|
-
# Analisis missing values
|
8379
|
-
st.write("**Analisis Missing Values:**")
|
8380
|
-
missing_data = df.isnull().sum()
|
8381
|
-
if missing_data.sum() > 0:
|
8382
|
-
fig_missing = px.bar(x=missing_data.index, y=missing_data.values,
|
8383
|
-
title="Missing Values per Kolom")
|
8384
|
-
st.plotly_chart(fig_missing)
|
8385
|
-
else:
|
8386
|
-
st.success("Tidak ada missing values dalam dataset!")
|
10518
|
+
st.plotly_chart(fig_corr, use_container_width=True)
|
10519
|
+
except Exception as e:
|
10520
|
+
st.warning(f"Tidak dapat menghitung matriks korelasi: {str(e)}")
|
10521
|
+
|
10522
|
+
with tab222:
|
10523
|
+
st.subheader("Analisis Data Kategorikal")
|
8387
10524
|
|
8388
|
-
|
8389
|
-
|
10525
|
+
if categorical_cols:
|
10526
|
+
col1, col2 = st.columns(2)
|
10527
|
+
|
10528
|
+
with col1:
|
10529
|
+
# Pie chart yang terorganisir
|
10530
|
+
selected_cat_pie = st.selectbox(
|
10531
|
+
"Pilih variabel kategorikal:",
|
10532
|
+
categorical_cols,
|
10533
|
+
key="pie_cat"
|
10534
|
+
)
|
10535
|
+
|
10536
|
+
if selected_cat_pie:
|
10537
|
+
try:
|
10538
|
+
value_counts = df[selected_cat_pie].value_counts().head(8)
|
10539
|
+
fig_pie = safe_plotting(px.pie,
|
10540
|
+
values=value_counts.values,
|
10541
|
+
names=value_counts.index,
|
10542
|
+
title=f"Distribusi {selected_cat_pie} (Top 8)",
|
10543
|
+
color_discrete_sequence=color_palette
|
10544
|
+
)
|
10545
|
+
if fig_pie:
|
10546
|
+
st.plotly_chart(fig_pie, use_container_width=True)
|
10547
|
+
except Exception as e:
|
10548
|
+
st.warning(f"Tidak dapat membuat pie chart: {str(e)}")
|
10549
|
+
|
10550
|
+
with col2:
|
10551
|
+
# Bar chart horizontal
|
10552
|
+
if selected_cat_pie:
|
10553
|
+
try:
|
10554
|
+
value_counts = df[selected_cat_pie].value_counts().head(10)
|
10555
|
+
fig_bar = safe_plotting(px.bar,
|
10556
|
+
x=value_counts.values,
|
10557
|
+
y=value_counts.index,
|
10558
|
+
orientation='h',
|
10559
|
+
title=f"Top 10 {selected_cat_pie}",
|
10560
|
+
color=value_counts.values,
|
10561
|
+
color_continuous_scale='Blues'
|
10562
|
+
)
|
10563
|
+
if fig_bar:
|
10564
|
+
fig_bar.update_layout(
|
10565
|
+
xaxis_title="Count",
|
10566
|
+
yaxis_title=selected_cat_pie,
|
10567
|
+
showlegend=False
|
10568
|
+
)
|
10569
|
+
st.plotly_chart(fig_bar, use_container_width=True)
|
10570
|
+
except Exception as e:
|
10571
|
+
st.warning(f"Tidak dapat membuat bar chart: {str(e)}")
|
10572
|
+
|
10573
|
+
with tab333:
|
10574
|
+
st.subheader("Data Quality Report")
|
8390
10575
|
|
8391
|
-
|
10576
|
+
# Buat laporan kualitas data yang komprehensif
|
10577
|
+
quality_report = []
|
8392
10578
|
for col in df.columns:
|
8393
|
-
|
10579
|
+
analysis = column_analysis[col]
|
10580
|
+
quality_report.append({
|
8394
10581
|
'Kolom': col,
|
8395
|
-
'Tipe':
|
8396
|
-
'
|
8397
|
-
'
|
8398
|
-
'
|
8399
|
-
'
|
10582
|
+
'Tipe Data': analysis['dtype'],
|
10583
|
+
'Role': analysis['role'],
|
10584
|
+
'Unique Values': analysis['unique_count'],
|
10585
|
+
'Null Values': analysis['null_count'],
|
10586
|
+
'Null %': f"{analysis['null_percentage']:.2f}%",
|
10587
|
+
'Sample': analysis['sample_values'][0] if analysis['sample_values'] else 'N/A'
|
8400
10588
|
})
|
8401
10589
|
|
8402
|
-
quality_df = pd.DataFrame(
|
8403
|
-
st.dataframe(quality_df)
|
10590
|
+
quality_df = pd.DataFrame(quality_report)
|
10591
|
+
st.dataframe(quality_df, use_container_width=True)
|
8404
10592
|
|
8405
|
-
#
|
8406
|
-
st.
|
10593
|
+
# Visualisasi kualitas data sederhana
|
10594
|
+
col1, col2 = st.columns(2)
|
10595
|
+
|
10596
|
+
with col1:
|
10597
|
+
# Missing values bar chart
|
10598
|
+
missing_data = quality_df[['Kolom', 'Null Values']].set_index('Kolom')
|
10599
|
+
fig_missing = safe_plotting(px.bar,
|
10600
|
+
missing_data,
|
10601
|
+
y='Null Values',
|
10602
|
+
title="Missing Values per Kolom",
|
10603
|
+
color='Null Values',
|
10604
|
+
color_continuous_scale='Reds'
|
10605
|
+
)
|
10606
|
+
if fig_missing:
|
10607
|
+
st.plotly_chart(fig_missing, use_container_width=True)
|
8407
10608
|
|
8408
|
-
|
8409
|
-
|
10609
|
+
with col2:
|
10610
|
+
# Data types distribution
|
10611
|
+
type_dist = quality_df['Tipe Data'].value_counts()
|
10612
|
+
fig_types = safe_plotting(px.pie,
|
10613
|
+
values=type_dist.values,
|
10614
|
+
names=type_dist.index,
|
10615
|
+
title="Distribusi Tipe Data",
|
10616
|
+
color_discrete_sequence=color_palette
|
10617
|
+
)
|
10618
|
+
if fig_types:
|
10619
|
+
st.plotly_chart(fig_types, use_container_width=True)
|
10620
|
+
|
10621
|
+
# --- DOWNLOAD SECTION ---
|
10622
|
+
st.subheader("💾 Download Hasil Analisis")
|
10623
|
+
|
10624
|
+
col1, col2, col3 = st.columns(3)
|
10625
|
+
|
10626
|
+
with col1:
|
8410
10627
|
st.download_button(
|
8411
|
-
|
8412
|
-
|
8413
|
-
|
8414
|
-
|
10628
|
+
"📊 Download Quality Report",
|
10629
|
+
quality_df.to_csv(index=False),
|
10630
|
+
"data_quality_report.csv",
|
10631
|
+
"text/csv"
|
8415
10632
|
)
|
10633
|
+
|
10634
|
+
with col2:
|
10635
|
+
# Buat summary report
|
10636
|
+
summary_report = {
|
10637
|
+
'file_name': uploaded_file.name,
|
10638
|
+
'file_size': f"{uploaded_file.size / 1024:.2f} KB",
|
10639
|
+
'rows': df.shape[0],
|
10640
|
+
'columns': df.shape[1],
|
10641
|
+
'analysis_date': pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"),
|
10642
|
+
'numeric_columns': numeric_cols,
|
10643
|
+
'categorical_columns': categorical_cols,
|
10644
|
+
'date_columns': date_cols,
|
10645
|
+
'primary_keys': [col for col, analysis in column_analysis.items()
|
10646
|
+
if analysis['role'] == 'PRIMARY_KEY'],
|
10647
|
+
'foreign_keys': [col for col, analysis in column_analysis.items()
|
10648
|
+
if analysis['role'] == 'FOREIGN_KEY']
|
10649
|
+
}
|
8416
10650
|
|
8417
|
-
|
8418
|
-
st.
|
8419
|
-
|
8420
|
-
|
8421
|
-
|
8422
|
-
|
8423
|
-
|
8424
|
-
example_data = {
|
8425
|
-
'ID': [1, 2, 3, 4, 5],
|
8426
|
-
'Nama': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
|
8427
|
-
'Usia': [25, 30, 35, 28, 32],
|
8428
|
-
'Gaji': [50000, 60000, 70000, 55000, 65000],
|
8429
|
-
'Departemen': ['IT', 'HR', 'IT', 'Finance', 'HR']
|
8430
|
-
}
|
8431
|
-
example_df = pd.DataFrame(example_data)
|
8432
|
-
st.dataframe(example_df)
|
10651
|
+
import json
|
10652
|
+
st.download_button(
|
10653
|
+
"📋 Download Summary Report",
|
10654
|
+
json.dumps(summary_report, indent=2, ensure_ascii=False),
|
10655
|
+
"analysis_summary.json",
|
10656
|
+
"application/json"
|
10657
|
+
)
|
8433
10658
|
|
8434
|
-
|
8435
|
-
|
8436
|
-
|
8437
|
-
|
8438
|
-
|
8439
|
-
|
8440
|
-
|
8441
|
-
|
10659
|
+
with col3:
|
10660
|
+
# Download processed data
|
10661
|
+
st.download_button(
|
10662
|
+
"💾 Download Processed Data",
|
10663
|
+
df.to_csv(index=False),
|
10664
|
+
"processed_data.csv",
|
10665
|
+
"text/csv"
|
10666
|
+
)
|
10667
|
+
|
10668
|
+
except Exception as e:
|
10669
|
+
st.error(f"Error dalam analisis data: {str(e)}")
|
10670
|
+
st.info("Pastikan file yang diupload berformat CSV atau Excel yang valid")
|
10671
|
+
st.code(f"Error details: {str(e)}", language='python')
|
10672
|
+
else:
|
10673
|
+
st.info("📤 Silakan upload file CSV atau Excel untuk memulai analisis")
|
10674
|
+
|
10675
|
+
# Template dan panduan
|
10676
|
+
st.subheader("🎯 Panduan Format Data")
|
10677
|
+
|
10678
|
+
col1, col2 = st.columns(2)
|
10679
|
+
|
10680
|
+
with col1:
|
10681
|
+
st.write("**Format yang Disarankan:**")
|
10682
|
+
sample_data = {
|
10683
|
+
'customer_id': [1, 2, 3, 4, 5],
|
10684
|
+
'order_id': [101, 102, 103, 104, 105],
|
10685
|
+
'product_id': [201, 202, 203, 204, 205],
|
10686
|
+
'order_date': pd.date_range('2024-01-01', periods=5),
|
10687
|
+
'amount': [100.50, 75.25, 200.00, 150.75, 90.99],
|
10688
|
+
'category': ['Electronics', 'Books', 'Electronics', 'Clothing', 'Books'],
|
10689
|
+
'status': ['Completed', 'Pending', 'Completed', 'Shipped', 'Pending']
|
10690
|
+
}
|
10691
|
+
sample_df = pd.DataFrame(sample_data)
|
10692
|
+
st.dataframe(sample_df)
|
10693
|
+
|
10694
|
+
with col2:
|
10695
|
+
st.write("**Keterangan Fitur:**")
|
10696
|
+
st.markdown("""
|
10697
|
+
- **🔑 Primary Key**: Kolom dengan nilai unik (ID, code)
|
10698
|
+
- **🔗 Foreign Key**: Kolom referensi ke tabel lain
|
10699
|
+
- **📊 ERD Dinamis**: Diagram relasi otomatis
|
10700
|
+
- **📈 Visualisasi Aman**: Error handling untuk semua chart
|
10701
|
+
- **🎨 Warna Konsisten**: Skema warna yang harmonis
|
10702
|
+
- **📋 Analisis Komprehensif**: Statistik detail dan laporan
|
10703
|
+
""")
|
10704
|
+
|
10705
|
+
# Download template
|
10706
|
+
csv_template = sample_df.to_csv(index=False)
|
10707
|
+
st.download_button(
|
10708
|
+
"📥 Download Template CSV",
|
10709
|
+
csv_template,
|
10710
|
+
"analysis_template.csv",
|
10711
|
+
"text/csv"
|
10712
|
+
)
|
8442
10713
|
|
8443
10714
|
|
8444
10715
|
with tab8:
|
@@ -8450,12 +10721,29 @@ if df is not None:
|
|
8450
10721
|
type=['csv', 'xlsx', 'xls'],
|
8451
10722
|
key="stock_uploader"
|
8452
10723
|
)
|
8453
|
-
with st.expander("📜 Ketarangan Lengkap Tentang
|
10724
|
+
with st.expander("📜 Ketarangan Lengkap Tentang Analisis Saham", expanded=False):
|
10725
|
+
st.markdown(
|
10726
|
+
"""
|
10727
|
+
<img src="https://s3-ap-southeast-1.amazonaws.com/membership-media/public/uploads/posts/1653502344_Memahami_Apa_Itu_Saham_Dan_Cara_Kerjanya_1170x658.jpg" class="responsive-img">
|
10728
|
+
""",
|
10729
|
+
unsafe_allow_html=True
|
10730
|
+
)
|
8454
10731
|
st.markdown("""
|
8455
|
-
|
10732
|
+
|
8456
10733
|
|
8457
10734
|
### 🧾 Pengambangan Saham
|
8458
|
-
- Saham
|
10735
|
+
- Saham merupakan salah satu instrumen investasi yang populer di kalangan investor. Dengan membeli saham, investor memiliki sebagian kepemilikan dalam sebuah perusahaan dan berhak atas sebagian keuntungan perusahaan tersebut.
|
10736
|
+
- Analisis saham melibatkan evaluasi berbagai faktor seperti kinerja keuangan perusahaan, kondisi pasar, tren industri, dan faktor ekonomi makro untuk membuat keputusan investasi yang lebih baik.
|
10737
|
+
- Analisis saham dapat dilakukan dengan menggunakan teknologi yang terkenal seperti Excel, Google Sheets, atau Microsoft Excel.
|
10738
|
+
|
10739
|
+
### 📈 Analisis Grafik Saham
|
10740
|
+
- Analisis grafik saham adalah proses menganalisis data saham untuk membuat grafik yang menampilkan informasi tentang saham secara visual.
|
10741
|
+
- Grafik saham dapat digunakan untuk membuat perbandingan antara saham yang berbeda, menampilkan trend, dan menentukan kemungkinan investasi yang lebih baik.
|
10742
|
+
- Grafik saham dapat digunakan untuk menentukan kemungkinan investasi yang lebih baik dan meningkatkan keuntungan investasi.
|
10743
|
+
|
10744
|
+
### 💰 Analisis Grafik Saham
|
10745
|
+
- Analisis grafik saham dapat digunakan untuk membuat perbandingan antara saham yang berbeda, menampilkan trend, dan menentukan kemungkinan investasi yang lebih baik.
|
10746
|
+
- Grafik saham dapat digunakan untuk menentukan kemungkinan investasi yang lebih baik dan meningkatkan keuntungan investasi.
|
8459
10747
|
""")
|
8460
10748
|
if uploaded_file is not None:
|
8461
10749
|
try:
|
@@ -9569,7 +11857,7 @@ if df is not None:
|
|
9569
11857
|
# Sidebar untuk memilih jenis kalkulator
|
9570
11858
|
calc_type = st.sidebar.selectbox(
|
9571
11859
|
"Pilih Jenis Kalkulator",
|
9572
|
-
["Kalkulator Dasar", "Kalkulator Ilmiah", "Kalkulator Keuangan", "Konverter Satuan", "Kalkulator BMI", "Kalkulator Waktu"]
|
11860
|
+
["🔢 Kalkulator Dasar", "🔬 Kalkulator Ilmiah", "💰 Kalkulator Keuangan", "📐 Konverter Satuan", "⚖️ Kalkulator BMI", "⏰ Kalkulator Waktu"]
|
9573
11861
|
)
|
9574
11862
|
|
9575
11863
|
# Initialize session state for history
|
@@ -9582,7 +11870,7 @@ if df is not None:
|
|
9582
11870
|
if len(st.session_state.calc_history) > 10: # Batasi hanya 10 riwayat terakhir
|
9583
11871
|
st.session_state.calc_history.pop(0)
|
9584
11872
|
|
9585
|
-
if calc_type == "Kalkulator Dasar":
|
11873
|
+
if calc_type == "🔢 Kalkulator Dasar":
|
9586
11874
|
st.subheader("🔢 Kalkulator Dasar")
|
9587
11875
|
|
9588
11876
|
# Layout dengan columns untuk tampilan kalkulator
|
@@ -9659,7 +11947,7 @@ if df is not None:
|
|
9659
11947
|
if st.button("🗑️ Reset", use_container_width=True):
|
9660
11948
|
st.rerun()
|
9661
11949
|
|
9662
|
-
elif calc_type == "Kalkulator Ilmiah":
|
11950
|
+
elif calc_type == "🔬 Kalkulator Ilmiah":
|
9663
11951
|
st.subheader("🔬 Kalkulator Ilmiah")
|
9664
11952
|
|
9665
11953
|
col1, col2 = st.columns(2)
|
@@ -9778,7 +12066,7 @@ if df is not None:
|
|
9778
12066
|
except Exception as e:
|
9779
12067
|
st.error(f"❌ Error: {str(e)}")
|
9780
12068
|
|
9781
|
-
elif calc_type == "Kalkulator Keuangan":
|
12069
|
+
elif calc_type == "💰 Kalkulator Keuangan":
|
9782
12070
|
st.subheader("💰 Kalkulator Keuangan")
|
9783
12071
|
|
9784
12072
|
finance_option = st.selectbox(
|
@@ -9860,7 +12148,7 @@ if df is not None:
|
|
9860
12148
|
""")
|
9861
12149
|
add_to_history(f"Cicilan: Rp {loan_amount:,.0f} → Rp {monthly_payment:,.0f}/bulan")
|
9862
12150
|
|
9863
|
-
elif calc_type == "Konverter Satuan":
|
12151
|
+
elif calc_type == "📐 Konverter Satuan":
|
9864
12152
|
st.subheader("📐 Konverter Satuan")
|
9865
12153
|
|
9866
12154
|
conversion_type = st.selectbox(
|
@@ -9945,8 +12233,8 @@ if df is not None:
|
|
9945
12233
|
st.success(f"**Hasil:** {calc_str}")
|
9946
12234
|
add_to_history(calc_str)
|
9947
12235
|
|
9948
|
-
elif calc_type == "Kalkulator BMI":
|
9949
|
-
st.subheader("
|
12236
|
+
elif calc_type == "⚖️ Kalkulator BMI":
|
12237
|
+
st.subheader("⚖️ Kalkulator BMI (Body Mass Index)")
|
9950
12238
|
|
9951
12239
|
col1, col2 = st.columns(2)
|
9952
12240
|
|
@@ -9983,7 +12271,7 @@ if df is not None:
|
|
9983
12271
|
""")
|
9984
12272
|
add_to_history(f"BMI: {bmi:.1f} ({category})")
|
9985
12273
|
|
9986
|
-
elif calc_type == "Kalkulator Waktu":
|
12274
|
+
elif calc_type == "⏰ Kalkulator Waktu":
|
9987
12275
|
st.subheader("⏰ Kalkulator Waktu")
|
9988
12276
|
|
9989
12277
|
time_option = st.selectbox("Pilih jenis perhitungan", [
|
@@ -10119,7 +12407,7 @@ if df is not None:
|
|
10119
12407
|
st.error("**🧹 Pembersihan Data**\n\nAuto-clean missing values")
|
10120
12408
|
|
10121
12409
|
# Video Tutorial (placeholder)
|
10122
|
-
st.markdown("### 🎥 Video Tutorial Penggunaan V2.
|
12410
|
+
st.markdown("### 🎥 Video Tutorial Penggunaan V2.3.8")
|
10123
12411
|
import streamlit.components.v1 as components
|
10124
12412
|
google_drive_id = "1obx6q2jQS1fRrNi1E4VpAPlyI_rR9nO5"
|
10125
12413
|
|
@@ -10488,7 +12776,8 @@ if df is not None:
|
|
10488
12776
|
with col3:
|
10489
12777
|
st.markdown("""
|
10490
12778
|
### 🔄 Update
|
10491
|
-
- Versi terbaru: 2.
|
12779
|
+
- Versi terbaru: 2.3.8
|
12780
|
+
- Rilis: Oktober 2025
|
10492
12781
|
- Last updated: 2025
|
10493
12782
|
- Compatibility: Python 3.8+
|
10494
12783
|
""")
|