streamlit-launcher 2.2.7__py3-none-any.whl → 2.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,6 +35,38 @@ from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classi
35
35
  from sklearn.inspection import permutation_importance
36
36
  from scipy.stats import gaussian_kde
37
37
  import streamlit.components.v1 as components
38
+ import tensorflow as tf
39
+ import numpy as np
40
+ import pandas as pd
41
+ from sklearn.model_selection import train_test_split
42
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
43
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
44
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
45
+ from sklearn.metrics import confusion_matrix, classification_report
46
+ import plotly.express as px
47
+ import plotly.graph_objects as go
48
+ from plotly.subplots import make_subplots
49
+ import xgboost as xgb
50
+ from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
51
+ from sklearn.feature_selection import mutual_info_regression, mutual_info_classif
52
+ import plotly.graph_objects as go
53
+ from plotly.subplots import make_subplots
54
+ import time
55
+ import warnings
56
+ warnings.filterwarnings('ignore')
57
+ from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
58
+ from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
59
+ from xgboost import XGBRegressor, XGBClassifier
60
+ from sklearn.ensemble import VotingRegressor, VotingClassifier
61
+ from sklearn.ensemble import StackingRegressor, StackingClassifier
62
+ from sklearn.model_selection import cross_validate, GridSearchCV
63
+ from sklearn.metrics import get_scorer
64
+ from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
65
+ from sklearn.compose import ColumnTransformer
66
+ from sklearn.impute import SimpleImputer
67
+ from sklearn.pipeline import Pipeline
68
+ import keras
69
+
38
70
 
39
71
  # Konfigurasi untuk performa
40
72
  plt.style.use('default')
@@ -6435,8 +6467,13 @@ def create_ml_dl_analysis_dashboard(df, numeric_cols, non_numeric_cols):
6435
6467
  """
6436
6468
  Dashboard komprehensif untuk analisis Machine Learning dan Deep Learning
6437
6469
  """
6438
-
6439
- st.title("🤖 Advanced ML/DL Analysis Dashboard")
6470
+ st.markdown("""
6471
+ <div style='text-align: center; padding: 10px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
6472
+ border-radius: 10px; margin: 10px 0;'>
6473
+ <h3 style='color: white; margin: 0;'>🧠 dwibaktindev AI</h3>
6474
+ <p style='color: white; margin: 0;'>Sasha • Alisa • dwibaktindev Models</p>
6475
+ </div>
6476
+ """, unsafe_allow_html=True)
6440
6477
 
6441
6478
  # Deteksi tipe data
6442
6479
  data_size = len(df)
@@ -6874,10 +6911,25 @@ def create_outlier_analysis(df, numeric_cols):
6874
6911
  st.plotly_chart(fig, use_container_width=True)
6875
6912
 
6876
6913
  def machine_learning_analysis(df, numeric_cols, non_numeric_cols):
6877
- """Analisis Machine Learning"""
6914
+ """Analisis Machine Learning dengan Optimasi untuk Dataset Besar"""
6878
6915
 
6879
6916
  st.header("🤖 Machine Learning Analysis")
6880
6917
 
6918
+ # Informasi dataset
6919
+ st.subheader("📊 Dataset Info")
6920
+ col1, col2, col3 = st.columns(3)
6921
+ with col1:
6922
+ st.metric("Total Rows", f"{len(df):,}")
6923
+ with col2:
6924
+ st.metric("Total Columns", f"{len(df.columns):,}")
6925
+ with col3:
6926
+ st.metric("Memory Usage", f"{df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
6927
+
6928
+ # Optimasi memory usage
6929
+ if st.checkbox("Optimize Memory Usage", value=True):
6930
+ df = optimize_memory_usage(df)
6931
+ st.success("Memory usage optimized!")
6932
+
6881
6933
  # Preprocessing
6882
6934
  st.subheader("🔧 Data Preprocessing")
6883
6935
 
@@ -6893,162 +6945,407 @@ def machine_learning_analysis(df, numeric_cols, non_numeric_cols):
6893
6945
 
6894
6946
  problem_type = st.selectbox(
6895
6947
  "Jenis Problem",
6896
- ["Regression", "Classification"],
6948
+ ["Regression", "Classification", "Auto Detect"],
6897
6949
  key="ml_problem_type"
6898
6950
  )
6951
+
6952
+ # Auto detect problem type
6953
+ if problem_type == "Auto Detect":
6954
+ if target_variable in numeric_cols:
6955
+ problem_type = "Regression"
6956
+ else:
6957
+ problem_type = "Classification"
6958
+ st.info(f"Auto-detected: {problem_type}")
6899
6959
 
6900
6960
  with col2:
6901
6961
  test_size = st.slider("Test Size Ratio", 0.1, 0.5, 0.2, 0.05, key="ml_test_size")
6902
6962
  random_state = st.number_input("Random State", value=42, key="ml_random_state")
6963
+
6964
+ # Sampling untuk dataset besar
6965
+ sample_size = st.slider("Sample Size (untuk dataset besar)",
6966
+ min_value=1000,
6967
+ max_value=min(50000, len(df)),
6968
+ value=min(10000, len(df)),
6969
+ step=1000,
6970
+ key="ml_sample_size")
6903
6971
 
6904
- # Feature selection
6972
+ # Feature selection dengan advanced options
6905
6973
  st.subheader("🎯 Feature Selection")
6974
+
6906
6975
  available_features = [f for f in numeric_cols + non_numeric_cols if f != target_variable]
6907
- selected_features = st.multiselect(
6908
- "Pilih Features untuk Model",
6909
- available_features,
6910
- default=available_features[:min(10, len(available_features))],
6911
- key="ml_features_select"
6912
- )
6913
6976
 
6914
- if target_variable and selected_features:
6915
- try:
6916
- # Prepare data
6917
- X = df[selected_features].copy()
6918
- y = df[target_variable]
6919
-
6920
- # Encode categorical features dan target untuk classification
6921
- le_dict = {}
6922
- for col in selected_features:
6923
- if col in non_numeric_cols:
6924
- le = LabelEncoder()
6925
- X[col] = le.fit_transform(X[col].astype(str))
6926
- le_dict[col] = le
6927
-
6928
- # Encode target untuk classification
6929
- if problem_type == "Classification" and y.dtype == 'object':
6930
- le_target = LabelEncoder()
6931
- y = le_target.fit_transform(y.astype(str))
6932
-
6933
- # Handle missing values
6934
- X = X.fillna(X.mean(numeric_only=True))
6935
-
6936
- # Split data
6937
- X_train, X_test, y_train, y_test = train_test_split(
6938
- X, y, test_size=test_size, random_state=random_state, stratify=y if problem_type == "Classification" else None
6977
+ col1, col2 = st.columns([2, 1])
6978
+
6979
+ with col1:
6980
+ feature_selection_method = st.radio(
6981
+ "Feature Selection Method",
6982
+ ["Manual Selection", "Auto Select Top Features"],
6983
+ key="feature_selection_method"
6984
+ )
6985
+
6986
+ if feature_selection_method == "Manual Selection":
6987
+ selected_features = st.multiselect(
6988
+ "Pilih Features untuk Model",
6989
+ available_features,
6990
+ default=available_features[:min(10, len(available_features))],
6991
+ key="ml_features_select"
6939
6992
  )
6993
+ else:
6994
+ top_k = st.slider("Number of Top Features", 5, 50, 15, key="top_k_features")
6995
+ selected_features = available_features[:top_k]
6996
+ st.info(f"Auto-selected top {top_k} features")
6997
+
6998
+ with col2:
6999
+ # Advanced options
7000
+ st.write("**Advanced Options:**")
7001
+ use_feature_engineering = st.checkbox("Feature Engineering", value=False)
7002
+ remove_high_correlation = st.checkbox("Remove High Correlation", value=True)
7003
+ correlation_threshold = st.slider("Correlation Threshold", 0.7, 0.99, 0.9, 0.01)
7004
+
7005
+ if not target_variable or not selected_features:
7006
+ st.warning("Pilih target variable dan features terlebih dahulu")
7007
+ return
7008
+
7009
+ try:
7010
+ # Sampling untuk dataset besar
7011
+ if len(df) > sample_size:
7012
+ st.info(f"Using sample of {sample_size} records for faster processing")
7013
+ df_sampled = df.sample(n=sample_size, random_state=random_state)
7014
+ else:
7015
+ df_sampled = df
7016
+
7017
+ # Progress tracking
7018
+ progress_bar = st.progress(0)
7019
+ status_text = st.empty()
7020
+
7021
+ # Prepare data
7022
+ status_text.text("Preparing data...")
7023
+ X = df_sampled[selected_features].copy()
7024
+ y = df_sampled[target_variable]
7025
+ progress_bar.progress(20)
7026
+
7027
+ # Handle large dataset - incremental processing
7028
+ chunk_size = min(1000, len(X))
7029
+
7030
+ # Encode categorical features
7031
+ status_text.text("Encoding categorical features...")
7032
+ le_dict = {}
7033
+ categorical_columns = [col for col in selected_features if col in non_numeric_cols]
7034
+
7035
+ for col in categorical_columns:
7036
+ # Untuk dataset besar, gunakan categorical encoding yang lebih efisien
7037
+ if X[col].nunique() > 100: # Jika terlalu banyak kategori, gunakan frequency encoding
7038
+ freq_encoding = X[col].value_counts().to_dict()
7039
+ X[col] = X[col].map(freq_encoding)
7040
+ X[col].fillna(0, inplace=True)
7041
+ else:
7042
+ le = LabelEncoder()
7043
+ X[col] = le.fit_transform(X[col].astype(str))
7044
+ le_dict[col] = le
7045
+ progress_bar.progress(40)
7046
+
7047
+ # Encode target variable
7048
+ status_text.text("Encoding target variable...")
7049
+ le_target = None
7050
+ if problem_type == "Classification" and y.dtype == 'object':
7051
+ le_target = LabelEncoder()
7052
+ y = le_target.fit_transform(y.astype(str))
7053
+
7054
+ # Remove high correlation features
7055
+ if remove_high_correlation and len(selected_features) > 1:
7056
+ status_text.text("Removing highly correlated features...")
7057
+ X = remove_correlated_features(X, correlation_threshold)
7058
+
7059
+ progress_bar.progress(60)
7060
+
7061
+ # Handle missing values dengan metode yang lebih robust
7062
+ status_text.text("Handling missing values...")
7063
+ for col in X.columns:
7064
+ if X[col].isnull().sum() > 0:
7065
+ if X[col].dtype in ['int64', 'float64']:
7066
+ X[col].fillna(X[col].median(), inplace=True)
7067
+ else:
7068
+ X[col].fillna(X[col].mode()[0] if len(X[col].mode()) > 0 else 0, inplace=True)
7069
+
7070
+ progress_bar.progress(80)
7071
+
7072
+ # Split data
7073
+ status_text.text("Splitting data...")
7074
+ X_train, X_test, y_train, y_test = train_test_split(
7075
+ X, y,
7076
+ test_size=test_size,
7077
+ random_state=random_state,
7078
+ stratify=y if problem_type == "Classification" else None
7079
+ )
7080
+
7081
+ # Scale features - gunakan StandardScaler yang lebih efisien
7082
+ scaler = StandardScaler()
7083
+ X_train_scaled = scaler.fit_transform(X_train)
7084
+ X_test_scaled = scaler.transform(X_test)
7085
+ progress_bar.progress(100)
7086
+
7087
+ # Model selection dengan progress tracking
7088
+ st.subheader("🚀 Model Training & Evaluation")
7089
+
7090
+ # Pilihan model berdasarkan problem type dan dataset size
7091
+ if problem_type == "Regression":
7092
+ models = {
7093
+ "Linear Regression": LinearRegression(),
7094
+ "Ridge Regression": Ridge(random_state=random_state),
7095
+ "Random Forest": RandomForestRegressor(
7096
+ n_estimators=50, # Kurangi untuk dataset besar
7097
+ random_state=random_state,
7098
+ n_jobs=-1 # Gunakan semua core CPU
7099
+ ),
7100
+ "Gradient Boosting": GradientBoostingRegressor(
7101
+ n_estimators=50,
7102
+ random_state=random_state
7103
+ )
7104
+ }
7105
+ elif problem_type == "Classification":
7106
+ models = {
7107
+ "Logistic Regression": LogisticRegression(
7108
+ random_state=random_state,
7109
+ n_jobs=-1,
7110
+ max_iter=1000
7111
+ ),
7112
+ "Random Forest": RandomForestClassifier(
7113
+ n_estimators=50,
7114
+ random_state=random_state,
7115
+ n_jobs=-1
7116
+ ),
7117
+ "Gradient Boosting": GradientBoostingClassifier(
7118
+ n_estimators=50,
7119
+ random_state=random_state
7120
+ ),
7121
+ "XGBoost": xgb.XGBClassifier(
7122
+ n_estimators=50,
7123
+ random_state=random_state,
7124
+ n_jobs=-1,
7125
+ verbosity=0
7126
+ ) if 'xgb' in globals() else None
7127
+ }
7128
+ # Remove None models
7129
+ models = {k: v for k, v in models.items() if v is not None}
7130
+
7131
+ # Train and evaluate models dengan progress bar
7132
+ results = {}
7133
+ model_progress = st.progress(0)
7134
+ total_models = len(models)
7135
+
7136
+ for i, (name, model) in enumerate(models.items()):
7137
+ status_text.text(f"Training {name}...")
6940
7138
 
6941
- # Scale features
6942
- scaler = StandardScaler()
6943
- X_train_scaled = scaler.fit_transform(X_train)
6944
- X_test_scaled = scaler.transform(X_test)
6945
-
6946
- # Model selection berdasarkan problem type
6947
- st.subheader("🚀 Model Training & Evaluation")
6948
-
6949
- if problem_type == "Regression":
6950
- models = {
6951
- "Linear Regression": LinearRegression(),
6952
- "Ridge Regression": Ridge(random_state=random_state),
6953
- "Random Forest": RandomForestRegressor(n_estimators=100, random_state=random_state)
6954
- }
6955
-
6956
- elif problem_type == "Classification":
6957
- models = {
6958
- "Logistic Regression": LogisticRegression(random_state=random_state),
6959
- "Random Forest": RandomForestClassifier(n_estimators=100, random_state=random_state),
6960
- "SVM": SVC(random_state=random_state)
6961
- }
6962
-
6963
- # Train and evaluate models
6964
- results = {}
6965
-
6966
- for name, model in models.items():
6967
- with st.spinner(f"Training {name}..."):
6968
- try:
6969
- # Train model
6970
- model.fit(X_train_scaled, y_train)
6971
- y_pred = model.predict(X_test_scaled)
6972
-
6973
- # Calculate metrics
6974
- if problem_type == "Regression":
6975
- mse = mean_squared_error(y_test, y_pred)
6976
- r2 = r2_score(y_test, y_pred)
6977
-
6978
- results[name] = {
6979
- 'MSE': mse,
6980
- 'R2 Score': r2,
6981
- 'predictions': y_pred,
6982
- 'model': model
6983
- }
6984
-
6985
- elif problem_type == "Classification":
6986
- accuracy = accuracy_score(y_test, y_pred)
6987
-
6988
- results[name] = {
6989
- 'Accuracy': accuracy,
6990
- 'predictions': y_pred,
6991
- 'model': model
6992
- }
6993
- except Exception as model_error:
6994
- st.warning(f"Error training {name}: {str(model_error)}")
6995
-
6996
- # Display results
6997
- if results:
6998
- st.subheader("📊 Model Performance Comparison")
7139
+ try:
7140
+ # Train model
7141
+ model.fit(X_train_scaled, y_train)
7142
+ y_pred = model.predict(X_test_scaled)
6999
7143
 
7144
+ # Calculate metrics
7000
7145
  if problem_type == "Regression":
7001
- results_df = pd.DataFrame({
7002
- 'Model': list(results.keys()),
7003
- 'MSE': [results[name]['MSE'] for name in results.keys()],
7004
- 'R2 Score': [results[name]['R2 Score'] for name in results.keys()]
7005
- })
7006
- else:
7007
- results_df = pd.DataFrame({
7008
- 'Model': list(results.keys()),
7009
- 'Accuracy': [results[name]['Accuracy'] for name in results.keys()]
7010
- })
7146
+ mse = mean_squared_error(y_test, y_pred)
7147
+ rmse = np.sqrt(mse)
7148
+ mae = mean_absolute_error(y_test, y_pred)
7149
+ r2 = r2_score(y_test, y_pred)
7150
+
7151
+ results[name] = {
7152
+ 'MSE': mse,
7153
+ 'RMSE': rmse,
7154
+ 'MAE': mae,
7155
+ 'R2 Score': r2,
7156
+ 'predictions': y_pred,
7157
+ 'model': model
7158
+ }
7011
7159
 
7012
- st.dataframe(results_df.sort_values(
7013
- 'R2 Score' if problem_type == "Regression" else 'Accuracy',
7014
- ascending=False
7015
- ), use_container_width=True)
7016
-
7017
- # Feature importance untuk tree-based models
7018
- st.subheader("🔍 Feature Importance")
7019
- for name, result in results.items():
7020
- model = result['model']
7021
- if hasattr(model, 'feature_importances_'):
7022
- feature_importance = pd.DataFrame({
7023
- 'feature': selected_features,
7024
- 'importance': model.feature_importances_
7025
- }).sort_values('importance', ascending=False)
7026
-
7027
- fig = px.bar(
7028
- feature_importance.head(10),
7029
- x='importance',
7030
- y='feature',
7031
- title=f"Feature Importance - {name}",
7032
- orientation='h'
7033
- )
7034
- st.plotly_chart(fig, use_container_width=True)
7035
- else:
7036
- st.warning("Tidak ada model yang berhasil di-training")
7160
+ elif problem_type == "Classification":
7161
+ accuracy = accuracy_score(y_test, y_pred)
7162
+ precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
7163
+ recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
7164
+ f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
7165
+
7166
+ results[name] = {
7167
+ 'Accuracy': accuracy,
7168
+ 'Precision': precision,
7169
+ 'Recall': recall,
7170
+ 'F1-Score': f1,
7171
+ 'predictions': y_pred,
7172
+ 'model': model
7173
+ }
7174
+
7175
+ st.success(f"✅ {name} trained successfully")
7176
+
7177
+ except Exception as model_error:
7178
+ st.warning(f"⚠️ Error training {name}: {str(model_error)}")
7179
+
7180
+ model_progress.progress((i + 1) / total_models)
7181
+
7182
+ status_text.text("Completed!")
7037
7183
 
7038
- except Exception as e:
7039
- st.error(f"Error dalam ML analysis: {str(e)}")
7184
+ # Display results
7185
+ if results:
7186
+ display_ml_results(results, problem_type, X_test, y_test, selected_features, le_target)
7187
+ else:
7188
+ st.error("❌ Tidak ada model yang berhasil di-training")
7189
+
7190
+ except Exception as e:
7191
+ st.error(f"❌ Error dalam ML analysis: {str(e)}")
7192
+ st.info("💡 Tips: Coba kurangi jumlah features atau gunakan sample size yang lebih kecil")
7193
+
7194
+ def optimize_memory_usage(df):
7195
+ """Optimize memory usage of dataframe"""
7196
+ for col in df.columns:
7197
+ if df[col].dtype == 'object':
7198
+ df[col] = df[col].astype('category')
7199
+ elif df[col].dtype in ['int64', 'int32']:
7200
+ c_min = df[col].min()
7201
+ c_max = df[col].max()
7202
+ if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
7203
+ df[col] = df[col].astype(np.int8)
7204
+ elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
7205
+ df[col] = df[col].astype(np.int16)
7206
+ elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
7207
+ df[col] = df[col].astype(np.int32)
7208
+ elif df[col].dtype in ['float64', 'float32']:
7209
+ c_min = df[col].min()
7210
+ c_max = df[col].max()
7211
+ if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
7212
+ df[col] = df[col].astype(np.float32)
7213
+ return df
7214
+
7215
+ def remove_correlated_features(X, threshold=0.9):
7216
+ """Remove highly correlated features"""
7217
+ corr_matrix = X.corr().abs()
7218
+ upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
7219
+ to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
7220
+ return X.drop(columns=to_drop)
7221
+
7222
+ def display_ml_results(results, problem_type, X_test, y_test, selected_features, le_target):
7223
+ """Display ML results with comprehensive visualizations"""
7224
+
7225
+ st.subheader("📊 Model Performance Comparison")
7226
+
7227
+ # Create results dataframe
7228
+ if problem_type == "Regression":
7229
+ metrics_df = pd.DataFrame({
7230
+ 'Model': list(results.keys()),
7231
+ 'MSE': [results[name]['MSE'] for name in results.keys()],
7232
+ 'RMSE': [results[name]['RMSE'] for name in results.keys()],
7233
+ 'MAE': [results[name]['MAE'] for name in results.keys()],
7234
+ 'R2 Score': [results[name]['R2 Score'] for name in results.keys()]
7235
+ })
7236
+ sort_metric = 'R2 Score'
7237
+ else:
7238
+ metrics_df = pd.DataFrame({
7239
+ 'Model': list(results.keys()),
7240
+ 'Accuracy': [results[name]['Accuracy'] for name in results.keys()],
7241
+ 'Precision': [results[name]['Precision'] for name in results.keys()],
7242
+ 'Recall': [results[name]['Recall'] for name in results.keys()],
7243
+ 'F1-Score': [results[name]['F1-Score'] for name in results.keys()]
7244
+ })
7245
+ sort_metric = 'Accuracy'
7246
+
7247
+ # Display metrics table
7248
+ st.dataframe(metrics_df.sort_values(sort_metric, ascending=False), use_container_width=True)
7249
+
7250
+ # Visualization
7251
+ col1, col2 = st.columns(2)
7252
+
7253
+ with col1:
7254
+ # Performance comparison chart
7255
+ if problem_type == "Regression":
7256
+ fig = px.bar(metrics_df, x='Model', y='R2 Score', title="R2 Score Comparison")
7257
+ else:
7258
+ fig = px.bar(metrics_df, x='Model', y='Accuracy', title="Accuracy Comparison")
7259
+ st.plotly_chart(fig, use_container_width=True)
7260
+
7261
+ with col2:
7262
+ # Actual vs Predicted untuk model terbaik
7263
+ best_model_name = metrics_df.loc[metrics_df[sort_metric].idxmax(), 'Model']
7264
+ best_result = results[best_model_name]
7265
+
7266
+ if problem_type == "Regression":
7267
+ fig = px.scatter(
7268
+ x=y_test,
7269
+ y=best_result['predictions'],
7270
+ labels={'x': 'Actual', 'y': 'Predicted'},
7271
+ title=f"Actual vs Predicted - {best_model_name}"
7272
+ )
7273
+ fig.add_trace(px.line(x=[y_test.min(), y_test.max()], y=[y_test.min(), y_test.max()]).data[0])
7274
+ else:
7275
+ # Confusion matrix
7276
+ cm = confusion_matrix(y_test, best_result['predictions'])
7277
+ fig = px.imshow(
7278
+ cm,
7279
+ labels=dict(x="Predicted", y="Actual", color="Count"),
7280
+ title=f"Confusion Matrix - {best_model_name}"
7281
+ )
7282
+ st.plotly_chart(fig, use_container_width=True)
7283
+
7284
+ # Feature importance
7285
+ st.subheader("🔍 Feature Importance")
7286
+ for name, result in results.items():
7287
+ model = result['model']
7288
+ if hasattr(model, 'feature_importances_'):
7289
+ feature_importance = pd.DataFrame({
7290
+ 'feature': selected_features[:len(model.feature_importances_)],
7291
+ 'importance': model.feature_importances_
7292
+ }).sort_values('importance', ascending=False)
7293
+
7294
+ fig = px.bar(
7295
+ feature_importance.head(10),
7296
+ x='importance',
7297
+ y='feature',
7298
+ title=f"Top 10 Feature Importance - {name}",
7299
+ orientation='h'
7300
+ )
7301
+ st.plotly_chart(fig, use_container_width=True)
7040
7302
 
7041
7303
  def deep_learning_analysis(df, numeric_cols, non_numeric_cols):
7042
- """Analisis Deep Learning"""
7304
+ """Analisis Deep Learning Lengkap - Optimized for Large Datasets"""
7043
7305
 
7044
- st.header("🧠 Deep Learning Analysis")
7306
+ st.header("🧠 Deep Learning Analysis - High Performance")
7045
7307
 
7046
- st.warning("⚠️ Fitur Deep Learning membutuhkan komputasi intensif dan dataset yang cukup besar")
7308
+ # Validasi dataset
7309
+ if df.empty:
7310
+ st.error("❌ Dataset kosong! Silakan upload data terlebih dahulu.")
7311
+ return
7312
+
7313
+ if len(numeric_cols) < 2:
7314
+ st.error("❌ Diperuhkan minimal 2 kolom numerik untuk analisis Deep Learning")
7315
+ return
7047
7316
 
7048
- # DL Configuration
7049
- col1, col2 = st.columns(2)
7317
+ # Configuration untuk kecepatan
7318
+ st.subheader("⚡ Konfigurasi Kecepatan & Performa")
7319
+
7320
+ col1, col2, col3 = st.columns(3)
7050
7321
 
7051
7322
  with col1:
7323
+ processing_speed = st.selectbox(
7324
+ "Kecepatan Processing",
7325
+ ["🚀 Very Fast", "⚡ Fast", "✅ Balanced", "🐢 Comprehensive"],
7326
+ index=0,
7327
+ key="processing_speed"
7328
+ )
7329
+
7330
+ # Set parameters berdasarkan kecepatan
7331
+ if processing_speed == "🚀 Very Fast":
7332
+ sample_size = 0.3
7333
+ epochs = 20
7334
+ batch_size = 128
7335
+ elif processing_speed == "⚡ Fast":
7336
+ sample_size = 0.5
7337
+ epochs = 30
7338
+ batch_size = 64
7339
+ elif processing_speed == "✅ Balanced":
7340
+ sample_size = 0.7
7341
+ epochs = 50
7342
+ batch_size = 32
7343
+ else:
7344
+ sample_size = 1.0
7345
+ epochs = 80
7346
+ batch_size = 16
7347
+
7348
+ with col2:
7052
7349
  dl_target = st.selectbox(
7053
7350
  "Pilih Target Variable",
7054
7351
  numeric_cols,
@@ -7061,30 +7358,76 @@ def deep_learning_analysis(df, numeric_cols, non_numeric_cols):
7061
7358
  key="dl_problem_type"
7062
7359
  )
7063
7360
 
7064
- with col2:
7065
- epochs = st.slider("Epochs", 10, 200, 50, key="dl_epochs")
7066
- batch_size = st.slider("Batch Size", 16, 256, 32, key="dl_batch_size")
7067
- learning_rate = st.selectbox("Learning Rate", [0.001, 0.01, 0.1, 0.0001], key="dl_learning_rate")
7361
+ with col3:
7362
+ epochs = st.slider("Epochs", 10, 200, epochs, key="dl_epochs")
7363
+ batch_size = st.slider("Batch Size", 16, 256, batch_size, key="dl_batch_size")
7364
+ learning_rate = st.selectbox("Learning Rate", [0.001, 0.01, 0.0001, 0.00001],
7365
+ index=0, key="dl_learning_rate")
7366
+
7367
+ # Optimasi dataset besar
7368
+ st.info(f"**Mode {processing_speed}** - Sample size: {sample_size*100}% - Dataset: {len(df):,} rows")
7068
7369
 
7069
- # Feature selection untuk DL
7370
+ # Feature selection dengan optimasi
7371
+ available_features = [f for f in numeric_cols if f != dl_target]
7070
7372
  dl_features = st.multiselect(
7071
7373
  "Pilih Features untuk Deep Learning",
7072
- [f for f in numeric_cols if f != dl_target],
7073
- default=[f for f in numeric_cols if f != dl_target][:5],
7374
+ available_features,
7375
+ default=available_features[:min(6, len(available_features))],
7074
7376
  key="dl_features_select"
7075
7377
  )
7076
7378
 
7077
- if dl_target and dl_features:
7078
- try:
7079
- import tensorflow as tf
7379
+ if not dl_target or not dl_features:
7380
+ st.info("📝 Pilih target variable dan features untuk memulai analisis DL")
7381
+ return
7382
+
7383
+ try:
7384
+
7385
+ # Check GPU availability
7386
+ gpu_available = len(tf.config.experimental.list_physical_devices('GPU')) > 0
7387
+ if gpu_available:
7388
+ st.success("🎯 GPU tersedia - Training akan dipercepat!")
7389
+ else:
7390
+ st.info("💡 GPU tidak tersedia - Training menggunakan CPU")
7391
+
7392
+ # Optimasi memory untuk dataset besar
7393
+ @st.cache_data(show_spinner=False)
7394
+ def prepare_data_optimized(_df, features, target, sample_frac=1.0, problem_type="Regression"):
7395
+ """Prepare data dengan optimasi memory"""
7396
+ # Sampling untuk dataset besar
7397
+ if sample_frac < 1.0:
7398
+ _df = _df.sample(frac=sample_frac, random_state=42)
7399
+
7400
+ X = _df[features].fillna(_df[features].mean())
7401
+ y = _df[target]
7402
+
7403
+ # Preprocessing target untuk classification
7404
+ if problem_type != "Regression":
7405
+ if problem_type == "Binary Classification":
7406
+ # Pastikan binary classification
7407
+ unique_vals = y.unique()
7408
+ if len(unique_vals) > 2:
7409
+ st.warning(f"⚠️ Target memiliki {len(unique_vals)} kelas. Menggunakan 2 kelas terbanyak.")
7410
+ top_2_classes = y.value_counts().head(2).index
7411
+ mask = y.isin(top_2_classes)
7412
+ X = X[mask]
7413
+ y = y[mask]
7414
+ y = LabelEncoder().fit_transform(y)
7415
+ else:
7416
+ y = LabelEncoder().fit_transform(y)
7417
+ else:
7418
+ # Multi-class classification
7419
+ y = LabelEncoder().fit_transform(y)
7080
7420
 
7081
- # Prepare data
7082
- X = df[dl_features].fillna(df[dl_features].mean())
7083
- y = df[dl_target]
7421
+ return X, y
7422
+
7423
+ # Prepare data dengan optimasi
7424
+ with st.spinner("🔄 Memproses data dengan optimasi kecepatan..."):
7425
+ X, y = prepare_data_optimized(df, dl_features, dl_target, sample_size, dl_problem_type)
7084
7426
 
7085
7427
  # Split data
7086
7428
  X_train, X_test, y_train, y_test = train_test_split(
7087
- X, y, test_size=0.2, random_state=42
7429
+ X, y, test_size=0.2, random_state=42,
7430
+ stratify=y if dl_problem_type != "Regression" else None
7088
7431
  )
7089
7432
 
7090
7433
  # Scale features
@@ -7092,198 +7435,1532 @@ def deep_learning_analysis(df, numeric_cols, non_numeric_cols):
7092
7435
  X_train_scaled = scaler.fit_transform(X_train)
7093
7436
  X_test_scaled = scaler.transform(X_test)
7094
7437
 
7095
- # Model architecture
7096
- st.subheader("🏗️ Neural Network Architecture")
7438
+ # Convert to TensorFlow datasets untuk performa tinggi
7439
+ train_dataset = tf.data.Dataset.from_tensor_slices((X_train_scaled, y_train))
7440
+ train_dataset = train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
7097
7441
 
7442
+ val_dataset = tf.data.Dataset.from_tensor_slices((X_test_scaled, y_test))
7443
+ val_dataset = val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
7444
+
7445
+ # Tampilkan info dataset
7446
+ st.success(f"✅ Data siap: {len(X_train):,} training samples, {len(X_test):,} test samples")
7447
+
7448
+ # Model architecture dengan optimasi
7449
+ st.subheader("🏗️ Neural Network Architecture - Optimized")
7450
+
7451
+ col1, col2 = st.columns(2)
7452
+
7453
+ with col1:
7454
+ hidden_layers = st.slider("Jumlah Hidden Layers", 1, 5, 2, key="dl_hidden_layers")
7455
+ units_per_layer = st.slider("Units per Layer", 32, 512, 64, key="dl_units")
7456
+ activation = st.selectbox("Activation Function", ["relu", "elu", "tanh", "selu"],
7457
+ index=0, key="dl_activation")
7458
+
7459
+ with col2:
7460
+ dropout_rate = st.slider("Dropout Rate", 0.0, 0.5, 0.2, 0.1, key="dl_dropout")
7461
+ optimizer = st.selectbox("Optimizer", ["adam", "rmsprop", "nadam", "sgd"],
7462
+ index=0, key="dl_optimizer")
7463
+ use_batch_norm = st.checkbox("Gunakan Batch Normalization", value=True, key="dl_batchnorm")
7464
+ use_early_stopping = st.checkbox("Gunakan Early Stopping", value=True, key="dl_earlystop")
7465
+
7466
+ # Advanced configuration
7467
+ with st.expander("⚙️ Konfigurasi Lanjutan"):
7098
7468
  col1, col2 = st.columns(2)
7099
-
7100
7469
  with col1:
7101
- hidden_layers = st.slider("Jumlah Hidden Layers", 1, 5, 2, key="dl_hidden_layers")
7102
- units_per_layer = st.slider("Units per Layer", 16, 256, 64, key="dl_units")
7103
- activation = st.selectbox("Activation Function", ["relu", "tanh", "sigmoid"], key="dl_activation")
7104
-
7470
+ weight_initializer = st.selectbox(
7471
+ "Weight Initializer",
7472
+ ["glorot_uniform", "he_normal", "lecun_uniform"],
7473
+ index=0
7474
+ )
7475
+ use_l2_reg = st.checkbox("Gunakan L2 Regularization", value=False)
7476
+ l2_rate = st.slider("L2 Rate", 0.0001, 0.01, 0.001, 0.0001) if use_l2_reg else 0.0
7477
+
7105
7478
  with col2:
7106
- dropout_rate = st.slider("Dropout Rate", 0.0, 0.5, 0.2, 0.1, key="dl_dropout")
7107
- optimizer = st.selectbox("Optimizer", ["adam", "rmsprop", "sgd"], key="dl_optimizer")
7108
-
7109
- # Build model
7479
+ learning_rate_schedule = st.selectbox(
7480
+ "Learning Rate Schedule",
7481
+ ["Constant", "ExponentialDecay", "CosineDecay"],
7482
+ index=0
7483
+ )
7484
+
7485
+ # Build optimized model
7486
+ with st.spinner("🔄 Membangun model neural network..."):
7110
7487
  model = tf.keras.Sequential()
7111
7488
 
7112
7489
  # Input layer
7113
- model.add(tf.keras.layers.Dense(units_per_layer, activation=activation, input_shape=(len(dl_features),)))
7490
+ if use_l2_reg:
7491
+ model.add(tf.keras.layers.Dense(
7492
+ units_per_layer,
7493
+ activation=activation,
7494
+ input_shape=(len(dl_features),),
7495
+ kernel_initializer=weight_initializer,
7496
+ kernel_regularizer=tf.keras.regularizers.l2(l2_rate)
7497
+ ))
7498
+ else:
7499
+ model.add(tf.keras.layers.Dense(
7500
+ units_per_layer,
7501
+ activation=activation,
7502
+ input_shape=(len(dl_features),),
7503
+ kernel_initializer=weight_initializer
7504
+ ))
7505
+
7506
+ if use_batch_norm:
7507
+ model.add(tf.keras.layers.BatchNormalization())
7114
7508
  model.add(tf.keras.layers.Dropout(dropout_rate))
7115
7509
 
7116
- # Hidden layers
7510
+ # Hidden layers dengan optimasi
7117
7511
  for i in range(hidden_layers - 1):
7118
- model.add(tf.keras.layers.Dense(units_per_layer, activation=activation))
7512
+ # Reduce units in deeper layers untuk efisiensi
7513
+ units = max(32, units_per_layer // (2 ** (i + 1)))
7514
+
7515
+ if use_l2_reg:
7516
+ model.add(tf.keras.layers.Dense(
7517
+ units,
7518
+ activation=activation,
7519
+ kernel_regularizer=tf.keras.regularizers.l2(l2_rate)
7520
+ ))
7521
+ else:
7522
+ model.add(tf.keras.layers.Dense(units, activation=activation))
7523
+
7524
+ if use_batch_norm:
7525
+ model.add(tf.keras.layers.BatchNormalization())
7119
7526
  model.add(tf.keras.layers.Dropout(dropout_rate))
7120
7527
 
7121
7528
  # Output layer
7122
7529
  if dl_problem_type == "Regression":
7123
7530
  model.add(tf.keras.layers.Dense(1, activation='linear'))
7124
7531
  loss = 'mse'
7125
- metrics = ['mae']
7532
+ metrics = ['mae', 'mse']
7533
+ monitor_metric = 'val_loss'
7126
7534
  else:
7127
- num_classes = len(y.unique()) if dl_problem_type == "Multi-class Classification" else 1
7535
+ num_classes = len(np.unique(y)) if dl_problem_type == "Multi-class Classification" else 1
7128
7536
  activation_output = 'softmax' if dl_problem_type == "Multi-class Classification" else 'sigmoid'
7129
- model.add(tf.keras.layers.Dense(num_classes, activation=activation_output))
7537
+ output_units = num_classes if dl_problem_type == "Multi-class Classification" else 1
7538
+ model.add(tf.keras.layers.Dense(output_units, activation=activation_output))
7130
7539
  loss = 'sparse_categorical_crossentropy' if dl_problem_type == "Multi-class Classification" else 'binary_crossentropy'
7131
7540
  metrics = ['accuracy']
7132
-
7133
- # Compile model
7134
- model.compile(
7135
- optimizer=optimizer,
7136
- loss=loss,
7137
- metrics=metrics
7541
+ monitor_metric = 'val_accuracy'
7542
+
7543
+ # Learning rate schedule
7544
+ if learning_rate_schedule == "ExponentialDecay":
7545
+ lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
7546
+ initial_learning_rate=learning_rate,
7547
+ decay_steps=1000,
7548
+ decay_rate=0.9
7138
7549
  )
7139
-
7140
- # Display model summary
7141
- st.text("Model Summary:")
7142
- model_summary = []
7143
- model.summary(print_fn=lambda x: model_summary.append(x))
7144
- st.text("\n".join(model_summary))
7145
-
7146
- # Train model
7147
- if st.button("🚀 Train Deep Learning Model", key="dl_train_button"):
7148
- with st.spinner("Training neural network..."):
7149
- # Callbacks
7150
- early_stopping = tf.keras.callbacks.EarlyStopping(
7151
- patience=10, restore_best_weights=True
7152
- )
7153
-
7154
- # Train model
7155
- history = model.fit(
7156
- X_train_scaled, y_train,
7157
- epochs=epochs,
7158
- batch_size=batch_size,
7159
- validation_split=0.2,
7160
- callbacks=[early_stopping],
7161
- verbose=0
7162
- )
7163
-
7164
- # Plot training history
7165
- fig = go.Figure()
7166
- fig.add_trace(go.Scatter(
7167
- y=history.history['loss'],
7168
- mode='lines',
7169
- name='Training Loss'
7170
- ))
7171
- if 'val_loss' in history.history:
7172
- fig.add_trace(go.Scatter(
7173
- y=history.history['val_loss'],
7174
- mode='lines',
7175
- name='Validation Loss'
7176
- ))
7177
- fig.update_layout(
7178
- title="Training History - Loss",
7179
- xaxis_title="Epoch",
7180
- yaxis_title="Loss",
7181
- height=400
7182
- )
7183
- st.plotly_chart(fig, use_container_width=True)
7184
-
7185
- # Evaluate model
7186
- test_results = model.evaluate(X_test_scaled, y_test, verbose=0)
7187
- st.success(f"✅ Model Training Complete!")
7188
- st.metric("Test Loss", f"{test_results[0]:.4f}")
7189
- if len(test_results) > 1:
7190
- st.metric("Test Metric", f"{test_results[1]:.4f}")
7550
+ elif learning_rate_schedule == "CosineDecay":
7551
+ lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
7552
+ initial_learning_rate=learning_rate,
7553
+ decay_steps=epochs * len(X_train) // batch_size
7554
+ )
7555
+ else:
7556
+ lr_schedule = learning_rate
7557
+
7558
+ # Compile model dengan learning rate
7559
+ if optimizer == "adam":
7560
+ optimizer_obj = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
7561
+ elif optimizer == "rmsprop":
7562
+ optimizer_obj = tf.keras.optimizers.RMSprop(learning_rate=lr_schedule)
7563
+ elif optimizer == "nadam":
7564
+ optimizer_obj = tf.keras.optimizers.Nadam(learning_rate=lr_schedule)
7565
+ else:
7566
+ optimizer_obj = tf.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9)
7191
7567
 
7192
- except ImportError:
7193
- st.error("❌ TensorFlow tidak terinstall. Install dengan: pip install tensorflow")
7194
- except Exception as e:
7195
- st.error(f"Error dalam DL analysis: {str(e)}")
7568
+ model.compile(optimizer=optimizer_obj, loss=loss, metrics=metrics)
7569
+
7570
+ # Display model summary
7571
+ st.subheader("📊 Model Summary")
7196
7572
 
7197
- def model_comparison_analysis(df, numeric_cols, non_numeric_cols):
7198
- """Perbandingan model yang komprehensif"""
7199
-
7200
- st.header("📊 Model Comparison Dashboard")
7201
-
7202
- st.info("🔄 Fitur Model Comparison - Pilih model dari tab Machine Learning dan Deep Learning untuk perbandingan")
7203
-
7204
- # Placeholder untuk implementasi lengkap
7205
- col1, col2, col3 = st.columns(3)
7206
-
7207
- with col1:
7208
- st.metric("ML Models", "3")
7209
- with col2:
7210
- st.metric("Evaluation Metrics", "5+")
7211
- with col3:
7212
- st.metric("Feature Importance", "✓")
7573
+ # Tangkap output summary dari model
7574
+ model_summary = []
7575
+ model.summary(print_fn=lambda x: model_summary.append(x))
7576
+ summary_text = "\n".join(model_summary)
7213
7577
 
7214
- def feature_analysis_dashboard(df, numeric_cols, non_numeric_cols):
7215
- """Dashboard analisis feature yang komprehensif"""
7216
-
7217
- st.header("🔍 Advanced Feature Analysis")
7218
-
7219
- # Feature importance analysis
7220
- st.subheader("🎯 Feature Importance Analysis")
7221
-
7222
- # Multiple methods untuk feature importance
7223
- importance_method = st.selectbox(
7224
- "Pilih Feature Importance Method",
7225
- ["Random Forest", "Permutation Importance"],
7226
- key="feature_importance_method"
7227
- )
7228
-
7229
- target_feature = st.selectbox(
7230
- "Pilih Target untuk Feature Importance",
7231
- numeric_cols,
7232
- key="feature_importance_target"
7233
- )
7234
-
7235
- if st.button("Hitung Feature Importance", key="feature_importance_button"):
7236
- with st.spinner("Menghitung feature importance..."):
7237
- # Implementasi feature importance calculation
7238
- try:
7239
- features = [f for f in numeric_cols if f != target_feature]
7240
-
7241
- X = df[features].fillna(df[features].mean())
7242
- y = df[target_feature]
7243
-
7244
- if importance_method == "Random Forest":
7245
- model = RandomForestRegressor(n_estimators=100, random_state=42)
7246
- model.fit(X, y)
7247
- importances = model.feature_importances_
7248
-
7249
- importance_df = pd.DataFrame({
7250
- 'feature': features,
7251
- 'importance': importances
7252
- }).sort_values('importance', ascending=False)
7253
-
7254
- fig = px.bar(
7255
- importance_df.head(15),
7256
- x='importance',
7257
- y='feature',
7258
- title="Random Forest Feature Importance",
7259
- orientation='h'
7578
+ # Tambahkan CSS styling
7579
+ st.markdown("""
7580
+ <style>
7581
+ .model-summary-box {
7582
+ background-color: #fff; /* Warna gelap seperti terminal */
7583
+ color: #000; /* Warna teks hijau neon */
7584
+ border-radius: 10px;
7585
+ padding: 15px;
7586
+ font-family: 'Courier New', monospace;
7587
+ font-size: 14px;
7588
+ line-height: 1.5;
7589
+ white-space: pre-wrap;
7590
+ box-shadow: 0 0 8px rgba(0,255,179,0.3);
7591
+ border: 1px solid rgba(0,255,179,0.4);
7592
+ overflow-x: auto;
7593
+ }
7594
+ </style>
7595
+ """, unsafe_allow_html=True)
7596
+
7597
+ # Gunakan expander untuk dropdown
7598
+ with st.expander("🧠 Lihat / Sembunyikan Model Summary"):
7599
+ st.markdown(f"<div class='model-summary-box'>{summary_text}</div>", unsafe_allow_html=True)
7600
+
7601
+ # Calculate total parameters
7602
+ total_params = model.count_params()
7603
+ st.info(f"📈 Total Parameters: {total_params:,}")
7604
+
7605
+ # Training section
7606
+ st.subheader("🚀 Pelatihan Model")
7607
+
7608
+ if st.button("🎯 Mulai Pelatihan Deep Learning", type="primary", key="dl_train_button"):
7609
+ start_time = time.time()
7610
+
7611
+ with st.spinner("🧠 Training neural network... Mohon tunggu..."):
7612
+ # Callbacks untuk training lebih cepat
7613
+ callbacks = []
7614
+
7615
+ if use_early_stopping:
7616
+ early_stopping = tf.keras.callbacks.EarlyStopping(
7617
+ monitor=monitor_metric,
7618
+ patience=10,
7619
+ restore_best_weights=True,
7620
+ mode='min' if dl_problem_type == "Regression" else 'max',
7621
+ verbose=1
7260
7622
  )
7261
- st.plotly_chart(fig, use_container_width=True)
7623
+ callbacks.append(early_stopping)
7624
+
7625
+ reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
7626
+ monitor='val_loss',
7627
+ factor=0.5,
7628
+ patience=5,
7629
+ min_lr=0.00001,
7630
+ verbose=1
7631
+ )
7632
+ callbacks.append(reduce_lr)
7633
+
7634
+ # TensorBoard callback (optional)
7635
+ # callbacks.append(tf.keras.callbacks.TensorBoard(log_dir='./logs'))
7636
+
7637
+ # Train model dengan progress bar
7638
+ progress_bar = st.progress(0)
7639
+ status_text = st.empty()
7640
+ time_estimator = st.empty()
7641
+ metrics_display = st.empty()
7262
7642
 
7263
- elif importance_method == "Permutation Importance":
7264
- model = RandomForestRegressor(n_estimators=100, random_state=42)
7265
- model.fit(X, y)
7643
+ class TrainingCallback(tf.keras.callbacks.Callback):
7644
+ def on_epoch_begin(self, epoch, logs=None):
7645
+ self.epoch_start_time = time.time()
7266
7646
 
7267
- perm_importance = permutation_importance(
7268
- model, X, y, n_repeats=5, random_state=42
7269
- )
7647
+ def on_epoch_end(self, epoch, logs=None):
7648
+ progress = (epoch + 1) / epochs
7649
+ progress_bar.progress(min(progress, 1.0))
7650
+
7651
+ # Metrics display
7652
+ if dl_problem_type == "Regression":
7653
+ metrics_str = f"Loss: {logs['loss']:.4f}, Val Loss: {logs['val_loss']:.4f}, MAE: {logs['mae']:.4f}"
7654
+ else:
7655
+ metrics_str = f"Loss: {logs['loss']:.4f}, Val Loss: {logs['val_loss']:.4f}, Acc: {logs['accuracy']:.4f}"
7656
+
7657
+ status_text.text(f"Epoch {epoch+1}/{epochs}")
7658
+ metrics_display.text(f"📊 {metrics_str}")
7659
+
7660
+ # Time estimation
7661
+ elapsed = time.time() - start_time
7662
+ epoch_time = time.time() - self.epoch_start_time
7663
+ remaining = epoch_time * (epochs - epoch - 1)
7664
+
7665
+ time_estimator.text(f"⏱️ Elapsed: {elapsed:.1f}s | Est. remaining: {remaining:.1f}s")
7666
+
7667
+ callbacks.append(TrainingCallback())
7668
+
7669
+ # Train model
7670
+ history = model.fit(
7671
+ train_dataset,
7672
+ epochs=epochs,
7673
+ validation_data=val_dataset,
7674
+ callbacks=callbacks,
7675
+ verbose=0
7676
+ )
7677
+
7678
+ training_time = time.time() - start_time
7679
+ progress_bar.progress(1.0)
7680
+ status_text.text(f"✅ Pelatihan Selesai! Waktu: {training_time:.1f} detik")
7681
+ time_estimator.text("")
7682
+ metrics_display.text("")
7683
+
7684
+ # ==================== EVALUASI DETAIL ====================
7685
+ st.subheader("📈 Hasil Evaluasi Detail")
7686
+
7687
+ # Predictions
7688
+ y_pred = model.predict(X_test_scaled, verbose=0)
7689
+
7690
+ # 1. PERFORMANCE METRICS COMPREHENSIVE
7691
+ st.subheader("🎯 Dashboard Performa Model")
7692
+
7693
+ if dl_problem_type == "Regression":
7694
+ # Regression metrics
7695
+ y_pred_flat = y_pred.flatten()
7696
+ mse = mean_squared_error(y_test, y_pred_flat)
7697
+ mae = mean_absolute_error(y_test, y_pred_flat)
7698
+ r2 = r2_score(y_test, y_pred_flat)
7699
+ rmse = np.sqrt(mse)
7270
7700
 
7271
- importance_df = pd.DataFrame({
7272
- 'feature': features,
7273
- 'importance': perm_importance.importances_mean
7274
- }).sort_values('importance', ascending=False)
7275
-
7276
- fig = px.bar(
7277
- importance_df.head(15),
7278
- x='importance',
7279
- y='feature',
7280
- title="Permutation Feature Importance",
7281
- orientation='h'
7282
- )
7283
- st.plotly_chart(fig, use_container_width=True)
7284
-
7285
- except Exception as e:
7286
- st.error(f"Error dalam feature importance analysis: {str(e)}")
7701
+ # Additional metrics
7702
+ mape = np.mean(np.abs((y_test - y_pred_flat) / np.where(y_test != 0, y_test, 1))) * 100
7703
+ accuracy_percentage = max(0, min(100, (1 - mae / (y_test.max() - y_test.min())) * 100))
7704
+
7705
+ # Display metrics
7706
+ col1, col2, col3, col4 = st.columns(4)
7707
+
7708
+ with col1:
7709
+ st.metric("R² Score", f"{r2:.4f}",
7710
+ delta="Excellent" if r2 > 0.8 else "Good" if r2 > 0.6 else "Needs Improvement")
7711
+ with col2:
7712
+ st.metric("MAE", f"{mae:.4f}")
7713
+ with col3:
7714
+ st.metric("RMSE", f"{rmse:.4f}")
7715
+ with col4:
7716
+ st.metric("MAPE", f"{mape:.2f}%")
7717
+
7718
+ else:
7719
+ # Classification metrics
7720
+ if dl_problem_type == "Binary Classification":
7721
+ y_pred_class = (y_pred > 0.5).astype(int).flatten()
7722
+ else:
7723
+ y_pred_class = np.argmax(y_pred, axis=1)
7724
+
7725
+ accuracy = accuracy_score(y_test, y_pred_class)
7726
+ precision = precision_score(y_test, y_pred_class, average='weighted', zero_division=0)
7727
+ recall = recall_score(y_test, y_pred_class, average='weighted', zero_division=0)
7728
+ f1 = f1_score(y_test, y_pred_class, average='weighted', zero_division=0)
7729
+
7730
+ # Display metrics
7731
+ col1, col2, col3, col4 = st.columns(4)
7732
+
7733
+ with col1:
7734
+ st.metric("Accuracy", f"{accuracy:.4f}",
7735
+ delta="Excellent" if accuracy > 0.9 else "Good" if accuracy > 0.8 else "Needs Improvement")
7736
+ with col2:
7737
+ st.metric("Precision", f"{precision:.4f}")
7738
+ with col3:
7739
+ st.metric("Recall", f"{recall:.4f}")
7740
+ with col4:
7741
+ st.metric("F1-Score", f"{f1:.4f}")
7742
+
7743
+ # 2. VISUALISASI LENGKAP
7744
+ st.subheader("📊 Visualisasi Komprehensif")
7745
+
7746
+ # Training history visualization
7747
+ fig_history = make_subplots(
7748
+ rows=1, cols=2,
7749
+ subplot_titles=('Loss Progression', 'Metrics Progression'),
7750
+ specs=[[{"secondary_y": False}, {"secondary_y": False}]]
7751
+ )
7752
+
7753
+ # Loss plot
7754
+ fig_history.add_trace(
7755
+ go.Scatter(x=list(range(1, len(history.history['loss'])+1)),
7756
+ y=history.history['loss'],
7757
+ name='Training Loss', line=dict(color='blue')),
7758
+ row=1, col=1
7759
+ )
7760
+ fig_history.add_trace(
7761
+ go.Scatter(x=list(range(1, len(history.history['val_loss'])+1)),
7762
+ y=history.history['val_loss'],
7763
+ name='Validation Loss', line=dict(color='red')),
7764
+ row=1, col=1
7765
+ )
7766
+
7767
+ # Metrics plot
7768
+ if dl_problem_type == "Regression":
7769
+ fig_history.add_trace(
7770
+ go.Scatter(x=list(range(1, len(history.history['mae'])+1)),
7771
+ y=history.history['mae'],
7772
+ name='Training MAE', line=dict(color='green')),
7773
+ row=1, col=2
7774
+ )
7775
+ if 'val_mae' in history.history:
7776
+ fig_history.add_trace(
7777
+ go.Scatter(x=list(range(1, len(history.history['val_mae'])+1)),
7778
+ y=history.history['val_mae'],
7779
+ name='Validation MAE', line=dict(color='orange')),
7780
+ row=1, col=2
7781
+ )
7782
+ else:
7783
+ fig_history.add_trace(
7784
+ go.Scatter(x=list(range(1, len(history.history['accuracy'])+1)),
7785
+ y=history.history['accuracy'],
7786
+ name='Training Accuracy', line=dict(color='green')),
7787
+ row=1, col=2
7788
+ )
7789
+ fig_history.add_trace(
7790
+ go.Scatter(x=list(range(1, len(history.history['val_accuracy'])+1)),
7791
+ y=history.history['val_accuracy'],
7792
+ name='Validation Accuracy', line=dict(color='orange')),
7793
+ row=1, col=2
7794
+ )
7795
+
7796
+ fig_history.update_layout(height=400, title_text="Training History")
7797
+ st.plotly_chart(fig_history, use_container_width=True)
7798
+
7799
+ # 3. PREDICTION VISUALIZATION
7800
+ if dl_problem_type == "Regression":
7801
+ # Regression plots
7802
+ col1, col2 = st.columns(2)
7803
+
7804
+ with col1:
7805
+ # Actual vs Predicted
7806
+ fig_actual_pred = px.scatter(
7807
+ x=y_test, y=y_pred_flat,
7808
+ title="Actual vs Predicted",
7809
+ labels={'x': 'Actual', 'y': 'Predicted'},
7810
+ trendline="lowess"
7811
+ )
7812
+ fig_actual_pred.add_trace(
7813
+ go.Scatter(x=[y_test.min(), y_test.max()],
7814
+ y=[y_test.min(), y_test.max()],
7815
+ mode='lines', name='Perfect Prediction',
7816
+ line=dict(color='red', dash='dash'))
7817
+ )
7818
+ st.plotly_chart(fig_actual_pred, use_container_width=True)
7819
+
7820
+ with col2:
7821
+ # Residual plot
7822
+ residuals = y_test - y_pred_flat
7823
+ fig_residual = px.scatter(
7824
+ x=y_pred_flat, y=residuals,
7825
+ title="Residual Plot",
7826
+ labels={'x': 'Predicted', 'y': 'Residuals'},
7827
+ trendline="lowess"
7828
+ )
7829
+ fig_residual.add_hline(y=0, line_dash="dash", line_color="red")
7830
+ st.plotly_chart(fig_residual, use_container_width=True)
7831
+
7832
+ else:
7833
+ # Classification plots
7834
+ col1, col2 = st.columns(2)
7835
+
7836
+ with col1:
7837
+ # Confusion Matrix
7838
+ cm = confusion_matrix(y_test, y_pred_class)
7839
+ fig_cm = px.imshow(
7840
+ cm,
7841
+ text_auto=True,
7842
+ title="Confusion Matrix",
7843
+ color_continuous_scale='Blues',
7844
+ aspect="auto"
7845
+ )
7846
+ st.plotly_chart(fig_cm, use_container_width=True)
7847
+
7848
+ with col2:
7849
+ # Classification report heatmap
7850
+ report = classification_report(y_test, y_pred_class, output_dict=True)
7851
+ report_df = pd.DataFrame(report).transpose().iloc[:-1, :3]
7852
+ fig_report = px.imshow(
7853
+ report_df.values,
7854
+ x=report_df.columns,
7855
+ y=report_df.index,
7856
+ text_auto=".2f",
7857
+ title="Classification Report",
7858
+ color_continuous_scale='Viridis',
7859
+ aspect="auto"
7860
+ )
7861
+ st.plotly_chart(fig_report, use_container_width=True)
7862
+
7863
+ # 4. FEATURE IMPORTANCE ANALYSIS
7864
+ st.subheader("🔍 Analisis Feature Importance")
7865
+
7866
+ try:
7867
+ # Simplified feature importance using permutation
7868
+ @st.cache_data
7869
+ def calculate_feature_importance(model, X_test_scaled, y_test, feature_names, problem_type):
7870
+ baseline_score = model.evaluate(X_test_scaled, y_test, verbose=0)
7871
+ baseline_loss = baseline_score[0] if problem_type == "Regression" else 1 - baseline_score[1]
7872
+
7873
+ importance_scores = []
7874
+ for i in range(len(feature_names)):
7875
+ X_permuted = X_test_scaled.copy()
7876
+ np.random.shuffle(X_permuted[:, i])
7877
+ permuted_score = model.evaluate(X_permuted, y_test, verbose=0)
7878
+ permuted_loss = permuted_score[0] if problem_type == "Regression" else 1 - permuted_score[1]
7879
+ importance = max(0, baseline_loss - permuted_loss)
7880
+ importance_scores.append(importance)
7881
+
7882
+ return pd.DataFrame({
7883
+ 'Feature': feature_names,
7884
+ 'Importance': importance_scores
7885
+ }).sort_values('Importance', ascending=False)
7886
+
7887
+ feature_importance_df = calculate_feature_importance(
7888
+ model, X_test_scaled, y_test, dl_features, dl_problem_type
7889
+ )
7890
+
7891
+ col1, col2 = st.columns(2)
7892
+
7893
+ with col1:
7894
+ fig_importance = px.bar(
7895
+ feature_importance_df,
7896
+ x='Importance',
7897
+ y='Feature',
7898
+ orientation='h',
7899
+ title="Feature Importance",
7900
+ color='Importance',
7901
+ color_continuous_scale='Viridis'
7902
+ )
7903
+ st.plotly_chart(fig_importance, use_container_width=True)
7904
+
7905
+ with col2:
7906
+ fig_importance_pie = px.pie(
7907
+ feature_importance_df,
7908
+ values='Importance',
7909
+ names='Feature',
7910
+ title="Feature Importance Distribution"
7911
+ )
7912
+ st.plotly_chart(fig_importance_pie, use_container_width=True)
7913
+
7914
+ except Exception as e:
7915
+ st.warning(f"⚠️ Feature importance calculation skipped: {str(e)}")
7916
+
7917
+ # 5. MODEL PERFORMANCE GAUGE
7918
+ st.subheader("📈 Performance Summary")
7919
+
7920
+ if dl_problem_type == "Regression":
7921
+ performance_score = min(100, max(0, (r2 + (1 - mae/y_test.std())) * 50))
7922
+ performance_level = "Sangat Baik" if performance_score > 85 else \
7923
+ "Baik" if performance_score > 70 else \
7924
+ "Cukup" if performance_score > 60 else "Perlu Improvement"
7925
+ else:
7926
+ performance_score = accuracy * 100
7927
+ performance_level = "Sangat Baik" if performance_score > 90 else \
7928
+ "Baik" if performance_score > 80 else \
7929
+ "Cukup" if performance_score > 70 else "Perlu Improvement"
7930
+
7931
+ # Gauge chart
7932
+ fig_gauge = go.Figure(go.Indicator(
7933
+ mode = "gauge+number+delta",
7934
+ value = performance_score,
7935
+ domain = {'x': [0, 1], 'y': [0, 1]},
7936
+ title = {'text': f"Model Performance: {performance_level}"},
7937
+ gauge = {
7938
+ 'axis': {'range': [None, 100]},
7939
+ 'bar': {'color': "darkblue"},
7940
+ 'steps': [
7941
+ {'range': [0, 60], 'color': "red"},
7942
+ {'range': [60, 75], 'color': "yellow"},
7943
+ {'range': [75, 90], 'color': "lightgreen"},
7944
+ {'range': [90, 100], 'color': "green"}],
7945
+ 'threshold': {
7946
+ 'line': {'color': "red", 'width': 4},
7947
+ 'thickness': 0.75,
7948
+ 'value': 90}}
7949
+ ))
7950
+ st.plotly_chart(fig_gauge, use_container_width=True)
7951
+
7952
+ # 6. DOWNLOAD DAN EXPORT MODEL
7953
+ st.subheader("💾 Export Model")
7954
+
7955
+ col1, col2 = st.columns(2)
7956
+
7957
+ with col1:
7958
+ # Save model
7959
+ if st.button("💾 Save TensorFlow Model"):
7960
+ model.save('saved_model.h5')
7961
+ with open('saved_model.h5', 'rb') as f:
7962
+ st.download_button(
7963
+ label="📥 Download Model",
7964
+ data=f,
7965
+ file_name="deep_learning_model.h5",
7966
+ mime="application/octet-stream"
7967
+ )
7968
+
7969
+ with col2:
7970
+ # Export predictions
7971
+ predictions_df = pd.DataFrame({
7972
+ 'Actual': y_test,
7973
+ 'Predicted': y_pred.flatten() if dl_problem_type == "Regression" else y_pred_class
7974
+ })
7975
+ csv = predictions_df.to_csv(index=False)
7976
+ st.download_button(
7977
+ label="📥 Download Predictions",
7978
+ data=csv,
7979
+ file_name="model_predictions.csv",
7980
+ mime="text/csv"
7981
+ )
7982
+
7983
+ # 7. RECOMMENDATIONS AND INSIGHTS
7984
+ st.subheader("💡 Insights & Rekomendasi")
7985
+
7986
+ # Training insights
7987
+ final_epoch = len(history.history['loss'])
7988
+ final_loss = history.history['loss'][-1]
7989
+ final_val_loss = history.history['val_loss'][-1]
7990
+
7991
+ col1, col2, col3 = st.columns(3)
7992
+ with col1:
7993
+ st.metric("Final Training Loss", f"{final_loss:.4f}")
7994
+ with col2:
7995
+ st.metric("Final Validation Loss", f"{final_val_loss:.4f}")
7996
+ with col3:
7997
+ st.metric("Training Time", f"{training_time:.1f}s")
7998
+
7999
+ # Recommendations based on performance
8000
+ st.info("""
8001
+ **🎯 Rekomendasi Improvement:**
8002
+ - **Data Quality**: Periksa missing values dan outliers
8003
+ - **Feature Engineering**: Tambahkan feature yang lebih relevan
8004
+ - **Hyperparameter Tuning**: Eksperimen dengan architecture berbeda
8005
+ - **Regularization**: Adjust dropout dan L2 regularization
8006
+ - **Learning Rate**: Coba learning rate scheduling
8007
+ """)
8008
+
8009
+ # Performance tips
8010
+ if performance_score < 70:
8011
+ st.warning("""
8012
+ **⚠️ Area Improvement:**
8013
+ - Pertimbangkan feature selection yang lebih baik
8014
+ - Coba model architecture yang lebih dalam/lebar
8015
+ - Gunakan lebih banyak data training
8016
+ - Eksperimen dengan different optimizers
8017
+ """)
8018
+ else:
8019
+ st.success("""
8020
+ **✅ Performa Baik!**
8021
+ - Model sudah menunjukkan hasil yang promising
8022
+ - Pertimbangkan deployment untuk penggunaan real-time
8023
+ - Monitor model performance secara berkala
8024
+ """)
8025
+
8026
+ except Exception as e:
8027
+ st.error(f"❌ Error dalam DL analysis: {str(e)}")
8028
+ st.info("""
8029
+ 💡 Tips Troubleshooting:
8030
+ - Pastikan dataset cukup besar (>100 samples)
8031
+ - Gunakan mode kecepatan lebih tinggi untuk dataset besar
8032
+ - Kurangi jumlah features jika memory error
8033
+ - Pastikan target variable sesuai dengan problem type
8034
+ - Coba learning rate yang lebih kecil
8035
+ """)
8036
+
8037
+ # Tambahkan fungsi utility jika diperlukan
8038
+ def validate_tensorflow_installation():
8039
+ """Validate TensorFlow installation"""
8040
+ try:
8041
+ import tensorflow as tf
8042
+ version = tf.__version__
8043
+ gpu_available = tf.config.list_physical_devices('GPU')
8044
+ return True, version, len(gpu_available) > 0
8045
+ except ImportError:
8046
+ return False, None, False
8047
+
8048
+ def model_comparison_analysis(df, numeric_cols, non_numeric_cols):
8049
+ """Analisis komparatif data yang komprehensif tanpa model machine learning"""
8050
+
8051
+ st.header("📊 Advanced Data Analysis Dashboard")
8052
+
8053
+ # Informasi dataset
8054
+ st.subheader("📋 Dataset Overview")
8055
+ col1, col2, col3, col4 = st.columns(4)
8056
+ with col1:
8057
+ st.metric("Total Samples", f"{len(df):,}")
8058
+ with col2:
8059
+ st.metric("Features", f"{len(numeric_cols) + len(non_numeric_cols):,}")
8060
+ with col3:
8061
+ st.metric("Numeric", f"{len(numeric_cols):,}")
8062
+ with col4:
8063
+ st.metric("Categorical", f"{len(non_numeric_cols):,}")
8064
+
8065
+ # Configuration section
8066
+ st.subheader("⚙️ Analysis Configuration")
8067
+
8068
+ col1, col2 = st.columns(2)
8069
+
8070
+ with col1:
8071
+ # Target selection untuk analisis
8072
+ target_variable = st.selectbox(
8073
+ "dwibaktindev AI",
8074
+ numeric_cols + non_numeric_cols,
8075
+ key="analysis_target"
8076
+ )
8077
+
8078
+ # Analysis type
8079
+ analysis_type = st.selectbox(
8080
+ "Alisa AI",
8081
+ ["Descriptive Statistics", "Correlation Analysis", "Distribution Analysis",
8082
+ "Relationship Analysis", "Comparative Analysis"],
8083
+ key="analysis_type"
8084
+ )
8085
+
8086
+ with col2:
8087
+ # Feature selection
8088
+ available_features = [f for f in numeric_cols + non_numeric_cols if f != target_variable]
8089
+ selected_features = st.multiselect(
8090
+ "Sasha AI",
8091
+ available_features,
8092
+ default=available_features[:min(10, len(available_features))],
8093
+ key="analysis_features"
8094
+ )
8095
+
8096
+ # Sample size untuk visualisasi
8097
+ sample_size = st.slider("Sample Size for Visualization", 100, len(df),
8098
+ min(1000, len(df)), 100, key="sample_size")
8099
+
8100
+ if st.button("🚀 Start Model AI", type="primary", key="start_analysis"):
8101
+ if not target_variable or not selected_features:
8102
+ st.error("❌ Please select target variable and features")
8103
+ return
8104
+
8105
+ try:
8106
+ # Lakukan analisis berdasarkan jenis
8107
+ with st.spinner("🔄 Performing analysis..."):
8108
+ if analysis_type == "Descriptive Statistics":
8109
+ perform_descriptive_analysis(df, target_variable, selected_features)
8110
+
8111
+ elif analysis_type == "Correlation Analysis":
8112
+ perform_correlation_analysis(df, target_variable, selected_features)
8113
+
8114
+ elif analysis_type == "Distribution Analysis":
8115
+ perform_distribution_analysis(df, target_variable, selected_features, sample_size)
8116
+
8117
+ elif analysis_type == "Relationship Analysis":
8118
+ perform_relationship_analysis(df, target_variable, selected_features, sample_size)
8119
+
8120
+ elif analysis_type == "Comparative Analysis":
8121
+ perform_comparative_analysis(df, target_variable, selected_features)
8122
+
8123
+ st.success("✅ Analysis completed!")
8124
+
8125
+ except Exception as e:
8126
+ st.error(f"❌ Error in data analysis: {str(e)}")
8127
+
8128
+ def perform_descriptive_analysis(df, target, features):
8129
+ """Analisis statistik deskriptif"""
8130
+ import pandas as pd
8131
+ import numpy as np
8132
+
8133
+ st.subheader("📊 Descriptive Statistics")
8134
+
8135
+ # Statistik untuk target variable
8136
+ st.write(f"### Target Variable: `{target}`")
8137
+
8138
+ if pd.api.types.is_numeric_dtype(df[target]):
8139
+ col1, col2, col3, col4 = st.columns(4)
8140
+
8141
+ with col1:
8142
+ st.metric("Mean", f"{df[target].mean():.2f}")
8143
+ with col2:
8144
+ st.metric("Median", f"{df[target].median():.2f}")
8145
+ with col3:
8146
+ st.metric("Std Dev", f"{df[target].std():.2f}")
8147
+ with col4:
8148
+ st.metric("Missing", f"{df[target].isnull().sum()}")
8149
+
8150
+ # Detailed statistics
8151
+ st.dataframe(df[target].describe(), use_container_width=True)
8152
+
8153
+ else:
8154
+ col1, col2, col3 = st.columns(3)
8155
+
8156
+ with col1:
8157
+ st.metric("Unique Values", df[target].nunique())
8158
+ with col2:
8159
+ st.metric("Most Frequent", df[target].mode().iloc[0] if not df[target].mode().empty else "N/A")
8160
+ with col3:
8161
+ st.metric("Missing", f"{df[target].isnull().sum()}")
8162
+
8163
+ # Value counts
8164
+ value_counts = df[target].value_counts()
8165
+ st.write("**Value Distribution:**")
8166
+ st.dataframe(value_counts, use_container_width=True)
8167
+
8168
+ # Statistik untuk features numerik
8169
+ numeric_features = [f for f in features if pd.api.types.is_numeric_dtype(df[f])]
8170
+ if numeric_features:
8171
+ st.write("### Numeric Features Summary")
8172
+ st.dataframe(df[numeric_features].describe(), use_container_width=True)
8173
+
8174
+ # Statistik untuk features kategorik
8175
+ categorical_features = [f for f in features if not pd.api.types.is_numeric_dtype(df[f])]
8176
+ if categorical_features:
8177
+ st.write("### Categorical Features Summary")
8178
+ for feature in categorical_features:
8179
+ with st.expander(f"`{feature}`"):
8180
+ value_counts = df[feature].value_counts()
8181
+ st.dataframe(value_counts, use_container_width=True)
8182
+
8183
+ def perform_correlation_analysis(df, target, features):
8184
+ """Analisis korelasi"""
8185
+ import pandas as pd
8186
+ import numpy as np
8187
+ import plotly.express as px
8188
+ import plotly.graph_objects as go
8189
+
8190
+ st.subheader("🔗 Correlation Analysis")
8191
+
8192
+ # Pilih hanya features numerik untuk korelasi
8193
+ numeric_features = [f for f in features if pd.api.types.is_numeric_dtype(df[f])]
8194
+
8195
+ if pd.api.types.is_numeric_dtype(df[target]):
8196
+ numeric_features.append(target)
8197
+
8198
+ if len(numeric_features) < 2:
8199
+ st.warning("⚠️ Need at least 2 numeric features for correlation analysis")
8200
+ return
8201
+
8202
+ correlation_df = df[numeric_features].corr()
8203
+
8204
+ # Heatmap korelasi
8205
+ st.write("### Correlation Heatmap")
8206
+ fig = px.imshow(correlation_df,
8207
+ title="Feature Correlation Heatmap",
8208
+ color_continuous_scale="RdBu_r",
8209
+ aspect="auto")
8210
+ st.plotly_chart(fig, use_container_width=True)
8211
+
8212
+ # Korelasi dengan target
8213
+ if pd.api.types.is_numeric_dtype(df[target]):
8214
+ st.write("### Correlation with Target")
8215
+ target_corr = correlation_df[target].drop(target).sort_values(ascending=False)
8216
+
8217
+ col1, col2 = st.columns(2)
8218
+
8219
+ with col1:
8220
+ fig = px.bar(x=target_corr.values, y=target_corr.index,
8221
+ orientation='h',
8222
+ title=f"Correlation with {target}",
8223
+ labels={'x': 'Correlation', 'y': 'Feature'})
8224
+ st.plotly_chart(fig, use_container_width=True)
8225
+
8226
+ with col2:
8227
+ # Tabel korelasi
8228
+ st.dataframe(target_corr.round(4), use_container_width=True)
8229
+
8230
+ def perform_distribution_analysis(df, target, features, sample_size):
8231
+ """Analisis distribusi"""
8232
+ import pandas as pd
8233
+ import plotly.express as px
8234
+ import plotly.graph_objects as go
8235
+ from plotly.subplots import make_subplots
8236
+
8237
+ st.subheader("📈 Distribution Analysis")
8238
+
8239
+ # Sample data untuk performa visualisasi
8240
+ sample_df = df.sample(min(sample_size, len(df)), random_state=42)
8241
+
8242
+ # Distribusi target variable
8243
+ st.write(f"### Target Variable Distribution: `{target}`")
8244
+
8245
+ if pd.api.types.is_numeric_dtype(df[target]):
8246
+ col1, col2 = st.columns(2)
8247
+
8248
+ with col1:
8249
+ # Histogram
8250
+ fig = px.histogram(df, x=target,
8251
+ title=f"Distribution of {target}",
8252
+ nbins=50)
8253
+ st.plotly_chart(fig, use_container_width=True)
8254
+
8255
+ with col2:
8256
+ # Box plot
8257
+ fig = px.box(df, y=target,
8258
+ title=f"Box Plot of {target}")
8259
+ st.plotly_chart(fig, use_container_width=True)
8260
+ else:
8261
+ # Untuk variabel kategorik
8262
+ value_counts = df[target].value_counts()
8263
+ fig = px.pie(values=value_counts.values,
8264
+ names=value_counts.index,
8265
+ title=f"Distribution of {target}")
8266
+ st.plotly_chart(fig, use_container_width=True)
8267
+
8268
+ # Distribusi features numerik
8269
+ numeric_features = [f for f in features if pd.api.types.is_numeric_dtype(df[f])]
8270
+ if numeric_features:
8271
+ st.write("### Numeric Features Distribution")
8272
+
8273
+ # Pilih features untuk ditampilkan
8274
+ selected_numeric = st.multiselect(
8275
+ "Select numeric features to visualize:",
8276
+ numeric_features,
8277
+ default=numeric_features[:min(3, len(numeric_features))]
8278
+ )
8279
+
8280
+ if selected_numeric:
8281
+ # Histogram multiple
8282
+ fig = make_subplots(rows=len(selected_numeric), cols=1,
8283
+ subplot_titles=selected_numeric)
8284
+
8285
+ for i, feature in enumerate(selected_numeric, 1):
8286
+ fig.add_trace(
8287
+ go.Histogram(x=df[feature], name=feature, nbinsx=30),
8288
+ row=i, col=1
8289
+ )
8290
+
8291
+ fig.update_layout(height=300*len(selected_numeric),
8292
+ title_text="Distribution of Numeric Features")
8293
+ st.plotly_chart(fig, use_container_width=True)
8294
+
8295
+ # Distribusi features kategorik
8296
+ categorical_features = [f for f in features if not pd.api.types.is_numeric_dtype(df[f])]
8297
+ if categorical_features:
8298
+ st.write("### Categorical Features Distribution")
8299
+
8300
+ selected_categorical = st.multiselect(
8301
+ "Select categorical features to visualize:",
8302
+ categorical_features,
8303
+ default=categorical_features[:min(2, len(categorical_features))]
8304
+ )
8305
+
8306
+ if selected_categorical:
8307
+ for feature in selected_categorical:
8308
+ value_counts = df[feature].value_counts().head(10) # Top 10 saja
8309
+ fig = px.bar(x=value_counts.values, y=value_counts.index,
8310
+ orientation='h',
8311
+ title=f"Top 10 Values in {feature}")
8312
+ st.plotly_chart(fig, use_container_width=True)
8313
+
8314
+ def perform_relationship_analysis(df, target, features, sample_size):
8315
+ """Analisis hubungan antara variabel"""
8316
+ import pandas as pd
8317
+ import plotly.express as px
8318
+ import plotly.graph_objects as go
8319
+
8320
+ st.subheader("🔄 Relationship Analysis")
8321
+
8322
+ sample_df = df.sample(min(sample_size, len(df)), random_state=42)
8323
+
8324
+ # Pilih features numerik untuk scatter plot
8325
+ numeric_features = [f for f in features if pd.api.types.is_numeric_dtype(df[f])]
8326
+
8327
+ if pd.api.types.is_numeric_dtype(df[target]) and len(numeric_features) >= 1:
8328
+ st.write("### Scatter Plots with Target")
8329
+
8330
+ col1, col2 = st.columns(2)
8331
+
8332
+ with col1:
8333
+ x_feature = st.selectbox("X-axis feature:", numeric_features, key="scatter_x")
8334
+
8335
+ with col2:
8336
+ color_feature = st.selectbox("Color by (optional):",
8337
+ [None] + [f for f in features if f != x_feature],
8338
+ key="scatter_color")
8339
+
8340
+ if x_feature:
8341
+ fig = px.scatter(sample_df, x=x_feature, y=target,
8342
+ color=color_feature if color_feature else None,
8343
+ title=f"{target} vs {x_feature}",
8344
+ opacity=0.6)
8345
+ st.plotly_chart(fig, use_container_width=True)
8346
+
8347
+ # Pair plot untuk multiple numeric features
8348
+ if len(numeric_features) >= 2:
8349
+ st.write("### Pairwise Relationships")
8350
+
8351
+ selected_for_pairplot = st.multiselect(
8352
+ "Select features for pair plot:",
8353
+ numeric_features + ([target] if pd.api.types.is_numeric_dtype(df[target]) else []),
8354
+ default=(numeric_features + [target])[:min(4, len(numeric_features) + 1)]
8355
+ )
8356
+
8357
+ if len(selected_for_pairplot) >= 2:
8358
+ fig = px.scatter_matrix(sample_df[selected_for_pairplot],
8359
+ dimensions=selected_for_pairplot,
8360
+ height=800)
8361
+ st.plotly_chart(fig, use_container_width=True)
8362
+
8363
+ # Analisis hubungan kategorik-numerik
8364
+ categorical_features = [f for f in features if not pd.api.types.is_numeric_dtype(df[f])]
8365
+ if categorical_features and pd.api.types.is_numeric_dtype(df[target]):
8366
+ st.write("### Categorical vs Numerical Analysis")
8367
+
8368
+ cat_feature = st.selectbox("Select categorical feature:", categorical_features)
8369
+ num_feature = st.selectbox("Select numerical feature:",
8370
+ [target] + numeric_features)
8371
+
8372
+ if cat_feature and num_feature:
8373
+ col1, col2 = st.columns(2)
8374
+
8375
+ with col1:
8376
+ # Box plot
8377
+ fig = px.box(df, x=cat_feature, y=num_feature,
8378
+ title=f"{num_feature} by {cat_feature}")
8379
+ st.plotly_chart(fig, use_container_width=True)
8380
+
8381
+ with col2:
8382
+ # Violin plot
8383
+ fig = px.violin(df, x=cat_feature, y=num_feature,
8384
+ title=f"Distribution of {num_feature} by {cat_feature}")
8385
+ st.plotly_chart(fig, use_container_width=True)
8386
+
8387
+ def perform_comparative_analysis(df, target, features):
8388
+ """Analisis komparatif"""
8389
+ import pandas as pd
8390
+ import plotly.express as px
8391
+ import plotly.graph_objects as go
8392
+
8393
+ st.subheader("⚖️ Comparative Analysis")
8394
+
8395
+ # Group by analysis
8396
+ st.write("### Group-wise Analysis")
8397
+
8398
+ group_feature = st.selectbox(
8399
+ "Group by feature:",
8400
+ [None] + [f for f in features if not pd.api.types.is_numeric_dtype(df[f])]
8401
+ )
8402
+
8403
+ if group_feature:
8404
+ if pd.api.types.is_numeric_dtype(df[target]):
8405
+ # Untuk target numerik
8406
+ summary = df.groupby(group_feature)[target].agg(['mean', 'median', 'std', 'count']).round(2)
8407
+ st.dataframe(summary, use_container_width=True)
8408
+
8409
+ # Visualisasi
8410
+ col1, col2 = st.columns(2)
8411
+
8412
+ with col1:
8413
+ fig = px.bar(summary.reset_index(), x=group_feature, y='mean',
8414
+ title=f"Average {target} by {group_feature}")
8415
+ st.plotly_chart(fig, use_container_width=True)
8416
+
8417
+ with col2:
8418
+ fig = px.box(df, x=group_feature, y=target,
8419
+ title=f"Distribution of {target} by {group_feature}")
8420
+ st.plotly_chart(fig, use_container_width=True)
8421
+
8422
+ else:
8423
+ # Untuk target kategorik
8424
+ cross_tab = pd.crosstab(df[group_feature], df[target], normalize='index') * 100
8425
+ st.write("**Percentage Distribution:**")
8426
+ st.dataframe(cross_tab.round(2), use_container_width=True)
8427
+
8428
+ # Stacked bar chart
8429
+ fig = px.bar(cross_tab.reset_index(),
8430
+ x=group_feature,
8431
+ y=cross_tab.columns.tolist(),
8432
+ title=f"Distribution of {target} by {group_feature}",
8433
+ barmode='stack')
8434
+ st.plotly_chart(fig, use_container_width=True)
8435
+
8436
+ # Time series analysis (jika ada kolom datetime)
8437
+ datetime_columns = df.select_dtypes(include=['datetime64']).columns.tolist()
8438
+ if datetime_columns and pd.api.types.is_numeric_dtype(df[target]):
8439
+ st.write("### Time Series Analysis")
8440
+
8441
+ date_col = st.selectbox("Select date column:", datetime_columns)
8442
+
8443
+ if date_col:
8444
+ # Aggregasi berdasarkan waktu
8445
+ df_sorted = df.sort_values(date_col)
8446
+
8447
+ # Pilih frekuensi aggregasi
8448
+ freq = st.selectbox("Aggregation frequency:",
8449
+ ['D', 'W', 'M', 'Q'],
8450
+ format_func=lambda x: {'D': 'Daily', 'W': 'Weekly',
8451
+ 'M': 'Monthly', 'Q': 'Quarterly'}[x])
8452
+
8453
+ time_series = df_sorted.set_index(date_col)[target].resample(freq).mean()
8454
+
8455
+ fig = px.line(time_series.reset_index(),
8456
+ x=date_col, y=target,
8457
+ title=f"{target} Over Time")
8458
+ st.plotly_chart(fig, use_container_width=True)
8459
+
8460
+ def feature_analysis_dashboard(df, numeric_cols, non_numeric_cols):
8461
+ """Dashboard analisis feature yang komprehensif dengan optimasi dataset besar"""
8462
+
8463
+ st.header("🔍 Advanced Feature Analysis")
8464
+
8465
+ # Informasi dataset
8466
+ st.subheader("📊 Dataset Overview")
8467
+ col1, col2, col3 = st.columns(3)
8468
+ with col1:
8469
+ st.metric("Total Features", f"{len(numeric_cols) + len(non_numeric_cols):,}")
8470
+ with col2:
8471
+ st.metric("Numeric Features", f"{len(numeric_cols):,}")
8472
+ with col3:
8473
+ st.metric("Categorical Features", f"{len(non_numeric_cols):,}")
8474
+
8475
+ # Optimasi memory
8476
+ if st.checkbox("Optimize Memory Usage", value=True, key="feature_optimize_mem"):
8477
+ df = optimize_memory_usage_feature(df)
8478
+ st.success("✅ Memory usage optimized!")
8479
+
8480
+ # Performance configuration
8481
+ st.subheader("⚡ Performance Configuration")
8482
+
8483
+ col1, col2 = st.columns(2)
8484
+
8485
+ with col1:
8486
+ # Sampling options untuk dataset besar
8487
+ use_sampling = st.checkbox("Use Sampling for Large Dataset", value=len(df) > 10000,
8488
+ key="feature_use_sampling")
8489
+
8490
+ if use_sampling:
8491
+ sample_size = st.slider(
8492
+ "Sample Size",
8493
+ min_value=1000,
8494
+ max_value=min(50000, len(df)),
8495
+ value=min(20000, len(df)),
8496
+ step=1000,
8497
+ key="feature_sample_size"
8498
+ )
8499
+ st.info(f"🎯 Using {sample_size} samples from {len(df):,} total records")
8500
+
8501
+ # Processing speed control
8502
+ processing_speed = st.select_slider(
8503
+ "Processing Speed",
8504
+ options=["Fast", "Balanced", "Comprehensive"],
8505
+ value="Balanced",
8506
+ key="feature_processing_speed"
8507
+ )
8508
+
8509
+ # Configure parameters based on speed selection
8510
+ speed_config = {
8511
+ "Fast": {"n_estimators": 50, "n_repeats": 3, "max_features": 20},
8512
+ "Balanced": {"n_estimators": 100, "n_repeats": 5, "max_features": 30},
8513
+ "Comprehensive": {"n_estimators": 200, "n_repeats": 10, "max_features": 50}
8514
+ }
8515
+ config = speed_config[processing_speed]
8516
+
8517
+ with col2:
8518
+ # Advanced options
8519
+ st.write("**Advanced Options:**")
8520
+
8521
+ max_features_display = st.slider(
8522
+ "Max Features to Display",
8523
+ 5, 50, 15,
8524
+ key="max_features_display"
8525
+ )
8526
+
8527
+ remove_high_corr = st.checkbox(
8528
+ "Remove Highly Correlated Features",
8529
+ value=True,
8530
+ key="feature_remove_corr"
8531
+ )
8532
+
8533
+ correlation_threshold = st.slider(
8534
+ "Correlation Threshold",
8535
+ 0.7, 0.99, 0.9, 0.01,
8536
+ key="feature_corr_threshold"
8537
+ )
8538
+
8539
+ random_state = st.number_input(
8540
+ "Random State",
8541
+ value=42,
8542
+ key="feature_random_state"
8543
+ )
8544
+
8545
+ # Feature importance analysis
8546
+ st.subheader("🎯 Feature Importance Analysis")
8547
+
8548
+ col1, col2 = st.columns(2)
8549
+
8550
+ with col1:
8551
+ # Multiple methods untuk feature importance
8552
+ importance_method = st.selectbox(
8553
+ "Pilih Feature Importance Method",
8554
+ ["Random Forest", "Permutation Importance", "Mutual Information", "All Methods"],
8555
+ key="feature_importance_method"
8556
+ )
8557
+
8558
+ # Problem type selection
8559
+ problem_type = st.radio(
8560
+ "Problem Type",
8561
+ ["Regression", "Classification", "Auto Detect"],
8562
+ key="feature_problem_type"
8563
+ )
8564
+
8565
+ with col2:
8566
+ target_feature = st.selectbox(
8567
+ "Pilih Target untuk Feature Importance",
8568
+ numeric_cols + non_numeric_cols,
8569
+ key="feature_importance_target"
8570
+ )
8571
+
8572
+ # Feature selection
8573
+ available_features = [f for f in numeric_cols + non_numeric_cols if f != target_feature]
8574
+
8575
+ if len(available_features) > config["max_features"]:
8576
+ st.warning(f"⚠️ Showing first {config['max_features']} features. Use comprehensive mode for more.")
8577
+ available_features = available_features[:config["max_features"]]
8578
+
8579
+ selected_features = st.multiselect(
8580
+ "Pilih Features untuk Analysis",
8581
+ available_features,
8582
+ default=available_features[:min(10, len(available_features))],
8583
+ key="feature_analysis_features"
8584
+ )
8585
+
8586
+ if not target_feature or not selected_features:
8587
+ st.warning("📝 Pilih target feature dan features untuk analysis")
8588
+ return
8589
+
8590
+ # Progress tracking
8591
+ progress_bar = st.progress(0)
8592
+ status_text = st.empty()
8593
+
8594
+ if st.button("🚀 Hitung Feature Importance", key="feature_importance_button"):
8595
+ try:
8596
+ # Apply sampling jika diperlukan
8597
+ if use_sampling and len(df) > sample_size:
8598
+ df_analysis = df.sample(n=sample_size, random_state=random_state)
8599
+ st.info(f"🔬 Analyzing {sample_size:,} sampled records")
8600
+ else:
8601
+ df_analysis = df
8602
+
8603
+ status_text.text("🔄 Preparing data...")
8604
+ progress_bar.progress(10)
8605
+
8606
+ # Prepare features and target
8607
+ X = df_analysis[selected_features].copy()
8608
+ y = df_analysis[target_feature]
8609
+
8610
+ # Auto-detect problem type
8611
+ if problem_type == "Auto Detect":
8612
+ if target_feature in numeric_cols:
8613
+ problem_type_detected = "Regression"
8614
+ else:
8615
+ problem_type_detected = "Classification"
8616
+ st.info(f"🔍 Auto-detected: {problem_type_detected}")
8617
+ else:
8618
+ problem_type_detected = problem_type
8619
+
8620
+ progress_bar.progress(20)
8621
+
8622
+ # Preprocessing dengan optimasi
8623
+ status_text.text("🔧 Preprocessing features...")
8624
+ X_processed, feature_names = preprocess_features_optimized(
8625
+ X, numeric_cols, non_numeric_cols, remove_high_corr, correlation_threshold
8626
+ )
8627
+
8628
+ progress_bar.progress(40)
8629
+
8630
+ # Encode target variable jika classification
8631
+ le_target = None
8632
+ if problem_type_detected == "Classification" and y.dtype == 'object':
8633
+ le_target = LabelEncoder()
8634
+ y = le_target.fit_transform(y.astype(str))
8635
+ st.info(f"🎯 Target encoded: {len(le_target.classes_)} classes")
8636
+
8637
+ progress_bar.progress(50)
8638
+
8639
+ # Handle missing values
8640
+ X_processed = handle_missing_values_optimized(X_processed)
8641
+
8642
+ progress_bar.progress(60)
8643
+
8644
+ # Calculate feature importance berdasarkan method yang dipilih
8645
+ status_text.text("📊 Calculating feature importance...")
8646
+
8647
+ results = {}
8648
+
8649
+ if importance_method in ["Random Forest", "All Methods"]:
8650
+ results["Random Forest"] = calculate_rf_importance(
8651
+ X_processed, y, problem_type_detected, config, random_state
8652
+ )
8653
+ progress_bar.progress(70)
8654
+
8655
+ if importance_method in ["Permutation Importance", "All Methods"]:
8656
+ results["Permutation"] = calculate_permutation_importance(
8657
+ X_processed, y, problem_type_detected, config, random_state
8658
+ )
8659
+ progress_bar.progress(80)
8660
+
8661
+ if importance_method in ["Mutual Information", "All Methods"]:
8662
+ results["Mutual Info"] = calculate_mutual_info(
8663
+ X_processed, y, problem_type_detected
8664
+ )
8665
+ progress_bar.progress(90)
8666
+
8667
+ progress_bar.progress(95)
8668
+
8669
+ # Display results
8670
+ status_text.text("📈 Displaying results...")
8671
+ display_feature_importance_results(
8672
+ results, feature_names, max_features_display, problem_type_detected
8673
+ )
8674
+
8675
+ progress_bar.progress(100)
8676
+ status_text.text("✅ Analysis completed!")
8677
+
8678
+ # Additional insights
8679
+ show_feature_analysis_insights(results, X_processed, y, problem_type_detected)
8680
+
8681
+ except Exception as e:
8682
+ st.error(f"❌ Error dalam feature importance analysis: {str(e)}")
8683
+ st.info("💡 Tips: Coba kurangi jumlah features, gunakan sampling, atau pilih mode 'Fast'")
8684
+
8685
+ def optimize_memory_usage_feature(df):
8686
+ """Optimize memory usage for feature analysis"""
8687
+ start_mem = df.memory_usage(deep=True).sum() / 1024**2
8688
+
8689
+ for col in df.columns:
8690
+ col_type = df[col].dtype
8691
+
8692
+ if col_type == 'object':
8693
+ if df[col].nunique() / len(df) < 0.5: # Jika cardinality tidak terlalu tinggi
8694
+ df[col] = df[col].astype('category')
8695
+ elif col_type in ['int64', 'int32']:
8696
+ c_min = df[col].min()
8697
+ c_max = df[col].max()
8698
+ if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
8699
+ df[col] = df[col].astype(np.int8)
8700
+ elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
8701
+ df[col] = df[col].astype(np.int16)
8702
+ elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
8703
+ df[col] = df[col].astype(np.int32)
8704
+ elif col_type in ['float64', 'float32']:
8705
+ c_min = df[col].min()
8706
+ c_max = df[col].max()
8707
+ if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
8708
+ df[col] = df[col].astype(np.float16)
8709
+ elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
8710
+ df[col] = df[col].astype(np.float32)
8711
+
8712
+ end_mem = df.memory_usage(deep=True).sum() / 1024**2
8713
+ st.success(f"💾 Memory reduced: {start_mem:.2f}MB → {end_mem:.2f}MB ({((start_mem - end_mem) / start_mem * 100):.1f}% reduction)")
8714
+
8715
+ return df
8716
+
8717
+ def preprocess_features_optimized(X, numeric_cols, non_numeric_cols, remove_high_corr, threshold):
8718
+ """Preprocess features dengan optimasi untuk dataset besar"""
8719
+
8720
+ X_processed = X.copy()
8721
+ feature_names = list(X.columns)
8722
+
8723
+ # Encode categorical features dengan metode yang efisien
8724
+ categorical_columns = [col for col in X.columns if col in non_numeric_cols]
8725
+
8726
+ for col in categorical_columns:
8727
+ if X_processed[col].nunique() > 50: # Untuk categorical dengan banyak unique values
8728
+ # Gunakan frequency encoding
8729
+ freq_map = X_processed[col].value_counts().to_dict()
8730
+ X_processed[col] = X_processed[col].map(freq_map)
8731
+ X_processed[col].fillna(0, inplace=True)
8732
+ else:
8733
+ # Gunakan label encoding
8734
+ le = LabelEncoder()
8735
+ X_processed[col] = le.fit_transform(X_processed[col].astype(str))
8736
+
8737
+ # Remove highly correlated features
8738
+ if remove_high_corr and len(X_processed.columns) > 1:
8739
+ numeric_features = [col for col in X_processed.columns if col in numeric_cols or col in categorical_columns]
8740
+ if len(numeric_features) > 1:
8741
+ X_numeric = X_processed[numeric_features]
8742
+ corr_matrix = X_numeric.corr().abs()
8743
+
8744
+ # Hapus feature yang highly correlated
8745
+ upper_triangle = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
8746
+ to_drop = [column for column in upper_triangle.columns if any(upper_triangle[column] > threshold)]
8747
+
8748
+ if to_drop:
8749
+ X_processed = X_processed.drop(columns=to_drop)
8750
+ feature_names = [f for f in feature_names if f not in to_drop]
8751
+ st.info(f"🗑️ Removed {len(to_drop)} highly correlated features")
8752
+
8753
+ return X_processed, feature_names
8754
+
8755
+ def handle_missing_values_optimized(X):
8756
+ """Handle missing values dengan metode yang optimal"""
8757
+ X_processed = X.copy()
8758
+
8759
+ for col in X_processed.columns:
8760
+ if X_processed[col].isnull().sum() > 0:
8761
+ if X_processed[col].dtype in ['int8', 'int16', 'int32', 'int64', 'float16', 'float32', 'float64']:
8762
+ # Untuk numeric, gunakan median (lebih robust terhadap outliers)
8763
+ X_processed[col].fillna(X_processed[col].median(), inplace=True)
8764
+ else:
8765
+ # Untuk categorical, gunakan mode
8766
+ if len(X_processed[col].mode()) > 0:
8767
+ X_processed[col].fillna(X_processed[col].mode()[0], inplace=True)
8768
+ else:
8769
+ X_processed[col].fillna(0, inplace=True)
8770
+
8771
+ return X_processed
8772
+
8773
+ def calculate_rf_importance(X, y, problem_type, config, random_state):
8774
+ """Calculate Random Forest feature importance"""
8775
+ if problem_type == "Regression":
8776
+ model = RandomForestRegressor(
8777
+ n_estimators=config["n_estimators"],
8778
+ random_state=random_state,
8779
+ n_jobs=-1 # Parallel processing
8780
+ )
8781
+ else:
8782
+ model = RandomForestClassifier(
8783
+ n_estimators=config["n_estimators"],
8784
+ random_state=random_state,
8785
+ n_jobs=-1
8786
+ )
8787
+
8788
+ model.fit(X, y)
8789
+ importances = model.feature_importances_
8790
+
8791
+ return {
8792
+ 'importances': importances,
8793
+ 'model': model
8794
+ }
8795
+
8796
+ def calculate_permutation_importance(X, y, problem_type, config, random_state):
8797
+ """Calculate permutation importance"""
8798
+ if problem_type == "Regression":
8799
+ model = RandomForestRegressor(
8800
+ n_estimators=config["n_estimators"],
8801
+ random_state=random_state,
8802
+ n_jobs=-1
8803
+ )
8804
+ else:
8805
+ model = RandomForestClassifier(
8806
+ n_estimators=config["n_estimators"],
8807
+ random_state=random_state,
8808
+ n_jobs=-1
8809
+ )
8810
+
8811
+ model.fit(X, y)
8812
+
8813
+ # Untuk dataset besar, gunakan subsample
8814
+ if len(X) > 10000:
8815
+ X_subsample = X.sample(n=10000, random_state=random_state)
8816
+ y_subsample = y.loc[X_subsample.index]
8817
+ else:
8818
+ X_subsample = X
8819
+ y_subsample = y
8820
+
8821
+ perm_importance = permutation_importance(
8822
+ model, X_subsample, y_subsample,
8823
+ n_repeats=config["n_repeats"],
8824
+ random_state=random_state,
8825
+ n_jobs=-1 # Parallel processing
8826
+ )
8827
+
8828
+ return {
8829
+ 'importances': perm_importance.importances_mean,
8830
+ 'std': perm_importance.importances_std
8831
+ }
8832
+
8833
+ def calculate_mutual_info(X, y, problem_type):
8834
+ """Calculate mutual information"""
8835
+ if problem_type == "Regression":
8836
+ mi = mutual_info_regression(X, y, random_state=42, n_jobs=-1)
8837
+ else:
8838
+ mi = mutual_info_classif(X, y, random_state=42, n_jobs=-1)
8839
+
8840
+ return {
8841
+ 'importances': mi
8842
+ }
8843
+
8844
+ def display_feature_importance_results(results, feature_names, max_display, problem_type):
8845
+ """Display feature importance results dengan visualisasi yang komprehensif"""
8846
+
8847
+ st.subheader("📊 Feature Importance Results")
8848
+
8849
+ # Tampilkan semua methods dalam tabs
8850
+ tabs = st.tabs(list(results.keys()))
8851
+
8852
+ for tab, (method_name, result) in zip(tabs, results.items()):
8853
+ with tab:
8854
+ importances = result['importances']
8855
+
8856
+ # Create importance dataframe
8857
+ importance_df = pd.DataFrame({
8858
+ 'feature': feature_names,
8859
+ 'importance': importances
8860
+ }).sort_values('importance', ascending=False)
8861
+
8862
+ # Display top features
8863
+ st.write(f"**Top {min(max_display, len(importance_df))} Features - {method_name}**")
8864
+
8865
+ col1, col2 = st.columns([2, 1])
8866
+
8867
+ with col1:
8868
+ # Bar chart
8869
+ fig = px.bar(
8870
+ importance_df.head(max_display),
8871
+ x='importance',
8872
+ y='feature',
8873
+ title=f"{method_name} Feature Importance",
8874
+ orientation='h',
8875
+ color='importance',
8876
+ color_continuous_scale='viridis'
8877
+ )
8878
+ fig.update_layout(showlegend=False)
8879
+ st.plotly_chart(fig, use_container_width=True)
8880
+
8881
+ with col2:
8882
+ # Table view
8883
+ st.dataframe(
8884
+ importance_df.head(10)[['feature', 'importance']].round(4),
8885
+ use_container_width=True
8886
+ )
8887
+
8888
+ # Additional info untuk permutation importance
8889
+ if method_name == "Permutation" and 'std' in result:
8890
+ st.write("**Permutation Importance with Std Dev:**")
8891
+ perm_df = pd.DataFrame({
8892
+ 'feature': feature_names,
8893
+ 'importance': importances,
8894
+ 'std': result['std']
8895
+ }).sort_values('importance', ascending=False)
8896
+
8897
+ fig = px.bar(
8898
+ perm_df.head(max_display),
8899
+ x='importance',
8900
+ y='feature',
8901
+ error_x='std',
8902
+ title="Permutation Importance ± Std Dev",
8903
+ orientation='h'
8904
+ )
8905
+ st.plotly_chart(fig, use_container_width=True)
8906
+
8907
+ def show_feature_analysis_insights(results, X, y, problem_type):
8908
+ """Show additional insights dari feature analysis"""
8909
+
8910
+ st.subheader("💡 Analysis Insights")
8911
+
8912
+ col1, col2 = st.columns(2)
8913
+
8914
+ with col1:
8915
+ st.write("**Dataset Characteristics:**")
8916
+ st.write(f"- Total samples: {len(X):,}")
8917
+ st.write(f"- Total features: {len(X.columns)}")
8918
+ st.write(f"- Problem type: {problem_type}")
8919
+
8920
+ if problem_type == "Classification":
8921
+ st.write(f"- Number of classes: {len(np.unique(y))}")
8922
+ else:
8923
+ st.write(f"- Target range: {y.min():.2f} to {y.max():.2f}")
8924
+
8925
+ with col2:
8926
+ st.write("**Feature Importance Consensus:**")
8927
+
8928
+ # Hitung consensus dari semua methods
8929
+ consensus_scores = {}
8930
+ for method_name, result in results.items():
8931
+ importances = result['importances']
8932
+ for i, feature in enumerate(X.columns):
8933
+ if feature not in consensus_scores:
8934
+ consensus_scores[feature] = []
8935
+ consensus_scores[feature].append(importances[i])
8936
+
8937
+ # Rata-rata score across methods
8938
+ avg_scores = {feature: np.mean(scores) for feature, scores in consensus_scores.items()}
8939
+ top_features = sorted(avg_scores.items(), key=lambda x: x[1], reverse=True)[:5]
8940
+
8941
+ for feature, score in top_features:
8942
+ st.write(f"- {feature}: {score:.4f}")
8943
+
8944
+ # Correlation analysis untuk top features
8945
+ if len(results) > 0:
8946
+ st.write("**Top Features Correlation Matrix:**")
8947
+
8948
+ # Ambil top 8 features dari method pertama
8949
+ first_method = list(results.values())[0]
8950
+ top_indices = np.argsort(first_method['importances'])[-8:][::-1]
8951
+ top_features_corr = [X.columns[i] for i in top_indices if i < len(X.columns)]
8952
+
8953
+ if len(top_features_corr) > 1:
8954
+ corr_matrix = X[top_features_corr].corr()
8955
+
8956
+ fig = px.imshow(
8957
+ corr_matrix,
8958
+ text_auto=True,
8959
+ aspect="auto",
8960
+ color_continuous_scale="RdBu_r",
8961
+ title="Correlation Matrix of Top Features"
8962
+ )
8963
+ st.plotly_chart(fig, use_container_width=True)
7287
8964
 
7288
8965
  # Fungsi untuk memuat data
7289
8966
  def load_data(uploaded_file):
@@ -8006,439 +9683,1033 @@ if uploaded_files:
8006
9683
  else:
8007
9684
  df = merge_datasets(datasets, merge_method)
8008
9685
 
9686
+ try:
9687
+ from stl import mesh
9688
+ import trimesh
9689
+ import os
9690
+ except ImportError:
9691
+ st.warning("Beberapa library 3D tidak terinstall. Install dengan: pip install numpy-stl trimesh plotly")
8009
9692
  REMOVE_BG_API_KEY = "xQH5KznYiupRrywK5yPcjeyi"
8010
9693
  PIXELS_API_KEY = "LH59shPdj1xO0lolnHPsClH23qsnHE4NjkCFBhKEXvR0CbqwkrXbqBnw"
8011
9694
  if df is not None:
8012
- tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8, tab9 = st.tabs([
9695
+ tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8, tab9, tab10 = st.tabs([
8013
9696
  "📊 Statistik",
8014
9697
  "📈 Visualisasi",
8015
9698
  "💾 Data",
8016
9699
  "ℹ️ Informasi",
8017
9700
  "🧮 Kalkulator",
8018
9701
  "🖼️ Vitures",
8019
- "📍 Flowchart",
9702
+ "📍 Flowchart",
8020
9703
  "📊 Grafik Saham",
8021
- "🗃️ SQL Style"
9704
+ "🗃️ SQL Style",
9705
+ "🔄 3D Model & Analisis"
8022
9706
  ])
9707
+
9708
+ with tab10:
9709
+ st.header("🔄 Konversi Gambar ke 3D Model dengan Analisis")
9710
+
9711
+ # Upload gambar
9712
+ uploaded_file = st.file_uploader("Unggah gambar untuk dikonversi ke 3D",
9713
+ type=['png', 'jpg', 'jpeg'],
9714
+ key="3d_converter")
9715
+
9716
+ col1, col2 = st.columns(2)
9717
+
9718
+ with col1:
9719
+ if uploaded_file is not None:
9720
+ # Display original image
9721
+ st.subheader("🖼️ Gambar Asli")
9722
+ st.image(uploaded_file, use_column_width=True)
9723
+
9724
+ # Image analysis
9725
+ st.subheader("📊 Analisis Gambar")
9726
+
9727
+ # Convert to numpy array for analysis
9728
+ import numpy as np
9729
+ from PIL import Image
9730
+
9731
+ image = Image.open(uploaded_file)
9732
+ img_array = np.array(image)
9733
+
9734
+ # Basic image statistics
9735
+ st.write(f"**Dimensi Gambar:** {img_array.shape}")
9736
+ st.write(f"**Tipe Data:** {img_array.dtype}")
9737
+ st.write(f"**Range Nilai:** {img_array.min()} - {img_array.max()}")
9738
+
9739
+ # Color distribution
9740
+ if len(img_array.shape) == 3: # Color image
9741
+ st.write("**Distribusi Warna RGB:**")
9742
+ colors = ['Red', 'Green', 'Blue']
9743
+ for i, color in enumerate(colors):
9744
+ channel_data = img_array[:, :, i]
9745
+ st.write(f"{color}: Mean={channel_data.mean():.2f}, Std={channel_data.std():.2f}")
9746
+
9747
+ with col2:
9748
+ if uploaded_file is not None:
9749
+ st.subheader("📈 Chart Analisis")
9750
+
9751
+ # Create some sample 3D data based on image
9752
+ height, width = img_array.shape[0], img_array.shape[1]
9753
+
9754
+ # Generate 3D surface data from image intensity
9755
+ if len(img_array.shape) == 3:
9756
+ gray_img = np.mean(img_array, axis=2) # Convert to grayscale
9757
+ else:
9758
+ gray_img = img_array
9759
+
9760
+ # Downsample for performance
9761
+ downsample_factor = max(1, gray_img.shape[0] // 50)
9762
+ gray_img_small = gray_img[::downsample_factor, ::downsample_factor]
9763
+
9764
+ # Create 3D surface plot
9765
+ fig_3d = go.Figure(data=[go.Surface(z=gray_img_small)])
9766
+ fig_3d.update_layout(
9767
+ title='3D Surface dari Gambar',
9768
+ scene=dict(
9769
+ xaxis_title='X',
9770
+ yaxis_title='Y',
9771
+ zaxis_title='Intensitas'
9772
+ )
9773
+ )
9774
+ st.plotly_chart(fig_3d, use_container_width=True)
9775
+
9776
+ # 2D Histogram of intensities
9777
+ fig_hist = px.histogram(x=gray_img.flatten(),
9778
+ title='Distribusi Intensitas Pixel',
9779
+ labels={'x': 'Intensitas', 'y': 'Frekuensi'})
9780
+ st.plotly_chart(fig_hist, use_container_width=True)
9781
+
9782
+ # Additional analysis section
9783
+ if uploaded_file is not None:
9784
+ st.subheader("🔍 Analisis Detail")
9785
+
9786
+ col3, col4 = st.columns(2)
9787
+
9788
+ with col3:
9789
+ # Edge detection simulation
9790
+ st.write("**Deteksi Tepi (Simulasi):**")
9791
+
9792
+ # Simple edge detection using gradient
9793
+ from scipy import ndimage
9794
+
9795
+ # Calculate gradients
9796
+ grad_x = ndimage.sobel(gray_img, axis=0)
9797
+ grad_y = ndimage.sobel(gray_img, axis=1)
9798
+ gradient_magnitude = np.hypot(grad_x, grad_y)
9799
+
9800
+ # Display edge map
9801
+ fig_edges = px.imshow(gradient_magnitude,
9802
+ title='Peta Tepi',
9803
+ color_continuous_scale='gray')
9804
+ st.plotly_chart(fig_edges, use_container_width=True)
9805
+
9806
+ with col4:
9807
+ # Statistical summary
9808
+ st.write("**Ringkasan Statistik:**")
9809
+
9810
+ stats_data = {
9811
+ 'Metrik': ['Mean', 'Median', 'Std Dev', 'Varians', 'Entropi'],
9812
+ 'Nilai': [
9813
+ f"{gray_img.mean():.2f}",
9814
+ f"{np.median(gray_img):.2f}",
9815
+ f"{gray_img.std():.2f}",
9816
+ f"{gray_img.var():.2f}",
9817
+ f"{-np.sum(gray_img * np.log2(gray_img + 1e-8)):.2f}"
9818
+ ]
9819
+ }
9820
+
9821
+ st.dataframe(stats_data, use_container_width=True)
9822
+
9823
+ # Date selection for analysis
9824
+ analysis_date = st.date_input("Pilih Tanggal Analisis",
9825
+ value=datetime.now().date(),
9826
+ key="3d_analysis_date")
9827
+
9828
+ st.write(f"**Analisis untuk tanggal:** {analysis_date}")
9829
+
9830
+ # Model conversion options
9831
+ if uploaded_file is not None:
9832
+ st.subheader("⚙️ Opsi Konversi 3D")
9833
+
9834
+ conversion_type = st.selectbox(
9835
+ "Pilih tipe model 3D:",
9836
+ ["Surface Mesh", "Point Cloud", "Voxel Grid", "Height Map"]
9837
+ )
9838
+
9839
+ resolution = st.slider("Resolusi Model 3D", 10, 100, 50)
9840
+ height_scale = st.slider("Skala Tinggi 3D", 0.1, 5.0, 1.0)
9841
+
9842
+ if st.button("🚀 Generate Model 3D", type="primary"):
9843
+ with st.spinner("Membuat model 3D..."):
9844
+ try:
9845
+ # Progress bar
9846
+ progress_bar = st.progress(0)
9847
+
9848
+ # Convert image to grayscale and normalize
9849
+ if len(img_array.shape) == 3:
9850
+ gray_img = np.mean(img_array, axis=2)
9851
+ else:
9852
+ gray_img = img_array
9853
+
9854
+ # Normalize to 0-1
9855
+ gray_img_normalized = gray_img.astype(np.float32) / 255.0
9856
+
9857
+ progress_bar.progress(25)
9858
+
9859
+ # Downsample image based on resolution
9860
+ downsample = max(1, gray_img_normalized.shape[0] // resolution)
9861
+ height_map = gray_img_normalized[::downsample, ::downsample]
9862
+
9863
+ progress_bar.progress(50)
9864
+
9865
+ # Generate 3D mesh from height map
9866
+ x, y = np.mgrid[0:height_map.shape[0], 0:height_map.shape[1]]
9867
+ z = height_map * height_scale
9868
+
9869
+ progress_bar.progress(75)
9870
+
9871
+ # Create vertices and faces for the mesh
9872
+ vertices = []
9873
+ faces = []
9874
+
9875
+ # Create vertices
9876
+ for i in range(z.shape[0]):
9877
+ for j in range(z.shape[1]):
9878
+ vertices.append([i, j, z[i, j]])
9879
+
9880
+ # Create faces
9881
+ for i in range(z.shape[0]-1):
9882
+ for j in range(z.shape[1]-1):
9883
+ # Two triangles per quad
9884
+ v1 = i * z.shape[1] + j
9885
+ v2 = v1 + 1
9886
+ v3 = (i + 1) * z.shape[1] + j
9887
+ v4 = v3 + 1
9888
+
9889
+ # First triangle
9890
+ faces.append([v1, v2, v3])
9891
+ # Second triangle
9892
+ faces.append([v2, v4, v3])
9893
+
9894
+ progress_bar.progress(90)
9895
+
9896
+ # Convert to numpy arrays
9897
+ vertices = np.array(vertices)
9898
+ faces = np.array(faces)
9899
+
9900
+ # Create STL mesh
9901
+ from stl import mesh
9902
+
9903
+ # Create the mesh object
9904
+ stl_mesh = mesh.Mesh(np.zeros(faces.shape[0], dtype=mesh.Mesh.dtype))
9905
+
9906
+ # Assign vertices to mesh
9907
+ for i, face in enumerate(faces):
9908
+ for j in range(3):
9909
+ stl_mesh.vectors[i][j] = vertices[face[j]]
9910
+
9911
+ progress_bar.progress(100)
9912
+
9913
+ # Save STL file to temporary file
9914
+ import tempfile
9915
+ import os
9916
+
9917
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.stl') as tmp_file:
9918
+ stl_mesh.save(tmp_file.name)
9919
+
9920
+ # Read the file data for download
9921
+ with open(tmp_file.name, 'rb') as f:
9922
+ stl_data = f.read()
9923
+
9924
+ # Clean up temporary file
9925
+ os.unlink(tmp_file.name)
9926
+
9927
+ st.success("✅ Model 3D berhasil dibuat!")
9928
+
9929
+ # Display results
9930
+ st.info(f"**Model 3D tipe:** {conversion_type}")
9931
+ st.info(f"**Resolusi:** {resolution}")
9932
+ st.info(f"**Dimensi Mesh:** {len(vertices)} vertices, {len(faces)} faces")
9933
+ st.info(f"**Skala Tinggi:** {height_scale}")
9934
+
9935
+ # Download button for 3D model
9936
+ st.download_button(
9937
+ label="📥 Download Model 3D (STL)",
9938
+ data=stl_data,
9939
+ file_name=f"3d_model_{uploaded_file.name.split('.')[0]}.stl",
9940
+ mime="application/octet-stream"
9941
+ )
9942
+
9943
+ # Display mesh information
9944
+ col5, col6 = st.columns(2)
9945
+
9946
+ with col5:
9947
+ st.write("**Informasi Mesh:**")
9948
+ mesh_info = {
9949
+ 'Parameter': ['Jumlah Vertex', 'Jumlah Face', 'Dimensi X', 'Dimensi Y', 'Tinggi Maks'],
9950
+ 'Nilai': [
9951
+ len(vertices),
9952
+ len(faces),
9953
+ f"{z.shape[0]} points",
9954
+ f"{z.shape[1]} points",
9955
+ f"{z.max():.3f}"
9956
+ ]
9957
+ }
9958
+ st.dataframe(mesh_info)
9959
+
9960
+ with col6:
9961
+ # Display 3D preview using plotly
9962
+ st.write("**Preview 3D:**")
9963
+
9964
+ # Create simplified mesh for preview
9965
+ preview_downsample = max(1, len(vertices) // 1000)
9966
+ preview_vertices = vertices[::preview_downsample]
9967
+
9968
+ fig_3d_preview = go.Figure(data=[go.Mesh3d(
9969
+ x=preview_vertices[:, 0],
9970
+ y=preview_vertices[:, 1],
9971
+ z=preview_vertices[:, 2],
9972
+ opacity=0.7,
9973
+ color='lightblue'
9974
+ )])
9975
+
9976
+ fig_3d_preview.update_layout(
9977
+ title='Preview Model 3D',
9978
+ scene=dict(
9979
+ xaxis_title='X',
9980
+ yaxis_title='Y',
9981
+ zaxis_title='Z'
9982
+ )
9983
+ )
9984
+
9985
+ st.plotly_chart(fig_3d_preview, use_container_width=True)
9986
+
9987
+ except Exception as e:
9988
+ st.error(f"❌ Error dalam membuat model 3D: {str(e)}")
9989
+ st.info("Pastikan library numpy-stl dan trimesh terinstall: `pip install numpy-stl trimesh`")
9990
+
8023
9991
 
8024
9992
  with tab9:
8025
- st.header("📁 Upload File & Analisis Lengkap SQL Style")
8026
- with st.expander("📜 Ketarangan Dalam Statistik Dan Analisis", expanded=False):
8027
- st.markdown("""
8028
- **Penjelasan Penting 📛**
9993
+ st.header("📁 Upload File & Analisis Lengkap Database SQL")
9994
+ with st.expander("📜 Keterangan Dalam Statistik Dan Analisis", expanded=False):
9995
+ st.markdown(
9996
+ """
9997
+ <img src="https://media.finebi.com/strapi/Annual_Sales_Summary_59110fda60.jpg" class="responsive-img">
9998
+ """,
9999
+ unsafe_allow_html=True
10000
+ )
10001
+ st.markdown("""
10002
+
10003
+ ### 🚀 Keterangan Lengkap Dalam Analisis Dan Statistik Pada SQL Style
10004
+ - Akankah Hal Gila Dapat Terjadi Dan Ini lah yang Mungkin Menjadi Kenyataan Pada SQL Style?
10005
+ - Dengan adanya fitur analisis data pada SQL Style, kini Anda dapat dengan mudah mengunggah file CSV atau Excel berisi data dari database SQL Anda untuk dianalisis secara menyeluruh.
10006
+ - Fitur ini dirancang untuk memberikan wawasan mendalam tentang struktur data Anda, termasuk deteksi kolom tanggal, analisis statistik dasar, dan visualisasi data yang informatif.
10007
+ - Setelah mengunggah file, SQL Style akan secara otomatis mendeteksi kolom tanggal dan melakukan analisis mendalam terhadap data tersebut.
10008
+ - Anda akan mendapatkan statistik dasar seperti jumlah baris dan kolom, nilai unik, serta informasi tentang missing values.
10009
+ - Selain itu, fitur visualisasi data akan membantu Anda memahami distribusi data, tren waktu, dan pola musiman dengan grafik yang mudah dipahami.
10010
+ - Fitur ini sangat berguna bagi para analis data, pengembang database, dan siapa saja yang ingin mendapatkan pemahaman lebih baik tentang data mereka.
10011
+ - Kami terus berupaya untuk meningkatkan fitur ini agar dapat memberikan pengalaman analisis data yang lebih baik dan lebih komprehensif.
10012
+ - dan kami akan segera update SQL Style ini agar lebih baik lagi kedepannya.
10013
+ - Terima kasih atas pengertian dan dukungannya.
10014
+ """)
10015
+
10016
+ # Upload file
10017
+ uploaded_file = st.file_uploader(
10018
+ "Pilih file CSV atau Excel",
10019
+ type=['csv', 'xlsx', 'xls'],
10020
+ help="Upload file data untuk dianalisis"
10021
+ )
10022
+
10023
+ if uploaded_file is not None:
10024
+ try:
10025
+ # Baca file berdasarkan tipe
10026
+ if uploaded_file.name.endswith('.csv'):
10027
+ df = pd.read_csv(uploaded_file)
10028
+ else:
10029
+ df = pd.read_excel(uploaded_file)
10030
+
10031
+ # Clean dataframe - handle mixed types and object dtypes
10032
+ def clean_dataframe(df):
10033
+ df_clean = df.copy()
10034
+
10035
+ # Convert object columns to appropriate types
10036
+ for col in df_clean.columns:
10037
+ # Skip if column is already numeric or datetime
10038
+ if pd.api.types.is_numeric_dtype(df_clean[col]):
10039
+ continue
10040
+ if pd.api.types.is_datetime64_any_dtype(df_clean[col]):
10041
+ continue
10042
+
10043
+ # Try to convert to numeric first
10044
+ try:
10045
+ df_clean[col] = pd.to_numeric(df_clean[col], errors='ignore')
10046
+ except:
10047
+ pass
10048
+
10049
+ # If still object, try to convert to datetime
10050
+ if df_clean[col].dtype == 'object':
10051
+ try:
10052
+ df_clean[col] = pd.to_datetime(df_clean[col], errors='ignore')
10053
+ except:
10054
+ pass
10055
+
10056
+ # Handle ObjectDType specifically
10057
+ if hasattr(df_clean[col].dtype, 'name') and df_clean[col].dtype.name == 'object':
10058
+ # Convert to string to avoid ObjectDType issues
10059
+ df_clean[col] = df_clean[col].astype(str)
10060
+
10061
+ return df_clean
8029
10062
 
8030
- ### 🚀 Ketrangan Lengkap Dalam Analisis Dan Statistik Pada SQL Style
8031
- - SQL ini masih tahap pemgembangan dan perbaikan, jadi mohon bersabar jika ada kekurangan
8032
- - dan kami akan segera update SQL Style ini agar lebih baik lagi kedepannya.
8033
- - Terima kasih atas pengertian dan dukungannya.
8034
- """)
8035
-
8036
- # Upload file
8037
- uploaded_file = st.file_uploader(
8038
- "Pilih file CSV atau Excel",
8039
- type=['csv', 'xlsx', 'xls'],
8040
- help="Upload file data untuk dianalisis"
8041
- )
8042
-
8043
- if uploaded_file is not None:
8044
- try:
8045
- # Baca file berdasarkan tipe
8046
- if uploaded_file.name.endswith('.csv'):
8047
- df = pd.read_csv(uploaded_file)
8048
- else:
8049
- df = pd.read_excel(uploaded_file)
10063
+ df = clean_dataframe(df)
10064
+
10065
+ st.success(f"File berhasil diupload! Shape: {df.shape}")
10066
+
10067
+ # Tampilkan preview data
10068
+ st.subheader("📋 Preview Data")
10069
+ st.dataframe(df.head())
10070
+
10071
+ # Informasi dasar dataset
10072
+ st.subheader("📊 Informasi Dataset")
10073
+ col1, col2, col3, col4 = st.columns(4)
10074
+
10075
+ with col1:
10076
+ st.metric("Jumlah Baris", df.shape[0])
10077
+ with col2:
10078
+ st.metric("Jumlah Kolom", df.shape[1])
10079
+ with col3:
10080
+ st.metric("Missing Values", df.isnull().sum().sum())
10081
+ with col4:
10082
+ st.metric("Duplikat", df.duplicated().sum())
10083
+
10084
+ # --- ANALISIS STRUKTUR DATA UNTUK ERD DINAMIS ---
10085
+ st.subheader("🔍 Analisis Struktur Data untuk ERD")
10086
+
10087
+ # Fungsi untuk deteksi tipe data yang aman
10088
+ def safe_dtype_detection(df):
10089
+ numeric_cols = []
10090
+ categorical_cols = []
10091
+ date_cols = []
10092
+ bool_cols = []
10093
+ other_cols = []
8050
10094
 
8051
- st.success(f"File berhasil diupload! Shape: {df.shape}")
10095
+ for col in df.columns:
10096
+ col_dtype = str(df[col].dtype)
10097
+
10098
+ # Check numeric
10099
+ if pd.api.types.is_numeric_dtype(df[col]):
10100
+ numeric_cols.append(col)
10101
+ # Check datetime
10102
+ elif pd.api.types.is_datetime64_any_dtype(df[col]):
10103
+ date_cols.append(col)
10104
+ # Check boolean
10105
+ elif pd.api.types.is_bool_dtype(df[col]):
10106
+ bool_cols.append(col)
10107
+ # Check categorical (object but limited unique values)
10108
+ elif df[col].dtype == 'object':
10109
+ if df[col].nunique() <= 50: # Consider as categorical if <= 50 unique values
10110
+ categorical_cols.append(col)
10111
+ else:
10112
+ other_cols.append(col)
10113
+ else:
10114
+ other_cols.append(col)
8052
10115
 
8053
- # Tampilkan preview data
8054
- st.subheader("📋 Preview Data")
8055
- st.dataframe(df.head())
10116
+ return numeric_cols, categorical_cols, date_cols, bool_cols, other_cols
10117
+
10118
+ numeric_cols, categorical_cols, date_cols, bool_cols, other_cols = safe_dtype_detection(df)
10119
+
10120
+ # Fungsi analisis yang lebih robust
10121
+ def robust_column_analysis(df):
10122
+ column_analysis = {}
8056
10123
 
8057
- # Informasi dasar dataset
8058
- st.subheader("📊 Informasi Dataset")
8059
- col1, col2, col3 = st.columns(3)
10124
+ for col in df.columns:
10125
+ try:
10126
+ col_data = df[col]
10127
+
10128
+ # Handle ObjectDType and other problematic types
10129
+ if hasattr(col_data.dtype, 'name') and col_data.dtype.name == 'object':
10130
+ # Convert to string for analysis
10131
+ col_data = col_data.astype(str)
10132
+
10133
+ analysis = {
10134
+ 'dtype': str(col_data.dtype),
10135
+ 'unique_count': col_data.nunique(),
10136
+ 'null_count': col_data.isnull().sum(),
10137
+ 'null_percentage': (col_data.isnull().sum() / len(col_data)) * 100,
10138
+ 'sample_values': col_data.dropna().head(3).tolist() if not col_data.empty else []
10139
+ }
10140
+
10141
+ # Safe sample values conversion
10142
+ safe_samples = []
10143
+ for val in analysis['sample_values']:
10144
+ try:
10145
+ safe_samples.append(str(val))
10146
+ except:
10147
+ safe_samples.append('N/A')
10148
+ analysis['sample_values'] = safe_samples
10149
+
10150
+ # Deteksi tipe kolom untuk ERD
10151
+ col_lower = str(col).lower()
10152
+
10153
+ # Primary Key detection
10154
+ if (analysis['unique_count'] == len(col_data) and
10155
+ analysis['null_count'] == 0 and
10156
+ any(keyword in col_lower for keyword in ['id', 'pk', 'key', 'code'])):
10157
+ analysis['role'] = 'PRIMARY_KEY'
10158
+ analysis['icon'] = '🔑'
10159
+
10160
+ # Foreign Key detection
10161
+ elif (any(keyword in col_lower for keyword in ['id', 'fk', 'ref', 'code']) and
10162
+ analysis['unique_count'] < len(col_data) * 0.8):
10163
+ analysis['role'] = 'FOREIGN_KEY'
10164
+ analysis['icon'] = '🔗'
10165
+
10166
+ # Measurement columns
10167
+ elif any(keyword in col_lower for keyword in ['amount', 'price', 'value', 'total', 'sum', 'avg', 'quantity']):
10168
+ analysis['role'] = 'MEASUREMENT'
10169
+ analysis['icon'] = '💰'
10170
+
10171
+ # Date/Time columns
10172
+ elif any(keyword in col_lower for keyword in ['date', 'time', 'year', 'month', 'day']):
10173
+ analysis['role'] = 'TEMPORAL'
10174
+ analysis['icon'] = '📅'
10175
+
10176
+ # Category columns
10177
+ elif (analysis['unique_count'] <= 20 and
10178
+ analysis['unique_count'] > 1 and
10179
+ str(col_data.dtype) == 'object'):
10180
+ analysis['role'] = 'CATEGORY'
10181
+ analysis['icon'] = '🏷️'
10182
+
10183
+ # Description columns
10184
+ elif (str(col_data.dtype) == 'object' and
10185
+ col_data.astype(str).str.len().mean() > 20):
10186
+ analysis['role'] = 'DESCRIPTION'
10187
+ analysis['icon'] = '📝'
10188
+
10189
+ # Numeric metrics
10190
+ elif pd.api.types.is_numeric_dtype(col_data):
10191
+ analysis['role'] = 'METRIC'
10192
+ analysis['icon'] = '📊'
10193
+
10194
+ else:
10195
+ analysis['role'] = 'ATTRIBUTE'
10196
+ analysis['icon'] = '📄'
10197
+
10198
+ column_analysis[col] = analysis
10199
+
10200
+ except Exception as e:
10201
+ # Fallback analysis for problematic columns
10202
+ column_analysis[col] = {
10203
+ 'dtype': 'unknown',
10204
+ 'role': 'ATTRIBUTE',
10205
+ 'icon': '❓',
10206
+ 'unique_count': 0,
10207
+ 'null_count': len(df[col]),
10208
+ 'null_percentage': 100.0,
10209
+ 'sample_values': ['Error in analysis']
10210
+ }
8060
10211
 
8061
- with col1:
8062
- st.metric("Jumlah Baris", df.shape[0])
8063
- with col2:
8064
- st.metric("Jumlah Kolom", df.shape[1])
8065
- with col3:
8066
- st.metric("Missing Values", df.isnull().sum().sum())
10212
+ return column_analysis
10213
+
10214
+ # Analisis kolom
10215
+ column_analysis = robust_column_analysis(df)
10216
+
10217
+ # Tampilkan analisis kolom
10218
+ st.write("**Analisis Detail Kolom:**")
10219
+ analysis_data = []
10220
+ for col, analysis in column_analysis.items():
10221
+ analysis_data.append({
10222
+ 'Kolom': col,
10223
+ 'Tipe': analysis['dtype'],
10224
+ 'Role': analysis['role'],
10225
+ 'Icon': analysis['icon'],
10226
+ 'Unique': analysis['unique_count'],
10227
+ 'Null %': f"{analysis['null_percentage']:.1f}%"
10228
+ })
10229
+
10230
+ analysis_df = pd.DataFrame(analysis_data)
10231
+ st.dataframe(analysis_df, use_container_width=True)
10232
+
10233
+ # --- ERD DINAMIS YANG LEBIH AKURAT ---
10234
+ st.subheader("🗄️ Entity Relationship Diagram (ERD) Dinamis")
10235
+
10236
+ # Konfigurasi ERD
10237
+ col1, col2, col3 = st.columns(3)
10238
+
10239
+ with col1:
10240
+ erd_style = st.selectbox(
10241
+ "Style ERD:",
10242
+ ['Vertical', 'Horizontal', 'Circular'],
10243
+ index=0
10244
+ )
10245
+
10246
+ with col2:
10247
+ show_relationships = st.checkbox("Tampilkan Relasi", value=True)
10248
+
10249
+ with col3:
10250
+ max_tables = st.slider("Max Tabel", 3, 15, 8)
10251
+
10252
+ try:
10253
+ import graphviz
10254
+
10255
+ # Buat graph ERD
10256
+ dot = graphviz.Digraph(comment='Dynamic Database ERD')
10257
+
10258
+ # Atur layout
10259
+ if erd_style == 'Vertical':
10260
+ dot.attr(rankdir='TB', size='12,16')
10261
+ elif erd_style == 'Horizontal':
10262
+ dot.attr(rankdir='LR', size='16,12')
10263
+ else: # Circular
10264
+ dot.attr(rankdir='LR', size='14,14', layout='circo')
10265
+
10266
+ # Kelompokkan kolom berdasarkan role untuk membuat tabel
10267
+ main_table_cols = []
10268
+ reference_tables = {}
10269
+
10270
+ for col, analysis in column_analysis.items():
10271
+ if analysis['role'] == 'FOREIGN_KEY':
10272
+ # Buat tabel referensi untuk foreign key
10273
+ ref_table_name = f"ref_{col}"
10274
+ if ref_table_name not in reference_tables:
10275
+ ref_display_name = col.replace('_id', '').replace('ID', '').replace('_', ' ').title()
10276
+ reference_tables[ref_table_name] = {
10277
+ 'name': ref_display_name,
10278
+ 'columns': []
10279
+ }
10280
+ reference_tables[ref_table_name]['columns'].append(col)
10281
+ else:
10282
+ main_table_cols.append((col, analysis))
8067
10283
 
8068
- # Tampilkan ERD (Entity Relationship Diagram) sederhana
8069
- st.subheader("🔗 Entity Relationship Diagram (ERD)")
8070
-
8071
- # Analisis relasi antar kolom
8072
- st.write("**Relasi antar Kolom:**")
8073
- numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
8074
- categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
8075
-
8076
- col1, col2 = st.columns(2)
8077
-
8078
- with col1:
8079
- st.write("**Kolom Numerik:**")
8080
- df_numeric = pd.DataFrame({"Nama Kolom": numeric_cols})
8081
- st.table(df_numeric)
8082
-
8083
- with col2:
8084
- st.write("**Kolom Kategorikal:**")
8085
- df_categorical = pd.DataFrame({"Nama Kolom": categorical_cols})
8086
- st.table(df_categorical)
8087
-
8088
- # --- Visualisasi ERD yang Sesungguhnya ---
8089
- st.write("---")
8090
- st.subheader("🗄️ Entity Relationship Diagram Visualization")
8091
-
8092
- # Buat struktur entitas dan relasi
8093
- st.write("**Struktur Tabel Database:**")
8094
-
8095
- # Generate SQL CREATE TABLE statements
8096
- st.markdown("### 📝 SQL Schema Definition")
8097
-
8098
- # Buat diagram ERD menggunakan graphviz
8099
- try:
8100
- import graphviz
8101
-
8102
- # Buat graph untuk ERD
8103
- dot = graphviz.Digraph(comment='Database ERD')
8104
- dot.attr(rankdir='TB', size='8,8')
8105
-
8106
- # Buat node untuk tabel utama
10284
+ # Batasi jumlah tabel yang ditampilkan
10285
+ tables_to_show = min(max_tables, len(reference_tables) + 1)
10286
+
10287
+ # Buat tabel utama
10288
+ if main_table_cols and tables_to_show > 0:
8107
10289
  with dot.subgraph(name='cluster_main') as c:
8108
- c.attr(label='Tabel Utama: dataset_table', style='filled', color='lightblue', fontsize='12')
8109
-
8110
- # Header tabel
8111
- c.node('table_header', f'📊 dataset_table', shape='plaintext', fontsize='14', fontname='Arial bold')
10290
+ table_name = uploaded_file.name.split('.')[0] # Remove extension
10291
+ c.attr(label=f'📊 {table_name}', style='filled',
10292
+ color='lightblue', fontsize='14', fontname='Arial Bold')
8112
10293
 
8113
- # Field-field dalam tabel
8114
10294
  fields = []
10295
+ for col, analysis in main_table_cols[:12]: # Batasi kolom per tabel
10296
+ field_type = ""
10297
+ if pd.api.types.is_numeric_dtype(df[col]):
10298
+ field_type = "NUMERIC"
10299
+ elif pd.api.types.is_datetime64_any_dtype(df[col]):
10300
+ field_type = "DATETIME"
10301
+ elif df[col].dtype == 'object':
10302
+ try:
10303
+ max_len = df[col].astype(str).str.len().max()
10304
+ field_type = f"VARCHAR({min(255, max(50, int(max_len)))})"
10305
+ except:
10306
+ field_type = "TEXT"
10307
+ elif df[col].dtype == 'bool':
10308
+ field_type = "BOOLEAN"
10309
+ else:
10310
+ field_type = "TEXT"
10311
+
10312
+ constraint = ""
10313
+ if analysis['role'] == 'PRIMARY_KEY':
10314
+ constraint = " [PK]"
10315
+ elif analysis['role'] == 'FOREIGN_KEY':
10316
+ constraint = " [FK]"
10317
+
10318
+ fields.append(f"<TR><TD ALIGN='LEFT'>{analysis['icon']} {col}</TD><TD ALIGN='LEFT'>{field_type}{constraint}</TD></TR>")
10319
+
10320
+ # Tambahkan indicator jika ada kolom yang tidak ditampilkan
10321
+ if len(main_table_cols) > 12:
10322
+ fields.append(f"<TR><TD ALIGN='LEFT'>...</TD><TD ALIGN='LEFT'>+{len(main_table_cols)-12} more</TD></TR>")
8115
10323
 
8116
- # Primary keys (asumsikan kolom pertama sebagai PK)
8117
- if len(df.columns) > 0:
8118
- pk_field = f"<TR><TD ALIGN='LEFT'><B>🔑 {df.columns[0]}</B></TD><TD ALIGN='LEFT'>[PK]</TD></TR>"
8119
- fields.append(pk_field)
10324
+ table_html = f'''<
10325
+ <TABLE BORDER="1" CELLBORDER="0" CELLSPACING="0" CELLPADDING="4">
10326
+ <TR><TD ALIGN="CENTER" BGCOLOR="#e6f3ff"><B>COLUMN</B></TD><TD ALIGN="CENTER" BGCOLOR="#e6f3ff"><B>TYPE</B></TD></TR>
10327
+ {''.join(fields)}
10328
+ </TABLE>
10329
+ >'''
8120
10330
 
8121
- # Numeric fields
8122
- for col in numeric_cols[:5]: # Batasi agar tidak terlalu panjang
8123
- if col != df.columns[0]:
8124
- fields.append(f"<TR><TD ALIGN='LEFT'>📈 {col}</TD><TD ALIGN='LEFT'>NUMERIC</TD></TR>")
10331
+ c.node('main_table', table_html, shape='none', fontname='Arial')
10332
+
10333
+ # Buat tabel referensi
10334
+ colors = ['#e6ffe6', '#fff0e6', '#e6f9ff', '#ffe6ff', '#ffffe6', '#f0e6ff']
10335
+ for i, (ref_name, ref_info) in enumerate(list(reference_tables.items())[:tables_to_show-1]):
10336
+ color = colors[i % len(colors)]
10337
+ with dot.subgraph(name=f'cluster_{ref_name}') as c:
10338
+ c.attr(label=f'📁 {ref_info["name"]}', style='filled',
10339
+ color=color, fontsize='12', fontname='Arial')
8125
10340
 
8126
- # Categorical fields
8127
- for col in categorical_cols[:3]: # Batasi agar tidak terlalu panjang
8128
- fields.append(f"<TR><TD ALIGN='LEFT'>📝 {col}</TD><TD ALIGN='LEFT'>VARCHAR</TD></TR>")
10341
+ fields = []
10342
+ # Primary key untuk tabel referensi
10343
+ for fk_col in ref_info['columns']:
10344
+ fields.append(f"<TR><TD ALIGN='LEFT'><B>🔑 {fk_col}</B></TD><TD ALIGN='LEFT'>[PK]</TD></TR>")
8129
10345
 
8130
- # Jika ada field lebih dari yang ditampilkan
8131
- total_fields = len(numeric_cols) + len(categorical_cols)
8132
- if total_fields > 8:
8133
- fields.append(f"<TR><TD ALIGN='LEFT'>...</TD><TD ALIGN='LEFT'>+{total_fields-8} fields</TD></TR>")
10346
+ # Tambahkan kolom umum untuk tabel referensi
10347
+ fields.append(f"<TR><TD ALIGN='LEFT'>📝 Name</TD><TD ALIGN='LEFT'>VARCHAR(100)</TD></TR>")
10348
+ fields.append(f"<TR><TD ALIGN='LEFT'>📝 Description</TD><TD ALIGN='LEFT'>VARCHAR(255)</TD></TR>")
10349
+ fields.append(f"<TR><TD ALIGN='LEFT'>📅 Created_Date</TD><TD ALIGN='LEFT'>DATETIME</TD></TR>")
10350
+ fields.append(f"<TR><TD ALIGN='LEFT'>✅ Is_Active</TD><TD ALIGN='LEFT'>BOOLEAN</TD></TR>")
8134
10351
 
8135
10352
  table_html = f'''<
8136
- <TABLE BORDER="1" CELLBORDER="0" CELLSPACING="0" CELLPADDING="4">
8137
- <TR><TD ALIGN="CENTER"><B>COLUMN</B></TD><TD ALIGN="CENTER"><B>TYPE</B></TD></TR>
10353
+ <TABLE BORDER="1" CELLBORDER="0" CELLSPACING="0" CELLPADDING="3">
10354
+ <TR><TD ALIGN="CENTER" BGCOLOR="{color}"><B>COLUMN</B></TD><TD ALIGN="CENTER" BGCOLOR="{color}"><B>TYPE</B></TD></TR>
8138
10355
  {''.join(fields)}
8139
10356
  </TABLE>
8140
10357
  >'''
8141
10358
 
8142
- c.node('main_table', table_html, shape='none', fontname='Arial')
8143
-
8144
- # Tampilkan graph
8145
- st.graphviz_chart(dot)
8146
-
8147
- except ImportError:
8148
- st.warning("Graphviz tidak terinstall. Menggunakan visualisasi alternatif...")
10359
+ c.node(ref_name, table_html, shape='none', fontname='Arial')
8149
10360
 
8150
- # Visualisasi alternatif dengan Plotly
8151
- st.write("**Diagram Relasi Tabel:**")
8152
-
8153
- # Buat network graph sederhana
8154
- import plotly.graph_objects as go
8155
-
8156
- # Node positions
8157
- node_x = [0.5]
8158
- node_y = [0.5]
8159
- node_text = ["dataset_table"]
8160
- node_colors = ['lightblue']
8161
-
8162
- # Add some related tables (conceptual)
8163
- related_tables = ['metadata_table', 'category_table', 'log_table']
8164
- for i, table in enumerate(related_tables):
8165
- node_x.append(0.2 + i * 0.3)
8166
- node_y.append(0.8)
8167
- node_text.append(table)
8168
- node_colors.append('lightgreen')
8169
-
8170
- fig = go.Figure()
8171
-
8172
- # Add nodes
8173
- fig.add_trace(go.Scatter(
8174
- x=node_x, y=node_y,
8175
- mode='markers+text',
8176
- marker=dict(size=50, color=node_colors),
8177
- text=node_text,
8178
- textposition="middle center",
8179
- name="Tables"
8180
- ))
10361
+ # Tambahkan relasi
10362
+ if show_relationships:
10363
+ for fk_col in ref_info['columns']:
10364
+ dot.edge(ref_name, 'main_table', label='1:N', style='dashed', color='#666666')
10365
+
10366
+ # Tampilkan ERD
10367
+ st.graphviz_chart(dot)
10368
+
10369
+ # Legenda
10370
+ st.markdown("""
10371
+ **📋 Legenda ERD:**
10372
+ - 🔑 Primary Key | 🔗 Foreign Key | 📊 Metric | 💰 Measurement
10373
+ - 📅 Temporal | 🏷️ Category | 📝 Description | 📄 Attribute
10374
+ - **Warna berbeda**: Tabel yang berbeda domain
10375
+ """)
10376
+
10377
+ except ImportError:
10378
+ st.warning("Graphviz tidak terinstall. Menggunakan visualisasi alternatif...")
10379
+
10380
+ # Visualisasi alternatif yang lebih sederhana
10381
+ import plotly.graph_objects as go
10382
+
10383
+ # Hitung posisi node secara dinamis
10384
+ num_tables = min(8, len(reference_tables) + 1)
10385
+ angles = np.linspace(0, 2*np.pi, num_tables, endpoint=False)
10386
+ radius = 0.4
10387
+
10388
+ fig = go.Figure()
10389
+
10390
+ # Node positions
10391
+ node_x = [0.5] # Main table di center
10392
+ node_y = [0.5]
10393
+ node_text = ["MAIN"]
10394
+ node_colors = ['#3366CC']
10395
+
10396
+ # Reference tables di sekeliling
10397
+ for i, (ref_name, ref_info) in enumerate(list(reference_tables.items())[:num_tables-1]):
10398
+ angle = angles[i]
10399
+ x = 0.5 + radius * np.cos(angle)
10400
+ y = 0.5 + radius * np.sin(angle)
8181
10401
 
8182
- # Add edges (relationships)
10402
+ node_x.append(x)
10403
+ node_y.append(y)
10404
+ node_text.append(ref_info['name'][:10])
10405
+ node_colors.append(colors[i % len(colors)])
10406
+
10407
+ # Add nodes
10408
+ fig.add_trace(go.Scatter(
10409
+ x=node_x, y=node_y,
10410
+ mode='markers+text',
10411
+ marker=dict(size=80, color=node_colors),
10412
+ text=node_text,
10413
+ textposition="middle center",
10414
+ textfont=dict(size=12, color='white'),
10415
+ name="Tables"
10416
+ ))
10417
+
10418
+ # Add relationships
10419
+ if show_relationships and len(node_x) > 1:
8183
10420
  for i in range(1, len(node_x)):
8184
10421
  fig.add_trace(go.Scatter(
8185
- x=[node_x[0], node_x[i]],
8186
- y=[node_y[0], node_y[i]],
10422
+ x=[node_x[i], node_x[0]], y=[node_y[i], node_y[0]],
8187
10423
  mode='lines',
8188
- line=dict(width=2, color='gray'),
8189
- hoverinfo='none'
10424
+ line=dict(width=2, color='gray', dash='dash'),
10425
+ hoverinfo='none',
10426
+ showlegend=False
8190
10427
  ))
8191
-
8192
- fig.update_layout(
8193
- title="Database Table Relationships",
8194
- showlegend=False,
8195
- height=400,
8196
- xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
8197
- yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
8198
- margin=dict(l=20, r=20, t=40, b=20)
8199
- )
8200
-
8201
- st.plotly_chart(fig, use_container_width=True)
8202
-
8203
- # --- Bagian Penyambung SQL ---
8204
- st.write("---")
8205
- st.subheader("🧩 Format SQL (Comma Separated)")
8206
-
8207
- numeric_sql = ", ".join(numeric_cols)
8208
- categorical_sql = ", ".join(categorical_cols)
8209
-
8210
- st.code(f"SELECT {numeric_sql}, {categorical_sql} FROM dataset_table;", language="sql")
8211
-
8212
- # Generate CREATE TABLE statement
8213
- st.markdown("### 🗃️ SQL CREATE TABLE Statement")
8214
-
8215
- # Deteksi tipe data untuk SQL
8216
- def infer_sql_type(dtype, sample_data):
8217
- if np.issubdtype(dtype, np.number):
8218
- return "DECIMAL(10,2)"
8219
- elif np.issubdtype(dtype, np.datetime64):
8220
- return "DATETIME"
8221
- else:
8222
- # Cek panjang string maksimum
8223
- max_len = sample_data.astype(str).str.len().max()
8224
- return f"VARCHAR({min(255, max(100, int(max_len * 1.5)))})"
8225
-
8226
- create_table_sql = "CREATE TABLE dataset_table (\n"
8227
- for i, col in enumerate(df.columns):
8228
- sql_type = infer_sql_type(df[col].dtype, df[col])
8229
- if i == 0:
8230
- create_table_sql += f" {col} {sql_type} PRIMARY KEY,\n"
8231
- else:
8232
- create_table_sql += f" {col} {sql_type},\n"
8233
-
8234
- create_table_sql = create_table_sql.rstrip(',\n') + "\n);"
8235
-
8236
- st.code(create_table_sql, language="sql")
8237
-
8238
- # Jika ingin lihat hanya daftar kolom
8239
- col3, col4 = st.columns(2)
8240
- with col3:
8241
- st.write("**Kolom Numerik (SQL String):**")
8242
- st.code(numeric_sql, language="sql")
8243
-
8244
- with col4:
8245
- st.write("**Kolom Kategorikal (SQL String):**")
8246
- st.code(categorical_sql, language="sql")
8247
-
8248
- # Visualisasi korelasi sebagai ERD sederhana
8249
- if len(numeric_cols) > 1:
8250
- st.write("---")
8251
- st.subheader("📊 Matriks Korelasi (Hubungan Numerik)")
8252
- corr_matrix = df[numeric_cols].corr()
8253
-
8254
- # Plot menggunakan Plotly
8255
- fig = px.imshow(
8256
- corr_matrix,
8257
- text_auto=".2f",
8258
- color_continuous_scale='RdBu_r',
8259
- zmin=-1,
8260
- zmax=1,
8261
- aspect="auto",
8262
- labels=dict(color="Korelasi")
8263
- )
8264
- fig.update_layout(
8265
- title="Matriks Korelasi Numerik",
8266
- xaxis_title="Fitur",
8267
- yaxis_title="Fitur",
8268
- autosize=True,
8269
- margin=dict(l=40, r=40, t=60, b=40),
8270
- height=600
8271
- )
8272
- st.plotly_chart(fig, use_container_width=True)
8273
-
8274
- # --- Linear Regression Analysis ---
8275
- st.write("---")
8276
- st.subheader("🧮 Linear Regression Analysis (SQL-Style LRS)")
10428
+
10429
+ fig.update_layout(
10430
+ title="Database Table Relationships",
10431
+ showlegend=False,
10432
+ height=500,
10433
+ xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]),
10434
+ yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]),
10435
+ margin=dict(l=20, r=20, t=60, b=20)
10436
+ )
10437
+
10438
+ st.plotly_chart(fig, use_container_width=True)
8277
10439
 
8278
- if len(numeric_cols) >= 2:
8279
- col1, col2 = st.columns(2)
10440
+ # --- VISUALISASI DATA YANG AMAN ---
10441
+ st.subheader("📈 Visualisasi Data")
10442
+
10443
+ # Warna konsisten untuk chart
10444
+ color_palette = px.colors.qualitative.Set3
10445
+
10446
+ # Fungsi safe plotting
10447
+ def safe_plotting(plot_function, *args, **kwargs):
10448
+ try:
10449
+ return plot_function(*args, **kwargs)
10450
+ except Exception as e:
10451
+ st.error(f"Error dalam membuat chart: {str(e)}")
10452
+ return None
10453
+
10454
+ # Tab untuk organisasi chart yang lebih baik
10455
+ tab111, tab222, tab333 = st.tabs(["📊 Distribusi Numerik", "🏷️ Analisis Kategorikal", "📋 Data Quality"])
10456
+
10457
+ with tab111:
10458
+ st.subheader("Analisis Distribusi Numerik")
10459
+
10460
+ if numeric_cols:
10461
+ col1, col2 = st.columns(2)
10462
+
10463
+ with col1:
10464
+ # Histogram dengan pengelompokan yang baik
10465
+ selected_num_hist = st.selectbox(
10466
+ "Pilih variabel untuk histogram:",
10467
+ numeric_cols,
10468
+ key="hist_num"
10469
+ )
8280
10470
 
8281
- with col1:
8282
- x_axis = st.selectbox("Pilih variabel X (Independent)", numeric_cols, key="lrs_x")
8283
- with col2:
8284
- y_axis = st.selectbox("Pilih variabel Y (Dependent)", numeric_cols, key="lrs_y")
10471
+ if selected_num_hist:
10472
+ fig_hist = safe_plotting(px.histogram,
10473
+ df,
10474
+ x=selected_num_hist,
10475
+ title=f"Distribusi {selected_num_hist}",
10476
+ nbins=30,
10477
+ color_discrete_sequence=['#3366CC'],
10478
+ opacity=0.8
10479
+ )
10480
+ if fig_hist:
10481
+ fig_hist.update_layout(
10482
+ bargap=0.1,
10483
+ xaxis_title=selected_num_hist,
10484
+ yaxis_title="Frekuensi"
10485
+ )
10486
+ st.plotly_chart(fig_hist, use_container_width=True)
10487
+
10488
+ with col2:
10489
+ # Box plot
10490
+ selected_num_box = st.selectbox(
10491
+ "Pilih variabel untuk box plot:",
10492
+ numeric_cols,
10493
+ key="box_num"
10494
+ )
8285
10495
 
8286
- if x_axis != y_axis:
8287
- # Hitung regresi linear
8288
- slope, intercept, r_value, p_value, std_err = stats.linregress(df[x_axis], df[y_axis])
8289
- correlation = df[x_axis].corr(df[y_axis])
8290
- r_squared = r_value**2
8291
-
8292
- # --- Tampilan SQL Query ---
8293
- st.markdown("### 🧩 SQL Query Representation")
8294
- st.code(f"""
8295
- SELECT
8296
- {x_axis} AS X,
8297
- {y_axis} AS Y,
8298
- ROUND(REGR_SLOPE({y_axis}, {x_axis}), 4) AS slope,
8299
- ROUND(REGR_INTERCEPT({y_axis}, {x_axis}), 4) AS intercept,
8300
- ROUND(CORR({y_axis}, {x_axis}), 4) AS correlation,
8301
- ROUND(POWER(CORR({y_axis}, {x_axis}), 2), 4) AS r_squared
8302
- FROM dataset_table;
8303
- """, language="sql")
8304
-
8305
- # --- Plot hubungan ---
8306
- fig = px.scatter(
10496
+ if selected_num_box:
10497
+ fig_box = safe_plotting(px.box,
8307
10498
  df,
8308
- x=x_axis,
8309
- y=y_axis,
8310
- trendline="ols",
8311
- title=f"📊 SQL Visualization: {y_axis} vs {x_axis}",
8312
- labels={x_axis: f"{x_axis}", y_axis: f"{y_axis}"}
8313
- )
8314
- fig.update_layout(
8315
- autosize=True,
8316
- margin=dict(l=40, r=40, t=60, b=40),
8317
- height=500,
8318
- title_x=0.5
10499
+ y=selected_num_box,
10500
+ title=f"Box Plot {selected_num_box}",
10501
+ color_discrete_sequence=['#FF6B6B']
8319
10502
  )
8320
- st.plotly_chart(fig, use_container_width=True)
8321
-
8322
- # --- Relationship Mapping ---
8323
- st.markdown("### 🔗 Relationship Mapping")
8324
-
8325
- # Buat diagram hubungan sederhana
8326
- rel_fig = go.Figure()
8327
-
8328
- # Add nodes
8329
- rel_fig.add_trace(go.Scatter(
8330
- x=[0.2, 0.8], y=[0.5, 0.5],
8331
- mode='markers+text',
8332
- marker=dict(size=80, color=['lightblue', 'lightgreen']),
8333
- text=[x_axis, y_axis],
8334
- textposition="middle center",
8335
- textfont=dict(size=14)
8336
- ))
8337
-
8338
- # Add relationship line dengan annotation korelasi
8339
- rel_fig.add_trace(go.Scatter(
8340
- x=[0.3, 0.7], y=[0.5, 0.5],
8341
- mode='lines+text',
8342
- line=dict(width=4, color='red'),
8343
- text=[f"r = {correlation:.3f}"],
8344
- textposition="middle center",
8345
- textfont=dict(size=12, color='red')
8346
- ))
8347
-
8348
- rel_fig.update_layout(
8349
- title=f"Relationship Diagram: {x_axis} → {y_axis}",
8350
- showlegend=False,
8351
- height=300,
8352
- xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]),
8353
- yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]),
8354
- margin=dict(l=20, r=20, t=60, b=20)
10503
+ if fig_box:
10504
+ st.plotly_chart(fig_box, use_container_width=True)
10505
+
10506
+ # Matriks korelasi
10507
+ if len(numeric_cols) >= 2:
10508
+ st.write("**Matriks Korelasi:**")
10509
+ try:
10510
+ corr_matrix = df[numeric_cols].corr()
10511
+ fig_corr = px.imshow(
10512
+ corr_matrix,
10513
+ text_auto=".2f",
10514
+ color_continuous_scale='RdBu_r',
10515
+ aspect="auto",
10516
+ title="Matriks Korelasi Numerik"
8355
10517
  )
8356
-
8357
- st.plotly_chart(rel_fig, use_container_width=True)
8358
-
8359
- # --- Tabel hasil regresi ---
8360
- st.markdown("### 📋 SQL-Style Result Table")
8361
- result_df = pd.DataFrame({
8362
- "Metric": ["X (Independent)", "Y (Dependent)", "Slope (β1)", "Intercept (β0)",
8363
- "R-Value", "R² (R-squared)", "P-Value", "Std Error", "Correlation"],
8364
- "Value": [x_axis, y_axis, f"{slope:.4f}", f"{intercept:.4f}",
8365
- f"{r_value:.4f}", f"{r_squared:.4f}", f"{p_value:.4f}",
8366
- f"{std_err:.4f}", f"{correlation:.4f}"]
8367
- })
8368
-
8369
- st.dataframe(result_df, use_container_width=True, hide_index=True)
8370
-
8371
- # Analisis statistik lengkap
8372
- st.subheader("📊 Analisis Statistik Lengkap")
8373
-
8374
- # Statistik deskriptif
8375
- st.write("**Statistik Deskriptif:**")
8376
- st.dataframe(df.describe())
8377
-
8378
- # Analisis missing values
8379
- st.write("**Analisis Missing Values:**")
8380
- missing_data = df.isnull().sum()
8381
- if missing_data.sum() > 0:
8382
- fig_missing = px.bar(x=missing_data.index, y=missing_data.values,
8383
- title="Missing Values per Kolom")
8384
- st.plotly_chart(fig_missing)
8385
- else:
8386
- st.success("Tidak ada missing values dalam dataset!")
10518
+ st.plotly_chart(fig_corr, use_container_width=True)
10519
+ except Exception as e:
10520
+ st.warning(f"Tidak dapat menghitung matriks korelasi: {str(e)}")
10521
+
10522
+ with tab222:
10523
+ st.subheader("Analisis Data Kategorikal")
8387
10524
 
8388
- # Data quality report
8389
- st.subheader("📋 Data Quality Report")
10525
+ if categorical_cols:
10526
+ col1, col2 = st.columns(2)
10527
+
10528
+ with col1:
10529
+ # Pie chart yang terorganisir
10530
+ selected_cat_pie = st.selectbox(
10531
+ "Pilih variabel kategorikal:",
10532
+ categorical_cols,
10533
+ key="pie_cat"
10534
+ )
10535
+
10536
+ if selected_cat_pie:
10537
+ try:
10538
+ value_counts = df[selected_cat_pie].value_counts().head(8)
10539
+ fig_pie = safe_plotting(px.pie,
10540
+ values=value_counts.values,
10541
+ names=value_counts.index,
10542
+ title=f"Distribusi {selected_cat_pie} (Top 8)",
10543
+ color_discrete_sequence=color_palette
10544
+ )
10545
+ if fig_pie:
10546
+ st.plotly_chart(fig_pie, use_container_width=True)
10547
+ except Exception as e:
10548
+ st.warning(f"Tidak dapat membuat pie chart: {str(e)}")
10549
+
10550
+ with col2:
10551
+ # Bar chart horizontal
10552
+ if selected_cat_pie:
10553
+ try:
10554
+ value_counts = df[selected_cat_pie].value_counts().head(10)
10555
+ fig_bar = safe_plotting(px.bar,
10556
+ x=value_counts.values,
10557
+ y=value_counts.index,
10558
+ orientation='h',
10559
+ title=f"Top 10 {selected_cat_pie}",
10560
+ color=value_counts.values,
10561
+ color_continuous_scale='Blues'
10562
+ )
10563
+ if fig_bar:
10564
+ fig_bar.update_layout(
10565
+ xaxis_title="Count",
10566
+ yaxis_title=selected_cat_pie,
10567
+ showlegend=False
10568
+ )
10569
+ st.plotly_chart(fig_bar, use_container_width=True)
10570
+ except Exception as e:
10571
+ st.warning(f"Tidak dapat membuat bar chart: {str(e)}")
10572
+
10573
+ with tab333:
10574
+ st.subheader("Data Quality Report")
8390
10575
 
8391
- quality_data = []
10576
+ # Buat laporan kualitas data yang komprehensif
10577
+ quality_report = []
8392
10578
  for col in df.columns:
8393
- quality_data.append({
10579
+ analysis = column_analysis[col]
10580
+ quality_report.append({
8394
10581
  'Kolom': col,
8395
- 'Tipe': df[col].dtype,
8396
- 'Missing': df[col].isnull().sum(),
8397
- 'Missing %': (df[col].isnull().sum() / len(df)) * 100,
8398
- 'Unique': df[col].nunique(),
8399
- 'Contoh Value': df[col].iloc[0] if not df[col].empty else 'N/A'
10582
+ 'Tipe Data': analysis['dtype'],
10583
+ 'Role': analysis['role'],
10584
+ 'Unique Values': analysis['unique_count'],
10585
+ 'Null Values': analysis['null_count'],
10586
+ 'Null %': f"{analysis['null_percentage']:.2f}%",
10587
+ 'Sample': analysis['sample_values'][0] if analysis['sample_values'] else 'N/A'
8400
10588
  })
8401
10589
 
8402
- quality_df = pd.DataFrame(quality_data)
8403
- st.dataframe(quality_df)
10590
+ quality_df = pd.DataFrame(quality_report)
10591
+ st.dataframe(quality_df, use_container_width=True)
8404
10592
 
8405
- # Download hasil analisis
8406
- st.subheader("💾 Download Hasil Analisis")
10593
+ # Visualisasi kualitas data sederhana
10594
+ col1, col2 = st.columns(2)
10595
+
10596
+ with col1:
10597
+ # Missing values bar chart
10598
+ missing_data = quality_df[['Kolom', 'Null Values']].set_index('Kolom')
10599
+ fig_missing = safe_plotting(px.bar,
10600
+ missing_data,
10601
+ y='Null Values',
10602
+ title="Missing Values per Kolom",
10603
+ color='Null Values',
10604
+ color_continuous_scale='Reds'
10605
+ )
10606
+ if fig_missing:
10607
+ st.plotly_chart(fig_missing, use_container_width=True)
8407
10608
 
8408
- # Convert quality report to CSV
8409
- csv = quality_df.to_csv(index=False)
10609
+ with col2:
10610
+ # Data types distribution
10611
+ type_dist = quality_df['Tipe Data'].value_counts()
10612
+ fig_types = safe_plotting(px.pie,
10613
+ values=type_dist.values,
10614
+ names=type_dist.index,
10615
+ title="Distribusi Tipe Data",
10616
+ color_discrete_sequence=color_palette
10617
+ )
10618
+ if fig_types:
10619
+ st.plotly_chart(fig_types, use_container_width=True)
10620
+
10621
+ # --- DOWNLOAD SECTION ---
10622
+ st.subheader("💾 Download Hasil Analisis")
10623
+
10624
+ col1, col2, col3 = st.columns(3)
10625
+
10626
+ with col1:
8410
10627
  st.download_button(
8411
- label="Download Data Quality Report",
8412
- data=csv,
8413
- file_name="data_quality_report.csv",
8414
- mime="text/csv"
10628
+ "📊 Download Quality Report",
10629
+ quality_df.to_csv(index=False),
10630
+ "data_quality_report.csv",
10631
+ "text/csv"
8415
10632
  )
10633
+
10634
+ with col2:
10635
+ # Buat summary report
10636
+ summary_report = {
10637
+ 'file_name': uploaded_file.name,
10638
+ 'file_size': f"{uploaded_file.size / 1024:.2f} KB",
10639
+ 'rows': df.shape[0],
10640
+ 'columns': df.shape[1],
10641
+ 'analysis_date': pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"),
10642
+ 'numeric_columns': numeric_cols,
10643
+ 'categorical_columns': categorical_cols,
10644
+ 'date_columns': date_cols,
10645
+ 'primary_keys': [col for col, analysis in column_analysis.items()
10646
+ if analysis['role'] == 'PRIMARY_KEY'],
10647
+ 'foreign_keys': [col for col, analysis in column_analysis.items()
10648
+ if analysis['role'] == 'FOREIGN_KEY']
10649
+ }
8416
10650
 
8417
- except Exception as e:
8418
- st.error(f"Error membaca file: {str(e)}")
8419
- else:
8420
- st.info("Silakan upload file CSV atau Excel untuk memulai analisis")
8421
-
8422
- # Contoh dataset
8423
- st.subheader("🎯 Contoh Format Data")
8424
- example_data = {
8425
- 'ID': [1, 2, 3, 4, 5],
8426
- 'Nama': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
8427
- 'Usia': [25, 30, 35, 28, 32],
8428
- 'Gaji': [50000, 60000, 70000, 55000, 65000],
8429
- 'Departemen': ['IT', 'HR', 'IT', 'Finance', 'HR']
8430
- }
8431
- example_df = pd.DataFrame(example_data)
8432
- st.dataframe(example_df)
10651
+ import json
10652
+ st.download_button(
10653
+ "📋 Download Summary Report",
10654
+ json.dumps(summary_report, indent=2, ensure_ascii=False),
10655
+ "analysis_summary.json",
10656
+ "application/json"
10657
+ )
8433
10658
 
8434
- # Download template
8435
- csv_template = example_df.to_csv(index=False)
8436
- st.download_button(
8437
- label="Download Template CSV",
8438
- data=csv_template,
8439
- file_name="template_data.csv",
8440
- mime="text/csv"
8441
- )
10659
+ with col3:
10660
+ # Download processed data
10661
+ st.download_button(
10662
+ "💾 Download Processed Data",
10663
+ df.to_csv(index=False),
10664
+ "processed_data.csv",
10665
+ "text/csv"
10666
+ )
10667
+
10668
+ except Exception as e:
10669
+ st.error(f"Error dalam analisis data: {str(e)}")
10670
+ st.info("Pastikan file yang diupload berformat CSV atau Excel yang valid")
10671
+ st.code(f"Error details: {str(e)}", language='python')
10672
+ else:
10673
+ st.info("📤 Silakan upload file CSV atau Excel untuk memulai analisis")
10674
+
10675
+ # Template dan panduan
10676
+ st.subheader("🎯 Panduan Format Data")
10677
+
10678
+ col1, col2 = st.columns(2)
10679
+
10680
+ with col1:
10681
+ st.write("**Format yang Disarankan:**")
10682
+ sample_data = {
10683
+ 'customer_id': [1, 2, 3, 4, 5],
10684
+ 'order_id': [101, 102, 103, 104, 105],
10685
+ 'product_id': [201, 202, 203, 204, 205],
10686
+ 'order_date': pd.date_range('2024-01-01', periods=5),
10687
+ 'amount': [100.50, 75.25, 200.00, 150.75, 90.99],
10688
+ 'category': ['Electronics', 'Books', 'Electronics', 'Clothing', 'Books'],
10689
+ 'status': ['Completed', 'Pending', 'Completed', 'Shipped', 'Pending']
10690
+ }
10691
+ sample_df = pd.DataFrame(sample_data)
10692
+ st.dataframe(sample_df)
10693
+
10694
+ with col2:
10695
+ st.write("**Keterangan Fitur:**")
10696
+ st.markdown("""
10697
+ - **🔑 Primary Key**: Kolom dengan nilai unik (ID, code)
10698
+ - **🔗 Foreign Key**: Kolom referensi ke tabel lain
10699
+ - **📊 ERD Dinamis**: Diagram relasi otomatis
10700
+ - **📈 Visualisasi Aman**: Error handling untuk semua chart
10701
+ - **🎨 Warna Konsisten**: Skema warna yang harmonis
10702
+ - **📋 Analisis Komprehensif**: Statistik detail dan laporan
10703
+ """)
10704
+
10705
+ # Download template
10706
+ csv_template = sample_df.to_csv(index=False)
10707
+ st.download_button(
10708
+ "📥 Download Template CSV",
10709
+ csv_template,
10710
+ "analysis_template.csv",
10711
+ "text/csv"
10712
+ )
8442
10713
 
8443
10714
 
8444
10715
  with tab8:
@@ -8450,12 +10721,29 @@ if df is not None:
8450
10721
  type=['csv', 'xlsx', 'xls'],
8451
10722
  key="stock_uploader"
8452
10723
  )
8453
- with st.expander("📜 Ketarangan Lengkap Tentang Aalisis Saham", expanded=False):
10724
+ with st.expander("📜 Ketarangan Lengkap Tentang Analisis Saham", expanded=False):
10725
+ st.markdown(
10726
+ """
10727
+ <img src="https://s3-ap-southeast-1.amazonaws.com/membership-media/public/uploads/posts/1653502344_Memahami_Apa_Itu_Saham_Dan_Cara_Kerjanya_1170x658.jpg" class="responsive-img">
10728
+ """,
10729
+ unsafe_allow_html=True
10730
+ )
8454
10731
  st.markdown("""
8455
- **Penjelasan Penting 📛**
10732
+
8456
10733
 
8457
10734
  ### 🧾 Pengambangan Saham
8458
- - Saham Ini Masih Tahap Pengembangan Dan Masih Tahap Uji Coba Dan kalian bisa menggunakan model yang ada
10735
+ - Saham merupakan salah satu instrumen investasi yang populer di kalangan investor. Dengan membeli saham, investor memiliki sebagian kepemilikan dalam sebuah perusahaan dan berhak atas sebagian keuntungan perusahaan tersebut.
10736
+ - Analisis saham melibatkan evaluasi berbagai faktor seperti kinerja keuangan perusahaan, kondisi pasar, tren industri, dan faktor ekonomi makro untuk membuat keputusan investasi yang lebih baik.
10737
+ - Analisis saham dapat dilakukan dengan menggunakan teknologi yang terkenal seperti Excel, Google Sheets, atau Microsoft Excel.
10738
+
10739
+ ### 📈 Analisis Grafik Saham
10740
+ - Analisis grafik saham adalah proses menganalisis data saham untuk membuat grafik yang menampilkan informasi tentang saham secara visual.
10741
+ - Grafik saham dapat digunakan untuk membuat perbandingan antara saham yang berbeda, menampilkan trend, dan menentukan kemungkinan investasi yang lebih baik.
10742
+ - Grafik saham dapat digunakan untuk menentukan kemungkinan investasi yang lebih baik dan meningkatkan keuntungan investasi.
10743
+
10744
+ ### 💰 Analisis Grafik Saham
10745
+ - Analisis grafik saham dapat digunakan untuk membuat perbandingan antara saham yang berbeda, menampilkan trend, dan menentukan kemungkinan investasi yang lebih baik.
10746
+ - Grafik saham dapat digunakan untuk menentukan kemungkinan investasi yang lebih baik dan meningkatkan keuntungan investasi.
8459
10747
  """)
8460
10748
  if uploaded_file is not None:
8461
10749
  try:
@@ -9569,7 +11857,7 @@ if df is not None:
9569
11857
  # Sidebar untuk memilih jenis kalkulator
9570
11858
  calc_type = st.sidebar.selectbox(
9571
11859
  "Pilih Jenis Kalkulator",
9572
- ["Kalkulator Dasar", "Kalkulator Ilmiah", "Kalkulator Keuangan", "Konverter Satuan", "Kalkulator BMI", "Kalkulator Waktu"]
11860
+ ["🔢 Kalkulator Dasar", "🔬 Kalkulator Ilmiah", "💰 Kalkulator Keuangan", "📐 Konverter Satuan", "⚖️ Kalkulator BMI", "Kalkulator Waktu"]
9573
11861
  )
9574
11862
 
9575
11863
  # Initialize session state for history
@@ -9582,7 +11870,7 @@ if df is not None:
9582
11870
  if len(st.session_state.calc_history) > 10: # Batasi hanya 10 riwayat terakhir
9583
11871
  st.session_state.calc_history.pop(0)
9584
11872
 
9585
- if calc_type == "Kalkulator Dasar":
11873
+ if calc_type == "🔢 Kalkulator Dasar":
9586
11874
  st.subheader("🔢 Kalkulator Dasar")
9587
11875
 
9588
11876
  # Layout dengan columns untuk tampilan kalkulator
@@ -9659,7 +11947,7 @@ if df is not None:
9659
11947
  if st.button("🗑️ Reset", use_container_width=True):
9660
11948
  st.rerun()
9661
11949
 
9662
- elif calc_type == "Kalkulator Ilmiah":
11950
+ elif calc_type == "🔬 Kalkulator Ilmiah":
9663
11951
  st.subheader("🔬 Kalkulator Ilmiah")
9664
11952
 
9665
11953
  col1, col2 = st.columns(2)
@@ -9778,7 +12066,7 @@ if df is not None:
9778
12066
  except Exception as e:
9779
12067
  st.error(f"❌ Error: {str(e)}")
9780
12068
 
9781
- elif calc_type == "Kalkulator Keuangan":
12069
+ elif calc_type == "💰 Kalkulator Keuangan":
9782
12070
  st.subheader("💰 Kalkulator Keuangan")
9783
12071
 
9784
12072
  finance_option = st.selectbox(
@@ -9860,7 +12148,7 @@ if df is not None:
9860
12148
  """)
9861
12149
  add_to_history(f"Cicilan: Rp {loan_amount:,.0f} → Rp {monthly_payment:,.0f}/bulan")
9862
12150
 
9863
- elif calc_type == "Konverter Satuan":
12151
+ elif calc_type == "📐 Konverter Satuan":
9864
12152
  st.subheader("📐 Konverter Satuan")
9865
12153
 
9866
12154
  conversion_type = st.selectbox(
@@ -9945,8 +12233,8 @@ if df is not None:
9945
12233
  st.success(f"**Hasil:** {calc_str}")
9946
12234
  add_to_history(calc_str)
9947
12235
 
9948
- elif calc_type == "Kalkulator BMI":
9949
- st.subheader("💪 Kalkulator BMI (Body Mass Index)")
12236
+ elif calc_type == "⚖️ Kalkulator BMI":
12237
+ st.subheader("⚖️ Kalkulator BMI (Body Mass Index)")
9950
12238
 
9951
12239
  col1, col2 = st.columns(2)
9952
12240
 
@@ -9983,7 +12271,7 @@ if df is not None:
9983
12271
  """)
9984
12272
  add_to_history(f"BMI: {bmi:.1f} ({category})")
9985
12273
 
9986
- elif calc_type == "Kalkulator Waktu":
12274
+ elif calc_type == "Kalkulator Waktu":
9987
12275
  st.subheader("⏰ Kalkulator Waktu")
9988
12276
 
9989
12277
  time_option = st.selectbox("Pilih jenis perhitungan", [
@@ -10119,7 +12407,7 @@ if df is not None:
10119
12407
  st.error("**🧹 Pembersihan Data**\n\nAuto-clean missing values")
10120
12408
 
10121
12409
  # Video Tutorial (placeholder)
10122
- st.markdown("### 🎥 Video Tutorial Penggunaan V2.2.5")
12410
+ st.markdown("### 🎥 Video Tutorial Penggunaan V2.3.8")
10123
12411
  import streamlit.components.v1 as components
10124
12412
  google_drive_id = "1obx6q2jQS1fRrNi1E4VpAPlyI_rR9nO5"
10125
12413
 
@@ -10488,7 +12776,8 @@ if df is not None:
10488
12776
  with col3:
10489
12777
  st.markdown("""
10490
12778
  ### 🔄 Update
10491
- - Versi terbaru: 2.2.5
12779
+ - Versi terbaru: 2.3.8
12780
+ - Rilis: Oktober 2025
10492
12781
  - Last updated: 2025
10493
12782
  - Compatibility: Python 3.8+
10494
12783
  """)