streamlit-launcher 2.2.5__py3-none-any.whl → 2.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,6 +34,39 @@ from sklearn.preprocessing import StandardScaler, LabelEncoder
34
34
  from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report, confusion_matrix
35
35
  from sklearn.inspection import permutation_importance
36
36
  from scipy.stats import gaussian_kde
37
+ import streamlit.components.v1 as components
38
+ import tensorflow as tf
39
+ import numpy as np
40
+ import pandas as pd
41
+ from sklearn.model_selection import train_test_split
42
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
43
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
44
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
45
+ from sklearn.metrics import confusion_matrix, classification_report
46
+ import plotly.express as px
47
+ import plotly.graph_objects as go
48
+ from plotly.subplots import make_subplots
49
+ import xgboost as xgb
50
+ from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
51
+ from sklearn.feature_selection import mutual_info_regression, mutual_info_classif
52
+ import plotly.graph_objects as go
53
+ from plotly.subplots import make_subplots
54
+ import time
55
+ import warnings
56
+ warnings.filterwarnings('ignore')
57
+ from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
58
+ from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
59
+ from xgboost import XGBRegressor, XGBClassifier
60
+ from sklearn.ensemble import VotingRegressor, VotingClassifier
61
+ from sklearn.ensemble import StackingRegressor, StackingClassifier
62
+ from sklearn.model_selection import cross_validate, GridSearchCV
63
+ from sklearn.metrics import get_scorer
64
+ from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
65
+ from sklearn.compose import ColumnTransformer
66
+ from sklearn.impute import SimpleImputer
67
+ from sklearn.pipeline import Pipeline
68
+ import keras
69
+
37
70
 
38
71
  # Konfigurasi untuk performa
39
72
  plt.style.use('default')
@@ -6434,8 +6467,13 @@ def create_ml_dl_analysis_dashboard(df, numeric_cols, non_numeric_cols):
6434
6467
  """
6435
6468
  Dashboard komprehensif untuk analisis Machine Learning dan Deep Learning
6436
6469
  """
6437
-
6438
- st.title("🤖 Advanced ML/DL Analysis Dashboard")
6470
+ st.markdown("""
6471
+ <div style='text-align: center; padding: 10px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
6472
+ border-radius: 10px; margin: 10px 0;'>
6473
+ <h3 style='color: white; margin: 0;'>🧠 dwibaktindev AI</h3>
6474
+ <p style='color: white; margin: 0;'>Sasha • Alisa • dwibaktindev Models</p>
6475
+ </div>
6476
+ """, unsafe_allow_html=True)
6439
6477
 
6440
6478
  # Deteksi tipe data
6441
6479
  data_size = len(df)
@@ -6873,10 +6911,25 @@ def create_outlier_analysis(df, numeric_cols):
6873
6911
  st.plotly_chart(fig, use_container_width=True)
6874
6912
 
6875
6913
  def machine_learning_analysis(df, numeric_cols, non_numeric_cols):
6876
- """Analisis Machine Learning"""
6914
+ """Analisis Machine Learning dengan Optimasi untuk Dataset Besar"""
6877
6915
 
6878
6916
  st.header("🤖 Machine Learning Analysis")
6879
6917
 
6918
+ # Informasi dataset
6919
+ st.subheader("📊 Dataset Info")
6920
+ col1, col2, col3 = st.columns(3)
6921
+ with col1:
6922
+ st.metric("Total Rows", f"{len(df):,}")
6923
+ with col2:
6924
+ st.metric("Total Columns", f"{len(df.columns):,}")
6925
+ with col3:
6926
+ st.metric("Memory Usage", f"{df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
6927
+
6928
+ # Optimasi memory usage
6929
+ if st.checkbox("Optimize Memory Usage", value=True):
6930
+ df = optimize_memory_usage(df)
6931
+ st.success("Memory usage optimized!")
6932
+
6880
6933
  # Preprocessing
6881
6934
  st.subheader("🔧 Data Preprocessing")
6882
6935
 
@@ -6892,162 +6945,407 @@ def machine_learning_analysis(df, numeric_cols, non_numeric_cols):
6892
6945
 
6893
6946
  problem_type = st.selectbox(
6894
6947
  "Jenis Problem",
6895
- ["Regression", "Classification"],
6948
+ ["Regression", "Classification", "Auto Detect"],
6896
6949
  key="ml_problem_type"
6897
6950
  )
6951
+
6952
+ # Auto detect problem type
6953
+ if problem_type == "Auto Detect":
6954
+ if target_variable in numeric_cols:
6955
+ problem_type = "Regression"
6956
+ else:
6957
+ problem_type = "Classification"
6958
+ st.info(f"Auto-detected: {problem_type}")
6898
6959
 
6899
6960
  with col2:
6900
6961
  test_size = st.slider("Test Size Ratio", 0.1, 0.5, 0.2, 0.05, key="ml_test_size")
6901
6962
  random_state = st.number_input("Random State", value=42, key="ml_random_state")
6963
+
6964
+ # Sampling untuk dataset besar
6965
+ sample_size = st.slider("Sample Size (untuk dataset besar)",
6966
+ min_value=1000,
6967
+ max_value=min(50000, len(df)),
6968
+ value=min(10000, len(df)),
6969
+ step=1000,
6970
+ key="ml_sample_size")
6902
6971
 
6903
- # Feature selection
6972
+ # Feature selection dengan advanced options
6904
6973
  st.subheader("🎯 Feature Selection")
6974
+
6905
6975
  available_features = [f for f in numeric_cols + non_numeric_cols if f != target_variable]
6906
- selected_features = st.multiselect(
6907
- "Pilih Features untuk Model",
6908
- available_features,
6909
- default=available_features[:min(10, len(available_features))],
6910
- key="ml_features_select"
6911
- )
6912
6976
 
6913
- if target_variable and selected_features:
6914
- try:
6915
- # Prepare data
6916
- X = df[selected_features].copy()
6917
- y = df[target_variable]
6918
-
6919
- # Encode categorical features dan target untuk classification
6920
- le_dict = {}
6921
- for col in selected_features:
6922
- if col in non_numeric_cols:
6923
- le = LabelEncoder()
6924
- X[col] = le.fit_transform(X[col].astype(str))
6925
- le_dict[col] = le
6926
-
6927
- # Encode target untuk classification
6928
- if problem_type == "Classification" and y.dtype == 'object':
6929
- le_target = LabelEncoder()
6930
- y = le_target.fit_transform(y.astype(str))
6931
-
6932
- # Handle missing values
6933
- X = X.fillna(X.mean(numeric_only=True))
6934
-
6935
- # Split data
6936
- X_train, X_test, y_train, y_test = train_test_split(
6937
- X, y, test_size=test_size, random_state=random_state, stratify=y if problem_type == "Classification" else None
6977
+ col1, col2 = st.columns([2, 1])
6978
+
6979
+ with col1:
6980
+ feature_selection_method = st.radio(
6981
+ "Feature Selection Method",
6982
+ ["Manual Selection", "Auto Select Top Features"],
6983
+ key="feature_selection_method"
6984
+ )
6985
+
6986
+ if feature_selection_method == "Manual Selection":
6987
+ selected_features = st.multiselect(
6988
+ "Pilih Features untuk Model",
6989
+ available_features,
6990
+ default=available_features[:min(10, len(available_features))],
6991
+ key="ml_features_select"
6938
6992
  )
6993
+ else:
6994
+ top_k = st.slider("Number of Top Features", 5, 50, 15, key="top_k_features")
6995
+ selected_features = available_features[:top_k]
6996
+ st.info(f"Auto-selected top {top_k} features")
6997
+
6998
+ with col2:
6999
+ # Advanced options
7000
+ st.write("**Advanced Options:**")
7001
+ use_feature_engineering = st.checkbox("Feature Engineering", value=False)
7002
+ remove_high_correlation = st.checkbox("Remove High Correlation", value=True)
7003
+ correlation_threshold = st.slider("Correlation Threshold", 0.7, 0.99, 0.9, 0.01)
7004
+
7005
+ if not target_variable or not selected_features:
7006
+ st.warning("Pilih target variable dan features terlebih dahulu")
7007
+ return
7008
+
7009
+ try:
7010
+ # Sampling untuk dataset besar
7011
+ if len(df) > sample_size:
7012
+ st.info(f"Using sample of {sample_size} records for faster processing")
7013
+ df_sampled = df.sample(n=sample_size, random_state=random_state)
7014
+ else:
7015
+ df_sampled = df
7016
+
7017
+ # Progress tracking
7018
+ progress_bar = st.progress(0)
7019
+ status_text = st.empty()
7020
+
7021
+ # Prepare data
7022
+ status_text.text("Preparing data...")
7023
+ X = df_sampled[selected_features].copy()
7024
+ y = df_sampled[target_variable]
7025
+ progress_bar.progress(20)
7026
+
7027
+ # Handle large dataset - incremental processing
7028
+ chunk_size = min(1000, len(X))
7029
+
7030
+ # Encode categorical features
7031
+ status_text.text("Encoding categorical features...")
7032
+ le_dict = {}
7033
+ categorical_columns = [col for col in selected_features if col in non_numeric_cols]
7034
+
7035
+ for col in categorical_columns:
7036
+ # Untuk dataset besar, gunakan categorical encoding yang lebih efisien
7037
+ if X[col].nunique() > 100: # Jika terlalu banyak kategori, gunakan frequency encoding
7038
+ freq_encoding = X[col].value_counts().to_dict()
7039
+ X[col] = X[col].map(freq_encoding)
7040
+ X[col].fillna(0, inplace=True)
7041
+ else:
7042
+ le = LabelEncoder()
7043
+ X[col] = le.fit_transform(X[col].astype(str))
7044
+ le_dict[col] = le
7045
+ progress_bar.progress(40)
7046
+
7047
+ # Encode target variable
7048
+ status_text.text("Encoding target variable...")
7049
+ le_target = None
7050
+ if problem_type == "Classification" and y.dtype == 'object':
7051
+ le_target = LabelEncoder()
7052
+ y = le_target.fit_transform(y.astype(str))
7053
+
7054
+ # Remove high correlation features
7055
+ if remove_high_correlation and len(selected_features) > 1:
7056
+ status_text.text("Removing highly correlated features...")
7057
+ X = remove_correlated_features(X, correlation_threshold)
7058
+
7059
+ progress_bar.progress(60)
7060
+
7061
+ # Handle missing values dengan metode yang lebih robust
7062
+ status_text.text("Handling missing values...")
7063
+ for col in X.columns:
7064
+ if X[col].isnull().sum() > 0:
7065
+ if X[col].dtype in ['int64', 'float64']:
7066
+ X[col].fillna(X[col].median(), inplace=True)
7067
+ else:
7068
+ X[col].fillna(X[col].mode()[0] if len(X[col].mode()) > 0 else 0, inplace=True)
7069
+
7070
+ progress_bar.progress(80)
7071
+
7072
+ # Split data
7073
+ status_text.text("Splitting data...")
7074
+ X_train, X_test, y_train, y_test = train_test_split(
7075
+ X, y,
7076
+ test_size=test_size,
7077
+ random_state=random_state,
7078
+ stratify=y if problem_type == "Classification" else None
7079
+ )
7080
+
7081
+ # Scale features - gunakan StandardScaler yang lebih efisien
7082
+ scaler = StandardScaler()
7083
+ X_train_scaled = scaler.fit_transform(X_train)
7084
+ X_test_scaled = scaler.transform(X_test)
7085
+ progress_bar.progress(100)
7086
+
7087
+ # Model selection dengan progress tracking
7088
+ st.subheader("🚀 Model Training & Evaluation")
7089
+
7090
+ # Pilihan model berdasarkan problem type dan dataset size
7091
+ if problem_type == "Regression":
7092
+ models = {
7093
+ "Linear Regression": LinearRegression(),
7094
+ "Ridge Regression": Ridge(random_state=random_state),
7095
+ "Random Forest": RandomForestRegressor(
7096
+ n_estimators=50, # Kurangi untuk dataset besar
7097
+ random_state=random_state,
7098
+ n_jobs=-1 # Gunakan semua core CPU
7099
+ ),
7100
+ "Gradient Boosting": GradientBoostingRegressor(
7101
+ n_estimators=50,
7102
+ random_state=random_state
7103
+ )
7104
+ }
7105
+ elif problem_type == "Classification":
7106
+ models = {
7107
+ "Logistic Regression": LogisticRegression(
7108
+ random_state=random_state,
7109
+ n_jobs=-1,
7110
+ max_iter=1000
7111
+ ),
7112
+ "Random Forest": RandomForestClassifier(
7113
+ n_estimators=50,
7114
+ random_state=random_state,
7115
+ n_jobs=-1
7116
+ ),
7117
+ "Gradient Boosting": GradientBoostingClassifier(
7118
+ n_estimators=50,
7119
+ random_state=random_state
7120
+ ),
7121
+ "XGBoost": xgb.XGBClassifier(
7122
+ n_estimators=50,
7123
+ random_state=random_state,
7124
+ n_jobs=-1,
7125
+ verbosity=0
7126
+ ) if 'xgb' in globals() else None
7127
+ }
7128
+ # Remove None models
7129
+ models = {k: v for k, v in models.items() if v is not None}
7130
+
7131
+ # Train and evaluate models dengan progress bar
7132
+ results = {}
7133
+ model_progress = st.progress(0)
7134
+ total_models = len(models)
7135
+
7136
+ for i, (name, model) in enumerate(models.items()):
7137
+ status_text.text(f"Training {name}...")
6939
7138
 
6940
- # Scale features
6941
- scaler = StandardScaler()
6942
- X_train_scaled = scaler.fit_transform(X_train)
6943
- X_test_scaled = scaler.transform(X_test)
6944
-
6945
- # Model selection berdasarkan problem type
6946
- st.subheader("🚀 Model Training & Evaluation")
6947
-
6948
- if problem_type == "Regression":
6949
- models = {
6950
- "Linear Regression": LinearRegression(),
6951
- "Ridge Regression": Ridge(random_state=random_state),
6952
- "Random Forest": RandomForestRegressor(n_estimators=100, random_state=random_state)
6953
- }
6954
-
6955
- elif problem_type == "Classification":
6956
- models = {
6957
- "Logistic Regression": LogisticRegression(random_state=random_state),
6958
- "Random Forest": RandomForestClassifier(n_estimators=100, random_state=random_state),
6959
- "SVM": SVC(random_state=random_state)
6960
- }
6961
-
6962
- # Train and evaluate models
6963
- results = {}
6964
-
6965
- for name, model in models.items():
6966
- with st.spinner(f"Training {name}..."):
6967
- try:
6968
- # Train model
6969
- model.fit(X_train_scaled, y_train)
6970
- y_pred = model.predict(X_test_scaled)
6971
-
6972
- # Calculate metrics
6973
- if problem_type == "Regression":
6974
- mse = mean_squared_error(y_test, y_pred)
6975
- r2 = r2_score(y_test, y_pred)
6976
-
6977
- results[name] = {
6978
- 'MSE': mse,
6979
- 'R2 Score': r2,
6980
- 'predictions': y_pred,
6981
- 'model': model
6982
- }
6983
-
6984
- elif problem_type == "Classification":
6985
- accuracy = accuracy_score(y_test, y_pred)
6986
-
6987
- results[name] = {
6988
- 'Accuracy': accuracy,
6989
- 'predictions': y_pred,
6990
- 'model': model
6991
- }
6992
- except Exception as model_error:
6993
- st.warning(f"Error training {name}: {str(model_error)}")
6994
-
6995
- # Display results
6996
- if results:
6997
- st.subheader("📊 Model Performance Comparison")
7139
+ try:
7140
+ # Train model
7141
+ model.fit(X_train_scaled, y_train)
7142
+ y_pred = model.predict(X_test_scaled)
6998
7143
 
7144
+ # Calculate metrics
6999
7145
  if problem_type == "Regression":
7000
- results_df = pd.DataFrame({
7001
- 'Model': list(results.keys()),
7002
- 'MSE': [results[name]['MSE'] for name in results.keys()],
7003
- 'R2 Score': [results[name]['R2 Score'] for name in results.keys()]
7004
- })
7005
- else:
7006
- results_df = pd.DataFrame({
7007
- 'Model': list(results.keys()),
7008
- 'Accuracy': [results[name]['Accuracy'] for name in results.keys()]
7009
- })
7146
+ mse = mean_squared_error(y_test, y_pred)
7147
+ rmse = np.sqrt(mse)
7148
+ mae = mean_absolute_error(y_test, y_pred)
7149
+ r2 = r2_score(y_test, y_pred)
7150
+
7151
+ results[name] = {
7152
+ 'MSE': mse,
7153
+ 'RMSE': rmse,
7154
+ 'MAE': mae,
7155
+ 'R2 Score': r2,
7156
+ 'predictions': y_pred,
7157
+ 'model': model
7158
+ }
7010
7159
 
7011
- st.dataframe(results_df.sort_values(
7012
- 'R2 Score' if problem_type == "Regression" else 'Accuracy',
7013
- ascending=False
7014
- ), use_container_width=True)
7015
-
7016
- # Feature importance untuk tree-based models
7017
- st.subheader("🔍 Feature Importance")
7018
- for name, result in results.items():
7019
- model = result['model']
7020
- if hasattr(model, 'feature_importances_'):
7021
- feature_importance = pd.DataFrame({
7022
- 'feature': selected_features,
7023
- 'importance': model.feature_importances_
7024
- }).sort_values('importance', ascending=False)
7025
-
7026
- fig = px.bar(
7027
- feature_importance.head(10),
7028
- x='importance',
7029
- y='feature',
7030
- title=f"Feature Importance - {name}",
7031
- orientation='h'
7032
- )
7033
- st.plotly_chart(fig, use_container_width=True)
7034
- else:
7035
- st.warning("Tidak ada model yang berhasil di-training")
7160
+ elif problem_type == "Classification":
7161
+ accuracy = accuracy_score(y_test, y_pred)
7162
+ precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
7163
+ recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
7164
+ f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
7165
+
7166
+ results[name] = {
7167
+ 'Accuracy': accuracy,
7168
+ 'Precision': precision,
7169
+ 'Recall': recall,
7170
+ 'F1-Score': f1,
7171
+ 'predictions': y_pred,
7172
+ 'model': model
7173
+ }
7174
+
7175
+ st.success(f"✅ {name} trained successfully")
7176
+
7177
+ except Exception as model_error:
7178
+ st.warning(f"⚠️ Error training {name}: {str(model_error)}")
7179
+
7180
+ model_progress.progress((i + 1) / total_models)
7181
+
7182
+ status_text.text("Completed!")
7036
7183
 
7037
- except Exception as e:
7038
- st.error(f"Error dalam ML analysis: {str(e)}")
7184
+ # Display results
7185
+ if results:
7186
+ display_ml_results(results, problem_type, X_test, y_test, selected_features, le_target)
7187
+ else:
7188
+ st.error("❌ Tidak ada model yang berhasil di-training")
7189
+
7190
+ except Exception as e:
7191
+ st.error(f"❌ Error dalam ML analysis: {str(e)}")
7192
+ st.info("💡 Tips: Coba kurangi jumlah features atau gunakan sample size yang lebih kecil")
7193
+
7194
+ def optimize_memory_usage(df):
7195
+ """Optimize memory usage of dataframe"""
7196
+ for col in df.columns:
7197
+ if df[col].dtype == 'object':
7198
+ df[col] = df[col].astype('category')
7199
+ elif df[col].dtype in ['int64', 'int32']:
7200
+ c_min = df[col].min()
7201
+ c_max = df[col].max()
7202
+ if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
7203
+ df[col] = df[col].astype(np.int8)
7204
+ elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
7205
+ df[col] = df[col].astype(np.int16)
7206
+ elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
7207
+ df[col] = df[col].astype(np.int32)
7208
+ elif df[col].dtype in ['float64', 'float32']:
7209
+ c_min = df[col].min()
7210
+ c_max = df[col].max()
7211
+ if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
7212
+ df[col] = df[col].astype(np.float32)
7213
+ return df
7214
+
7215
+ def remove_correlated_features(X, threshold=0.9):
7216
+ """Remove highly correlated features"""
7217
+ corr_matrix = X.corr().abs()
7218
+ upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
7219
+ to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
7220
+ return X.drop(columns=to_drop)
7221
+
7222
+ def display_ml_results(results, problem_type, X_test, y_test, selected_features, le_target):
7223
+ """Display ML results with comprehensive visualizations"""
7224
+
7225
+ st.subheader("📊 Model Performance Comparison")
7226
+
7227
+ # Create results dataframe
7228
+ if problem_type == "Regression":
7229
+ metrics_df = pd.DataFrame({
7230
+ 'Model': list(results.keys()),
7231
+ 'MSE': [results[name]['MSE'] for name in results.keys()],
7232
+ 'RMSE': [results[name]['RMSE'] for name in results.keys()],
7233
+ 'MAE': [results[name]['MAE'] for name in results.keys()],
7234
+ 'R2 Score': [results[name]['R2 Score'] for name in results.keys()]
7235
+ })
7236
+ sort_metric = 'R2 Score'
7237
+ else:
7238
+ metrics_df = pd.DataFrame({
7239
+ 'Model': list(results.keys()),
7240
+ 'Accuracy': [results[name]['Accuracy'] for name in results.keys()],
7241
+ 'Precision': [results[name]['Precision'] for name in results.keys()],
7242
+ 'Recall': [results[name]['Recall'] for name in results.keys()],
7243
+ 'F1-Score': [results[name]['F1-Score'] for name in results.keys()]
7244
+ })
7245
+ sort_metric = 'Accuracy'
7246
+
7247
+ # Display metrics table
7248
+ st.dataframe(metrics_df.sort_values(sort_metric, ascending=False), use_container_width=True)
7249
+
7250
+ # Visualization
7251
+ col1, col2 = st.columns(2)
7252
+
7253
+ with col1:
7254
+ # Performance comparison chart
7255
+ if problem_type == "Regression":
7256
+ fig = px.bar(metrics_df, x='Model', y='R2 Score', title="R2 Score Comparison")
7257
+ else:
7258
+ fig = px.bar(metrics_df, x='Model', y='Accuracy', title="Accuracy Comparison")
7259
+ st.plotly_chart(fig, use_container_width=True)
7260
+
7261
+ with col2:
7262
+ # Actual vs Predicted untuk model terbaik
7263
+ best_model_name = metrics_df.loc[metrics_df[sort_metric].idxmax(), 'Model']
7264
+ best_result = results[best_model_name]
7265
+
7266
+ if problem_type == "Regression":
7267
+ fig = px.scatter(
7268
+ x=y_test,
7269
+ y=best_result['predictions'],
7270
+ labels={'x': 'Actual', 'y': 'Predicted'},
7271
+ title=f"Actual vs Predicted - {best_model_name}"
7272
+ )
7273
+ fig.add_trace(px.line(x=[y_test.min(), y_test.max()], y=[y_test.min(), y_test.max()]).data[0])
7274
+ else:
7275
+ # Confusion matrix
7276
+ cm = confusion_matrix(y_test, best_result['predictions'])
7277
+ fig = px.imshow(
7278
+ cm,
7279
+ labels=dict(x="Predicted", y="Actual", color="Count"),
7280
+ title=f"Confusion Matrix - {best_model_name}"
7281
+ )
7282
+ st.plotly_chart(fig, use_container_width=True)
7283
+
7284
+ # Feature importance
7285
+ st.subheader("🔍 Feature Importance")
7286
+ for name, result in results.items():
7287
+ model = result['model']
7288
+ if hasattr(model, 'feature_importances_'):
7289
+ feature_importance = pd.DataFrame({
7290
+ 'feature': selected_features[:len(model.feature_importances_)],
7291
+ 'importance': model.feature_importances_
7292
+ }).sort_values('importance', ascending=False)
7293
+
7294
+ fig = px.bar(
7295
+ feature_importance.head(10),
7296
+ x='importance',
7297
+ y='feature',
7298
+ title=f"Top 10 Feature Importance - {name}",
7299
+ orientation='h'
7300
+ )
7301
+ st.plotly_chart(fig, use_container_width=True)
7039
7302
 
7040
7303
  def deep_learning_analysis(df, numeric_cols, non_numeric_cols):
7041
- """Analisis Deep Learning"""
7304
+ """Analisis Deep Learning Lengkap - Optimized for Large Datasets"""
7042
7305
 
7043
- st.header("🧠 Deep Learning Analysis")
7306
+ st.header("🧠 Deep Learning Analysis - High Performance")
7044
7307
 
7045
- st.warning("⚠️ Fitur Deep Learning membutuhkan komputasi intensif dan dataset yang cukup besar")
7308
+ # Validasi dataset
7309
+ if df.empty:
7310
+ st.error("❌ Dataset kosong! Silakan upload data terlebih dahulu.")
7311
+ return
7312
+
7313
+ if len(numeric_cols) < 2:
7314
+ st.error("❌ Diperuhkan minimal 2 kolom numerik untuk analisis Deep Learning")
7315
+ return
7046
7316
 
7047
- # DL Configuration
7048
- col1, col2 = st.columns(2)
7317
+ # Configuration untuk kecepatan
7318
+ st.subheader("⚡ Konfigurasi Kecepatan & Performa")
7319
+
7320
+ col1, col2, col3 = st.columns(3)
7049
7321
 
7050
7322
  with col1:
7323
+ processing_speed = st.selectbox(
7324
+ "Kecepatan Processing",
7325
+ ["🚀 Very Fast", "⚡ Fast", "✅ Balanced", "🐢 Comprehensive"],
7326
+ index=0,
7327
+ key="processing_speed"
7328
+ )
7329
+
7330
+ # Set parameters berdasarkan kecepatan
7331
+ if processing_speed == "🚀 Very Fast":
7332
+ sample_size = 0.3
7333
+ epochs = 20
7334
+ batch_size = 128
7335
+ elif processing_speed == "⚡ Fast":
7336
+ sample_size = 0.5
7337
+ epochs = 30
7338
+ batch_size = 64
7339
+ elif processing_speed == "✅ Balanced":
7340
+ sample_size = 0.7
7341
+ epochs = 50
7342
+ batch_size = 32
7343
+ else:
7344
+ sample_size = 1.0
7345
+ epochs = 80
7346
+ batch_size = 16
7347
+
7348
+ with col2:
7051
7349
  dl_target = st.selectbox(
7052
7350
  "Pilih Target Variable",
7053
7351
  numeric_cols,
@@ -7060,30 +7358,76 @@ def deep_learning_analysis(df, numeric_cols, non_numeric_cols):
7060
7358
  key="dl_problem_type"
7061
7359
  )
7062
7360
 
7063
- with col2:
7064
- epochs = st.slider("Epochs", 10, 200, 50, key="dl_epochs")
7065
- batch_size = st.slider("Batch Size", 16, 256, 32, key="dl_batch_size")
7066
- learning_rate = st.selectbox("Learning Rate", [0.001, 0.01, 0.1, 0.0001], key="dl_learning_rate")
7361
+ with col3:
7362
+ epochs = st.slider("Epochs", 10, 200, epochs, key="dl_epochs")
7363
+ batch_size = st.slider("Batch Size", 16, 256, batch_size, key="dl_batch_size")
7364
+ learning_rate = st.selectbox("Learning Rate", [0.001, 0.01, 0.0001, 0.00001],
7365
+ index=0, key="dl_learning_rate")
7366
+
7367
+ # Optimasi dataset besar
7368
+ st.info(f"**Mode {processing_speed}** - Sample size: {sample_size*100}% - Dataset: {len(df):,} rows")
7067
7369
 
7068
- # Feature selection untuk DL
7370
+ # Feature selection dengan optimasi
7371
+ available_features = [f for f in numeric_cols if f != dl_target]
7069
7372
  dl_features = st.multiselect(
7070
7373
  "Pilih Features untuk Deep Learning",
7071
- [f for f in numeric_cols if f != dl_target],
7072
- default=[f for f in numeric_cols if f != dl_target][:5],
7374
+ available_features,
7375
+ default=available_features[:min(6, len(available_features))],
7073
7376
  key="dl_features_select"
7074
7377
  )
7075
7378
 
7076
- if dl_target and dl_features:
7077
- try:
7078
- import tensorflow as tf
7379
+ if not dl_target or not dl_features:
7380
+ st.info("📝 Pilih target variable dan features untuk memulai analisis DL")
7381
+ return
7382
+
7383
+ try:
7384
+
7385
+ # Check GPU availability
7386
+ gpu_available = len(tf.config.experimental.list_physical_devices('GPU')) > 0
7387
+ if gpu_available:
7388
+ st.success("🎯 GPU tersedia - Training akan dipercepat!")
7389
+ else:
7390
+ st.info("💡 GPU tidak tersedia - Training menggunakan CPU")
7391
+
7392
+ # Optimasi memory untuk dataset besar
7393
+ @st.cache_data(show_spinner=False)
7394
+ def prepare_data_optimized(_df, features, target, sample_frac=1.0, problem_type="Regression"):
7395
+ """Prepare data dengan optimasi memory"""
7396
+ # Sampling untuk dataset besar
7397
+ if sample_frac < 1.0:
7398
+ _df = _df.sample(frac=sample_frac, random_state=42)
7399
+
7400
+ X = _df[features].fillna(_df[features].mean())
7401
+ y = _df[target]
7402
+
7403
+ # Preprocessing target untuk classification
7404
+ if problem_type != "Regression":
7405
+ if problem_type == "Binary Classification":
7406
+ # Pastikan binary classification
7407
+ unique_vals = y.unique()
7408
+ if len(unique_vals) > 2:
7409
+ st.warning(f"⚠️ Target memiliki {len(unique_vals)} kelas. Menggunakan 2 kelas terbanyak.")
7410
+ top_2_classes = y.value_counts().head(2).index
7411
+ mask = y.isin(top_2_classes)
7412
+ X = X[mask]
7413
+ y = y[mask]
7414
+ y = LabelEncoder().fit_transform(y)
7415
+ else:
7416
+ y = LabelEncoder().fit_transform(y)
7417
+ else:
7418
+ # Multi-class classification
7419
+ y = LabelEncoder().fit_transform(y)
7079
7420
 
7080
- # Prepare data
7081
- X = df[dl_features].fillna(df[dl_features].mean())
7082
- y = df[dl_target]
7421
+ return X, y
7422
+
7423
+ # Prepare data dengan optimasi
7424
+ with st.spinner("🔄 Memproses data dengan optimasi kecepatan..."):
7425
+ X, y = prepare_data_optimized(df, dl_features, dl_target, sample_size, dl_problem_type)
7083
7426
 
7084
7427
  # Split data
7085
7428
  X_train, X_test, y_train, y_test = train_test_split(
7086
- X, y, test_size=0.2, random_state=42
7429
+ X, y, test_size=0.2, random_state=42,
7430
+ stratify=y if dl_problem_type != "Regression" else None
7087
7431
  )
7088
7432
 
7089
7433
  # Scale features
@@ -7091,198 +7435,1532 @@ def deep_learning_analysis(df, numeric_cols, non_numeric_cols):
7091
7435
  X_train_scaled = scaler.fit_transform(X_train)
7092
7436
  X_test_scaled = scaler.transform(X_test)
7093
7437
 
7094
- # Model architecture
7095
- st.subheader("🏗️ Neural Network Architecture")
7438
+ # Convert to TensorFlow datasets untuk performa tinggi
7439
+ train_dataset = tf.data.Dataset.from_tensor_slices((X_train_scaled, y_train))
7440
+ train_dataset = train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
7096
7441
 
7442
+ val_dataset = tf.data.Dataset.from_tensor_slices((X_test_scaled, y_test))
7443
+ val_dataset = val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
7444
+
7445
+ # Tampilkan info dataset
7446
+ st.success(f"✅ Data siap: {len(X_train):,} training samples, {len(X_test):,} test samples")
7447
+
7448
+ # Model architecture dengan optimasi
7449
+ st.subheader("🏗️ Neural Network Architecture - Optimized")
7450
+
7451
+ col1, col2 = st.columns(2)
7452
+
7453
+ with col1:
7454
+ hidden_layers = st.slider("Jumlah Hidden Layers", 1, 5, 2, key="dl_hidden_layers")
7455
+ units_per_layer = st.slider("Units per Layer", 32, 512, 64, key="dl_units")
7456
+ activation = st.selectbox("Activation Function", ["relu", "elu", "tanh", "selu"],
7457
+ index=0, key="dl_activation")
7458
+
7459
+ with col2:
7460
+ dropout_rate = st.slider("Dropout Rate", 0.0, 0.5, 0.2, 0.1, key="dl_dropout")
7461
+ optimizer = st.selectbox("Optimizer", ["adam", "rmsprop", "nadam", "sgd"],
7462
+ index=0, key="dl_optimizer")
7463
+ use_batch_norm = st.checkbox("Gunakan Batch Normalization", value=True, key="dl_batchnorm")
7464
+ use_early_stopping = st.checkbox("Gunakan Early Stopping", value=True, key="dl_earlystop")
7465
+
7466
+ # Advanced configuration
7467
+ with st.expander("⚙️ Konfigurasi Lanjutan"):
7097
7468
  col1, col2 = st.columns(2)
7098
-
7099
7469
  with col1:
7100
- hidden_layers = st.slider("Jumlah Hidden Layers", 1, 5, 2, key="dl_hidden_layers")
7101
- units_per_layer = st.slider("Units per Layer", 16, 256, 64, key="dl_units")
7102
- activation = st.selectbox("Activation Function", ["relu", "tanh", "sigmoid"], key="dl_activation")
7103
-
7470
+ weight_initializer = st.selectbox(
7471
+ "Weight Initializer",
7472
+ ["glorot_uniform", "he_normal", "lecun_uniform"],
7473
+ index=0
7474
+ )
7475
+ use_l2_reg = st.checkbox("Gunakan L2 Regularization", value=False)
7476
+ l2_rate = st.slider("L2 Rate", 0.0001, 0.01, 0.001, 0.0001) if use_l2_reg else 0.0
7477
+
7104
7478
  with col2:
7105
- dropout_rate = st.slider("Dropout Rate", 0.0, 0.5, 0.2, 0.1, key="dl_dropout")
7106
- optimizer = st.selectbox("Optimizer", ["adam", "rmsprop", "sgd"], key="dl_optimizer")
7107
-
7108
- # Build model
7479
+ learning_rate_schedule = st.selectbox(
7480
+ "Learning Rate Schedule",
7481
+ ["Constant", "ExponentialDecay", "CosineDecay"],
7482
+ index=0
7483
+ )
7484
+
7485
+ # Build optimized model
7486
+ with st.spinner("🔄 Membangun model neural network..."):
7109
7487
  model = tf.keras.Sequential()
7110
7488
 
7111
7489
  # Input layer
7112
- model.add(tf.keras.layers.Dense(units_per_layer, activation=activation, input_shape=(len(dl_features),)))
7490
+ if use_l2_reg:
7491
+ model.add(tf.keras.layers.Dense(
7492
+ units_per_layer,
7493
+ activation=activation,
7494
+ input_shape=(len(dl_features),),
7495
+ kernel_initializer=weight_initializer,
7496
+ kernel_regularizer=tf.keras.regularizers.l2(l2_rate)
7497
+ ))
7498
+ else:
7499
+ model.add(tf.keras.layers.Dense(
7500
+ units_per_layer,
7501
+ activation=activation,
7502
+ input_shape=(len(dl_features),),
7503
+ kernel_initializer=weight_initializer
7504
+ ))
7505
+
7506
+ if use_batch_norm:
7507
+ model.add(tf.keras.layers.BatchNormalization())
7113
7508
  model.add(tf.keras.layers.Dropout(dropout_rate))
7114
7509
 
7115
- # Hidden layers
7510
+ # Hidden layers dengan optimasi
7116
7511
  for i in range(hidden_layers - 1):
7117
- model.add(tf.keras.layers.Dense(units_per_layer, activation=activation))
7512
+ # Reduce units in deeper layers untuk efisiensi
7513
+ units = max(32, units_per_layer // (2 ** (i + 1)))
7514
+
7515
+ if use_l2_reg:
7516
+ model.add(tf.keras.layers.Dense(
7517
+ units,
7518
+ activation=activation,
7519
+ kernel_regularizer=tf.keras.regularizers.l2(l2_rate)
7520
+ ))
7521
+ else:
7522
+ model.add(tf.keras.layers.Dense(units, activation=activation))
7523
+
7524
+ if use_batch_norm:
7525
+ model.add(tf.keras.layers.BatchNormalization())
7118
7526
  model.add(tf.keras.layers.Dropout(dropout_rate))
7119
7527
 
7120
7528
  # Output layer
7121
7529
  if dl_problem_type == "Regression":
7122
7530
  model.add(tf.keras.layers.Dense(1, activation='linear'))
7123
7531
  loss = 'mse'
7124
- metrics = ['mae']
7532
+ metrics = ['mae', 'mse']
7533
+ monitor_metric = 'val_loss'
7125
7534
  else:
7126
- num_classes = len(y.unique()) if dl_problem_type == "Multi-class Classification" else 1
7535
+ num_classes = len(np.unique(y)) if dl_problem_type == "Multi-class Classification" else 1
7127
7536
  activation_output = 'softmax' if dl_problem_type == "Multi-class Classification" else 'sigmoid'
7128
- model.add(tf.keras.layers.Dense(num_classes, activation=activation_output))
7537
+ output_units = num_classes if dl_problem_type == "Multi-class Classification" else 1
7538
+ model.add(tf.keras.layers.Dense(output_units, activation=activation_output))
7129
7539
  loss = 'sparse_categorical_crossentropy' if dl_problem_type == "Multi-class Classification" else 'binary_crossentropy'
7130
7540
  metrics = ['accuracy']
7131
-
7132
- # Compile model
7133
- model.compile(
7134
- optimizer=optimizer,
7135
- loss=loss,
7136
- metrics=metrics
7541
+ monitor_metric = 'val_accuracy'
7542
+
7543
+ # Learning rate schedule
7544
+ if learning_rate_schedule == "ExponentialDecay":
7545
+ lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
7546
+ initial_learning_rate=learning_rate,
7547
+ decay_steps=1000,
7548
+ decay_rate=0.9
7137
7549
  )
7138
-
7139
- # Display model summary
7140
- st.text("Model Summary:")
7141
- model_summary = []
7142
- model.summary(print_fn=lambda x: model_summary.append(x))
7143
- st.text("\n".join(model_summary))
7144
-
7145
- # Train model
7146
- if st.button("🚀 Train Deep Learning Model", key="dl_train_button"):
7147
- with st.spinner("Training neural network..."):
7148
- # Callbacks
7149
- early_stopping = tf.keras.callbacks.EarlyStopping(
7150
- patience=10, restore_best_weights=True
7151
- )
7152
-
7153
- # Train model
7154
- history = model.fit(
7155
- X_train_scaled, y_train,
7156
- epochs=epochs,
7157
- batch_size=batch_size,
7158
- validation_split=0.2,
7159
- callbacks=[early_stopping],
7160
- verbose=0
7161
- )
7162
-
7163
- # Plot training history
7164
- fig = go.Figure()
7165
- fig.add_trace(go.Scatter(
7166
- y=history.history['loss'],
7167
- mode='lines',
7168
- name='Training Loss'
7169
- ))
7170
- if 'val_loss' in history.history:
7171
- fig.add_trace(go.Scatter(
7172
- y=history.history['val_loss'],
7173
- mode='lines',
7174
- name='Validation Loss'
7175
- ))
7176
- fig.update_layout(
7177
- title="Training History - Loss",
7178
- xaxis_title="Epoch",
7179
- yaxis_title="Loss",
7180
- height=400
7181
- )
7182
- st.plotly_chart(fig, use_container_width=True)
7183
-
7184
- # Evaluate model
7185
- test_results = model.evaluate(X_test_scaled, y_test, verbose=0)
7186
- st.success(f"✅ Model Training Complete!")
7187
- st.metric("Test Loss", f"{test_results[0]:.4f}")
7188
- if len(test_results) > 1:
7189
- st.metric("Test Metric", f"{test_results[1]:.4f}")
7550
+ elif learning_rate_schedule == "CosineDecay":
7551
+ lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
7552
+ initial_learning_rate=learning_rate,
7553
+ decay_steps=epochs * len(X_train) // batch_size
7554
+ )
7555
+ else:
7556
+ lr_schedule = learning_rate
7557
+
7558
+ # Compile model dengan learning rate
7559
+ if optimizer == "adam":
7560
+ optimizer_obj = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
7561
+ elif optimizer == "rmsprop":
7562
+ optimizer_obj = tf.keras.optimizers.RMSprop(learning_rate=lr_schedule)
7563
+ elif optimizer == "nadam":
7564
+ optimizer_obj = tf.keras.optimizers.Nadam(learning_rate=lr_schedule)
7565
+ else:
7566
+ optimizer_obj = tf.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9)
7190
7567
 
7191
- except ImportError:
7192
- st.error("❌ TensorFlow tidak terinstall. Install dengan: pip install tensorflow")
7193
- except Exception as e:
7194
- st.error(f"Error dalam DL analysis: {str(e)}")
7568
+ model.compile(optimizer=optimizer_obj, loss=loss, metrics=metrics)
7569
+
7570
+ # Display model summary
7571
+ st.subheader("📊 Model Summary")
7195
7572
 
7196
- def model_comparison_analysis(df, numeric_cols, non_numeric_cols):
7197
- """Perbandingan model yang komprehensif"""
7198
-
7199
- st.header("📊 Model Comparison Dashboard")
7200
-
7201
- st.info("🔄 Fitur Model Comparison - Pilih model dari tab Machine Learning dan Deep Learning untuk perbandingan")
7202
-
7203
- # Placeholder untuk implementasi lengkap
7204
- col1, col2, col3 = st.columns(3)
7205
-
7206
- with col1:
7207
- st.metric("ML Models", "3")
7208
- with col2:
7209
- st.metric("Evaluation Metrics", "5+")
7210
- with col3:
7211
- st.metric("Feature Importance", "✓")
7573
+ # Tangkap output summary dari model
7574
+ model_summary = []
7575
+ model.summary(print_fn=lambda x: model_summary.append(x))
7576
+ summary_text = "\n".join(model_summary)
7212
7577
 
7213
- def feature_analysis_dashboard(df, numeric_cols, non_numeric_cols):
7214
- """Dashboard analisis feature yang komprehensif"""
7215
-
7216
- st.header("🔍 Advanced Feature Analysis")
7217
-
7218
- # Feature importance analysis
7219
- st.subheader("🎯 Feature Importance Analysis")
7220
-
7221
- # Multiple methods untuk feature importance
7222
- importance_method = st.selectbox(
7223
- "Pilih Feature Importance Method",
7224
- ["Random Forest", "Permutation Importance"],
7225
- key="feature_importance_method"
7226
- )
7227
-
7228
- target_feature = st.selectbox(
7229
- "Pilih Target untuk Feature Importance",
7230
- numeric_cols,
7231
- key="feature_importance_target"
7232
- )
7233
-
7234
- if st.button("Hitung Feature Importance", key="feature_importance_button"):
7235
- with st.spinner("Menghitung feature importance..."):
7236
- # Implementasi feature importance calculation
7237
- try:
7238
- features = [f for f in numeric_cols if f != target_feature]
7239
-
7240
- X = df[features].fillna(df[features].mean())
7241
- y = df[target_feature]
7242
-
7243
- if importance_method == "Random Forest":
7244
- model = RandomForestRegressor(n_estimators=100, random_state=42)
7245
- model.fit(X, y)
7246
- importances = model.feature_importances_
7247
-
7248
- importance_df = pd.DataFrame({
7249
- 'feature': features,
7250
- 'importance': importances
7251
- }).sort_values('importance', ascending=False)
7252
-
7253
- fig = px.bar(
7254
- importance_df.head(15),
7255
- x='importance',
7256
- y='feature',
7257
- title="Random Forest Feature Importance",
7258
- orientation='h'
7578
+ # Tambahkan CSS styling
7579
+ st.markdown("""
7580
+ <style>
7581
+ .model-summary-box {
7582
+ background-color: #fff; /* Warna gelap seperti terminal */
7583
+ color: #000; /* Warna teks hijau neon */
7584
+ border-radius: 10px;
7585
+ padding: 15px;
7586
+ font-family: 'Courier New', monospace;
7587
+ font-size: 14px;
7588
+ line-height: 1.5;
7589
+ white-space: pre-wrap;
7590
+ box-shadow: 0 0 8px rgba(0,255,179,0.3);
7591
+ border: 1px solid rgba(0,255,179,0.4);
7592
+ overflow-x: auto;
7593
+ }
7594
+ </style>
7595
+ """, unsafe_allow_html=True)
7596
+
7597
+ # Gunakan expander untuk dropdown
7598
+ with st.expander("🧠 Lihat / Sembunyikan Model Summary"):
7599
+ st.markdown(f"<div class='model-summary-box'>{summary_text}</div>", unsafe_allow_html=True)
7600
+
7601
+ # Calculate total parameters
7602
+ total_params = model.count_params()
7603
+ st.info(f"📈 Total Parameters: {total_params:,}")
7604
+
7605
+ # Training section
7606
+ st.subheader("🚀 Pelatihan Model")
7607
+
7608
+ if st.button("🎯 Mulai Pelatihan Deep Learning", type="primary", key="dl_train_button"):
7609
+ start_time = time.time()
7610
+
7611
+ with st.spinner("🧠 Training neural network... Mohon tunggu..."):
7612
+ # Callbacks untuk training lebih cepat
7613
+ callbacks = []
7614
+
7615
+ if use_early_stopping:
7616
+ early_stopping = tf.keras.callbacks.EarlyStopping(
7617
+ monitor=monitor_metric,
7618
+ patience=10,
7619
+ restore_best_weights=True,
7620
+ mode='min' if dl_problem_type == "Regression" else 'max',
7621
+ verbose=1
7259
7622
  )
7260
- st.plotly_chart(fig, use_container_width=True)
7623
+ callbacks.append(early_stopping)
7624
+
7625
+ reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
7626
+ monitor='val_loss',
7627
+ factor=0.5,
7628
+ patience=5,
7629
+ min_lr=0.00001,
7630
+ verbose=1
7631
+ )
7632
+ callbacks.append(reduce_lr)
7633
+
7634
+ # TensorBoard callback (optional)
7635
+ # callbacks.append(tf.keras.callbacks.TensorBoard(log_dir='./logs'))
7636
+
7637
+ # Train model dengan progress bar
7638
+ progress_bar = st.progress(0)
7639
+ status_text = st.empty()
7640
+ time_estimator = st.empty()
7641
+ metrics_display = st.empty()
7261
7642
 
7262
- elif importance_method == "Permutation Importance":
7263
- model = RandomForestRegressor(n_estimators=100, random_state=42)
7264
- model.fit(X, y)
7643
+ class TrainingCallback(tf.keras.callbacks.Callback):
7644
+ def on_epoch_begin(self, epoch, logs=None):
7645
+ self.epoch_start_time = time.time()
7265
7646
 
7266
- perm_importance = permutation_importance(
7267
- model, X, y, n_repeats=5, random_state=42
7268
- )
7647
+ def on_epoch_end(self, epoch, logs=None):
7648
+ progress = (epoch + 1) / epochs
7649
+ progress_bar.progress(min(progress, 1.0))
7650
+
7651
+ # Metrics display
7652
+ if dl_problem_type == "Regression":
7653
+ metrics_str = f"Loss: {logs['loss']:.4f}, Val Loss: {logs['val_loss']:.4f}, MAE: {logs['mae']:.4f}"
7654
+ else:
7655
+ metrics_str = f"Loss: {logs['loss']:.4f}, Val Loss: {logs['val_loss']:.4f}, Acc: {logs['accuracy']:.4f}"
7656
+
7657
+ status_text.text(f"Epoch {epoch+1}/{epochs}")
7658
+ metrics_display.text(f"📊 {metrics_str}")
7659
+
7660
+ # Time estimation
7661
+ elapsed = time.time() - start_time
7662
+ epoch_time = time.time() - self.epoch_start_time
7663
+ remaining = epoch_time * (epochs - epoch - 1)
7664
+
7665
+ time_estimator.text(f"⏱️ Elapsed: {elapsed:.1f}s | Est. remaining: {remaining:.1f}s")
7666
+
7667
+ callbacks.append(TrainingCallback())
7668
+
7669
+ # Train model
7670
+ history = model.fit(
7671
+ train_dataset,
7672
+ epochs=epochs,
7673
+ validation_data=val_dataset,
7674
+ callbacks=callbacks,
7675
+ verbose=0
7676
+ )
7677
+
7678
+ training_time = time.time() - start_time
7679
+ progress_bar.progress(1.0)
7680
+ status_text.text(f"✅ Pelatihan Selesai! Waktu: {training_time:.1f} detik")
7681
+ time_estimator.text("")
7682
+ metrics_display.text("")
7683
+
7684
+ # ==================== EVALUASI DETAIL ====================
7685
+ st.subheader("📈 Hasil Evaluasi Detail")
7686
+
7687
+ # Predictions
7688
+ y_pred = model.predict(X_test_scaled, verbose=0)
7689
+
7690
+ # 1. PERFORMANCE METRICS COMPREHENSIVE
7691
+ st.subheader("🎯 Dashboard Performa Model")
7692
+
7693
+ if dl_problem_type == "Regression":
7694
+ # Regression metrics
7695
+ y_pred_flat = y_pred.flatten()
7696
+ mse = mean_squared_error(y_test, y_pred_flat)
7697
+ mae = mean_absolute_error(y_test, y_pred_flat)
7698
+ r2 = r2_score(y_test, y_pred_flat)
7699
+ rmse = np.sqrt(mse)
7269
7700
 
7270
- importance_df = pd.DataFrame({
7271
- 'feature': features,
7272
- 'importance': perm_importance.importances_mean
7273
- }).sort_values('importance', ascending=False)
7274
-
7275
- fig = px.bar(
7276
- importance_df.head(15),
7277
- x='importance',
7278
- y='feature',
7279
- title="Permutation Feature Importance",
7280
- orientation='h'
7281
- )
7282
- st.plotly_chart(fig, use_container_width=True)
7283
-
7284
- except Exception as e:
7285
- st.error(f"Error dalam feature importance analysis: {str(e)}")
7701
+ # Additional metrics
7702
+ mape = np.mean(np.abs((y_test - y_pred_flat) / np.where(y_test != 0, y_test, 1))) * 100
7703
+ accuracy_percentage = max(0, min(100, (1 - mae / (y_test.max() - y_test.min())) * 100))
7704
+
7705
+ # Display metrics
7706
+ col1, col2, col3, col4 = st.columns(4)
7707
+
7708
+ with col1:
7709
+ st.metric("R² Score", f"{r2:.4f}",
7710
+ delta="Excellent" if r2 > 0.8 else "Good" if r2 > 0.6 else "Needs Improvement")
7711
+ with col2:
7712
+ st.metric("MAE", f"{mae:.4f}")
7713
+ with col3:
7714
+ st.metric("RMSE", f"{rmse:.4f}")
7715
+ with col4:
7716
+ st.metric("MAPE", f"{mape:.2f}%")
7717
+
7718
+ else:
7719
+ # Classification metrics
7720
+ if dl_problem_type == "Binary Classification":
7721
+ y_pred_class = (y_pred > 0.5).astype(int).flatten()
7722
+ else:
7723
+ y_pred_class = np.argmax(y_pred, axis=1)
7724
+
7725
+ accuracy = accuracy_score(y_test, y_pred_class)
7726
+ precision = precision_score(y_test, y_pred_class, average='weighted', zero_division=0)
7727
+ recall = recall_score(y_test, y_pred_class, average='weighted', zero_division=0)
7728
+ f1 = f1_score(y_test, y_pred_class, average='weighted', zero_division=0)
7729
+
7730
+ # Display metrics
7731
+ col1, col2, col3, col4 = st.columns(4)
7732
+
7733
+ with col1:
7734
+ st.metric("Accuracy", f"{accuracy:.4f}",
7735
+ delta="Excellent" if accuracy > 0.9 else "Good" if accuracy > 0.8 else "Needs Improvement")
7736
+ with col2:
7737
+ st.metric("Precision", f"{precision:.4f}")
7738
+ with col3:
7739
+ st.metric("Recall", f"{recall:.4f}")
7740
+ with col4:
7741
+ st.metric("F1-Score", f"{f1:.4f}")
7742
+
7743
+ # 2. VISUALISASI LENGKAP
7744
+ st.subheader("📊 Visualisasi Komprehensif")
7745
+
7746
+ # Training history visualization
7747
+ fig_history = make_subplots(
7748
+ rows=1, cols=2,
7749
+ subplot_titles=('Loss Progression', 'Metrics Progression'),
7750
+ specs=[[{"secondary_y": False}, {"secondary_y": False}]]
7751
+ )
7752
+
7753
+ # Loss plot
7754
+ fig_history.add_trace(
7755
+ go.Scatter(x=list(range(1, len(history.history['loss'])+1)),
7756
+ y=history.history['loss'],
7757
+ name='Training Loss', line=dict(color='blue')),
7758
+ row=1, col=1
7759
+ )
7760
+ fig_history.add_trace(
7761
+ go.Scatter(x=list(range(1, len(history.history['val_loss'])+1)),
7762
+ y=history.history['val_loss'],
7763
+ name='Validation Loss', line=dict(color='red')),
7764
+ row=1, col=1
7765
+ )
7766
+
7767
+ # Metrics plot
7768
+ if dl_problem_type == "Regression":
7769
+ fig_history.add_trace(
7770
+ go.Scatter(x=list(range(1, len(history.history['mae'])+1)),
7771
+ y=history.history['mae'],
7772
+ name='Training MAE', line=dict(color='green')),
7773
+ row=1, col=2
7774
+ )
7775
+ if 'val_mae' in history.history:
7776
+ fig_history.add_trace(
7777
+ go.Scatter(x=list(range(1, len(history.history['val_mae'])+1)),
7778
+ y=history.history['val_mae'],
7779
+ name='Validation MAE', line=dict(color='orange')),
7780
+ row=1, col=2
7781
+ )
7782
+ else:
7783
+ fig_history.add_trace(
7784
+ go.Scatter(x=list(range(1, len(history.history['accuracy'])+1)),
7785
+ y=history.history['accuracy'],
7786
+ name='Training Accuracy', line=dict(color='green')),
7787
+ row=1, col=2
7788
+ )
7789
+ fig_history.add_trace(
7790
+ go.Scatter(x=list(range(1, len(history.history['val_accuracy'])+1)),
7791
+ y=history.history['val_accuracy'],
7792
+ name='Validation Accuracy', line=dict(color='orange')),
7793
+ row=1, col=2
7794
+ )
7795
+
7796
+ fig_history.update_layout(height=400, title_text="Training History")
7797
+ st.plotly_chart(fig_history, use_container_width=True)
7798
+
7799
+ # 3. PREDICTION VISUALIZATION
7800
+ if dl_problem_type == "Regression":
7801
+ # Regression plots
7802
+ col1, col2 = st.columns(2)
7803
+
7804
+ with col1:
7805
+ # Actual vs Predicted
7806
+ fig_actual_pred = px.scatter(
7807
+ x=y_test, y=y_pred_flat,
7808
+ title="Actual vs Predicted",
7809
+ labels={'x': 'Actual', 'y': 'Predicted'},
7810
+ trendline="lowess"
7811
+ )
7812
+ fig_actual_pred.add_trace(
7813
+ go.Scatter(x=[y_test.min(), y_test.max()],
7814
+ y=[y_test.min(), y_test.max()],
7815
+ mode='lines', name='Perfect Prediction',
7816
+ line=dict(color='red', dash='dash'))
7817
+ )
7818
+ st.plotly_chart(fig_actual_pred, use_container_width=True)
7819
+
7820
+ with col2:
7821
+ # Residual plot
7822
+ residuals = y_test - y_pred_flat
7823
+ fig_residual = px.scatter(
7824
+ x=y_pred_flat, y=residuals,
7825
+ title="Residual Plot",
7826
+ labels={'x': 'Predicted', 'y': 'Residuals'},
7827
+ trendline="lowess"
7828
+ )
7829
+ fig_residual.add_hline(y=0, line_dash="dash", line_color="red")
7830
+ st.plotly_chart(fig_residual, use_container_width=True)
7831
+
7832
+ else:
7833
+ # Classification plots
7834
+ col1, col2 = st.columns(2)
7835
+
7836
+ with col1:
7837
+ # Confusion Matrix
7838
+ cm = confusion_matrix(y_test, y_pred_class)
7839
+ fig_cm = px.imshow(
7840
+ cm,
7841
+ text_auto=True,
7842
+ title="Confusion Matrix",
7843
+ color_continuous_scale='Blues',
7844
+ aspect="auto"
7845
+ )
7846
+ st.plotly_chart(fig_cm, use_container_width=True)
7847
+
7848
+ with col2:
7849
+ # Classification report heatmap
7850
+ report = classification_report(y_test, y_pred_class, output_dict=True)
7851
+ report_df = pd.DataFrame(report).transpose().iloc[:-1, :3]
7852
+ fig_report = px.imshow(
7853
+ report_df.values,
7854
+ x=report_df.columns,
7855
+ y=report_df.index,
7856
+ text_auto=".2f",
7857
+ title="Classification Report",
7858
+ color_continuous_scale='Viridis',
7859
+ aspect="auto"
7860
+ )
7861
+ st.plotly_chart(fig_report, use_container_width=True)
7862
+
7863
+ # 4. FEATURE IMPORTANCE ANALYSIS
7864
+ st.subheader("🔍 Analisis Feature Importance")
7865
+
7866
+ try:
7867
+ # Simplified feature importance using permutation
7868
+ @st.cache_data
7869
+ def calculate_feature_importance(model, X_test_scaled, y_test, feature_names, problem_type):
7870
+ baseline_score = model.evaluate(X_test_scaled, y_test, verbose=0)
7871
+ baseline_loss = baseline_score[0] if problem_type == "Regression" else 1 - baseline_score[1]
7872
+
7873
+ importance_scores = []
7874
+ for i in range(len(feature_names)):
7875
+ X_permuted = X_test_scaled.copy()
7876
+ np.random.shuffle(X_permuted[:, i])
7877
+ permuted_score = model.evaluate(X_permuted, y_test, verbose=0)
7878
+ permuted_loss = permuted_score[0] if problem_type == "Regression" else 1 - permuted_score[1]
7879
+ importance = max(0, baseline_loss - permuted_loss)
7880
+ importance_scores.append(importance)
7881
+
7882
+ return pd.DataFrame({
7883
+ 'Feature': feature_names,
7884
+ 'Importance': importance_scores
7885
+ }).sort_values('Importance', ascending=False)
7886
+
7887
+ feature_importance_df = calculate_feature_importance(
7888
+ model, X_test_scaled, y_test, dl_features, dl_problem_type
7889
+ )
7890
+
7891
+ col1, col2 = st.columns(2)
7892
+
7893
+ with col1:
7894
+ fig_importance = px.bar(
7895
+ feature_importance_df,
7896
+ x='Importance',
7897
+ y='Feature',
7898
+ orientation='h',
7899
+ title="Feature Importance",
7900
+ color='Importance',
7901
+ color_continuous_scale='Viridis'
7902
+ )
7903
+ st.plotly_chart(fig_importance, use_container_width=True)
7904
+
7905
+ with col2:
7906
+ fig_importance_pie = px.pie(
7907
+ feature_importance_df,
7908
+ values='Importance',
7909
+ names='Feature',
7910
+ title="Feature Importance Distribution"
7911
+ )
7912
+ st.plotly_chart(fig_importance_pie, use_container_width=True)
7913
+
7914
+ except Exception as e:
7915
+ st.warning(f"⚠️ Feature importance calculation skipped: {str(e)}")
7916
+
7917
+ # 5. MODEL PERFORMANCE GAUGE
7918
+ st.subheader("📈 Performance Summary")
7919
+
7920
+ if dl_problem_type == "Regression":
7921
+ performance_score = min(100, max(0, (r2 + (1 - mae/y_test.std())) * 50))
7922
+ performance_level = "Sangat Baik" if performance_score > 85 else \
7923
+ "Baik" if performance_score > 70 else \
7924
+ "Cukup" if performance_score > 60 else "Perlu Improvement"
7925
+ else:
7926
+ performance_score = accuracy * 100
7927
+ performance_level = "Sangat Baik" if performance_score > 90 else \
7928
+ "Baik" if performance_score > 80 else \
7929
+ "Cukup" if performance_score > 70 else "Perlu Improvement"
7930
+
7931
+ # Gauge chart
7932
+ fig_gauge = go.Figure(go.Indicator(
7933
+ mode = "gauge+number+delta",
7934
+ value = performance_score,
7935
+ domain = {'x': [0, 1], 'y': [0, 1]},
7936
+ title = {'text': f"Model Performance: {performance_level}"},
7937
+ gauge = {
7938
+ 'axis': {'range': [None, 100]},
7939
+ 'bar': {'color': "darkblue"},
7940
+ 'steps': [
7941
+ {'range': [0, 60], 'color': "red"},
7942
+ {'range': [60, 75], 'color': "yellow"},
7943
+ {'range': [75, 90], 'color': "lightgreen"},
7944
+ {'range': [90, 100], 'color': "green"}],
7945
+ 'threshold': {
7946
+ 'line': {'color': "red", 'width': 4},
7947
+ 'thickness': 0.75,
7948
+ 'value': 90}}
7949
+ ))
7950
+ st.plotly_chart(fig_gauge, use_container_width=True)
7951
+
7952
+ # 6. DOWNLOAD DAN EXPORT MODEL
7953
+ st.subheader("💾 Export Model")
7954
+
7955
+ col1, col2 = st.columns(2)
7956
+
7957
+ with col1:
7958
+ # Save model
7959
+ if st.button("💾 Save TensorFlow Model"):
7960
+ model.save('saved_model.h5')
7961
+ with open('saved_model.h5', 'rb') as f:
7962
+ st.download_button(
7963
+ label="📥 Download Model",
7964
+ data=f,
7965
+ file_name="deep_learning_model.h5",
7966
+ mime="application/octet-stream"
7967
+ )
7968
+
7969
+ with col2:
7970
+ # Export predictions
7971
+ predictions_df = pd.DataFrame({
7972
+ 'Actual': y_test,
7973
+ 'Predicted': y_pred.flatten() if dl_problem_type == "Regression" else y_pred_class
7974
+ })
7975
+ csv = predictions_df.to_csv(index=False)
7976
+ st.download_button(
7977
+ label="📥 Download Predictions",
7978
+ data=csv,
7979
+ file_name="model_predictions.csv",
7980
+ mime="text/csv"
7981
+ )
7982
+
7983
+ # 7. RECOMMENDATIONS AND INSIGHTS
7984
+ st.subheader("💡 Insights & Rekomendasi")
7985
+
7986
+ # Training insights
7987
+ final_epoch = len(history.history['loss'])
7988
+ final_loss = history.history['loss'][-1]
7989
+ final_val_loss = history.history['val_loss'][-1]
7990
+
7991
+ col1, col2, col3 = st.columns(3)
7992
+ with col1:
7993
+ st.metric("Final Training Loss", f"{final_loss:.4f}")
7994
+ with col2:
7995
+ st.metric("Final Validation Loss", f"{final_val_loss:.4f}")
7996
+ with col3:
7997
+ st.metric("Training Time", f"{training_time:.1f}s")
7998
+
7999
+ # Recommendations based on performance
8000
+ st.info("""
8001
+ **🎯 Rekomendasi Improvement:**
8002
+ - **Data Quality**: Periksa missing values dan outliers
8003
+ - **Feature Engineering**: Tambahkan feature yang lebih relevan
8004
+ - **Hyperparameter Tuning**: Eksperimen dengan architecture berbeda
8005
+ - **Regularization**: Adjust dropout dan L2 regularization
8006
+ - **Learning Rate**: Coba learning rate scheduling
8007
+ """)
8008
+
8009
+ # Performance tips
8010
+ if performance_score < 70:
8011
+ st.warning("""
8012
+ **⚠️ Area Improvement:**
8013
+ - Pertimbangkan feature selection yang lebih baik
8014
+ - Coba model architecture yang lebih dalam/lebar
8015
+ - Gunakan lebih banyak data training
8016
+ - Eksperimen dengan different optimizers
8017
+ """)
8018
+ else:
8019
+ st.success("""
8020
+ **✅ Performa Baik!**
8021
+ - Model sudah menunjukkan hasil yang promising
8022
+ - Pertimbangkan deployment untuk penggunaan real-time
8023
+ - Monitor model performance secara berkala
8024
+ """)
8025
+
8026
+ except Exception as e:
8027
+ st.error(f"❌ Error dalam DL analysis: {str(e)}")
8028
+ st.info("""
8029
+ 💡 Tips Troubleshooting:
8030
+ - Pastikan dataset cukup besar (>100 samples)
8031
+ - Gunakan mode kecepatan lebih tinggi untuk dataset besar
8032
+ - Kurangi jumlah features jika memory error
8033
+ - Pastikan target variable sesuai dengan problem type
8034
+ - Coba learning rate yang lebih kecil
8035
+ """)
8036
+
8037
+ # Tambahkan fungsi utility jika diperlukan
8038
+ def validate_tensorflow_installation():
8039
+ """Validate TensorFlow installation"""
8040
+ try:
8041
+ import tensorflow as tf
8042
+ version = tf.__version__
8043
+ gpu_available = tf.config.list_physical_devices('GPU')
8044
+ return True, version, len(gpu_available) > 0
8045
+ except ImportError:
8046
+ return False, None, False
8047
+
8048
+ def model_comparison_analysis(df, numeric_cols, non_numeric_cols):
8049
+ """Analisis komparatif data yang komprehensif tanpa model machine learning"""
8050
+
8051
+ st.header("📊 Advanced Data Analysis Dashboard")
8052
+
8053
+ # Informasi dataset
8054
+ st.subheader("📋 Dataset Overview")
8055
+ col1, col2, col3, col4 = st.columns(4)
8056
+ with col1:
8057
+ st.metric("Total Samples", f"{len(df):,}")
8058
+ with col2:
8059
+ st.metric("Features", f"{len(numeric_cols) + len(non_numeric_cols):,}")
8060
+ with col3:
8061
+ st.metric("Numeric", f"{len(numeric_cols):,}")
8062
+ with col4:
8063
+ st.metric("Categorical", f"{len(non_numeric_cols):,}")
8064
+
8065
+ # Configuration section
8066
+ st.subheader("⚙️ Analysis Configuration")
8067
+
8068
+ col1, col2 = st.columns(2)
8069
+
8070
+ with col1:
8071
+ # Target selection untuk analisis
8072
+ target_variable = st.selectbox(
8073
+ "dwibaktindev AI",
8074
+ numeric_cols + non_numeric_cols,
8075
+ key="analysis_target"
8076
+ )
8077
+
8078
+ # Analysis type
8079
+ analysis_type = st.selectbox(
8080
+ "Alisa AI",
8081
+ ["Descriptive Statistics", "Correlation Analysis", "Distribution Analysis",
8082
+ "Relationship Analysis", "Comparative Analysis"],
8083
+ key="analysis_type"
8084
+ )
8085
+
8086
+ with col2:
8087
+ # Feature selection
8088
+ available_features = [f for f in numeric_cols + non_numeric_cols if f != target_variable]
8089
+ selected_features = st.multiselect(
8090
+ "Sasha AI",
8091
+ available_features,
8092
+ default=available_features[:min(10, len(available_features))],
8093
+ key="analysis_features"
8094
+ )
8095
+
8096
+ # Sample size untuk visualisasi
8097
+ sample_size = st.slider("Sample Size for Visualization", 100, len(df),
8098
+ min(1000, len(df)), 100, key="sample_size")
8099
+
8100
+ if st.button("🚀 Start Model AI", type="primary", key="start_analysis"):
8101
+ if not target_variable or not selected_features:
8102
+ st.error("❌ Please select target variable and features")
8103
+ return
8104
+
8105
+ try:
8106
+ # Lakukan analisis berdasarkan jenis
8107
+ with st.spinner("🔄 Performing analysis..."):
8108
+ if analysis_type == "Descriptive Statistics":
8109
+ perform_descriptive_analysis(df, target_variable, selected_features)
8110
+
8111
+ elif analysis_type == "Correlation Analysis":
8112
+ perform_correlation_analysis(df, target_variable, selected_features)
8113
+
8114
+ elif analysis_type == "Distribution Analysis":
8115
+ perform_distribution_analysis(df, target_variable, selected_features, sample_size)
8116
+
8117
+ elif analysis_type == "Relationship Analysis":
8118
+ perform_relationship_analysis(df, target_variable, selected_features, sample_size)
8119
+
8120
+ elif analysis_type == "Comparative Analysis":
8121
+ perform_comparative_analysis(df, target_variable, selected_features)
8122
+
8123
+ st.success("✅ Analysis completed!")
8124
+
8125
+ except Exception as e:
8126
+ st.error(f"❌ Error in data analysis: {str(e)}")
8127
+
8128
+ def perform_descriptive_analysis(df, target, features):
8129
+ """Analisis statistik deskriptif"""
8130
+ import pandas as pd
8131
+ import numpy as np
8132
+
8133
+ st.subheader("📊 Descriptive Statistics")
8134
+
8135
+ # Statistik untuk target variable
8136
+ st.write(f"### Target Variable: `{target}`")
8137
+
8138
+ if pd.api.types.is_numeric_dtype(df[target]):
8139
+ col1, col2, col3, col4 = st.columns(4)
8140
+
8141
+ with col1:
8142
+ st.metric("Mean", f"{df[target].mean():.2f}")
8143
+ with col2:
8144
+ st.metric("Median", f"{df[target].median():.2f}")
8145
+ with col3:
8146
+ st.metric("Std Dev", f"{df[target].std():.2f}")
8147
+ with col4:
8148
+ st.metric("Missing", f"{df[target].isnull().sum()}")
8149
+
8150
+ # Detailed statistics
8151
+ st.dataframe(df[target].describe(), use_container_width=True)
8152
+
8153
+ else:
8154
+ col1, col2, col3 = st.columns(3)
8155
+
8156
+ with col1:
8157
+ st.metric("Unique Values", df[target].nunique())
8158
+ with col2:
8159
+ st.metric("Most Frequent", df[target].mode().iloc[0] if not df[target].mode().empty else "N/A")
8160
+ with col3:
8161
+ st.metric("Missing", f"{df[target].isnull().sum()}")
8162
+
8163
+ # Value counts
8164
+ value_counts = df[target].value_counts()
8165
+ st.write("**Value Distribution:**")
8166
+ st.dataframe(value_counts, use_container_width=True)
8167
+
8168
+ # Statistik untuk features numerik
8169
+ numeric_features = [f for f in features if pd.api.types.is_numeric_dtype(df[f])]
8170
+ if numeric_features:
8171
+ st.write("### Numeric Features Summary")
8172
+ st.dataframe(df[numeric_features].describe(), use_container_width=True)
8173
+
8174
+ # Statistik untuk features kategorik
8175
+ categorical_features = [f for f in features if not pd.api.types.is_numeric_dtype(df[f])]
8176
+ if categorical_features:
8177
+ st.write("### Categorical Features Summary")
8178
+ for feature in categorical_features:
8179
+ with st.expander(f"`{feature}`"):
8180
+ value_counts = df[feature].value_counts()
8181
+ st.dataframe(value_counts, use_container_width=True)
8182
+
8183
+ def perform_correlation_analysis(df, target, features):
8184
+ """Analisis korelasi"""
8185
+ import pandas as pd
8186
+ import numpy as np
8187
+ import plotly.express as px
8188
+ import plotly.graph_objects as go
8189
+
8190
+ st.subheader("🔗 Correlation Analysis")
8191
+
8192
+ # Pilih hanya features numerik untuk korelasi
8193
+ numeric_features = [f for f in features if pd.api.types.is_numeric_dtype(df[f])]
8194
+
8195
+ if pd.api.types.is_numeric_dtype(df[target]):
8196
+ numeric_features.append(target)
8197
+
8198
+ if len(numeric_features) < 2:
8199
+ st.warning("⚠️ Need at least 2 numeric features for correlation analysis")
8200
+ return
8201
+
8202
+ correlation_df = df[numeric_features].corr()
8203
+
8204
+ # Heatmap korelasi
8205
+ st.write("### Correlation Heatmap")
8206
+ fig = px.imshow(correlation_df,
8207
+ title="Feature Correlation Heatmap",
8208
+ color_continuous_scale="RdBu_r",
8209
+ aspect="auto")
8210
+ st.plotly_chart(fig, use_container_width=True)
8211
+
8212
+ # Korelasi dengan target
8213
+ if pd.api.types.is_numeric_dtype(df[target]):
8214
+ st.write("### Correlation with Target")
8215
+ target_corr = correlation_df[target].drop(target).sort_values(ascending=False)
8216
+
8217
+ col1, col2 = st.columns(2)
8218
+
8219
+ with col1:
8220
+ fig = px.bar(x=target_corr.values, y=target_corr.index,
8221
+ orientation='h',
8222
+ title=f"Correlation with {target}",
8223
+ labels={'x': 'Correlation', 'y': 'Feature'})
8224
+ st.plotly_chart(fig, use_container_width=True)
8225
+
8226
+ with col2:
8227
+ # Tabel korelasi
8228
+ st.dataframe(target_corr.round(4), use_container_width=True)
8229
+
8230
+ def perform_distribution_analysis(df, target, features, sample_size):
8231
+ """Analisis distribusi"""
8232
+ import pandas as pd
8233
+ import plotly.express as px
8234
+ import plotly.graph_objects as go
8235
+ from plotly.subplots import make_subplots
8236
+
8237
+ st.subheader("📈 Distribution Analysis")
8238
+
8239
+ # Sample data untuk performa visualisasi
8240
+ sample_df = df.sample(min(sample_size, len(df)), random_state=42)
8241
+
8242
+ # Distribusi target variable
8243
+ st.write(f"### Target Variable Distribution: `{target}`")
8244
+
8245
+ if pd.api.types.is_numeric_dtype(df[target]):
8246
+ col1, col2 = st.columns(2)
8247
+
8248
+ with col1:
8249
+ # Histogram
8250
+ fig = px.histogram(df, x=target,
8251
+ title=f"Distribution of {target}",
8252
+ nbins=50)
8253
+ st.plotly_chart(fig, use_container_width=True)
8254
+
8255
+ with col2:
8256
+ # Box plot
8257
+ fig = px.box(df, y=target,
8258
+ title=f"Box Plot of {target}")
8259
+ st.plotly_chart(fig, use_container_width=True)
8260
+ else:
8261
+ # Untuk variabel kategorik
8262
+ value_counts = df[target].value_counts()
8263
+ fig = px.pie(values=value_counts.values,
8264
+ names=value_counts.index,
8265
+ title=f"Distribution of {target}")
8266
+ st.plotly_chart(fig, use_container_width=True)
8267
+
8268
+ # Distribusi features numerik
8269
+ numeric_features = [f for f in features if pd.api.types.is_numeric_dtype(df[f])]
8270
+ if numeric_features:
8271
+ st.write("### Numeric Features Distribution")
8272
+
8273
+ # Pilih features untuk ditampilkan
8274
+ selected_numeric = st.multiselect(
8275
+ "Select numeric features to visualize:",
8276
+ numeric_features,
8277
+ default=numeric_features[:min(3, len(numeric_features))]
8278
+ )
8279
+
8280
+ if selected_numeric:
8281
+ # Histogram multiple
8282
+ fig = make_subplots(rows=len(selected_numeric), cols=1,
8283
+ subplot_titles=selected_numeric)
8284
+
8285
+ for i, feature in enumerate(selected_numeric, 1):
8286
+ fig.add_trace(
8287
+ go.Histogram(x=df[feature], name=feature, nbinsx=30),
8288
+ row=i, col=1
8289
+ )
8290
+
8291
+ fig.update_layout(height=300*len(selected_numeric),
8292
+ title_text="Distribution of Numeric Features")
8293
+ st.plotly_chart(fig, use_container_width=True)
8294
+
8295
+ # Distribusi features kategorik
8296
+ categorical_features = [f for f in features if not pd.api.types.is_numeric_dtype(df[f])]
8297
+ if categorical_features:
8298
+ st.write("### Categorical Features Distribution")
8299
+
8300
+ selected_categorical = st.multiselect(
8301
+ "Select categorical features to visualize:",
8302
+ categorical_features,
8303
+ default=categorical_features[:min(2, len(categorical_features))]
8304
+ )
8305
+
8306
+ if selected_categorical:
8307
+ for feature in selected_categorical:
8308
+ value_counts = df[feature].value_counts().head(10) # Top 10 saja
8309
+ fig = px.bar(x=value_counts.values, y=value_counts.index,
8310
+ orientation='h',
8311
+ title=f"Top 10 Values in {feature}")
8312
+ st.plotly_chart(fig, use_container_width=True)
8313
+
8314
+ def perform_relationship_analysis(df, target, features, sample_size):
8315
+ """Analisis hubungan antara variabel"""
8316
+ import pandas as pd
8317
+ import plotly.express as px
8318
+ import plotly.graph_objects as go
8319
+
8320
+ st.subheader("🔄 Relationship Analysis")
8321
+
8322
+ sample_df = df.sample(min(sample_size, len(df)), random_state=42)
8323
+
8324
+ # Pilih features numerik untuk scatter plot
8325
+ numeric_features = [f for f in features if pd.api.types.is_numeric_dtype(df[f])]
8326
+
8327
+ if pd.api.types.is_numeric_dtype(df[target]) and len(numeric_features) >= 1:
8328
+ st.write("### Scatter Plots with Target")
8329
+
8330
+ col1, col2 = st.columns(2)
8331
+
8332
+ with col1:
8333
+ x_feature = st.selectbox("X-axis feature:", numeric_features, key="scatter_x")
8334
+
8335
+ with col2:
8336
+ color_feature = st.selectbox("Color by (optional):",
8337
+ [None] + [f for f in features if f != x_feature],
8338
+ key="scatter_color")
8339
+
8340
+ if x_feature:
8341
+ fig = px.scatter(sample_df, x=x_feature, y=target,
8342
+ color=color_feature if color_feature else None,
8343
+ title=f"{target} vs {x_feature}",
8344
+ opacity=0.6)
8345
+ st.plotly_chart(fig, use_container_width=True)
8346
+
8347
+ # Pair plot untuk multiple numeric features
8348
+ if len(numeric_features) >= 2:
8349
+ st.write("### Pairwise Relationships")
8350
+
8351
+ selected_for_pairplot = st.multiselect(
8352
+ "Select features for pair plot:",
8353
+ numeric_features + ([target] if pd.api.types.is_numeric_dtype(df[target]) else []),
8354
+ default=(numeric_features + [target])[:min(4, len(numeric_features) + 1)]
8355
+ )
8356
+
8357
+ if len(selected_for_pairplot) >= 2:
8358
+ fig = px.scatter_matrix(sample_df[selected_for_pairplot],
8359
+ dimensions=selected_for_pairplot,
8360
+ height=800)
8361
+ st.plotly_chart(fig, use_container_width=True)
8362
+
8363
+ # Analisis hubungan kategorik-numerik
8364
+ categorical_features = [f for f in features if not pd.api.types.is_numeric_dtype(df[f])]
8365
+ if categorical_features and pd.api.types.is_numeric_dtype(df[target]):
8366
+ st.write("### Categorical vs Numerical Analysis")
8367
+
8368
+ cat_feature = st.selectbox("Select categorical feature:", categorical_features)
8369
+ num_feature = st.selectbox("Select numerical feature:",
8370
+ [target] + numeric_features)
8371
+
8372
+ if cat_feature and num_feature:
8373
+ col1, col2 = st.columns(2)
8374
+
8375
+ with col1:
8376
+ # Box plot
8377
+ fig = px.box(df, x=cat_feature, y=num_feature,
8378
+ title=f"{num_feature} by {cat_feature}")
8379
+ st.plotly_chart(fig, use_container_width=True)
8380
+
8381
+ with col2:
8382
+ # Violin plot
8383
+ fig = px.violin(df, x=cat_feature, y=num_feature,
8384
+ title=f"Distribution of {num_feature} by {cat_feature}")
8385
+ st.plotly_chart(fig, use_container_width=True)
8386
+
8387
+ def perform_comparative_analysis(df, target, features):
8388
+ """Analisis komparatif"""
8389
+ import pandas as pd
8390
+ import plotly.express as px
8391
+ import plotly.graph_objects as go
8392
+
8393
+ st.subheader("⚖️ Comparative Analysis")
8394
+
8395
+ # Group by analysis
8396
+ st.write("### Group-wise Analysis")
8397
+
8398
+ group_feature = st.selectbox(
8399
+ "Group by feature:",
8400
+ [None] + [f for f in features if not pd.api.types.is_numeric_dtype(df[f])]
8401
+ )
8402
+
8403
+ if group_feature:
8404
+ if pd.api.types.is_numeric_dtype(df[target]):
8405
+ # Untuk target numerik
8406
+ summary = df.groupby(group_feature)[target].agg(['mean', 'median', 'std', 'count']).round(2)
8407
+ st.dataframe(summary, use_container_width=True)
8408
+
8409
+ # Visualisasi
8410
+ col1, col2 = st.columns(2)
8411
+
8412
+ with col1:
8413
+ fig = px.bar(summary.reset_index(), x=group_feature, y='mean',
8414
+ title=f"Average {target} by {group_feature}")
8415
+ st.plotly_chart(fig, use_container_width=True)
8416
+
8417
+ with col2:
8418
+ fig = px.box(df, x=group_feature, y=target,
8419
+ title=f"Distribution of {target} by {group_feature}")
8420
+ st.plotly_chart(fig, use_container_width=True)
8421
+
8422
+ else:
8423
+ # Untuk target kategorik
8424
+ cross_tab = pd.crosstab(df[group_feature], df[target], normalize='index') * 100
8425
+ st.write("**Percentage Distribution:**")
8426
+ st.dataframe(cross_tab.round(2), use_container_width=True)
8427
+
8428
+ # Stacked bar chart
8429
+ fig = px.bar(cross_tab.reset_index(),
8430
+ x=group_feature,
8431
+ y=cross_tab.columns.tolist(),
8432
+ title=f"Distribution of {target} by {group_feature}",
8433
+ barmode='stack')
8434
+ st.plotly_chart(fig, use_container_width=True)
8435
+
8436
+ # Time series analysis (jika ada kolom datetime)
8437
+ datetime_columns = df.select_dtypes(include=['datetime64']).columns.tolist()
8438
+ if datetime_columns and pd.api.types.is_numeric_dtype(df[target]):
8439
+ st.write("### Time Series Analysis")
8440
+
8441
+ date_col = st.selectbox("Select date column:", datetime_columns)
8442
+
8443
+ if date_col:
8444
+ # Aggregasi berdasarkan waktu
8445
+ df_sorted = df.sort_values(date_col)
8446
+
8447
+ # Pilih frekuensi aggregasi
8448
+ freq = st.selectbox("Aggregation frequency:",
8449
+ ['D', 'W', 'M', 'Q'],
8450
+ format_func=lambda x: {'D': 'Daily', 'W': 'Weekly',
8451
+ 'M': 'Monthly', 'Q': 'Quarterly'}[x])
8452
+
8453
+ time_series = df_sorted.set_index(date_col)[target].resample(freq).mean()
8454
+
8455
+ fig = px.line(time_series.reset_index(),
8456
+ x=date_col, y=target,
8457
+ title=f"{target} Over Time")
8458
+ st.plotly_chart(fig, use_container_width=True)
8459
+
8460
+ def feature_analysis_dashboard(df, numeric_cols, non_numeric_cols):
8461
+ """Dashboard analisis feature yang komprehensif dengan optimasi dataset besar"""
8462
+
8463
+ st.header("🔍 Advanced Feature Analysis")
8464
+
8465
+ # Informasi dataset
8466
+ st.subheader("📊 Dataset Overview")
8467
+ col1, col2, col3 = st.columns(3)
8468
+ with col1:
8469
+ st.metric("Total Features", f"{len(numeric_cols) + len(non_numeric_cols):,}")
8470
+ with col2:
8471
+ st.metric("Numeric Features", f"{len(numeric_cols):,}")
8472
+ with col3:
8473
+ st.metric("Categorical Features", f"{len(non_numeric_cols):,}")
8474
+
8475
+ # Optimasi memory
8476
+ if st.checkbox("Optimize Memory Usage", value=True, key="feature_optimize_mem"):
8477
+ df = optimize_memory_usage_feature(df)
8478
+ st.success("✅ Memory usage optimized!")
8479
+
8480
+ # Performance configuration
8481
+ st.subheader("⚡ Performance Configuration")
8482
+
8483
+ col1, col2 = st.columns(2)
8484
+
8485
+ with col1:
8486
+ # Sampling options untuk dataset besar
8487
+ use_sampling = st.checkbox("Use Sampling for Large Dataset", value=len(df) > 10000,
8488
+ key="feature_use_sampling")
8489
+
8490
+ if use_sampling:
8491
+ sample_size = st.slider(
8492
+ "Sample Size",
8493
+ min_value=1000,
8494
+ max_value=min(50000, len(df)),
8495
+ value=min(20000, len(df)),
8496
+ step=1000,
8497
+ key="feature_sample_size"
8498
+ )
8499
+ st.info(f"🎯 Using {sample_size} samples from {len(df):,} total records")
8500
+
8501
+ # Processing speed control
8502
+ processing_speed = st.select_slider(
8503
+ "Processing Speed",
8504
+ options=["Fast", "Balanced", "Comprehensive"],
8505
+ value="Balanced",
8506
+ key="feature_processing_speed"
8507
+ )
8508
+
8509
+ # Configure parameters based on speed selection
8510
+ speed_config = {
8511
+ "Fast": {"n_estimators": 50, "n_repeats": 3, "max_features": 20},
8512
+ "Balanced": {"n_estimators": 100, "n_repeats": 5, "max_features": 30},
8513
+ "Comprehensive": {"n_estimators": 200, "n_repeats": 10, "max_features": 50}
8514
+ }
8515
+ config = speed_config[processing_speed]
8516
+
8517
+ with col2:
8518
+ # Advanced options
8519
+ st.write("**Advanced Options:**")
8520
+
8521
+ max_features_display = st.slider(
8522
+ "Max Features to Display",
8523
+ 5, 50, 15,
8524
+ key="max_features_display"
8525
+ )
8526
+
8527
+ remove_high_corr = st.checkbox(
8528
+ "Remove Highly Correlated Features",
8529
+ value=True,
8530
+ key="feature_remove_corr"
8531
+ )
8532
+
8533
+ correlation_threshold = st.slider(
8534
+ "Correlation Threshold",
8535
+ 0.7, 0.99, 0.9, 0.01,
8536
+ key="feature_corr_threshold"
8537
+ )
8538
+
8539
+ random_state = st.number_input(
8540
+ "Random State",
8541
+ value=42,
8542
+ key="feature_random_state"
8543
+ )
8544
+
8545
+ # Feature importance analysis
8546
+ st.subheader("🎯 Feature Importance Analysis")
8547
+
8548
+ col1, col2 = st.columns(2)
8549
+
8550
+ with col1:
8551
+ # Multiple methods untuk feature importance
8552
+ importance_method = st.selectbox(
8553
+ "Pilih Feature Importance Method",
8554
+ ["Random Forest", "Permutation Importance", "Mutual Information", "All Methods"],
8555
+ key="feature_importance_method"
8556
+ )
8557
+
8558
+ # Problem type selection
8559
+ problem_type = st.radio(
8560
+ "Problem Type",
8561
+ ["Regression", "Classification", "Auto Detect"],
8562
+ key="feature_problem_type"
8563
+ )
8564
+
8565
+ with col2:
8566
+ target_feature = st.selectbox(
8567
+ "Pilih Target untuk Feature Importance",
8568
+ numeric_cols + non_numeric_cols,
8569
+ key="feature_importance_target"
8570
+ )
8571
+
8572
+ # Feature selection
8573
+ available_features = [f for f in numeric_cols + non_numeric_cols if f != target_feature]
8574
+
8575
+ if len(available_features) > config["max_features"]:
8576
+ st.warning(f"⚠️ Showing first {config['max_features']} features. Use comprehensive mode for more.")
8577
+ available_features = available_features[:config["max_features"]]
8578
+
8579
+ selected_features = st.multiselect(
8580
+ "Pilih Features untuk Analysis",
8581
+ available_features,
8582
+ default=available_features[:min(10, len(available_features))],
8583
+ key="feature_analysis_features"
8584
+ )
8585
+
8586
+ if not target_feature or not selected_features:
8587
+ st.warning("📝 Pilih target feature dan features untuk analysis")
8588
+ return
8589
+
8590
+ # Progress tracking
8591
+ progress_bar = st.progress(0)
8592
+ status_text = st.empty()
8593
+
8594
+ if st.button("🚀 Hitung Feature Importance", key="feature_importance_button"):
8595
+ try:
8596
+ # Apply sampling jika diperlukan
8597
+ if use_sampling and len(df) > sample_size:
8598
+ df_analysis = df.sample(n=sample_size, random_state=random_state)
8599
+ st.info(f"🔬 Analyzing {sample_size:,} sampled records")
8600
+ else:
8601
+ df_analysis = df
8602
+
8603
+ status_text.text("🔄 Preparing data...")
8604
+ progress_bar.progress(10)
8605
+
8606
+ # Prepare features and target
8607
+ X = df_analysis[selected_features].copy()
8608
+ y = df_analysis[target_feature]
8609
+
8610
+ # Auto-detect problem type
8611
+ if problem_type == "Auto Detect":
8612
+ if target_feature in numeric_cols:
8613
+ problem_type_detected = "Regression"
8614
+ else:
8615
+ problem_type_detected = "Classification"
8616
+ st.info(f"🔍 Auto-detected: {problem_type_detected}")
8617
+ else:
8618
+ problem_type_detected = problem_type
8619
+
8620
+ progress_bar.progress(20)
8621
+
8622
+ # Preprocessing dengan optimasi
8623
+ status_text.text("🔧 Preprocessing features...")
8624
+ X_processed, feature_names = preprocess_features_optimized(
8625
+ X, numeric_cols, non_numeric_cols, remove_high_corr, correlation_threshold
8626
+ )
8627
+
8628
+ progress_bar.progress(40)
8629
+
8630
+ # Encode target variable jika classification
8631
+ le_target = None
8632
+ if problem_type_detected == "Classification" and y.dtype == 'object':
8633
+ le_target = LabelEncoder()
8634
+ y = le_target.fit_transform(y.astype(str))
8635
+ st.info(f"🎯 Target encoded: {len(le_target.classes_)} classes")
8636
+
8637
+ progress_bar.progress(50)
8638
+
8639
+ # Handle missing values
8640
+ X_processed = handle_missing_values_optimized(X_processed)
8641
+
8642
+ progress_bar.progress(60)
8643
+
8644
+ # Calculate feature importance berdasarkan method yang dipilih
8645
+ status_text.text("📊 Calculating feature importance...")
8646
+
8647
+ results = {}
8648
+
8649
+ if importance_method in ["Random Forest", "All Methods"]:
8650
+ results["Random Forest"] = calculate_rf_importance(
8651
+ X_processed, y, problem_type_detected, config, random_state
8652
+ )
8653
+ progress_bar.progress(70)
8654
+
8655
+ if importance_method in ["Permutation Importance", "All Methods"]:
8656
+ results["Permutation"] = calculate_permutation_importance(
8657
+ X_processed, y, problem_type_detected, config, random_state
8658
+ )
8659
+ progress_bar.progress(80)
8660
+
8661
+ if importance_method in ["Mutual Information", "All Methods"]:
8662
+ results["Mutual Info"] = calculate_mutual_info(
8663
+ X_processed, y, problem_type_detected
8664
+ )
8665
+ progress_bar.progress(90)
8666
+
8667
+ progress_bar.progress(95)
8668
+
8669
+ # Display results
8670
+ status_text.text("📈 Displaying results...")
8671
+ display_feature_importance_results(
8672
+ results, feature_names, max_features_display, problem_type_detected
8673
+ )
8674
+
8675
+ progress_bar.progress(100)
8676
+ status_text.text("✅ Analysis completed!")
8677
+
8678
+ # Additional insights
8679
+ show_feature_analysis_insights(results, X_processed, y, problem_type_detected)
8680
+
8681
+ except Exception as e:
8682
+ st.error(f"❌ Error dalam feature importance analysis: {str(e)}")
8683
+ st.info("💡 Tips: Coba kurangi jumlah features, gunakan sampling, atau pilih mode 'Fast'")
8684
+
8685
+ def optimize_memory_usage_feature(df):
8686
+ """Optimize memory usage for feature analysis"""
8687
+ start_mem = df.memory_usage(deep=True).sum() / 1024**2
8688
+
8689
+ for col in df.columns:
8690
+ col_type = df[col].dtype
8691
+
8692
+ if col_type == 'object':
8693
+ if df[col].nunique() / len(df) < 0.5: # Jika cardinality tidak terlalu tinggi
8694
+ df[col] = df[col].astype('category')
8695
+ elif col_type in ['int64', 'int32']:
8696
+ c_min = df[col].min()
8697
+ c_max = df[col].max()
8698
+ if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
8699
+ df[col] = df[col].astype(np.int8)
8700
+ elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
8701
+ df[col] = df[col].astype(np.int16)
8702
+ elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
8703
+ df[col] = df[col].astype(np.int32)
8704
+ elif col_type in ['float64', 'float32']:
8705
+ c_min = df[col].min()
8706
+ c_max = df[col].max()
8707
+ if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
8708
+ df[col] = df[col].astype(np.float16)
8709
+ elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
8710
+ df[col] = df[col].astype(np.float32)
8711
+
8712
+ end_mem = df.memory_usage(deep=True).sum() / 1024**2
8713
+ st.success(f"💾 Memory reduced: {start_mem:.2f}MB → {end_mem:.2f}MB ({((start_mem - end_mem) / start_mem * 100):.1f}% reduction)")
8714
+
8715
+ return df
8716
+
8717
+ def preprocess_features_optimized(X, numeric_cols, non_numeric_cols, remove_high_corr, threshold):
8718
+ """Preprocess features dengan optimasi untuk dataset besar"""
8719
+
8720
+ X_processed = X.copy()
8721
+ feature_names = list(X.columns)
8722
+
8723
+ # Encode categorical features dengan metode yang efisien
8724
+ categorical_columns = [col for col in X.columns if col in non_numeric_cols]
8725
+
8726
+ for col in categorical_columns:
8727
+ if X_processed[col].nunique() > 50: # Untuk categorical dengan banyak unique values
8728
+ # Gunakan frequency encoding
8729
+ freq_map = X_processed[col].value_counts().to_dict()
8730
+ X_processed[col] = X_processed[col].map(freq_map)
8731
+ X_processed[col].fillna(0, inplace=True)
8732
+ else:
8733
+ # Gunakan label encoding
8734
+ le = LabelEncoder()
8735
+ X_processed[col] = le.fit_transform(X_processed[col].astype(str))
8736
+
8737
+ # Remove highly correlated features
8738
+ if remove_high_corr and len(X_processed.columns) > 1:
8739
+ numeric_features = [col for col in X_processed.columns if col in numeric_cols or col in categorical_columns]
8740
+ if len(numeric_features) > 1:
8741
+ X_numeric = X_processed[numeric_features]
8742
+ corr_matrix = X_numeric.corr().abs()
8743
+
8744
+ # Hapus feature yang highly correlated
8745
+ upper_triangle = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
8746
+ to_drop = [column for column in upper_triangle.columns if any(upper_triangle[column] > threshold)]
8747
+
8748
+ if to_drop:
8749
+ X_processed = X_processed.drop(columns=to_drop)
8750
+ feature_names = [f for f in feature_names if f not in to_drop]
8751
+ st.info(f"🗑️ Removed {len(to_drop)} highly correlated features")
8752
+
8753
+ return X_processed, feature_names
8754
+
8755
+ def handle_missing_values_optimized(X):
8756
+ """Handle missing values dengan metode yang optimal"""
8757
+ X_processed = X.copy()
8758
+
8759
+ for col in X_processed.columns:
8760
+ if X_processed[col].isnull().sum() > 0:
8761
+ if X_processed[col].dtype in ['int8', 'int16', 'int32', 'int64', 'float16', 'float32', 'float64']:
8762
+ # Untuk numeric, gunakan median (lebih robust terhadap outliers)
8763
+ X_processed[col].fillna(X_processed[col].median(), inplace=True)
8764
+ else:
8765
+ # Untuk categorical, gunakan mode
8766
+ if len(X_processed[col].mode()) > 0:
8767
+ X_processed[col].fillna(X_processed[col].mode()[0], inplace=True)
8768
+ else:
8769
+ X_processed[col].fillna(0, inplace=True)
8770
+
8771
+ return X_processed
8772
+
8773
+ def calculate_rf_importance(X, y, problem_type, config, random_state):
8774
+ """Calculate Random Forest feature importance"""
8775
+ if problem_type == "Regression":
8776
+ model = RandomForestRegressor(
8777
+ n_estimators=config["n_estimators"],
8778
+ random_state=random_state,
8779
+ n_jobs=-1 # Parallel processing
8780
+ )
8781
+ else:
8782
+ model = RandomForestClassifier(
8783
+ n_estimators=config["n_estimators"],
8784
+ random_state=random_state,
8785
+ n_jobs=-1
8786
+ )
8787
+
8788
+ model.fit(X, y)
8789
+ importances = model.feature_importances_
8790
+
8791
+ return {
8792
+ 'importances': importances,
8793
+ 'model': model
8794
+ }
8795
+
8796
+ def calculate_permutation_importance(X, y, problem_type, config, random_state):
8797
+ """Calculate permutation importance"""
8798
+ if problem_type == "Regression":
8799
+ model = RandomForestRegressor(
8800
+ n_estimators=config["n_estimators"],
8801
+ random_state=random_state,
8802
+ n_jobs=-1
8803
+ )
8804
+ else:
8805
+ model = RandomForestClassifier(
8806
+ n_estimators=config["n_estimators"],
8807
+ random_state=random_state,
8808
+ n_jobs=-1
8809
+ )
8810
+
8811
+ model.fit(X, y)
8812
+
8813
+ # Untuk dataset besar, gunakan subsample
8814
+ if len(X) > 10000:
8815
+ X_subsample = X.sample(n=10000, random_state=random_state)
8816
+ y_subsample = y.loc[X_subsample.index]
8817
+ else:
8818
+ X_subsample = X
8819
+ y_subsample = y
8820
+
8821
+ perm_importance = permutation_importance(
8822
+ model, X_subsample, y_subsample,
8823
+ n_repeats=config["n_repeats"],
8824
+ random_state=random_state,
8825
+ n_jobs=-1 # Parallel processing
8826
+ )
8827
+
8828
+ return {
8829
+ 'importances': perm_importance.importances_mean,
8830
+ 'std': perm_importance.importances_std
8831
+ }
8832
+
8833
+ def calculate_mutual_info(X, y, problem_type):
8834
+ """Calculate mutual information"""
8835
+ if problem_type == "Regression":
8836
+ mi = mutual_info_regression(X, y, random_state=42, n_jobs=-1)
8837
+ else:
8838
+ mi = mutual_info_classif(X, y, random_state=42, n_jobs=-1)
8839
+
8840
+ return {
8841
+ 'importances': mi
8842
+ }
8843
+
8844
+ def display_feature_importance_results(results, feature_names, max_display, problem_type):
8845
+ """Display feature importance results dengan visualisasi yang komprehensif"""
8846
+
8847
+ st.subheader("📊 Feature Importance Results")
8848
+
8849
+ # Tampilkan semua methods dalam tabs
8850
+ tabs = st.tabs(list(results.keys()))
8851
+
8852
+ for tab, (method_name, result) in zip(tabs, results.items()):
8853
+ with tab:
8854
+ importances = result['importances']
8855
+
8856
+ # Create importance dataframe
8857
+ importance_df = pd.DataFrame({
8858
+ 'feature': feature_names,
8859
+ 'importance': importances
8860
+ }).sort_values('importance', ascending=False)
8861
+
8862
+ # Display top features
8863
+ st.write(f"**Top {min(max_display, len(importance_df))} Features - {method_name}**")
8864
+
8865
+ col1, col2 = st.columns([2, 1])
8866
+
8867
+ with col1:
8868
+ # Bar chart
8869
+ fig = px.bar(
8870
+ importance_df.head(max_display),
8871
+ x='importance',
8872
+ y='feature',
8873
+ title=f"{method_name} Feature Importance",
8874
+ orientation='h',
8875
+ color='importance',
8876
+ color_continuous_scale='viridis'
8877
+ )
8878
+ fig.update_layout(showlegend=False)
8879
+ st.plotly_chart(fig, use_container_width=True)
8880
+
8881
+ with col2:
8882
+ # Table view
8883
+ st.dataframe(
8884
+ importance_df.head(10)[['feature', 'importance']].round(4),
8885
+ use_container_width=True
8886
+ )
8887
+
8888
+ # Additional info untuk permutation importance
8889
+ if method_name == "Permutation" and 'std' in result:
8890
+ st.write("**Permutation Importance with Std Dev:**")
8891
+ perm_df = pd.DataFrame({
8892
+ 'feature': feature_names,
8893
+ 'importance': importances,
8894
+ 'std': result['std']
8895
+ }).sort_values('importance', ascending=False)
8896
+
8897
+ fig = px.bar(
8898
+ perm_df.head(max_display),
8899
+ x='importance',
8900
+ y='feature',
8901
+ error_x='std',
8902
+ title="Permutation Importance ± Std Dev",
8903
+ orientation='h'
8904
+ )
8905
+ st.plotly_chart(fig, use_container_width=True)
8906
+
8907
+ def show_feature_analysis_insights(results, X, y, problem_type):
8908
+ """Show additional insights dari feature analysis"""
8909
+
8910
+ st.subheader("💡 Analysis Insights")
8911
+
8912
+ col1, col2 = st.columns(2)
8913
+
8914
+ with col1:
8915
+ st.write("**Dataset Characteristics:**")
8916
+ st.write(f"- Total samples: {len(X):,}")
8917
+ st.write(f"- Total features: {len(X.columns)}")
8918
+ st.write(f"- Problem type: {problem_type}")
8919
+
8920
+ if problem_type == "Classification":
8921
+ st.write(f"- Number of classes: {len(np.unique(y))}")
8922
+ else:
8923
+ st.write(f"- Target range: {y.min():.2f} to {y.max():.2f}")
8924
+
8925
+ with col2:
8926
+ st.write("**Feature Importance Consensus:**")
8927
+
8928
+ # Hitung consensus dari semua methods
8929
+ consensus_scores = {}
8930
+ for method_name, result in results.items():
8931
+ importances = result['importances']
8932
+ for i, feature in enumerate(X.columns):
8933
+ if feature not in consensus_scores:
8934
+ consensus_scores[feature] = []
8935
+ consensus_scores[feature].append(importances[i])
8936
+
8937
+ # Rata-rata score across methods
8938
+ avg_scores = {feature: np.mean(scores) for feature, scores in consensus_scores.items()}
8939
+ top_features = sorted(avg_scores.items(), key=lambda x: x[1], reverse=True)[:5]
8940
+
8941
+ for feature, score in top_features:
8942
+ st.write(f"- {feature}: {score:.4f}")
8943
+
8944
+ # Correlation analysis untuk top features
8945
+ if len(results) > 0:
8946
+ st.write("**Top Features Correlation Matrix:**")
8947
+
8948
+ # Ambil top 8 features dari method pertama
8949
+ first_method = list(results.values())[0]
8950
+ top_indices = np.argsort(first_method['importances'])[-8:][::-1]
8951
+ top_features_corr = [X.columns[i] for i in top_indices if i < len(X.columns)]
8952
+
8953
+ if len(top_features_corr) > 1:
8954
+ corr_matrix = X[top_features_corr].corr()
8955
+
8956
+ fig = px.imshow(
8957
+ corr_matrix,
8958
+ text_auto=True,
8959
+ aspect="auto",
8960
+ color_continuous_scale="RdBu_r",
8961
+ title="Correlation Matrix of Top Features"
8962
+ )
8963
+ st.plotly_chart(fig, use_container_width=True)
7286
8964
 
7287
8965
  # Fungsi untuk memuat data
7288
8966
  def load_data(uploaded_file):
@@ -7958,6 +9636,29 @@ uploaded_files = st.sidebar.file_uploader(
7958
9636
  accept_multiple_files=True
7959
9637
  )
7960
9638
 
9639
+ # Pilihan website
9640
+ website_option = st.sidebar.selectbox(
9641
+ "Pilih Website:",
9642
+ ["https://streamlit-launcher.vercel.app/", "Custom URL"]
9643
+ )
9644
+
9645
+ if website_option == "Custom URL":
9646
+ custom_url = st.sidebar.text_input("Masukkan URL custom:")
9647
+ if custom_url:
9648
+ website_url = custom_url
9649
+ else:
9650
+ website_url = "https://streamlit-launcher.vercel.app/"
9651
+ else:
9652
+ website_url = website_option
9653
+
9654
+ # Tampilkan iframe
9655
+ if st.sidebar.button("🌐 Tampilkan Website"):
9656
+ st.markdown(f"""
9657
+ <div style="border: 2px solid #e0e0e0; border-radius: 10px; padding: 10px; margin: 10px 0;">
9658
+ <iframe src="{website_url}" width="100%" height="600" style="border: none; border-radius: 8px;"></iframe>
9659
+ </div>
9660
+ """, unsafe_allow_html=True)
9661
+
7961
9662
  merge_method = "concat"
7962
9663
  if uploaded_files and len(uploaded_files) > 1:
7963
9664
  merge_method = st.sidebar.selectbox(
@@ -7982,439 +9683,1033 @@ if uploaded_files:
7982
9683
  else:
7983
9684
  df = merge_datasets(datasets, merge_method)
7984
9685
 
9686
+ try:
9687
+ from stl import mesh
9688
+ import trimesh
9689
+ import os
9690
+ except ImportError:
9691
+ st.warning("Beberapa library 3D tidak terinstall. Install dengan: pip install numpy-stl trimesh plotly")
7985
9692
  REMOVE_BG_API_KEY = "xQH5KznYiupRrywK5yPcjeyi"
7986
9693
  PIXELS_API_KEY = "LH59shPdj1xO0lolnHPsClH23qsnHE4NjkCFBhKEXvR0CbqwkrXbqBnw"
7987
9694
  if df is not None:
7988
- tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8, tab9 = st.tabs([
9695
+ tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8, tab9, tab10 = st.tabs([
7989
9696
  "📊 Statistik",
7990
9697
  "📈 Visualisasi",
7991
9698
  "💾 Data",
7992
9699
  "ℹ️ Informasi",
7993
9700
  "🧮 Kalkulator",
7994
9701
  "🖼️ Vitures",
7995
- "📍 Flowchart",
9702
+ "📍 Flowchart",
7996
9703
  "📊 Grafik Saham",
7997
- "🗃️ SQL Style"
9704
+ "🗃️ SQL Style",
9705
+ "🔄 3D Model & Analisis"
7998
9706
  ])
9707
+
9708
+ with tab10:
9709
+ st.header("🔄 Konversi Gambar ke 3D Model dengan Analisis")
9710
+
9711
+ # Upload gambar
9712
+ uploaded_file = st.file_uploader("Unggah gambar untuk dikonversi ke 3D",
9713
+ type=['png', 'jpg', 'jpeg'],
9714
+ key="3d_converter")
9715
+
9716
+ col1, col2 = st.columns(2)
9717
+
9718
+ with col1:
9719
+ if uploaded_file is not None:
9720
+ # Display original image
9721
+ st.subheader("🖼️ Gambar Asli")
9722
+ st.image(uploaded_file, use_column_width=True)
9723
+
9724
+ # Image analysis
9725
+ st.subheader("📊 Analisis Gambar")
9726
+
9727
+ # Convert to numpy array for analysis
9728
+ import numpy as np
9729
+ from PIL import Image
9730
+
9731
+ image = Image.open(uploaded_file)
9732
+ img_array = np.array(image)
9733
+
9734
+ # Basic image statistics
9735
+ st.write(f"**Dimensi Gambar:** {img_array.shape}")
9736
+ st.write(f"**Tipe Data:** {img_array.dtype}")
9737
+ st.write(f"**Range Nilai:** {img_array.min()} - {img_array.max()}")
9738
+
9739
+ # Color distribution
9740
+ if len(img_array.shape) == 3: # Color image
9741
+ st.write("**Distribusi Warna RGB:**")
9742
+ colors = ['Red', 'Green', 'Blue']
9743
+ for i, color in enumerate(colors):
9744
+ channel_data = img_array[:, :, i]
9745
+ st.write(f"{color}: Mean={channel_data.mean():.2f}, Std={channel_data.std():.2f}")
9746
+
9747
+ with col2:
9748
+ if uploaded_file is not None:
9749
+ st.subheader("📈 Chart Analisis")
9750
+
9751
+ # Create some sample 3D data based on image
9752
+ height, width = img_array.shape[0], img_array.shape[1]
9753
+
9754
+ # Generate 3D surface data from image intensity
9755
+ if len(img_array.shape) == 3:
9756
+ gray_img = np.mean(img_array, axis=2) # Convert to grayscale
9757
+ else:
9758
+ gray_img = img_array
9759
+
9760
+ # Downsample for performance
9761
+ downsample_factor = max(1, gray_img.shape[0] // 50)
9762
+ gray_img_small = gray_img[::downsample_factor, ::downsample_factor]
9763
+
9764
+ # Create 3D surface plot
9765
+ fig_3d = go.Figure(data=[go.Surface(z=gray_img_small)])
9766
+ fig_3d.update_layout(
9767
+ title='3D Surface dari Gambar',
9768
+ scene=dict(
9769
+ xaxis_title='X',
9770
+ yaxis_title='Y',
9771
+ zaxis_title='Intensitas'
9772
+ )
9773
+ )
9774
+ st.plotly_chart(fig_3d, use_container_width=True)
9775
+
9776
+ # 2D Histogram of intensities
9777
+ fig_hist = px.histogram(x=gray_img.flatten(),
9778
+ title='Distribusi Intensitas Pixel',
9779
+ labels={'x': 'Intensitas', 'y': 'Frekuensi'})
9780
+ st.plotly_chart(fig_hist, use_container_width=True)
9781
+
9782
+ # Additional analysis section
9783
+ if uploaded_file is not None:
9784
+ st.subheader("🔍 Analisis Detail")
9785
+
9786
+ col3, col4 = st.columns(2)
9787
+
9788
+ with col3:
9789
+ # Edge detection simulation
9790
+ st.write("**Deteksi Tepi (Simulasi):**")
9791
+
9792
+ # Simple edge detection using gradient
9793
+ from scipy import ndimage
9794
+
9795
+ # Calculate gradients
9796
+ grad_x = ndimage.sobel(gray_img, axis=0)
9797
+ grad_y = ndimage.sobel(gray_img, axis=1)
9798
+ gradient_magnitude = np.hypot(grad_x, grad_y)
9799
+
9800
+ # Display edge map
9801
+ fig_edges = px.imshow(gradient_magnitude,
9802
+ title='Peta Tepi',
9803
+ color_continuous_scale='gray')
9804
+ st.plotly_chart(fig_edges, use_container_width=True)
9805
+
9806
+ with col4:
9807
+ # Statistical summary
9808
+ st.write("**Ringkasan Statistik:**")
9809
+
9810
+ stats_data = {
9811
+ 'Metrik': ['Mean', 'Median', 'Std Dev', 'Varians', 'Entropi'],
9812
+ 'Nilai': [
9813
+ f"{gray_img.mean():.2f}",
9814
+ f"{np.median(gray_img):.2f}",
9815
+ f"{gray_img.std():.2f}",
9816
+ f"{gray_img.var():.2f}",
9817
+ f"{-np.sum(gray_img * np.log2(gray_img + 1e-8)):.2f}"
9818
+ ]
9819
+ }
9820
+
9821
+ st.dataframe(stats_data, use_container_width=True)
9822
+
9823
+ # Date selection for analysis
9824
+ analysis_date = st.date_input("Pilih Tanggal Analisis",
9825
+ value=datetime.now().date(),
9826
+ key="3d_analysis_date")
9827
+
9828
+ st.write(f"**Analisis untuk tanggal:** {analysis_date}")
9829
+
9830
+ # Model conversion options
9831
+ if uploaded_file is not None:
9832
+ st.subheader("⚙️ Opsi Konversi 3D")
9833
+
9834
+ conversion_type = st.selectbox(
9835
+ "Pilih tipe model 3D:",
9836
+ ["Surface Mesh", "Point Cloud", "Voxel Grid", "Height Map"]
9837
+ )
9838
+
9839
+ resolution = st.slider("Resolusi Model 3D", 10, 100, 50)
9840
+ height_scale = st.slider("Skala Tinggi 3D", 0.1, 5.0, 1.0)
9841
+
9842
+ if st.button("🚀 Generate Model 3D", type="primary"):
9843
+ with st.spinner("Membuat model 3D..."):
9844
+ try:
9845
+ # Progress bar
9846
+ progress_bar = st.progress(0)
9847
+
9848
+ # Convert image to grayscale and normalize
9849
+ if len(img_array.shape) == 3:
9850
+ gray_img = np.mean(img_array, axis=2)
9851
+ else:
9852
+ gray_img = img_array
9853
+
9854
+ # Normalize to 0-1
9855
+ gray_img_normalized = gray_img.astype(np.float32) / 255.0
9856
+
9857
+ progress_bar.progress(25)
9858
+
9859
+ # Downsample image based on resolution
9860
+ downsample = max(1, gray_img_normalized.shape[0] // resolution)
9861
+ height_map = gray_img_normalized[::downsample, ::downsample]
9862
+
9863
+ progress_bar.progress(50)
9864
+
9865
+ # Generate 3D mesh from height map
9866
+ x, y = np.mgrid[0:height_map.shape[0], 0:height_map.shape[1]]
9867
+ z = height_map * height_scale
9868
+
9869
+ progress_bar.progress(75)
9870
+
9871
+ # Create vertices and faces for the mesh
9872
+ vertices = []
9873
+ faces = []
9874
+
9875
+ # Create vertices
9876
+ for i in range(z.shape[0]):
9877
+ for j in range(z.shape[1]):
9878
+ vertices.append([i, j, z[i, j]])
9879
+
9880
+ # Create faces
9881
+ for i in range(z.shape[0]-1):
9882
+ for j in range(z.shape[1]-1):
9883
+ # Two triangles per quad
9884
+ v1 = i * z.shape[1] + j
9885
+ v2 = v1 + 1
9886
+ v3 = (i + 1) * z.shape[1] + j
9887
+ v4 = v3 + 1
9888
+
9889
+ # First triangle
9890
+ faces.append([v1, v2, v3])
9891
+ # Second triangle
9892
+ faces.append([v2, v4, v3])
9893
+
9894
+ progress_bar.progress(90)
9895
+
9896
+ # Convert to numpy arrays
9897
+ vertices = np.array(vertices)
9898
+ faces = np.array(faces)
9899
+
9900
+ # Create STL mesh
9901
+ from stl import mesh
9902
+
9903
+ # Create the mesh object
9904
+ stl_mesh = mesh.Mesh(np.zeros(faces.shape[0], dtype=mesh.Mesh.dtype))
9905
+
9906
+ # Assign vertices to mesh
9907
+ for i, face in enumerate(faces):
9908
+ for j in range(3):
9909
+ stl_mesh.vectors[i][j] = vertices[face[j]]
9910
+
9911
+ progress_bar.progress(100)
9912
+
9913
+ # Save STL file to temporary file
9914
+ import tempfile
9915
+ import os
9916
+
9917
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.stl') as tmp_file:
9918
+ stl_mesh.save(tmp_file.name)
9919
+
9920
+ # Read the file data for download
9921
+ with open(tmp_file.name, 'rb') as f:
9922
+ stl_data = f.read()
9923
+
9924
+ # Clean up temporary file
9925
+ os.unlink(tmp_file.name)
9926
+
9927
+ st.success("✅ Model 3D berhasil dibuat!")
9928
+
9929
+ # Display results
9930
+ st.info(f"**Model 3D tipe:** {conversion_type}")
9931
+ st.info(f"**Resolusi:** {resolution}")
9932
+ st.info(f"**Dimensi Mesh:** {len(vertices)} vertices, {len(faces)} faces")
9933
+ st.info(f"**Skala Tinggi:** {height_scale}")
9934
+
9935
+ # Download button for 3D model
9936
+ st.download_button(
9937
+ label="📥 Download Model 3D (STL)",
9938
+ data=stl_data,
9939
+ file_name=f"3d_model_{uploaded_file.name.split('.')[0]}.stl",
9940
+ mime="application/octet-stream"
9941
+ )
9942
+
9943
+ # Display mesh information
9944
+ col5, col6 = st.columns(2)
9945
+
9946
+ with col5:
9947
+ st.write("**Informasi Mesh:**")
9948
+ mesh_info = {
9949
+ 'Parameter': ['Jumlah Vertex', 'Jumlah Face', 'Dimensi X', 'Dimensi Y', 'Tinggi Maks'],
9950
+ 'Nilai': [
9951
+ len(vertices),
9952
+ len(faces),
9953
+ f"{z.shape[0]} points",
9954
+ f"{z.shape[1]} points",
9955
+ f"{z.max():.3f}"
9956
+ ]
9957
+ }
9958
+ st.dataframe(mesh_info)
9959
+
9960
+ with col6:
9961
+ # Display 3D preview using plotly
9962
+ st.write("**Preview 3D:**")
9963
+
9964
+ # Create simplified mesh for preview
9965
+ preview_downsample = max(1, len(vertices) // 1000)
9966
+ preview_vertices = vertices[::preview_downsample]
9967
+
9968
+ fig_3d_preview = go.Figure(data=[go.Mesh3d(
9969
+ x=preview_vertices[:, 0],
9970
+ y=preview_vertices[:, 1],
9971
+ z=preview_vertices[:, 2],
9972
+ opacity=0.7,
9973
+ color='lightblue'
9974
+ )])
9975
+
9976
+ fig_3d_preview.update_layout(
9977
+ title='Preview Model 3D',
9978
+ scene=dict(
9979
+ xaxis_title='X',
9980
+ yaxis_title='Y',
9981
+ zaxis_title='Z'
9982
+ )
9983
+ )
9984
+
9985
+ st.plotly_chart(fig_3d_preview, use_container_width=True)
9986
+
9987
+ except Exception as e:
9988
+ st.error(f"❌ Error dalam membuat model 3D: {str(e)}")
9989
+ st.info("Pastikan library numpy-stl dan trimesh terinstall: `pip install numpy-stl trimesh`")
9990
+
7999
9991
 
8000
9992
  with tab9:
8001
- st.header("📁 Upload File & Analisis Lengkap SQL Style")
8002
- with st.expander("📜 Ketarangan Dalam Statistik Dan Analisis", expanded=False):
8003
- st.markdown("""
8004
- **Penjelasan Penting 📛**
9993
+ st.header("📁 Upload File & Analisis Lengkap Database SQL")
9994
+ with st.expander("📜 Keterangan Dalam Statistik Dan Analisis", expanded=False):
9995
+ st.markdown(
9996
+ """
9997
+ <img src="https://media.finebi.com/strapi/Annual_Sales_Summary_59110fda60.jpg" class="responsive-img">
9998
+ """,
9999
+ unsafe_allow_html=True
10000
+ )
10001
+ st.markdown("""
10002
+
10003
+ ### 🚀 Keterangan Lengkap Dalam Analisis Dan Statistik Pada SQL Style
10004
+ - Akankah Hal Gila Dapat Terjadi Dan Ini lah yang Mungkin Menjadi Kenyataan Pada SQL Style?
10005
+ - Dengan adanya fitur analisis data pada SQL Style, kini Anda dapat dengan mudah mengunggah file CSV atau Excel berisi data dari database SQL Anda untuk dianalisis secara menyeluruh.
10006
+ - Fitur ini dirancang untuk memberikan wawasan mendalam tentang struktur data Anda, termasuk deteksi kolom tanggal, analisis statistik dasar, dan visualisasi data yang informatif.
10007
+ - Setelah mengunggah file, SQL Style akan secara otomatis mendeteksi kolom tanggal dan melakukan analisis mendalam terhadap data tersebut.
10008
+ - Anda akan mendapatkan statistik dasar seperti jumlah baris dan kolom, nilai unik, serta informasi tentang missing values.
10009
+ - Selain itu, fitur visualisasi data akan membantu Anda memahami distribusi data, tren waktu, dan pola musiman dengan grafik yang mudah dipahami.
10010
+ - Fitur ini sangat berguna bagi para analis data, pengembang database, dan siapa saja yang ingin mendapatkan pemahaman lebih baik tentang data mereka.
10011
+ - Kami terus berupaya untuk meningkatkan fitur ini agar dapat memberikan pengalaman analisis data yang lebih baik dan lebih komprehensif.
10012
+ - dan kami akan segera update SQL Style ini agar lebih baik lagi kedepannya.
10013
+ - Terima kasih atas pengertian dan dukungannya.
10014
+ """)
10015
+
10016
+ # Upload file
10017
+ uploaded_file = st.file_uploader(
10018
+ "Pilih file CSV atau Excel",
10019
+ type=['csv', 'xlsx', 'xls'],
10020
+ help="Upload file data untuk dianalisis"
10021
+ )
10022
+
10023
+ if uploaded_file is not None:
10024
+ try:
10025
+ # Baca file berdasarkan tipe
10026
+ if uploaded_file.name.endswith('.csv'):
10027
+ df = pd.read_csv(uploaded_file)
10028
+ else:
10029
+ df = pd.read_excel(uploaded_file)
10030
+
10031
+ # Clean dataframe - handle mixed types and object dtypes
10032
+ def clean_dataframe(df):
10033
+ df_clean = df.copy()
10034
+
10035
+ # Convert object columns to appropriate types
10036
+ for col in df_clean.columns:
10037
+ # Skip if column is already numeric or datetime
10038
+ if pd.api.types.is_numeric_dtype(df_clean[col]):
10039
+ continue
10040
+ if pd.api.types.is_datetime64_any_dtype(df_clean[col]):
10041
+ continue
10042
+
10043
+ # Try to convert to numeric first
10044
+ try:
10045
+ df_clean[col] = pd.to_numeric(df_clean[col], errors='ignore')
10046
+ except:
10047
+ pass
10048
+
10049
+ # If still object, try to convert to datetime
10050
+ if df_clean[col].dtype == 'object':
10051
+ try:
10052
+ df_clean[col] = pd.to_datetime(df_clean[col], errors='ignore')
10053
+ except:
10054
+ pass
10055
+
10056
+ # Handle ObjectDType specifically
10057
+ if hasattr(df_clean[col].dtype, 'name') and df_clean[col].dtype.name == 'object':
10058
+ # Convert to string to avoid ObjectDType issues
10059
+ df_clean[col] = df_clean[col].astype(str)
10060
+
10061
+ return df_clean
8005
10062
 
8006
- ### 🚀 Ketrangan Lengkap Dalam Analisis Dan Statistik Pada SQL Style
8007
- - SQL ini masih tahap pemgembangan dan perbaikan, jadi mohon bersabar jika ada kekurangan
8008
- - dan kami akan segera update SQL Style ini agar lebih baik lagi kedepannya.
8009
- - Terima kasih atas pengertian dan dukungannya.
8010
- """)
8011
-
8012
- # Upload file
8013
- uploaded_file = st.file_uploader(
8014
- "Pilih file CSV atau Excel",
8015
- type=['csv', 'xlsx', 'xls'],
8016
- help="Upload file data untuk dianalisis"
8017
- )
8018
-
8019
- if uploaded_file is not None:
8020
- try:
8021
- # Baca file berdasarkan tipe
8022
- if uploaded_file.name.endswith('.csv'):
8023
- df = pd.read_csv(uploaded_file)
8024
- else:
8025
- df = pd.read_excel(uploaded_file)
10063
+ df = clean_dataframe(df)
10064
+
10065
+ st.success(f"File berhasil diupload! Shape: {df.shape}")
10066
+
10067
+ # Tampilkan preview data
10068
+ st.subheader("📋 Preview Data")
10069
+ st.dataframe(df.head())
10070
+
10071
+ # Informasi dasar dataset
10072
+ st.subheader("📊 Informasi Dataset")
10073
+ col1, col2, col3, col4 = st.columns(4)
10074
+
10075
+ with col1:
10076
+ st.metric("Jumlah Baris", df.shape[0])
10077
+ with col2:
10078
+ st.metric("Jumlah Kolom", df.shape[1])
10079
+ with col3:
10080
+ st.metric("Missing Values", df.isnull().sum().sum())
10081
+ with col4:
10082
+ st.metric("Duplikat", df.duplicated().sum())
10083
+
10084
+ # --- ANALISIS STRUKTUR DATA UNTUK ERD DINAMIS ---
10085
+ st.subheader("🔍 Analisis Struktur Data untuk ERD")
10086
+
10087
+ # Fungsi untuk deteksi tipe data yang aman
10088
+ def safe_dtype_detection(df):
10089
+ numeric_cols = []
10090
+ categorical_cols = []
10091
+ date_cols = []
10092
+ bool_cols = []
10093
+ other_cols = []
8026
10094
 
8027
- st.success(f"File berhasil diupload! Shape: {df.shape}")
10095
+ for col in df.columns:
10096
+ col_dtype = str(df[col].dtype)
10097
+
10098
+ # Check numeric
10099
+ if pd.api.types.is_numeric_dtype(df[col]):
10100
+ numeric_cols.append(col)
10101
+ # Check datetime
10102
+ elif pd.api.types.is_datetime64_any_dtype(df[col]):
10103
+ date_cols.append(col)
10104
+ # Check boolean
10105
+ elif pd.api.types.is_bool_dtype(df[col]):
10106
+ bool_cols.append(col)
10107
+ # Check categorical (object but limited unique values)
10108
+ elif df[col].dtype == 'object':
10109
+ if df[col].nunique() <= 50: # Consider as categorical if <= 50 unique values
10110
+ categorical_cols.append(col)
10111
+ else:
10112
+ other_cols.append(col)
10113
+ else:
10114
+ other_cols.append(col)
8028
10115
 
8029
- # Tampilkan preview data
8030
- st.subheader("📋 Preview Data")
8031
- st.dataframe(df.head())
10116
+ return numeric_cols, categorical_cols, date_cols, bool_cols, other_cols
10117
+
10118
+ numeric_cols, categorical_cols, date_cols, bool_cols, other_cols = safe_dtype_detection(df)
10119
+
10120
+ # Fungsi analisis yang lebih robust
10121
+ def robust_column_analysis(df):
10122
+ column_analysis = {}
8032
10123
 
8033
- # Informasi dasar dataset
8034
- st.subheader("📊 Informasi Dataset")
8035
- col1, col2, col3 = st.columns(3)
10124
+ for col in df.columns:
10125
+ try:
10126
+ col_data = df[col]
10127
+
10128
+ # Handle ObjectDType and other problematic types
10129
+ if hasattr(col_data.dtype, 'name') and col_data.dtype.name == 'object':
10130
+ # Convert to string for analysis
10131
+ col_data = col_data.astype(str)
10132
+
10133
+ analysis = {
10134
+ 'dtype': str(col_data.dtype),
10135
+ 'unique_count': col_data.nunique(),
10136
+ 'null_count': col_data.isnull().sum(),
10137
+ 'null_percentage': (col_data.isnull().sum() / len(col_data)) * 100,
10138
+ 'sample_values': col_data.dropna().head(3).tolist() if not col_data.empty else []
10139
+ }
10140
+
10141
+ # Safe sample values conversion
10142
+ safe_samples = []
10143
+ for val in analysis['sample_values']:
10144
+ try:
10145
+ safe_samples.append(str(val))
10146
+ except:
10147
+ safe_samples.append('N/A')
10148
+ analysis['sample_values'] = safe_samples
10149
+
10150
+ # Deteksi tipe kolom untuk ERD
10151
+ col_lower = str(col).lower()
10152
+
10153
+ # Primary Key detection
10154
+ if (analysis['unique_count'] == len(col_data) and
10155
+ analysis['null_count'] == 0 and
10156
+ any(keyword in col_lower for keyword in ['id', 'pk', 'key', 'code'])):
10157
+ analysis['role'] = 'PRIMARY_KEY'
10158
+ analysis['icon'] = '🔑'
10159
+
10160
+ # Foreign Key detection
10161
+ elif (any(keyword in col_lower for keyword in ['id', 'fk', 'ref', 'code']) and
10162
+ analysis['unique_count'] < len(col_data) * 0.8):
10163
+ analysis['role'] = 'FOREIGN_KEY'
10164
+ analysis['icon'] = '🔗'
10165
+
10166
+ # Measurement columns
10167
+ elif any(keyword in col_lower for keyword in ['amount', 'price', 'value', 'total', 'sum', 'avg', 'quantity']):
10168
+ analysis['role'] = 'MEASUREMENT'
10169
+ analysis['icon'] = '💰'
10170
+
10171
+ # Date/Time columns
10172
+ elif any(keyword in col_lower for keyword in ['date', 'time', 'year', 'month', 'day']):
10173
+ analysis['role'] = 'TEMPORAL'
10174
+ analysis['icon'] = '📅'
10175
+
10176
+ # Category columns
10177
+ elif (analysis['unique_count'] <= 20 and
10178
+ analysis['unique_count'] > 1 and
10179
+ str(col_data.dtype) == 'object'):
10180
+ analysis['role'] = 'CATEGORY'
10181
+ analysis['icon'] = '🏷️'
10182
+
10183
+ # Description columns
10184
+ elif (str(col_data.dtype) == 'object' and
10185
+ col_data.astype(str).str.len().mean() > 20):
10186
+ analysis['role'] = 'DESCRIPTION'
10187
+ analysis['icon'] = '📝'
10188
+
10189
+ # Numeric metrics
10190
+ elif pd.api.types.is_numeric_dtype(col_data):
10191
+ analysis['role'] = 'METRIC'
10192
+ analysis['icon'] = '📊'
10193
+
10194
+ else:
10195
+ analysis['role'] = 'ATTRIBUTE'
10196
+ analysis['icon'] = '📄'
10197
+
10198
+ column_analysis[col] = analysis
10199
+
10200
+ except Exception as e:
10201
+ # Fallback analysis for problematic columns
10202
+ column_analysis[col] = {
10203
+ 'dtype': 'unknown',
10204
+ 'role': 'ATTRIBUTE',
10205
+ 'icon': '❓',
10206
+ 'unique_count': 0,
10207
+ 'null_count': len(df[col]),
10208
+ 'null_percentage': 100.0,
10209
+ 'sample_values': ['Error in analysis']
10210
+ }
8036
10211
 
8037
- with col1:
8038
- st.metric("Jumlah Baris", df.shape[0])
8039
- with col2:
8040
- st.metric("Jumlah Kolom", df.shape[1])
8041
- with col3:
8042
- st.metric("Missing Values", df.isnull().sum().sum())
10212
+ return column_analysis
10213
+
10214
+ # Analisis kolom
10215
+ column_analysis = robust_column_analysis(df)
10216
+
10217
+ # Tampilkan analisis kolom
10218
+ st.write("**Analisis Detail Kolom:**")
10219
+ analysis_data = []
10220
+ for col, analysis in column_analysis.items():
10221
+ analysis_data.append({
10222
+ 'Kolom': col,
10223
+ 'Tipe': analysis['dtype'],
10224
+ 'Role': analysis['role'],
10225
+ 'Icon': analysis['icon'],
10226
+ 'Unique': analysis['unique_count'],
10227
+ 'Null %': f"{analysis['null_percentage']:.1f}%"
10228
+ })
10229
+
10230
+ analysis_df = pd.DataFrame(analysis_data)
10231
+ st.dataframe(analysis_df, use_container_width=True)
10232
+
10233
+ # --- ERD DINAMIS YANG LEBIH AKURAT ---
10234
+ st.subheader("🗄️ Entity Relationship Diagram (ERD) Dinamis")
10235
+
10236
+ # Konfigurasi ERD
10237
+ col1, col2, col3 = st.columns(3)
10238
+
10239
+ with col1:
10240
+ erd_style = st.selectbox(
10241
+ "Style ERD:",
10242
+ ['Vertical', 'Horizontal', 'Circular'],
10243
+ index=0
10244
+ )
10245
+
10246
+ with col2:
10247
+ show_relationships = st.checkbox("Tampilkan Relasi", value=True)
10248
+
10249
+ with col3:
10250
+ max_tables = st.slider("Max Tabel", 3, 15, 8)
10251
+
10252
+ try:
10253
+ import graphviz
10254
+
10255
+ # Buat graph ERD
10256
+ dot = graphviz.Digraph(comment='Dynamic Database ERD')
10257
+
10258
+ # Atur layout
10259
+ if erd_style == 'Vertical':
10260
+ dot.attr(rankdir='TB', size='12,16')
10261
+ elif erd_style == 'Horizontal':
10262
+ dot.attr(rankdir='LR', size='16,12')
10263
+ else: # Circular
10264
+ dot.attr(rankdir='LR', size='14,14', layout='circo')
10265
+
10266
+ # Kelompokkan kolom berdasarkan role untuk membuat tabel
10267
+ main_table_cols = []
10268
+ reference_tables = {}
10269
+
10270
+ for col, analysis in column_analysis.items():
10271
+ if analysis['role'] == 'FOREIGN_KEY':
10272
+ # Buat tabel referensi untuk foreign key
10273
+ ref_table_name = f"ref_{col}"
10274
+ if ref_table_name not in reference_tables:
10275
+ ref_display_name = col.replace('_id', '').replace('ID', '').replace('_', ' ').title()
10276
+ reference_tables[ref_table_name] = {
10277
+ 'name': ref_display_name,
10278
+ 'columns': []
10279
+ }
10280
+ reference_tables[ref_table_name]['columns'].append(col)
10281
+ else:
10282
+ main_table_cols.append((col, analysis))
8043
10283
 
8044
- # Tampilkan ERD (Entity Relationship Diagram) sederhana
8045
- st.subheader("🔗 Entity Relationship Diagram (ERD)")
8046
-
8047
- # Analisis relasi antar kolom
8048
- st.write("**Relasi antar Kolom:**")
8049
- numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
8050
- categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
8051
-
8052
- col1, col2 = st.columns(2)
8053
-
8054
- with col1:
8055
- st.write("**Kolom Numerik:**")
8056
- df_numeric = pd.DataFrame({"Nama Kolom": numeric_cols})
8057
- st.table(df_numeric)
8058
-
8059
- with col2:
8060
- st.write("**Kolom Kategorikal:**")
8061
- df_categorical = pd.DataFrame({"Nama Kolom": categorical_cols})
8062
- st.table(df_categorical)
8063
-
8064
- # --- Visualisasi ERD yang Sesungguhnya ---
8065
- st.write("---")
8066
- st.subheader("🗄️ Entity Relationship Diagram Visualization")
8067
-
8068
- # Buat struktur entitas dan relasi
8069
- st.write("**Struktur Tabel Database:**")
8070
-
8071
- # Generate SQL CREATE TABLE statements
8072
- st.markdown("### 📝 SQL Schema Definition")
8073
-
8074
- # Buat diagram ERD menggunakan graphviz
8075
- try:
8076
- import graphviz
8077
-
8078
- # Buat graph untuk ERD
8079
- dot = graphviz.Digraph(comment='Database ERD')
8080
- dot.attr(rankdir='TB', size='8,8')
8081
-
8082
- # Buat node untuk tabel utama
10284
+ # Batasi jumlah tabel yang ditampilkan
10285
+ tables_to_show = min(max_tables, len(reference_tables) + 1)
10286
+
10287
+ # Buat tabel utama
10288
+ if main_table_cols and tables_to_show > 0:
8083
10289
  with dot.subgraph(name='cluster_main') as c:
8084
- c.attr(label='Tabel Utama: dataset_table', style='filled', color='lightblue', fontsize='12')
8085
-
8086
- # Header tabel
8087
- c.node('table_header', f'📊 dataset_table', shape='plaintext', fontsize='14', fontname='Arial bold')
10290
+ table_name = uploaded_file.name.split('.')[0] # Remove extension
10291
+ c.attr(label=f'📊 {table_name}', style='filled',
10292
+ color='lightblue', fontsize='14', fontname='Arial Bold')
8088
10293
 
8089
- # Field-field dalam tabel
8090
10294
  fields = []
10295
+ for col, analysis in main_table_cols[:12]: # Batasi kolom per tabel
10296
+ field_type = ""
10297
+ if pd.api.types.is_numeric_dtype(df[col]):
10298
+ field_type = "NUMERIC"
10299
+ elif pd.api.types.is_datetime64_any_dtype(df[col]):
10300
+ field_type = "DATETIME"
10301
+ elif df[col].dtype == 'object':
10302
+ try:
10303
+ max_len = df[col].astype(str).str.len().max()
10304
+ field_type = f"VARCHAR({min(255, max(50, int(max_len)))})"
10305
+ except:
10306
+ field_type = "TEXT"
10307
+ elif df[col].dtype == 'bool':
10308
+ field_type = "BOOLEAN"
10309
+ else:
10310
+ field_type = "TEXT"
10311
+
10312
+ constraint = ""
10313
+ if analysis['role'] == 'PRIMARY_KEY':
10314
+ constraint = " [PK]"
10315
+ elif analysis['role'] == 'FOREIGN_KEY':
10316
+ constraint = " [FK]"
10317
+
10318
+ fields.append(f"<TR><TD ALIGN='LEFT'>{analysis['icon']} {col}</TD><TD ALIGN='LEFT'>{field_type}{constraint}</TD></TR>")
10319
+
10320
+ # Tambahkan indicator jika ada kolom yang tidak ditampilkan
10321
+ if len(main_table_cols) > 12:
10322
+ fields.append(f"<TR><TD ALIGN='LEFT'>...</TD><TD ALIGN='LEFT'>+{len(main_table_cols)-12} more</TD></TR>")
8091
10323
 
8092
- # Primary keys (asumsikan kolom pertama sebagai PK)
8093
- if len(df.columns) > 0:
8094
- pk_field = f"<TR><TD ALIGN='LEFT'><B>🔑 {df.columns[0]}</B></TD><TD ALIGN='LEFT'>[PK]</TD></TR>"
8095
- fields.append(pk_field)
10324
+ table_html = f'''<
10325
+ <TABLE BORDER="1" CELLBORDER="0" CELLSPACING="0" CELLPADDING="4">
10326
+ <TR><TD ALIGN="CENTER" BGCOLOR="#e6f3ff"><B>COLUMN</B></TD><TD ALIGN="CENTER" BGCOLOR="#e6f3ff"><B>TYPE</B></TD></TR>
10327
+ {''.join(fields)}
10328
+ </TABLE>
10329
+ >'''
8096
10330
 
8097
- # Numeric fields
8098
- for col in numeric_cols[:5]: # Batasi agar tidak terlalu panjang
8099
- if col != df.columns[0]:
8100
- fields.append(f"<TR><TD ALIGN='LEFT'>📈 {col}</TD><TD ALIGN='LEFT'>NUMERIC</TD></TR>")
10331
+ c.node('main_table', table_html, shape='none', fontname='Arial')
10332
+
10333
+ # Buat tabel referensi
10334
+ colors = ['#e6ffe6', '#fff0e6', '#e6f9ff', '#ffe6ff', '#ffffe6', '#f0e6ff']
10335
+ for i, (ref_name, ref_info) in enumerate(list(reference_tables.items())[:tables_to_show-1]):
10336
+ color = colors[i % len(colors)]
10337
+ with dot.subgraph(name=f'cluster_{ref_name}') as c:
10338
+ c.attr(label=f'📁 {ref_info["name"]}', style='filled',
10339
+ color=color, fontsize='12', fontname='Arial')
8101
10340
 
8102
- # Categorical fields
8103
- for col in categorical_cols[:3]: # Batasi agar tidak terlalu panjang
8104
- fields.append(f"<TR><TD ALIGN='LEFT'>📝 {col}</TD><TD ALIGN='LEFT'>VARCHAR</TD></TR>")
10341
+ fields = []
10342
+ # Primary key untuk tabel referensi
10343
+ for fk_col in ref_info['columns']:
10344
+ fields.append(f"<TR><TD ALIGN='LEFT'><B>🔑 {fk_col}</B></TD><TD ALIGN='LEFT'>[PK]</TD></TR>")
8105
10345
 
8106
- # Jika ada field lebih dari yang ditampilkan
8107
- total_fields = len(numeric_cols) + len(categorical_cols)
8108
- if total_fields > 8:
8109
- fields.append(f"<TR><TD ALIGN='LEFT'>...</TD><TD ALIGN='LEFT'>+{total_fields-8} fields</TD></TR>")
10346
+ # Tambahkan kolom umum untuk tabel referensi
10347
+ fields.append(f"<TR><TD ALIGN='LEFT'>📝 Name</TD><TD ALIGN='LEFT'>VARCHAR(100)</TD></TR>")
10348
+ fields.append(f"<TR><TD ALIGN='LEFT'>📝 Description</TD><TD ALIGN='LEFT'>VARCHAR(255)</TD></TR>")
10349
+ fields.append(f"<TR><TD ALIGN='LEFT'>📅 Created_Date</TD><TD ALIGN='LEFT'>DATETIME</TD></TR>")
10350
+ fields.append(f"<TR><TD ALIGN='LEFT'>✅ Is_Active</TD><TD ALIGN='LEFT'>BOOLEAN</TD></TR>")
8110
10351
 
8111
10352
  table_html = f'''<
8112
- <TABLE BORDER="1" CELLBORDER="0" CELLSPACING="0" CELLPADDING="4">
8113
- <TR><TD ALIGN="CENTER"><B>COLUMN</B></TD><TD ALIGN="CENTER"><B>TYPE</B></TD></TR>
10353
+ <TABLE BORDER="1" CELLBORDER="0" CELLSPACING="0" CELLPADDING="3">
10354
+ <TR><TD ALIGN="CENTER" BGCOLOR="{color}"><B>COLUMN</B></TD><TD ALIGN="CENTER" BGCOLOR="{color}"><B>TYPE</B></TD></TR>
8114
10355
  {''.join(fields)}
8115
10356
  </TABLE>
8116
10357
  >'''
8117
10358
 
8118
- c.node('main_table', table_html, shape='none', fontname='Arial')
8119
-
8120
- # Tampilkan graph
8121
- st.graphviz_chart(dot)
8122
-
8123
- except ImportError:
8124
- st.warning("Graphviz tidak terinstall. Menggunakan visualisasi alternatif...")
10359
+ c.node(ref_name, table_html, shape='none', fontname='Arial')
8125
10360
 
8126
- # Visualisasi alternatif dengan Plotly
8127
- st.write("**Diagram Relasi Tabel:**")
8128
-
8129
- # Buat network graph sederhana
8130
- import plotly.graph_objects as go
8131
-
8132
- # Node positions
8133
- node_x = [0.5]
8134
- node_y = [0.5]
8135
- node_text = ["dataset_table"]
8136
- node_colors = ['lightblue']
8137
-
8138
- # Add some related tables (conceptual)
8139
- related_tables = ['metadata_table', 'category_table', 'log_table']
8140
- for i, table in enumerate(related_tables):
8141
- node_x.append(0.2 + i * 0.3)
8142
- node_y.append(0.8)
8143
- node_text.append(table)
8144
- node_colors.append('lightgreen')
8145
-
8146
- fig = go.Figure()
8147
-
8148
- # Add nodes
8149
- fig.add_trace(go.Scatter(
8150
- x=node_x, y=node_y,
8151
- mode='markers+text',
8152
- marker=dict(size=50, color=node_colors),
8153
- text=node_text,
8154
- textposition="middle center",
8155
- name="Tables"
8156
- ))
10361
+ # Tambahkan relasi
10362
+ if show_relationships:
10363
+ for fk_col in ref_info['columns']:
10364
+ dot.edge(ref_name, 'main_table', label='1:N', style='dashed', color='#666666')
10365
+
10366
+ # Tampilkan ERD
10367
+ st.graphviz_chart(dot)
10368
+
10369
+ # Legenda
10370
+ st.markdown("""
10371
+ **📋 Legenda ERD:**
10372
+ - 🔑 Primary Key | 🔗 Foreign Key | 📊 Metric | 💰 Measurement
10373
+ - 📅 Temporal | 🏷️ Category | 📝 Description | 📄 Attribute
10374
+ - **Warna berbeda**: Tabel yang berbeda domain
10375
+ """)
10376
+
10377
+ except ImportError:
10378
+ st.warning("Graphviz tidak terinstall. Menggunakan visualisasi alternatif...")
10379
+
10380
+ # Visualisasi alternatif yang lebih sederhana
10381
+ import plotly.graph_objects as go
10382
+
10383
+ # Hitung posisi node secara dinamis
10384
+ num_tables = min(8, len(reference_tables) + 1)
10385
+ angles = np.linspace(0, 2*np.pi, num_tables, endpoint=False)
10386
+ radius = 0.4
10387
+
10388
+ fig = go.Figure()
10389
+
10390
+ # Node positions
10391
+ node_x = [0.5] # Main table di center
10392
+ node_y = [0.5]
10393
+ node_text = ["MAIN"]
10394
+ node_colors = ['#3366CC']
10395
+
10396
+ # Reference tables di sekeliling
10397
+ for i, (ref_name, ref_info) in enumerate(list(reference_tables.items())[:num_tables-1]):
10398
+ angle = angles[i]
10399
+ x = 0.5 + radius * np.cos(angle)
10400
+ y = 0.5 + radius * np.sin(angle)
8157
10401
 
8158
- # Add edges (relationships)
10402
+ node_x.append(x)
10403
+ node_y.append(y)
10404
+ node_text.append(ref_info['name'][:10])
10405
+ node_colors.append(colors[i % len(colors)])
10406
+
10407
+ # Add nodes
10408
+ fig.add_trace(go.Scatter(
10409
+ x=node_x, y=node_y,
10410
+ mode='markers+text',
10411
+ marker=dict(size=80, color=node_colors),
10412
+ text=node_text,
10413
+ textposition="middle center",
10414
+ textfont=dict(size=12, color='white'),
10415
+ name="Tables"
10416
+ ))
10417
+
10418
+ # Add relationships
10419
+ if show_relationships and len(node_x) > 1:
8159
10420
  for i in range(1, len(node_x)):
8160
10421
  fig.add_trace(go.Scatter(
8161
- x=[node_x[0], node_x[i]],
8162
- y=[node_y[0], node_y[i]],
10422
+ x=[node_x[i], node_x[0]], y=[node_y[i], node_y[0]],
8163
10423
  mode='lines',
8164
- line=dict(width=2, color='gray'),
8165
- hoverinfo='none'
10424
+ line=dict(width=2, color='gray', dash='dash'),
10425
+ hoverinfo='none',
10426
+ showlegend=False
8166
10427
  ))
8167
-
8168
- fig.update_layout(
8169
- title="Database Table Relationships",
8170
- showlegend=False,
8171
- height=400,
8172
- xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
8173
- yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
8174
- margin=dict(l=20, r=20, t=40, b=20)
8175
- )
8176
-
8177
- st.plotly_chart(fig, use_container_width=True)
8178
-
8179
- # --- Bagian Penyambung SQL ---
8180
- st.write("---")
8181
- st.subheader("🧩 Format SQL (Comma Separated)")
8182
-
8183
- numeric_sql = ", ".join(numeric_cols)
8184
- categorical_sql = ", ".join(categorical_cols)
8185
-
8186
- st.code(f"SELECT {numeric_sql}, {categorical_sql} FROM dataset_table;", language="sql")
8187
-
8188
- # Generate CREATE TABLE statement
8189
- st.markdown("### 🗃️ SQL CREATE TABLE Statement")
8190
-
8191
- # Deteksi tipe data untuk SQL
8192
- def infer_sql_type(dtype, sample_data):
8193
- if np.issubdtype(dtype, np.number):
8194
- return "DECIMAL(10,2)"
8195
- elif np.issubdtype(dtype, np.datetime64):
8196
- return "DATETIME"
8197
- else:
8198
- # Cek panjang string maksimum
8199
- max_len = sample_data.astype(str).str.len().max()
8200
- return f"VARCHAR({min(255, max(100, int(max_len * 1.5)))})"
8201
-
8202
- create_table_sql = "CREATE TABLE dataset_table (\n"
8203
- for i, col in enumerate(df.columns):
8204
- sql_type = infer_sql_type(df[col].dtype, df[col])
8205
- if i == 0:
8206
- create_table_sql += f" {col} {sql_type} PRIMARY KEY,\n"
8207
- else:
8208
- create_table_sql += f" {col} {sql_type},\n"
8209
-
8210
- create_table_sql = create_table_sql.rstrip(',\n') + "\n);"
8211
-
8212
- st.code(create_table_sql, language="sql")
8213
-
8214
- # Jika ingin lihat hanya daftar kolom
8215
- col3, col4 = st.columns(2)
8216
- with col3:
8217
- st.write("**Kolom Numerik (SQL String):**")
8218
- st.code(numeric_sql, language="sql")
8219
-
8220
- with col4:
8221
- st.write("**Kolom Kategorikal (SQL String):**")
8222
- st.code(categorical_sql, language="sql")
8223
-
8224
- # Visualisasi korelasi sebagai ERD sederhana
8225
- if len(numeric_cols) > 1:
8226
- st.write("---")
8227
- st.subheader("📊 Matriks Korelasi (Hubungan Numerik)")
8228
- corr_matrix = df[numeric_cols].corr()
8229
-
8230
- # Plot menggunakan Plotly
8231
- fig = px.imshow(
8232
- corr_matrix,
8233
- text_auto=".2f",
8234
- color_continuous_scale='RdBu_r',
8235
- zmin=-1,
8236
- zmax=1,
8237
- aspect="auto",
8238
- labels=dict(color="Korelasi")
8239
- )
8240
- fig.update_layout(
8241
- title="Matriks Korelasi Numerik",
8242
- xaxis_title="Fitur",
8243
- yaxis_title="Fitur",
8244
- autosize=True,
8245
- margin=dict(l=40, r=40, t=60, b=40),
8246
- height=600
8247
- )
8248
- st.plotly_chart(fig, use_container_width=True)
8249
-
8250
- # --- Linear Regression Analysis ---
8251
- st.write("---")
8252
- st.subheader("🧮 Linear Regression Analysis (SQL-Style LRS)")
10428
+
10429
+ fig.update_layout(
10430
+ title="Database Table Relationships",
10431
+ showlegend=False,
10432
+ height=500,
10433
+ xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]),
10434
+ yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]),
10435
+ margin=dict(l=20, r=20, t=60, b=20)
10436
+ )
10437
+
10438
+ st.plotly_chart(fig, use_container_width=True)
8253
10439
 
8254
- if len(numeric_cols) >= 2:
8255
- col1, col2 = st.columns(2)
10440
+ # --- VISUALISASI DATA YANG AMAN ---
10441
+ st.subheader("📈 Visualisasi Data")
10442
+
10443
+ # Warna konsisten untuk chart
10444
+ color_palette = px.colors.qualitative.Set3
10445
+
10446
+ # Fungsi safe plotting
10447
+ def safe_plotting(plot_function, *args, **kwargs):
10448
+ try:
10449
+ return plot_function(*args, **kwargs)
10450
+ except Exception as e:
10451
+ st.error(f"Error dalam membuat chart: {str(e)}")
10452
+ return None
10453
+
10454
+ # Tab untuk organisasi chart yang lebih baik
10455
+ tab111, tab222, tab333 = st.tabs(["📊 Distribusi Numerik", "🏷️ Analisis Kategorikal", "📋 Data Quality"])
10456
+
10457
+ with tab111:
10458
+ st.subheader("Analisis Distribusi Numerik")
10459
+
10460
+ if numeric_cols:
10461
+ col1, col2 = st.columns(2)
10462
+
10463
+ with col1:
10464
+ # Histogram dengan pengelompokan yang baik
10465
+ selected_num_hist = st.selectbox(
10466
+ "Pilih variabel untuk histogram:",
10467
+ numeric_cols,
10468
+ key="hist_num"
10469
+ )
8256
10470
 
8257
- with col1:
8258
- x_axis = st.selectbox("Pilih variabel X (Independent)", numeric_cols, key="lrs_x")
8259
- with col2:
8260
- y_axis = st.selectbox("Pilih variabel Y (Dependent)", numeric_cols, key="lrs_y")
10471
+ if selected_num_hist:
10472
+ fig_hist = safe_plotting(px.histogram,
10473
+ df,
10474
+ x=selected_num_hist,
10475
+ title=f"Distribusi {selected_num_hist}",
10476
+ nbins=30,
10477
+ color_discrete_sequence=['#3366CC'],
10478
+ opacity=0.8
10479
+ )
10480
+ if fig_hist:
10481
+ fig_hist.update_layout(
10482
+ bargap=0.1,
10483
+ xaxis_title=selected_num_hist,
10484
+ yaxis_title="Frekuensi"
10485
+ )
10486
+ st.plotly_chart(fig_hist, use_container_width=True)
10487
+
10488
+ with col2:
10489
+ # Box plot
10490
+ selected_num_box = st.selectbox(
10491
+ "Pilih variabel untuk box plot:",
10492
+ numeric_cols,
10493
+ key="box_num"
10494
+ )
8261
10495
 
8262
- if x_axis != y_axis:
8263
- # Hitung regresi linear
8264
- slope, intercept, r_value, p_value, std_err = stats.linregress(df[x_axis], df[y_axis])
8265
- correlation = df[x_axis].corr(df[y_axis])
8266
- r_squared = r_value**2
8267
-
8268
- # --- Tampilan SQL Query ---
8269
- st.markdown("### 🧩 SQL Query Representation")
8270
- st.code(f"""
8271
- SELECT
8272
- {x_axis} AS X,
8273
- {y_axis} AS Y,
8274
- ROUND(REGR_SLOPE({y_axis}, {x_axis}), 4) AS slope,
8275
- ROUND(REGR_INTERCEPT({y_axis}, {x_axis}), 4) AS intercept,
8276
- ROUND(CORR({y_axis}, {x_axis}), 4) AS correlation,
8277
- ROUND(POWER(CORR({y_axis}, {x_axis}), 2), 4) AS r_squared
8278
- FROM dataset_table;
8279
- """, language="sql")
8280
-
8281
- # --- Plot hubungan ---
8282
- fig = px.scatter(
10496
+ if selected_num_box:
10497
+ fig_box = safe_plotting(px.box,
8283
10498
  df,
8284
- x=x_axis,
8285
- y=y_axis,
8286
- trendline="ols",
8287
- title=f"📊 SQL Visualization: {y_axis} vs {x_axis}",
8288
- labels={x_axis: f"{x_axis}", y_axis: f"{y_axis}"}
8289
- )
8290
- fig.update_layout(
8291
- autosize=True,
8292
- margin=dict(l=40, r=40, t=60, b=40),
8293
- height=500,
8294
- title_x=0.5
10499
+ y=selected_num_box,
10500
+ title=f"Box Plot {selected_num_box}",
10501
+ color_discrete_sequence=['#FF6B6B']
8295
10502
  )
8296
- st.plotly_chart(fig, use_container_width=True)
8297
-
8298
- # --- Relationship Mapping ---
8299
- st.markdown("### 🔗 Relationship Mapping")
8300
-
8301
- # Buat diagram hubungan sederhana
8302
- rel_fig = go.Figure()
8303
-
8304
- # Add nodes
8305
- rel_fig.add_trace(go.Scatter(
8306
- x=[0.2, 0.8], y=[0.5, 0.5],
8307
- mode='markers+text',
8308
- marker=dict(size=80, color=['lightblue', 'lightgreen']),
8309
- text=[x_axis, y_axis],
8310
- textposition="middle center",
8311
- textfont=dict(size=14)
8312
- ))
8313
-
8314
- # Add relationship line dengan annotation korelasi
8315
- rel_fig.add_trace(go.Scatter(
8316
- x=[0.3, 0.7], y=[0.5, 0.5],
8317
- mode='lines+text',
8318
- line=dict(width=4, color='red'),
8319
- text=[f"r = {correlation:.3f}"],
8320
- textposition="middle center",
8321
- textfont=dict(size=12, color='red')
8322
- ))
8323
-
8324
- rel_fig.update_layout(
8325
- title=f"Relationship Diagram: {x_axis} → {y_axis}",
8326
- showlegend=False,
8327
- height=300,
8328
- xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]),
8329
- yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[0, 1]),
8330
- margin=dict(l=20, r=20, t=60, b=20)
10503
+ if fig_box:
10504
+ st.plotly_chart(fig_box, use_container_width=True)
10505
+
10506
+ # Matriks korelasi
10507
+ if len(numeric_cols) >= 2:
10508
+ st.write("**Matriks Korelasi:**")
10509
+ try:
10510
+ corr_matrix = df[numeric_cols].corr()
10511
+ fig_corr = px.imshow(
10512
+ corr_matrix,
10513
+ text_auto=".2f",
10514
+ color_continuous_scale='RdBu_r',
10515
+ aspect="auto",
10516
+ title="Matriks Korelasi Numerik"
8331
10517
  )
8332
-
8333
- st.plotly_chart(rel_fig, use_container_width=True)
8334
-
8335
- # --- Tabel hasil regresi ---
8336
- st.markdown("### 📋 SQL-Style Result Table")
8337
- result_df = pd.DataFrame({
8338
- "Metric": ["X (Independent)", "Y (Dependent)", "Slope (β1)", "Intercept (β0)",
8339
- "R-Value", "R² (R-squared)", "P-Value", "Std Error", "Correlation"],
8340
- "Value": [x_axis, y_axis, f"{slope:.4f}", f"{intercept:.4f}",
8341
- f"{r_value:.4f}", f"{r_squared:.4f}", f"{p_value:.4f}",
8342
- f"{std_err:.4f}", f"{correlation:.4f}"]
8343
- })
8344
-
8345
- st.dataframe(result_df, use_container_width=True, hide_index=True)
8346
-
8347
- # Analisis statistik lengkap
8348
- st.subheader("📊 Analisis Statistik Lengkap")
8349
-
8350
- # Statistik deskriptif
8351
- st.write("**Statistik Deskriptif:**")
8352
- st.dataframe(df.describe())
8353
-
8354
- # Analisis missing values
8355
- st.write("**Analisis Missing Values:**")
8356
- missing_data = df.isnull().sum()
8357
- if missing_data.sum() > 0:
8358
- fig_missing = px.bar(x=missing_data.index, y=missing_data.values,
8359
- title="Missing Values per Kolom")
8360
- st.plotly_chart(fig_missing)
8361
- else:
8362
- st.success("Tidak ada missing values dalam dataset!")
10518
+ st.plotly_chart(fig_corr, use_container_width=True)
10519
+ except Exception as e:
10520
+ st.warning(f"Tidak dapat menghitung matriks korelasi: {str(e)}")
10521
+
10522
+ with tab222:
10523
+ st.subheader("Analisis Data Kategorikal")
8363
10524
 
8364
- # Data quality report
8365
- st.subheader("📋 Data Quality Report")
10525
+ if categorical_cols:
10526
+ col1, col2 = st.columns(2)
10527
+
10528
+ with col1:
10529
+ # Pie chart yang terorganisir
10530
+ selected_cat_pie = st.selectbox(
10531
+ "Pilih variabel kategorikal:",
10532
+ categorical_cols,
10533
+ key="pie_cat"
10534
+ )
10535
+
10536
+ if selected_cat_pie:
10537
+ try:
10538
+ value_counts = df[selected_cat_pie].value_counts().head(8)
10539
+ fig_pie = safe_plotting(px.pie,
10540
+ values=value_counts.values,
10541
+ names=value_counts.index,
10542
+ title=f"Distribusi {selected_cat_pie} (Top 8)",
10543
+ color_discrete_sequence=color_palette
10544
+ )
10545
+ if fig_pie:
10546
+ st.plotly_chart(fig_pie, use_container_width=True)
10547
+ except Exception as e:
10548
+ st.warning(f"Tidak dapat membuat pie chart: {str(e)}")
10549
+
10550
+ with col2:
10551
+ # Bar chart horizontal
10552
+ if selected_cat_pie:
10553
+ try:
10554
+ value_counts = df[selected_cat_pie].value_counts().head(10)
10555
+ fig_bar = safe_plotting(px.bar,
10556
+ x=value_counts.values,
10557
+ y=value_counts.index,
10558
+ orientation='h',
10559
+ title=f"Top 10 {selected_cat_pie}",
10560
+ color=value_counts.values,
10561
+ color_continuous_scale='Blues'
10562
+ )
10563
+ if fig_bar:
10564
+ fig_bar.update_layout(
10565
+ xaxis_title="Count",
10566
+ yaxis_title=selected_cat_pie,
10567
+ showlegend=False
10568
+ )
10569
+ st.plotly_chart(fig_bar, use_container_width=True)
10570
+ except Exception as e:
10571
+ st.warning(f"Tidak dapat membuat bar chart: {str(e)}")
10572
+
10573
+ with tab333:
10574
+ st.subheader("Data Quality Report")
8366
10575
 
8367
- quality_data = []
10576
+ # Buat laporan kualitas data yang komprehensif
10577
+ quality_report = []
8368
10578
  for col in df.columns:
8369
- quality_data.append({
10579
+ analysis = column_analysis[col]
10580
+ quality_report.append({
8370
10581
  'Kolom': col,
8371
- 'Tipe': df[col].dtype,
8372
- 'Missing': df[col].isnull().sum(),
8373
- 'Missing %': (df[col].isnull().sum() / len(df)) * 100,
8374
- 'Unique': df[col].nunique(),
8375
- 'Contoh Value': df[col].iloc[0] if not df[col].empty else 'N/A'
10582
+ 'Tipe Data': analysis['dtype'],
10583
+ 'Role': analysis['role'],
10584
+ 'Unique Values': analysis['unique_count'],
10585
+ 'Null Values': analysis['null_count'],
10586
+ 'Null %': f"{analysis['null_percentage']:.2f}%",
10587
+ 'Sample': analysis['sample_values'][0] if analysis['sample_values'] else 'N/A'
8376
10588
  })
8377
10589
 
8378
- quality_df = pd.DataFrame(quality_data)
8379
- st.dataframe(quality_df)
10590
+ quality_df = pd.DataFrame(quality_report)
10591
+ st.dataframe(quality_df, use_container_width=True)
8380
10592
 
8381
- # Download hasil analisis
8382
- st.subheader("💾 Download Hasil Analisis")
10593
+ # Visualisasi kualitas data sederhana
10594
+ col1, col2 = st.columns(2)
10595
+
10596
+ with col1:
10597
+ # Missing values bar chart
10598
+ missing_data = quality_df[['Kolom', 'Null Values']].set_index('Kolom')
10599
+ fig_missing = safe_plotting(px.bar,
10600
+ missing_data,
10601
+ y='Null Values',
10602
+ title="Missing Values per Kolom",
10603
+ color='Null Values',
10604
+ color_continuous_scale='Reds'
10605
+ )
10606
+ if fig_missing:
10607
+ st.plotly_chart(fig_missing, use_container_width=True)
8383
10608
 
8384
- # Convert quality report to CSV
8385
- csv = quality_df.to_csv(index=False)
10609
+ with col2:
10610
+ # Data types distribution
10611
+ type_dist = quality_df['Tipe Data'].value_counts()
10612
+ fig_types = safe_plotting(px.pie,
10613
+ values=type_dist.values,
10614
+ names=type_dist.index,
10615
+ title="Distribusi Tipe Data",
10616
+ color_discrete_sequence=color_palette
10617
+ )
10618
+ if fig_types:
10619
+ st.plotly_chart(fig_types, use_container_width=True)
10620
+
10621
+ # --- DOWNLOAD SECTION ---
10622
+ st.subheader("💾 Download Hasil Analisis")
10623
+
10624
+ col1, col2, col3 = st.columns(3)
10625
+
10626
+ with col1:
8386
10627
  st.download_button(
8387
- label="Download Data Quality Report",
8388
- data=csv,
8389
- file_name="data_quality_report.csv",
8390
- mime="text/csv"
10628
+ "📊 Download Quality Report",
10629
+ quality_df.to_csv(index=False),
10630
+ "data_quality_report.csv",
10631
+ "text/csv"
8391
10632
  )
10633
+
10634
+ with col2:
10635
+ # Buat summary report
10636
+ summary_report = {
10637
+ 'file_name': uploaded_file.name,
10638
+ 'file_size': f"{uploaded_file.size / 1024:.2f} KB",
10639
+ 'rows': df.shape[0],
10640
+ 'columns': df.shape[1],
10641
+ 'analysis_date': pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"),
10642
+ 'numeric_columns': numeric_cols,
10643
+ 'categorical_columns': categorical_cols,
10644
+ 'date_columns': date_cols,
10645
+ 'primary_keys': [col for col, analysis in column_analysis.items()
10646
+ if analysis['role'] == 'PRIMARY_KEY'],
10647
+ 'foreign_keys': [col for col, analysis in column_analysis.items()
10648
+ if analysis['role'] == 'FOREIGN_KEY']
10649
+ }
8392
10650
 
8393
- except Exception as e:
8394
- st.error(f"Error membaca file: {str(e)}")
8395
- else:
8396
- st.info("Silakan upload file CSV atau Excel untuk memulai analisis")
8397
-
8398
- # Contoh dataset
8399
- st.subheader("🎯 Contoh Format Data")
8400
- example_data = {
8401
- 'ID': [1, 2, 3, 4, 5],
8402
- 'Nama': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
8403
- 'Usia': [25, 30, 35, 28, 32],
8404
- 'Gaji': [50000, 60000, 70000, 55000, 65000],
8405
- 'Departemen': ['IT', 'HR', 'IT', 'Finance', 'HR']
8406
- }
8407
- example_df = pd.DataFrame(example_data)
8408
- st.dataframe(example_df)
10651
+ import json
10652
+ st.download_button(
10653
+ "📋 Download Summary Report",
10654
+ json.dumps(summary_report, indent=2, ensure_ascii=False),
10655
+ "analysis_summary.json",
10656
+ "application/json"
10657
+ )
8409
10658
 
8410
- # Download template
8411
- csv_template = example_df.to_csv(index=False)
8412
- st.download_button(
8413
- label="Download Template CSV",
8414
- data=csv_template,
8415
- file_name="template_data.csv",
8416
- mime="text/csv"
8417
- )
10659
+ with col3:
10660
+ # Download processed data
10661
+ st.download_button(
10662
+ "💾 Download Processed Data",
10663
+ df.to_csv(index=False),
10664
+ "processed_data.csv",
10665
+ "text/csv"
10666
+ )
10667
+
10668
+ except Exception as e:
10669
+ st.error(f"Error dalam analisis data: {str(e)}")
10670
+ st.info("Pastikan file yang diupload berformat CSV atau Excel yang valid")
10671
+ st.code(f"Error details: {str(e)}", language='python')
10672
+ else:
10673
+ st.info("📤 Silakan upload file CSV atau Excel untuk memulai analisis")
10674
+
10675
+ # Template dan panduan
10676
+ st.subheader("🎯 Panduan Format Data")
10677
+
10678
+ col1, col2 = st.columns(2)
10679
+
10680
+ with col1:
10681
+ st.write("**Format yang Disarankan:**")
10682
+ sample_data = {
10683
+ 'customer_id': [1, 2, 3, 4, 5],
10684
+ 'order_id': [101, 102, 103, 104, 105],
10685
+ 'product_id': [201, 202, 203, 204, 205],
10686
+ 'order_date': pd.date_range('2024-01-01', periods=5),
10687
+ 'amount': [100.50, 75.25, 200.00, 150.75, 90.99],
10688
+ 'category': ['Electronics', 'Books', 'Electronics', 'Clothing', 'Books'],
10689
+ 'status': ['Completed', 'Pending', 'Completed', 'Shipped', 'Pending']
10690
+ }
10691
+ sample_df = pd.DataFrame(sample_data)
10692
+ st.dataframe(sample_df)
10693
+
10694
+ with col2:
10695
+ st.write("**Keterangan Fitur:**")
10696
+ st.markdown("""
10697
+ - **🔑 Primary Key**: Kolom dengan nilai unik (ID, code)
10698
+ - **🔗 Foreign Key**: Kolom referensi ke tabel lain
10699
+ - **📊 ERD Dinamis**: Diagram relasi otomatis
10700
+ - **📈 Visualisasi Aman**: Error handling untuk semua chart
10701
+ - **🎨 Warna Konsisten**: Skema warna yang harmonis
10702
+ - **📋 Analisis Komprehensif**: Statistik detail dan laporan
10703
+ """)
10704
+
10705
+ # Download template
10706
+ csv_template = sample_df.to_csv(index=False)
10707
+ st.download_button(
10708
+ "📥 Download Template CSV",
10709
+ csv_template,
10710
+ "analysis_template.csv",
10711
+ "text/csv"
10712
+ )
8418
10713
 
8419
10714
 
8420
10715
  with tab8:
@@ -8426,12 +10721,29 @@ if df is not None:
8426
10721
  type=['csv', 'xlsx', 'xls'],
8427
10722
  key="stock_uploader"
8428
10723
  )
8429
- with st.expander("📜 Ketarangan Lengkap Tentang Aalisis Saham", expanded=False):
10724
+ with st.expander("📜 Ketarangan Lengkap Tentang Analisis Saham", expanded=False):
10725
+ st.markdown(
10726
+ """
10727
+ <img src="https://s3-ap-southeast-1.amazonaws.com/membership-media/public/uploads/posts/1653502344_Memahami_Apa_Itu_Saham_Dan_Cara_Kerjanya_1170x658.jpg" class="responsive-img">
10728
+ """,
10729
+ unsafe_allow_html=True
10730
+ )
8430
10731
  st.markdown("""
8431
- **Penjelasan Penting 📛**
10732
+
8432
10733
 
8433
10734
  ### 🧾 Pengambangan Saham
8434
- - Saham Ini Masih Tahap Pengembangan Dan Masih Tahap Uji Coba Dan kalian bisa menggunakan model yang ada
10735
+ - Saham merupakan salah satu instrumen investasi yang populer di kalangan investor. Dengan membeli saham, investor memiliki sebagian kepemilikan dalam sebuah perusahaan dan berhak atas sebagian keuntungan perusahaan tersebut.
10736
+ - Analisis saham melibatkan evaluasi berbagai faktor seperti kinerja keuangan perusahaan, kondisi pasar, tren industri, dan faktor ekonomi makro untuk membuat keputusan investasi yang lebih baik.
10737
+ - Analisis saham dapat dilakukan dengan menggunakan teknologi yang terkenal seperti Excel, Google Sheets, atau Microsoft Excel.
10738
+
10739
+ ### 📈 Analisis Grafik Saham
10740
+ - Analisis grafik saham adalah proses menganalisis data saham untuk membuat grafik yang menampilkan informasi tentang saham secara visual.
10741
+ - Grafik saham dapat digunakan untuk membuat perbandingan antara saham yang berbeda, menampilkan trend, dan menentukan kemungkinan investasi yang lebih baik.
10742
+ - Grafik saham dapat digunakan untuk menentukan kemungkinan investasi yang lebih baik dan meningkatkan keuntungan investasi.
10743
+
10744
+ ### 💰 Analisis Grafik Saham
10745
+ - Analisis grafik saham dapat digunakan untuk membuat perbandingan antara saham yang berbeda, menampilkan trend, dan menentukan kemungkinan investasi yang lebih baik.
10746
+ - Grafik saham dapat digunakan untuk menentukan kemungkinan investasi yang lebih baik dan meningkatkan keuntungan investasi.
8435
10747
  """)
8436
10748
  if uploaded_file is not None:
8437
10749
  try:
@@ -9545,7 +11857,7 @@ if df is not None:
9545
11857
  # Sidebar untuk memilih jenis kalkulator
9546
11858
  calc_type = st.sidebar.selectbox(
9547
11859
  "Pilih Jenis Kalkulator",
9548
- ["Kalkulator Dasar", "Kalkulator Ilmiah", "Kalkulator Keuangan", "Konverter Satuan", "Kalkulator BMI", "Kalkulator Waktu"]
11860
+ ["🔢 Kalkulator Dasar", "🔬 Kalkulator Ilmiah", "💰 Kalkulator Keuangan", "📐 Konverter Satuan", "⚖️ Kalkulator BMI", "Kalkulator Waktu"]
9549
11861
  )
9550
11862
 
9551
11863
  # Initialize session state for history
@@ -9558,7 +11870,7 @@ if df is not None:
9558
11870
  if len(st.session_state.calc_history) > 10: # Batasi hanya 10 riwayat terakhir
9559
11871
  st.session_state.calc_history.pop(0)
9560
11872
 
9561
- if calc_type == "Kalkulator Dasar":
11873
+ if calc_type == "🔢 Kalkulator Dasar":
9562
11874
  st.subheader("🔢 Kalkulator Dasar")
9563
11875
 
9564
11876
  # Layout dengan columns untuk tampilan kalkulator
@@ -9635,7 +11947,7 @@ if df is not None:
9635
11947
  if st.button("🗑️ Reset", use_container_width=True):
9636
11948
  st.rerun()
9637
11949
 
9638
- elif calc_type == "Kalkulator Ilmiah":
11950
+ elif calc_type == "🔬 Kalkulator Ilmiah":
9639
11951
  st.subheader("🔬 Kalkulator Ilmiah")
9640
11952
 
9641
11953
  col1, col2 = st.columns(2)
@@ -9754,7 +12066,7 @@ if df is not None:
9754
12066
  except Exception as e:
9755
12067
  st.error(f"❌ Error: {str(e)}")
9756
12068
 
9757
- elif calc_type == "Kalkulator Keuangan":
12069
+ elif calc_type == "💰 Kalkulator Keuangan":
9758
12070
  st.subheader("💰 Kalkulator Keuangan")
9759
12071
 
9760
12072
  finance_option = st.selectbox(
@@ -9836,7 +12148,7 @@ if df is not None:
9836
12148
  """)
9837
12149
  add_to_history(f"Cicilan: Rp {loan_amount:,.0f} → Rp {monthly_payment:,.0f}/bulan")
9838
12150
 
9839
- elif calc_type == "Konverter Satuan":
12151
+ elif calc_type == "📐 Konverter Satuan":
9840
12152
  st.subheader("📐 Konverter Satuan")
9841
12153
 
9842
12154
  conversion_type = st.selectbox(
@@ -9921,8 +12233,8 @@ if df is not None:
9921
12233
  st.success(f"**Hasil:** {calc_str}")
9922
12234
  add_to_history(calc_str)
9923
12235
 
9924
- elif calc_type == "Kalkulator BMI":
9925
- st.subheader("💪 Kalkulator BMI (Body Mass Index)")
12236
+ elif calc_type == "⚖️ Kalkulator BMI":
12237
+ st.subheader("⚖️ Kalkulator BMI (Body Mass Index)")
9926
12238
 
9927
12239
  col1, col2 = st.columns(2)
9928
12240
 
@@ -9959,7 +12271,7 @@ if df is not None:
9959
12271
  """)
9960
12272
  add_to_history(f"BMI: {bmi:.1f} ({category})")
9961
12273
 
9962
- elif calc_type == "Kalkulator Waktu":
12274
+ elif calc_type == "Kalkulator Waktu":
9963
12275
  st.subheader("⏰ Kalkulator Waktu")
9964
12276
 
9965
12277
  time_option = st.selectbox("Pilih jenis perhitungan", [
@@ -10095,7 +12407,7 @@ if df is not None:
10095
12407
  st.error("**🧹 Pembersihan Data**\n\nAuto-clean missing values")
10096
12408
 
10097
12409
  # Video Tutorial (placeholder)
10098
- st.markdown("### 🎥 Video Tutorial Penggunaan V2.2.5")
12410
+ st.markdown("### 🎥 Video Tutorial Penggunaan V2.3.8")
10099
12411
  import streamlit.components.v1 as components
10100
12412
  google_drive_id = "1obx6q2jQS1fRrNi1E4VpAPlyI_rR9nO5"
10101
12413
 
@@ -10464,7 +12776,8 @@ if df is not None:
10464
12776
  with col3:
10465
12777
  st.markdown("""
10466
12778
  ### 🔄 Update
10467
- - Versi terbaru: 2.2.5
12779
+ - Versi terbaru: 2.3.8
12780
+ - Rilis: Oktober 2025
10468
12781
  - Last updated: 2025
10469
12782
  - Compatibility: Python 3.8+
10470
12783
  """)