vn-backtest 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ import logging
2
+
3
+ from .strategy import Strategy
4
+ from .engine import BacktestEngine
5
+ from .analysis import PerformanceAnalyzer
6
+ from .reporter import ReportGenerator
7
+ from .optimizer import ParameterOptimizer
8
+
9
+ __all__ = [
10
+ "Strategy",
11
+ "BacktestEngine",
12
+ "PerformanceAnalyzer",
13
+ "ReportGenerator",
14
+ "ParameterOptimizer",
15
+ ]
16
+
17
+ # Thiết lập Logger mặc định cho toàn bộ thư viện là NullHandler
18
+ # Điều này đảm bảo thư viện sẽ không tự động in bất kỳ thứ gì ra màn hình
19
+ # trừ khi người dùng chủ động cấu hình logging ở phía ứng dụng của họ.
20
+ logging.getLogger("vn_backtest").addHandler(logging.NullHandler())
@@ -0,0 +1,405 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from typing import Dict, Any
4
+
5
+
6
+ class PerformanceAnalyzer:
7
+ """
8
+ Computes key performance metrics for trading strategies,
9
+ comparing the results with the VN-Index benchmark.
10
+ """
11
+
12
+ @staticmethod
13
+ def calculate_metrics(
14
+ equity_curve: pd.DataFrame,
15
+ trades: pd.DataFrame,
16
+ benchmark_data: pd.DataFrame = None,
17
+ initial_cash: float = 100_000_000.0,
18
+ risk_free_rate: float = 0.04, # 4% risk-free rate typical in VN
19
+ include_auto_close: bool = True,
20
+ ) -> Dict[str, Any]:
21
+ """
22
+ Calculate metrics.
23
+
24
+ Args:
25
+ equity_curve (pd.DataFrame): DataFrame with index 'Date' and column 'Equity'.
26
+ trades (pd.DataFrame): DataFrame of executed trades.
27
+ benchmark_data (pd.DataFrame, optional): DataFrame with index 'Date' and column 'Close' (benchmark).
28
+ initial_cash (float): Starting portfolio value.
29
+ risk_free_rate (float): Annualized risk-free rate.
30
+
31
+ Returns:
32
+ Dict: Financial metrics.
33
+ """
34
+ if equity_curve.empty:
35
+ return {}
36
+
37
+ # BUG FIX: Work on a copy to avoid mutating the caller's DataFrame in-place.
38
+ equity_df = equity_curve.copy()
39
+
40
+ final_equity = equity_df["Equity"].iloc[-1]
41
+ total_return = (final_equity - initial_cash) / initial_cash
42
+
43
+ # Calculate calendar duration
44
+ start_date = equity_df.index[0]
45
+ end_date = equity_df.index[-1]
46
+ duration_days = (end_date - start_date).days
47
+ years = duration_days / 365.25
48
+
49
+ # CAGR
50
+ if final_equity <= 0:
51
+ cagr = -1.0
52
+ elif years > 0:
53
+ cagr = (final_equity / initial_cash) ** (1 / years) - 1
54
+ else:
55
+ cagr = 0.0
56
+
57
+ # Daily Returns (computed on the copy, never touching the original)
58
+ equity_df["DailyReturn"] = equity_df["Equity"].pct_change().fillna(0.0)
59
+ daily_returns = equity_df["DailyReturn"]
60
+
61
+ # Volatility (Annualized)
62
+ daily_vol = daily_returns.std()
63
+ ann_vol = daily_vol * np.sqrt(252)
64
+
65
+ # Sharpe Ratio
66
+ if ann_vol > 0:
67
+ sharpe_ratio = (cagr - risk_free_rate) / ann_vol
68
+ else:
69
+ sharpe_ratio = 0.0
70
+
71
+ # Sortino Ratio
72
+ # downside deviation: replace positive returns with 0
73
+ downside_diff = np.minimum(daily_returns, 0.0)
74
+ downside_vol = np.sqrt(np.mean(downside_diff**2)) * np.sqrt(252)
75
+ if downside_vol > 0:
76
+ sortino_ratio = (cagr - risk_free_rate) / downside_vol
77
+ else:
78
+ sortino_ratio = 0.0
79
+
80
+ # Drawdowns
81
+ running_max = equity_df["Equity"].cummax()
82
+ drawdown = (equity_df["Equity"] - running_max) / running_max
83
+ max_drawdown = drawdown.min()
84
+
85
+ # Drawdown Duration (in trading days)
86
+ is_in_drawdown = drawdown < 0
87
+ drawdown_streaks = is_in_drawdown.groupby((~is_in_drawdown).cumsum()).cumsum()
88
+ max_dd_duration = (
89
+ int(drawdown_streaks.max()) if not drawdown_streaks.empty else 0
90
+ )
91
+
92
+ # Trade Statistics
93
+ # Filter out auto-closed trades from trade-level stats to avoid skewing win rate/profit factor if requested
94
+ strategy_trades = trades
95
+ if not include_auto_close and not trades.empty and "Note" in trades.columns:
96
+ strategy_trades = trades[
97
+ trades["Note"].isna()
98
+ | (trades["Note"] != "Auto-closed at end of backtest")
99
+ ]
100
+
101
+ # Count BUY and SELL orders (raw order count — separate from round-trip count)
102
+ actual_buy_sells = pd.DataFrame(columns=trades.columns)
103
+ if not strategy_trades.empty:
104
+ actual_buy_sells = strategy_trades[
105
+ strategy_trades["Action"].isin(["BUY", "SELL"])
106
+ ]
107
+ total_orders = len(actual_buy_sells)
108
+ total_trades = 0 # Will be set to n_completed after FIFO matching
109
+ completed_trades = []
110
+
111
+ win_rate = 0.0
112
+ profit_factor = 0.0
113
+ avg_trade_return = 0.0
114
+ best_trade = 0.0
115
+ worst_trade = 0.0
116
+ avg_hold_days = 0.0
117
+
118
+ if total_orders > 0:
119
+ # Filter for trade matching (BUY, SELL, DIVIDEND_STOCK, and DIVIDEND_CASH)
120
+ matching_trades = pd.DataFrame(columns=trades.columns)
121
+ if not strategy_trades.empty:
122
+ matching_trades = strategy_trades[
123
+ strategy_trades["Action"].isin(
124
+ ["BUY", "SELL", "DIVIDEND_STOCK", "DIVIDEND_CASH"]
125
+ )
126
+ ]
127
+
128
+ # We pair BUYs and SELLs to calculate individual trade profits.
129
+ # In simple portfolio trading, a trade starts with a BUY and ends with a SELL.
130
+ # Let's match trades by FIFO per ticker.
131
+ completed_trades = []
132
+ buy_queues = {}
133
+
134
+ # Sort trades chronologically
135
+ trades_sorted = matching_trades.sort_values("Date")
136
+
137
+ for _, t in trades_sorted.iterrows():
138
+ ticker = t["Ticker"]
139
+ if ticker not in buy_queues:
140
+ buy_queues[ticker] = []
141
+
142
+ if t["Action"] == "BUY":
143
+ # Add buying lot
144
+ # Include cash advance fee in the buy fee if it was incurred
145
+ advance_fee = (
146
+ t["AdvanceFee"]
147
+ if "AdvanceFee" in t.index and pd.notna(t["AdvanceFee"])
148
+ else 0.0
149
+ )
150
+ buy_queues[ticker].append(
151
+ {
152
+ "qty": t["Quantity"],
153
+ "price": t["Price"],
154
+ "date": t["Date"],
155
+ "fee": t["Fee"] + advance_fee,
156
+ }
157
+ )
158
+ elif t["Action"] == "DIVIDEND_STOCK":
159
+ # Adjust the cost basis of all existing lots proportionally instead of adding a 0-cost lot
160
+ total_qty_before = sum(lot["qty"] for lot in buy_queues[ticker])
161
+ if total_qty_before > 0:
162
+ ratio = t["Quantity"] / total_qty_before
163
+ for lot in buy_queues[ticker]:
164
+ lot["qty"] *= 1.0 + ratio
165
+ lot["price"] /= 1.0 + ratio
166
+ else:
167
+ # Fallback for anomaly cases
168
+ import logging
169
+
170
+ logging.warning(
171
+ f"CẢNH BÁO: Nhận cổ tức cổ phiếu cho {ticker} vào ngày {t['Date'].strftime('%d/%m/%Y')} "
172
+ f"nhưng hàng đợi mua trống (không nắm giữ cổ phiếu trước ngày chốt quyền). "
173
+ f"Điều này có thể do sai lệch dữ liệu lịch sử hoặc giao dịch."
174
+ )
175
+ buy_queues[ticker].append(
176
+ {
177
+ "qty": t["Quantity"],
178
+ "price": 0.0,
179
+ "date": t["Date"],
180
+ "fee": 0.0,
181
+ }
182
+ )
183
+ elif t["Action"] == "DIVIDEND_CASH":
184
+ # Distribute cash dividend to active lots or completed trades
185
+ net_amount = t["TotalValue"]
186
+ total_qty = sum(lot["qty"] for lot in buy_queues[ticker])
187
+ if total_qty > 0:
188
+ d = net_amount / total_qty
189
+ for lot in buy_queues[ticker]:
190
+ # Reduce cost basis of active lots by the net dividend per share
191
+ lot["price"] -= d
192
+ else:
193
+ # Find recently completed trades for this ticker
194
+ ticker_completed = [
195
+ tc for tc in completed_trades if tc["ticker"] == ticker
196
+ ]
197
+ if ticker_completed:
198
+ last_trade = ticker_completed[-1]
199
+ last_trade["profit"] += net_amount
200
+ if "buy_cost" in last_trade and last_trade["buy_cost"] > 0:
201
+ last_trade["return"] = (
202
+ last_trade["profit"] / last_trade["buy_cost"]
203
+ )
204
+ elif t["Action"] == "SELL":
205
+ sell_qty = t["Quantity"]
206
+ sell_price = t["Price"]
207
+ sell_date = t["Date"]
208
+ sell_fee = t["Fee"]
209
+ sell_tax = t["Tax"]
210
+
211
+ realized_gain = 0.0
212
+ total_buy_cost = 0.0
213
+ days_held_sum = 0.0
214
+ matched_qty_sum = 0
215
+
216
+ buy_queue = buy_queues[ticker]
217
+ while sell_qty > 1e-5 and buy_queue:
218
+ buy_lot = buy_queue[0]
219
+ matched_qty = min(sell_qty, buy_lot["qty"])
220
+
221
+ # Calculate proportional buy cost
222
+ prop_buy_cost = matched_qty * buy_lot["price"]
223
+ prop_buy_fee = buy_lot["fee"] * (matched_qty / buy_lot["qty"])
224
+
225
+ total_buy_cost += prop_buy_cost + prop_buy_fee
226
+
227
+ # Days held
228
+ hold_days = (sell_date - buy_lot["date"]).days
229
+ days_held_sum += hold_days * matched_qty
230
+ matched_qty_sum += matched_qty
231
+
232
+ # Deduct from buy queue and update remaining fee
233
+ buy_lot["fee"] -= prop_buy_fee
234
+ buy_lot["qty"] -= matched_qty
235
+ sell_qty -= matched_qty
236
+ if buy_lot["qty"] < 1e-5:
237
+ buy_queue.pop(0)
238
+
239
+ if matched_qty_sum > 0:
240
+ # Proceeds of this matched portion
241
+ prop_sell_val = matched_qty_sum * sell_price
242
+ prop_sell_fee = sell_fee * (matched_qty_sum / t["Quantity"])
243
+ prop_sell_tax = sell_tax * (matched_qty_sum / t["Quantity"])
244
+ net_proceeds = prop_sell_val - prop_sell_fee - prop_sell_tax
245
+
246
+ trade_profit = net_proceeds - total_buy_cost
247
+ trade_return = (
248
+ trade_profit / total_buy_cost if total_buy_cost > 0 else 0.0
249
+ )
250
+ avg_hold = days_held_sum / matched_qty_sum
251
+
252
+ completed_trades.append(
253
+ {
254
+ "ticker": ticker,
255
+ "profit": trade_profit,
256
+ "return": trade_return,
257
+ "hold_days": avg_hold,
258
+ "buy_cost": total_buy_cost,
259
+ }
260
+ )
261
+
262
+ # Calculate stats from completed trades
263
+ n_completed = len(completed_trades)
264
+
265
+ # BUG FIX: total_trades should be the number of completed round-trips
266
+ # (BUY→SELL pairs), not the raw count of BUY + SELL orders.
267
+ total_trades = n_completed
268
+
269
+ if n_completed > 0:
270
+ trade_returns = [tc["return"] for tc in completed_trades]
271
+ trade_profits = [tc["profit"] for tc in completed_trades]
272
+
273
+ wins = [p for p in trade_profits if p > 0]
274
+ losses = [p for p in trade_profits if p <= 0]
275
+
276
+ win_rate = len(wins) / n_completed
277
+
278
+ sum_wins = float(sum(wins))
279
+ sum_losses = float(abs(sum(losses)))
280
+ if sum_losses > 1e-4:
281
+ profit_factor = sum_wins / sum_losses
282
+ else:
283
+ profit_factor = float("inf") if sum_wins > 1e-4 else 0.0
284
+
285
+ avg_trade_return = np.mean(trade_returns)
286
+ best_trade = np.max(trade_returns)
287
+ worst_trade = np.min(trade_returns)
288
+ avg_hold_days = np.mean([tc["hold_days"] for tc in completed_trades])
289
+
290
+ # Benchmark Metrics
291
+ benchmark_return = 0.0
292
+ benchmark_cagr = 0.0
293
+ alpha = 0.0
294
+ beta = 1.0
295
+ outperformance = 0.0
296
+
297
+ primary_bench_data = None
298
+ if isinstance(benchmark_data, dict):
299
+ if benchmark_data:
300
+ first_key = list(benchmark_data.keys())[0]
301
+ primary_bench_data = benchmark_data[first_key]
302
+ elif isinstance(benchmark_data, pd.DataFrame):
303
+ primary_bench_data = benchmark_data
304
+
305
+ if primary_bench_data is not None and not primary_bench_data.empty:
306
+ # Align dates
307
+ aligned_data = pd.DataFrame(index=equity_df.index)
308
+ aligned_data["Strategy_Return"] = daily_returns
309
+
310
+ # Map benchmark Close to aligned index
311
+ # Drop timezone information to avoid timezone mismatches
312
+ bench_close = primary_bench_data["Close"].copy()
313
+ bench_close.index = (
314
+ bench_close.index.tz_localize(None)
315
+ if bench_close.index.tz is not None
316
+ else bench_close.index
317
+ )
318
+ strategy_index = (
319
+ equity_df.index.tz_localize(None)
320
+ if equity_df.index.tz is not None
321
+ else equity_df.index
322
+ )
323
+
324
+ # PERF FIX: Avoid bfill() which introduces lookahead bias. Only use ffill().
325
+ bench_close_aligned = bench_close.reindex(strategy_index).ffill()
326
+ if bench_close_aligned.isna().any():
327
+ import logging
328
+
329
+ logging.warning(
330
+ "Dữ liệu benchmark có giá trị NaN ở các ngày đầu của backtest. "
331
+ "Chiến lược bắt đầu trước khi có dữ liệu benchmark; "
332
+ "các ngày này sẽ bị loại khỏi tính toán Alpha/Beta."
333
+ )
334
+ aligned_data["Benchmark_Close"] = bench_close_aligned
335
+ aligned_data["Benchmark_Return"] = (
336
+ aligned_data["Benchmark_Close"].pct_change().fillna(0.0)
337
+ )
338
+
339
+ # Benchmark total return (calculated from the first valid price)
340
+ bench_valid = aligned_data["Benchmark_Close"].dropna()
341
+ if not bench_valid.empty:
342
+ bench_start = bench_valid.iloc[0]
343
+ bench_end = bench_valid.iloc[-1]
344
+ benchmark_return = (
345
+ (bench_end - bench_start) / bench_start if bench_start > 0 else 0.0
346
+ )
347
+ else:
348
+ bench_start = 0.0
349
+ bench_end = 0.0
350
+ benchmark_return = 0.0
351
+
352
+ # Benchmark CAGR
353
+ if years > 0 and bench_end > 0 and bench_start > 0:
354
+ benchmark_cagr = (bench_end / bench_start) ** (1 / years) - 1
355
+ else:
356
+ benchmark_cagr = 0.0
357
+
358
+ outperformance = total_return - benchmark_return
359
+
360
+ # OLS Regression for Alpha and Beta using daily excess returns
361
+ # excess returns = return - risk_free_rate / 252 (daily risk-free rate)
362
+ daily_rf = risk_free_rate / 252.0
363
+ excess_strat = aligned_data["Strategy_Return"] - daily_rf
364
+ excess_bench = aligned_data["Benchmark_Return"] - daily_rf
365
+
366
+ mask = ~np.isnan(excess_bench) & ~np.isnan(excess_strat)
367
+ eb_clean = excess_bench[mask]
368
+ es_clean = excess_strat[mask]
369
+
370
+ if len(eb_clean) > 1:
371
+ # OLS: es_clean = beta * eb_clean + alpha_daily
372
+ beta, alpha_daily = np.polyfit(eb_clean, es_clean, 1)
373
+ alpha = alpha_daily * 252.0 # Annualized Alpha
374
+ else:
375
+ beta = 1.0
376
+ alpha = 0.0
377
+
378
+ return {
379
+ "duration_days": duration_days,
380
+ "years": round(years, 2),
381
+ "initial_cash": initial_cash,
382
+ "final_equity": final_equity,
383
+ "total_return": total_return,
384
+ "cagr": cagr,
385
+ "annualized_vol": ann_vol,
386
+ "sharpe_ratio": sharpe_ratio,
387
+ "sortino_ratio": sortino_ratio,
388
+ "max_drawdown": max_drawdown,
389
+ "max_drawdown_duration": max_dd_duration,
390
+ "total_trades": total_trades, # Số round-trips hoàn chỉnh (BUY→SELL)
391
+ "total_orders": total_orders, # Tổng số lệnh BUY + SELL (raw order count)
392
+ "win_rate": win_rate,
393
+ "profit_factor": profit_factor,
394
+ "avg_trade_return": avg_trade_return,
395
+ "best_trade": best_trade,
396
+ "worst_trade": worst_trade,
397
+ "avg_hold_days": round(avg_hold_days, 1),
398
+ "benchmark_return": benchmark_return,
399
+ "benchmark_cagr": benchmark_cagr,
400
+ "outperformance": outperformance,
401
+ "alpha": alpha,
402
+ "beta": beta,
403
+ "risk_free_rate": risk_free_rate,
404
+ "completed_trades": completed_trades,
405
+ }