vm-tool 1.0.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/README.md +5 -0
- examples/__init__.py +1 -0
- examples/cloud/README.md +3 -0
- examples/cloud/__init__.py +1 -0
- examples/cloud/ssh_identity_file.py +27 -0
- examples/cloud/ssh_password.py +27 -0
- examples/cloud/template_cloud_setup.py +36 -0
- examples/deploy_full_setup.py +44 -0
- examples/docker-compose.example.yml +47 -0
- examples/ec2-setup.sh +95 -0
- examples/github-actions-ec2.yml +245 -0
- examples/github-actions-full-setup.yml +58 -0
- examples/local/.keep +1 -0
- examples/local/README.md +3 -0
- examples/local/__init__.py +1 -0
- examples/local/template_local_setup.py +27 -0
- examples/production-deploy.sh +70 -0
- examples/rollback.sh +52 -0
- examples/setup.sh +52 -0
- examples/ssh_key_management.py +22 -0
- examples/version_check.sh +3 -0
- vm_tool/__init__.py +0 -0
- vm_tool/alerting.py +274 -0
- vm_tool/audit.py +118 -0
- vm_tool/backup.py +125 -0
- vm_tool/benchmarking.py +200 -0
- vm_tool/cli.py +761 -0
- vm_tool/cloud.py +125 -0
- vm_tool/completion.py +200 -0
- vm_tool/compliance.py +104 -0
- vm_tool/config.py +92 -0
- vm_tool/drift.py +98 -0
- vm_tool/generator.py +462 -0
- vm_tool/health.py +197 -0
- vm_tool/history.py +131 -0
- vm_tool/kubernetes.py +89 -0
- vm_tool/metrics.py +183 -0
- vm_tool/notifications.py +152 -0
- vm_tool/plugins.py +119 -0
- vm_tool/policy.py +197 -0
- vm_tool/rbac.py +140 -0
- vm_tool/recovery.py +169 -0
- vm_tool/reporting.py +218 -0
- vm_tool/runner.py +445 -0
- vm_tool/secrets.py +285 -0
- vm_tool/ssh.py +150 -0
- vm_tool/state.py +122 -0
- vm_tool/strategies/__init__.py +16 -0
- vm_tool/strategies/ab_testing.py +258 -0
- vm_tool/strategies/blue_green.py +227 -0
- vm_tool/strategies/canary.py +277 -0
- vm_tool/validation.py +267 -0
- vm_tool/vm_setup/cleanup.yml +27 -0
- vm_tool/vm_setup/docker/create_docker_service.yml +63 -0
- vm_tool/vm_setup/docker/docker_setup.yml +7 -0
- vm_tool/vm_setup/docker/install_docker_and_compose.yml +92 -0
- vm_tool/vm_setup/docker/login_to_docker_hub.yml +6 -0
- vm_tool/vm_setup/github/git_configuration.yml +68 -0
- vm_tool/vm_setup/inventory.yml +1 -0
- vm_tool/vm_setup/k8s.yml +15 -0
- vm_tool/vm_setup/main.yml +27 -0
- vm_tool/vm_setup/monitoring.yml +42 -0
- vm_tool/vm_setup/project_service.yml +17 -0
- vm_tool/vm_setup/push_code.yml +40 -0
- vm_tool/vm_setup/setup.yml +17 -0
- vm_tool/vm_setup/setup_project_env.yml +7 -0
- vm_tool/webhooks.py +83 -0
- vm_tool-1.0.32.dist-info/METADATA +213 -0
- vm_tool-1.0.32.dist-info/RECORD +73 -0
- vm_tool-1.0.32.dist-info/WHEEL +5 -0
- vm_tool-1.0.32.dist-info/entry_points.txt +2 -0
- vm_tool-1.0.32.dist-info/licenses/LICENSE +21 -0
- vm_tool-1.0.32.dist-info/top_level.txt +2 -0
vm_tool/policy.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""Policy as Code framework for deployment policies."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Dict, Any, Callable, List, Optional
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class PolicyViolation:
|
|
12
|
+
"""Policy violation details."""
|
|
13
|
+
|
|
14
|
+
policy_name: str
|
|
15
|
+
message: str
|
|
16
|
+
severity: str # "error", "warning", "info"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Policy:
|
|
20
|
+
"""Base policy class."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, name: str, description: str):
|
|
23
|
+
self.name = name
|
|
24
|
+
self.description = description
|
|
25
|
+
|
|
26
|
+
def evaluate(self, context: Dict[str, Any]) -> List[PolicyViolation]:
|
|
27
|
+
"""Evaluate policy against context."""
|
|
28
|
+
raise NotImplementedError
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class DeploymentPolicy(Policy):
|
|
32
|
+
"""Policy for deployment validation."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, name: str, description: str, rules: List[Callable]):
|
|
35
|
+
super().__init__(name, description)
|
|
36
|
+
self.rules = rules
|
|
37
|
+
|
|
38
|
+
def evaluate(self, context: Dict[str, Any]) -> List[PolicyViolation]:
|
|
39
|
+
"""Evaluate all rules."""
|
|
40
|
+
violations = []
|
|
41
|
+
|
|
42
|
+
for rule in self.rules:
|
|
43
|
+
try:
|
|
44
|
+
violation = rule(context)
|
|
45
|
+
if violation:
|
|
46
|
+
violations.append(violation)
|
|
47
|
+
except Exception as e:
|
|
48
|
+
logger.error(f"Policy rule failed: {e}")
|
|
49
|
+
|
|
50
|
+
return violations
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class PolicyEngine:
|
|
54
|
+
"""Policy evaluation engine."""
|
|
55
|
+
|
|
56
|
+
def __init__(self):
|
|
57
|
+
self.policies: List[Policy] = []
|
|
58
|
+
self._register_default_policies()
|
|
59
|
+
|
|
60
|
+
def _register_default_policies(self):
|
|
61
|
+
"""Register default policies."""
|
|
62
|
+
# Production deployment policy
|
|
63
|
+
self.add_policy(
|
|
64
|
+
DeploymentPolicy(
|
|
65
|
+
name="production_safety",
|
|
66
|
+
description="Safety checks for production deployments",
|
|
67
|
+
rules=[
|
|
68
|
+
self._require_approval_for_production,
|
|
69
|
+
self._require_backup_before_deploy,
|
|
70
|
+
self._require_health_checks,
|
|
71
|
+
],
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# Security policy
|
|
76
|
+
self.add_policy(
|
|
77
|
+
DeploymentPolicy(
|
|
78
|
+
name="security",
|
|
79
|
+
description="Security requirements",
|
|
80
|
+
rules=[
|
|
81
|
+
self._require_secrets_encryption,
|
|
82
|
+
self._no_hardcoded_credentials,
|
|
83
|
+
],
|
|
84
|
+
)
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def add_policy(self, policy: Policy):
|
|
88
|
+
"""Add a policy."""
|
|
89
|
+
self.policies.append(policy)
|
|
90
|
+
logger.info(f"Policy registered: {policy.name}")
|
|
91
|
+
|
|
92
|
+
def evaluate_all(self, context: Dict[str, Any]) -> List[PolicyViolation]:
|
|
93
|
+
"""Evaluate all policies."""
|
|
94
|
+
all_violations = []
|
|
95
|
+
|
|
96
|
+
for policy in self.policies:
|
|
97
|
+
violations = policy.evaluate(context)
|
|
98
|
+
all_violations.extend(violations)
|
|
99
|
+
|
|
100
|
+
return all_violations
|
|
101
|
+
|
|
102
|
+
def enforce(self, context: Dict[str, Any]) -> bool:
|
|
103
|
+
"""Enforce policies (fail on errors)."""
|
|
104
|
+
violations = self.evaluate_all(context)
|
|
105
|
+
|
|
106
|
+
errors = [v for v in violations if v.severity == "error"]
|
|
107
|
+
warnings = [v for v in violations if v.severity == "warning"]
|
|
108
|
+
|
|
109
|
+
if warnings:
|
|
110
|
+
for v in warnings:
|
|
111
|
+
logger.warning(f"Policy warning [{v.policy_name}]: {v.message}")
|
|
112
|
+
|
|
113
|
+
if errors:
|
|
114
|
+
for v in errors:
|
|
115
|
+
logger.error(f"Policy violation [{v.policy_name}]: {v.message}")
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
return True
|
|
119
|
+
|
|
120
|
+
# Default policy rules
|
|
121
|
+
def _require_approval_for_production(
|
|
122
|
+
self, context: Dict[str, Any]
|
|
123
|
+
) -> Optional[PolicyViolation]:
|
|
124
|
+
"""Require manual approval for production."""
|
|
125
|
+
if context.get("environment") == "production" and not context.get("approved"):
|
|
126
|
+
return PolicyViolation(
|
|
127
|
+
policy_name="production_safety",
|
|
128
|
+
message="Production deployments require manual approval",
|
|
129
|
+
severity="error",
|
|
130
|
+
)
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
def _require_backup_before_deploy(
|
|
134
|
+
self, context: Dict[str, Any]
|
|
135
|
+
) -> Optional[PolicyViolation]:
|
|
136
|
+
"""Require backup before deployment."""
|
|
137
|
+
if context.get("environment") == "production" and not context.get(
|
|
138
|
+
"backup_created"
|
|
139
|
+
):
|
|
140
|
+
return PolicyViolation(
|
|
141
|
+
policy_name="production_safety",
|
|
142
|
+
message="Backup required before production deployment",
|
|
143
|
+
severity="warning",
|
|
144
|
+
)
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
def _require_health_checks(
|
|
148
|
+
self, context: Dict[str, Any]
|
|
149
|
+
) -> Optional[PolicyViolation]:
|
|
150
|
+
"""Require health checks."""
|
|
151
|
+
if not context.get("health_checks_enabled"):
|
|
152
|
+
return PolicyViolation(
|
|
153
|
+
policy_name="production_safety",
|
|
154
|
+
message="Health checks must be enabled",
|
|
155
|
+
severity="warning",
|
|
156
|
+
)
|
|
157
|
+
return None
|
|
158
|
+
|
|
159
|
+
def _require_secrets_encryption(
|
|
160
|
+
self, context: Dict[str, Any]
|
|
161
|
+
) -> Optional[PolicyViolation]:
|
|
162
|
+
"""Require encrypted secrets."""
|
|
163
|
+
if context.get("secrets_plaintext"):
|
|
164
|
+
return PolicyViolation(
|
|
165
|
+
policy_name="security",
|
|
166
|
+
message="Secrets must be encrypted",
|
|
167
|
+
severity="error",
|
|
168
|
+
)
|
|
169
|
+
return None
|
|
170
|
+
|
|
171
|
+
def _no_hardcoded_credentials(
|
|
172
|
+
self, context: Dict[str, Any]
|
|
173
|
+
) -> Optional[PolicyViolation]:
|
|
174
|
+
"""Check for hardcoded credentials."""
|
|
175
|
+
compose_content = context.get("compose_file_content", "")
|
|
176
|
+
if (
|
|
177
|
+
"password:" in compose_content.lower()
|
|
178
|
+
or "secret:" in compose_content.lower()
|
|
179
|
+
):
|
|
180
|
+
return PolicyViolation(
|
|
181
|
+
policy_name="security",
|
|
182
|
+
message="Possible hardcoded credentials detected",
|
|
183
|
+
severity="warning",
|
|
184
|
+
)
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
# Global policy engine
|
|
189
|
+
_policy_engine = None
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def get_policy_engine() -> PolicyEngine:
|
|
193
|
+
"""Get global policy engine instance."""
|
|
194
|
+
global _policy_engine
|
|
195
|
+
if _policy_engine is None:
|
|
196
|
+
_policy_engine = PolicyEngine()
|
|
197
|
+
return _policy_engine
|
vm_tool/rbac.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""Role-Based Access Control (RBAC) system."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Dict, Set, Optional, List
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Permission(Enum):
|
|
12
|
+
"""System permissions."""
|
|
13
|
+
|
|
14
|
+
DEPLOY = "deploy"
|
|
15
|
+
ROLLBACK = "rollback"
|
|
16
|
+
VIEW_HISTORY = "view_history"
|
|
17
|
+
MANAGE_CONFIG = "manage_config"
|
|
18
|
+
MANAGE_SECRETS = "manage_secrets"
|
|
19
|
+
MANAGE_USERS = "manage_users"
|
|
20
|
+
VIEW_METRICS = "view_metrics"
|
|
21
|
+
MANAGE_BACKUPS = "manage_backups"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Role(Enum):
|
|
25
|
+
"""Predefined roles."""
|
|
26
|
+
|
|
27
|
+
ADMIN = "admin"
|
|
28
|
+
DEPLOYER = "deployer"
|
|
29
|
+
VIEWER = "viewer"
|
|
30
|
+
OPERATOR = "operator"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class User:
|
|
35
|
+
"""User with roles and permissions."""
|
|
36
|
+
|
|
37
|
+
username: str
|
|
38
|
+
roles: Set[Role]
|
|
39
|
+
custom_permissions: Set[Permission]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class RBAC:
|
|
43
|
+
"""Role-Based Access Control manager."""
|
|
44
|
+
|
|
45
|
+
# Default role permissions
|
|
46
|
+
ROLE_PERMISSIONS = {
|
|
47
|
+
Role.ADMIN: {
|
|
48
|
+
Permission.DEPLOY,
|
|
49
|
+
Permission.ROLLBACK,
|
|
50
|
+
Permission.VIEW_HISTORY,
|
|
51
|
+
Permission.MANAGE_CONFIG,
|
|
52
|
+
Permission.MANAGE_SECRETS,
|
|
53
|
+
Permission.MANAGE_USERS,
|
|
54
|
+
Permission.VIEW_METRICS,
|
|
55
|
+
Permission.MANAGE_BACKUPS,
|
|
56
|
+
},
|
|
57
|
+
Role.DEPLOYER: {
|
|
58
|
+
Permission.DEPLOY,
|
|
59
|
+
Permission.ROLLBACK,
|
|
60
|
+
Permission.VIEW_HISTORY,
|
|
61
|
+
Permission.VIEW_METRICS,
|
|
62
|
+
},
|
|
63
|
+
Role.OPERATOR: {
|
|
64
|
+
Permission.DEPLOY,
|
|
65
|
+
Permission.VIEW_HISTORY,
|
|
66
|
+
Permission.VIEW_METRICS,
|
|
67
|
+
Permission.MANAGE_BACKUPS,
|
|
68
|
+
},
|
|
69
|
+
Role.VIEWER: {Permission.VIEW_HISTORY, Permission.VIEW_METRICS},
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
def __init__(self):
|
|
73
|
+
self.users: Dict[str, User] = {}
|
|
74
|
+
|
|
75
|
+
def add_user(self, username: str, roles: List[Role]):
|
|
76
|
+
"""Add a user with roles."""
|
|
77
|
+
self.users[username] = User(
|
|
78
|
+
username=username, roles=set(roles), custom_permissions=set()
|
|
79
|
+
)
|
|
80
|
+
logger.info(f"User added: {username} with roles {[r.value for r in roles]}")
|
|
81
|
+
|
|
82
|
+
def grant_permission(self, username: str, permission: Permission):
|
|
83
|
+
"""Grant custom permission to user."""
|
|
84
|
+
if username in self.users:
|
|
85
|
+
self.users[username].custom_permissions.add(permission)
|
|
86
|
+
logger.info(f"Granted {permission.value} to {username}")
|
|
87
|
+
|
|
88
|
+
def revoke_permission(self, username: str, permission: Permission):
|
|
89
|
+
"""Revoke custom permission from user."""
|
|
90
|
+
if username in self.users:
|
|
91
|
+
self.users[username].custom_permissions.discard(permission)
|
|
92
|
+
logger.info(f"Revoked {permission.value} from {username}")
|
|
93
|
+
|
|
94
|
+
def has_permission(self, username: str, permission: Permission) -> bool:
|
|
95
|
+
"""Check if user has permission."""
|
|
96
|
+
if username not in self.users:
|
|
97
|
+
return False
|
|
98
|
+
|
|
99
|
+
user = self.users[username]
|
|
100
|
+
|
|
101
|
+
# Check custom permissions
|
|
102
|
+
if permission in user.custom_permissions:
|
|
103
|
+
return True
|
|
104
|
+
|
|
105
|
+
# Check role permissions
|
|
106
|
+
for role in user.roles:
|
|
107
|
+
if permission in self.ROLE_PERMISSIONS.get(role, set()):
|
|
108
|
+
return True
|
|
109
|
+
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
def require_permission(self, username: str, permission: Permission):
|
|
113
|
+
"""Require permission or raise exception."""
|
|
114
|
+
if not self.has_permission(username, permission):
|
|
115
|
+
from vm_tool.audit import get_audit_logger, AuditEventType
|
|
116
|
+
|
|
117
|
+
audit = get_audit_logger()
|
|
118
|
+
audit.log_event(
|
|
119
|
+
AuditEventType.PERMISSION_DENIED,
|
|
120
|
+
user=username,
|
|
121
|
+
action=permission.value,
|
|
122
|
+
resource="system",
|
|
123
|
+
success=False,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
raise PermissionError(
|
|
127
|
+
f"User {username} does not have permission: {permission.value}"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# Global RBAC instance
|
|
132
|
+
_rbac = None
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def get_rbac() -> RBAC:
|
|
136
|
+
"""Get global RBAC instance."""
|
|
137
|
+
global _rbac
|
|
138
|
+
if _rbac is None:
|
|
139
|
+
_rbac = RBAC()
|
|
140
|
+
return _rbac
|
vm_tool/recovery.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""Error recovery mechanisms for failed deployments."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Optional, Callable, Any
|
|
5
|
+
from enum import Enum
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class RecoveryStrategy(Enum):
|
|
11
|
+
"""Error recovery strategies."""
|
|
12
|
+
|
|
13
|
+
RETRY = "retry"
|
|
14
|
+
ROLLBACK = "rollback"
|
|
15
|
+
SKIP = "skip"
|
|
16
|
+
FAIL = "fail"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ErrorRecovery:
|
|
20
|
+
"""Automatic error recovery for deployments."""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
max_retries: int = 3,
|
|
25
|
+
retry_delay: int = 30,
|
|
26
|
+
auto_rollback: bool = True,
|
|
27
|
+
):
|
|
28
|
+
self.max_retries = max_retries
|
|
29
|
+
self.retry_delay = retry_delay
|
|
30
|
+
self.auto_rollback = auto_rollback
|
|
31
|
+
|
|
32
|
+
def execute_with_recovery(
|
|
33
|
+
self,
|
|
34
|
+
func: Callable,
|
|
35
|
+
*args,
|
|
36
|
+
strategy: RecoveryStrategy = RecoveryStrategy.RETRY,
|
|
37
|
+
**kwargs,
|
|
38
|
+
) -> Any:
|
|
39
|
+
"""Execute function with error recovery.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
func: Function to execute
|
|
43
|
+
strategy: Recovery strategy to use
|
|
44
|
+
*args, **kwargs: Arguments to pass to function
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Function result
|
|
48
|
+
"""
|
|
49
|
+
if strategy == RecoveryStrategy.RETRY:
|
|
50
|
+
return self._retry_on_failure(func, *args, **kwargs)
|
|
51
|
+
elif strategy == RecoveryStrategy.ROLLBACK:
|
|
52
|
+
return self._rollback_on_failure(func, *args, **kwargs)
|
|
53
|
+
elif strategy == RecoveryStrategy.SKIP:
|
|
54
|
+
return self._skip_on_failure(func, *args, **kwargs)
|
|
55
|
+
else: # FAIL
|
|
56
|
+
return func(*args, **kwargs)
|
|
57
|
+
|
|
58
|
+
def _retry_on_failure(self, func: Callable, *args, **kwargs) -> Any:
|
|
59
|
+
"""Retry function on failure."""
|
|
60
|
+
import time
|
|
61
|
+
|
|
62
|
+
last_error = None
|
|
63
|
+
|
|
64
|
+
for attempt in range(1, self.max_retries + 1):
|
|
65
|
+
try:
|
|
66
|
+
logger.info(f"Attempt {attempt}/{self.max_retries}")
|
|
67
|
+
result = func(*args, **kwargs)
|
|
68
|
+
logger.info(f"✅ Success on attempt {attempt}")
|
|
69
|
+
return result
|
|
70
|
+
except Exception as e:
|
|
71
|
+
last_error = e
|
|
72
|
+
logger.warning(f"❌ Attempt {attempt} failed: {e}")
|
|
73
|
+
|
|
74
|
+
if attempt < self.max_retries:
|
|
75
|
+
logger.info(f"⏳ Retrying in {self.retry_delay}s...")
|
|
76
|
+
time.sleep(self.retry_delay)
|
|
77
|
+
|
|
78
|
+
logger.error(f"❌ All {self.max_retries} attempts failed")
|
|
79
|
+
raise last_error
|
|
80
|
+
|
|
81
|
+
def _rollback_on_failure(self, func: Callable, *args, **kwargs) -> Any:
|
|
82
|
+
"""Rollback on failure."""
|
|
83
|
+
try:
|
|
84
|
+
return func(*args, **kwargs)
|
|
85
|
+
except Exception as e:
|
|
86
|
+
logger.error(f"❌ Execution failed: {e}")
|
|
87
|
+
|
|
88
|
+
if self.auto_rollback:
|
|
89
|
+
logger.info("🔄 Initiating automatic rollback...")
|
|
90
|
+
try:
|
|
91
|
+
self._perform_rollback()
|
|
92
|
+
logger.info("✅ Rollback successful")
|
|
93
|
+
except Exception as rollback_error:
|
|
94
|
+
logger.error(f"❌ Rollback failed: {rollback_error}")
|
|
95
|
+
|
|
96
|
+
raise
|
|
97
|
+
|
|
98
|
+
def _skip_on_failure(self, func: Callable, *args, **kwargs) -> Optional[Any]:
|
|
99
|
+
"""Skip on failure (don't raise exception)."""
|
|
100
|
+
try:
|
|
101
|
+
return func(*args, **kwargs)
|
|
102
|
+
except Exception as e:
|
|
103
|
+
logger.warning(f"⏭️ Skipping due to error: {e}")
|
|
104
|
+
return None
|
|
105
|
+
|
|
106
|
+
def _perform_rollback(self):
|
|
107
|
+
"""Perform rollback to previous state."""
|
|
108
|
+
from vm_tool.history import DeploymentHistory
|
|
109
|
+
|
|
110
|
+
history = DeploymentHistory()
|
|
111
|
+
previous = history.get_previous_deployment()
|
|
112
|
+
|
|
113
|
+
if previous:
|
|
114
|
+
logger.info(f" Rolling back to deployment: {previous['id']}")
|
|
115
|
+
# TODO: Implement actual rollback
|
|
116
|
+
else:
|
|
117
|
+
logger.warning(" No previous deployment found for rollback")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class CircuitBreaker:
|
|
121
|
+
"""Circuit breaker pattern for deployment failures."""
|
|
122
|
+
|
|
123
|
+
def __init__(
|
|
124
|
+
self,
|
|
125
|
+
failure_threshold: int = 5,
|
|
126
|
+
recovery_timeout: int = 300,
|
|
127
|
+
):
|
|
128
|
+
self.failure_threshold = failure_threshold
|
|
129
|
+
self.recovery_timeout = recovery_timeout
|
|
130
|
+
self.failure_count = 0
|
|
131
|
+
self.last_failure_time = 0
|
|
132
|
+
self.state = "closed" # closed, open, half-open
|
|
133
|
+
|
|
134
|
+
def call(self, func: Callable, *args, **kwargs) -> Any:
|
|
135
|
+
"""Call function through circuit breaker."""
|
|
136
|
+
import time
|
|
137
|
+
|
|
138
|
+
# Check if circuit should recover
|
|
139
|
+
if self.state == "open":
|
|
140
|
+
if time.time() - self.last_failure_time > self.recovery_timeout:
|
|
141
|
+
logger.info("🔄 Circuit breaker entering half-open state")
|
|
142
|
+
self.state = "half-open"
|
|
143
|
+
else:
|
|
144
|
+
raise Exception("Circuit breaker is OPEN - too many recent failures")
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
result = func(*args, **kwargs)
|
|
148
|
+
|
|
149
|
+
# Success - reset or close circuit
|
|
150
|
+
if self.state == "half-open":
|
|
151
|
+
logger.info("✅ Circuit breaker closing - recovery successful")
|
|
152
|
+
self.state = "closed"
|
|
153
|
+
self.failure_count = 0
|
|
154
|
+
|
|
155
|
+
return result
|
|
156
|
+
|
|
157
|
+
except Exception as e:
|
|
158
|
+
self.failure_count += 1
|
|
159
|
+
self.last_failure_time = time.time()
|
|
160
|
+
|
|
161
|
+
logger.warning(
|
|
162
|
+
f"Circuit breaker failure {self.failure_count}/{self.failure_threshold}"
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
if self.failure_count >= self.failure_threshold:
|
|
166
|
+
logger.error("❌ Circuit breaker OPEN - failure threshold reached")
|
|
167
|
+
self.state = "open"
|
|
168
|
+
|
|
169
|
+
raise
|