strands-agents-evals 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- strands_agents_evals-0.1.0/.github/ISSUE_TEMPLATE/bug_report.yml +104 -0
- strands_agents_evals-0.1.0/.github/ISSUE_TEMPLATE/config.yml +5 -0
- strands_agents_evals-0.1.0/.github/ISSUE_TEMPLATE/feature_request.yml +41 -0
- strands_agents_evals-0.1.0/.github/PULL_REQUEST_TEMPLATE.md +38 -0
- strands_agents_evals-0.1.0/.github/dependabot.yml +20 -0
- strands_agents_evals-0.1.0/.github/workflows/integration-test.yml +73 -0
- strands_agents_evals-0.1.0/.github/workflows/pr-and-push.yml +19 -0
- strands_agents_evals-0.1.0/.github/workflows/pypi-publish-on-release.yml +82 -0
- strands_agents_evals-0.1.0/.github/workflows/test-lint.yml +94 -0
- strands_agents_evals-0.1.0/.gitignore +17 -0
- strands_agents_evals-0.1.0/.pre-commit-config.yaml +29 -0
- strands_agents_evals-0.1.0/CODE_OF_CONDUCT.md +4 -0
- strands_agents_evals-0.1.0/CONTRIBUTING.md +170 -0
- strands_agents_evals-0.1.0/LICENSE +175 -0
- strands_agents_evals-0.1.0/NOTICE +1 -0
- strands_agents_evals-0.1.0/PKG-INFO +408 -0
- strands_agents_evals-0.1.0/README.md +376 -0
- strands_agents_evals-0.1.0/STYLE_GUIDE.md +59 -0
- strands_agents_evals-0.1.0/pyproject.toml +175 -0
- strands_agents_evals-0.1.0/src/__init__.py +0 -0
- strands_agents_evals-0.1.0/src/strands_evals/__init__.py +22 -0
- strands_agents_evals-0.1.0/src/strands_evals/case.py +53 -0
- strands_agents_evals-0.1.0/src/strands_evals/display/display_console.py +150 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/__init__.py +23 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/evaluator.py +182 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/faithfulness_evaluator.py +116 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/goal_success_rate_evaluator.py +90 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/harmfulness_evaluator.py +135 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/helpfulness_evaluator.py +148 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/interactions_evaluator.py +244 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/output_evaluator.py +72 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/case_prompt_template.py +63 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/faithfulness/__init__.py +11 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/faithfulness/faithfulness_v0.py +30 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/goal_success_rate/__init__.py +11 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/goal_success_rate/goal_success_rate_v0.py +17 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/harmfulness/__init__.py +11 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/harmfulness/harmfulness_v0.py +8 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/helpfulness/__init__.py +11 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/helpfulness/helpfulness_v0.py +38 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/prompt_templates.py +176 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/tool_parameter_accuracy/__init__.py +11 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/tool_parameter_accuracy/tool_parameter_accuracy_v0.py +40 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/tool_selection_accuracy/__init__.py +11 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/tool_selection_accuracy/tool_selection_accuracy_v0.py +23 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/tool_parameter_accuracy_evaluator.py +112 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/tool_selection_accuracy_evaluator.py +112 -0
- strands_agents_evals-0.1.0/src/strands_evals/evaluators/trajectory_evaluator.py +100 -0
- strands_agents_evals-0.1.0/src/strands_evals/experiment.py +652 -0
- strands_agents_evals-0.1.0/src/strands_evals/extractors/__init__.py +3 -0
- strands_agents_evals-0.1.0/src/strands_evals/extractors/graph_extractor.py +30 -0
- strands_agents_evals-0.1.0/src/strands_evals/extractors/swarm_extractor.py +73 -0
- strands_agents_evals-0.1.0/src/strands_evals/extractors/tools_use_extractor.py +164 -0
- strands_agents_evals-0.1.0/src/strands_evals/extractors/trace_extractor.py +166 -0
- strands_agents_evals-0.1.0/src/strands_evals/generators/__init__.py +3 -0
- strands_agents_evals-0.1.0/src/strands_evals/generators/experiment_generator.py +498 -0
- strands_agents_evals-0.1.0/src/strands_evals/generators/prompt_template/prompt_templates.py +75 -0
- strands_agents_evals-0.1.0/src/strands_evals/generators/topic_planner.py +60 -0
- strands_agents_evals-0.1.0/src/strands_evals/mappers/__init__.py +6 -0
- strands_agents_evals-0.1.0/src/strands_evals/mappers/session_mapper.py +27 -0
- strands_agents_evals-0.1.0/src/strands_evals/mappers/strands_in_memory_session_mapper.py +473 -0
- strands_agents_evals-0.1.0/src/strands_evals/simulation/README.md +323 -0
- strands_agents_evals-0.1.0/src/strands_evals/simulation/__init__.py +6 -0
- strands_agents_evals-0.1.0/src/strands_evals/simulation/actor_simulator.py +292 -0
- strands_agents_evals-0.1.0/src/strands_evals/simulation/profiles/__init__.py +5 -0
- strands_agents_evals-0.1.0/src/strands_evals/simulation/profiles/actor_profile.py +26 -0
- strands_agents_evals-0.1.0/src/strands_evals/simulation/prompt_templates/__init__.py +11 -0
- strands_agents_evals-0.1.0/src/strands_evals/simulation/prompt_templates/actor_profile_extraction.py +25 -0
- strands_agents_evals-0.1.0/src/strands_evals/simulation/prompt_templates/actor_system_prompt.py +64 -0
- strands_agents_evals-0.1.0/src/strands_evals/simulation/prompt_templates/goal_completion.py +27 -0
- strands_agents_evals-0.1.0/src/strands_evals/simulation/tools/__init__.py +5 -0
- strands_agents_evals-0.1.0/src/strands_evals/simulation/tools/goal_completion.py +93 -0
- strands_agents_evals-0.1.0/src/strands_evals/telemetry/__init__.py +15 -0
- strands_agents_evals-0.1.0/src/strands_evals/telemetry/_cloudwatch_logger.py +209 -0
- strands_agents_evals-0.1.0/src/strands_evals/telemetry/config.py +207 -0
- strands_agents_evals-0.1.0/src/strands_evals/telemetry/tracer.py +38 -0
- strands_agents_evals-0.1.0/src/strands_evals/tools/evaluation_tools.py +67 -0
- strands_agents_evals-0.1.0/src/strands_evals/types/__init__.py +11 -0
- strands_agents_evals-0.1.0/src/strands_evals/types/evaluation.py +105 -0
- strands_agents_evals-0.1.0/src/strands_evals/types/evaluation_report.py +244 -0
- strands_agents_evals-0.1.0/src/strands_evals/types/simulation/__init__.py +5 -0
- strands_agents_evals-0.1.0/src/strands_evals/types/simulation/actor.py +34 -0
- strands_agents_evals-0.1.0/src/strands_evals/types/trace.py +205 -0
- strands_agents_evals-0.1.0/tests/__init__.py +0 -0
- strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_evaluator.py +151 -0
- strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_faithfulness_evaluator.py +111 -0
- strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_goal_success_rate_evaluator.py +125 -0
- strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_harmfulness_evaluator.py +114 -0
- strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_helpfulness_evaluator.py +115 -0
- strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_interactions_evaluator.py +398 -0
- strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_output_evaluator.py +174 -0
- strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_tool_parameter_accuracy_evaluator.py +137 -0
- strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_tool_selection_accuracy_evaluator.py +127 -0
- strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_trajectory_evaluator.py +254 -0
- strands_agents_evals-0.1.0/tests/strands_evals/extractors/test_graph_extractor.py +109 -0
- strands_agents_evals-0.1.0/tests/strands_evals/extractors/test_swarm_extractor.py +154 -0
- strands_agents_evals-0.1.0/tests/strands_evals/extractors/test_tools_use_extractor.py +211 -0
- strands_agents_evals-0.1.0/tests/strands_evals/extractors/test_trace_extractor.py +159 -0
- strands_agents_evals-0.1.0/tests/strands_evals/generators/test_experiment_generator.py +407 -0
- strands_agents_evals-0.1.0/tests/strands_evals/generators/test_topic_planner.py +26 -0
- strands_agents_evals-0.1.0/tests/strands_evals/mappers/__init__.py +0 -0
- strands_agents_evals-0.1.0/tests/strands_evals/mappers/test_strands_in_memory_mapper.py +575 -0
- strands_agents_evals-0.1.0/tests/strands_evals/simulation/__init__.py +1 -0
- strands_agents_evals-0.1.0/tests/strands_evals/simulation/test_actor_simulator.py +213 -0
- strands_agents_evals-0.1.0/tests/strands_evals/simulation/test_goal_completion.py +196 -0
- strands_agents_evals-0.1.0/tests/strands_evals/telemetry/test_config.py +305 -0
- strands_agents_evals-0.1.0/tests/strands_evals/telemetry/test_tracer.py +125 -0
- strands_agents_evals-0.1.0/tests/strands_evals/test_cases.py +71 -0
- strands_agents_evals-0.1.0/tests/strands_evals/test_experiment.py +1054 -0
- strands_agents_evals-0.1.0/tests/strands_evals/tools/test_evaluation_tools.py +117 -0
- strands_agents_evals-0.1.0/tests/strands_evals/types/test_trace.py +181 -0
- strands_agents_evals-0.1.0/tests/test_integration.py +350 -0
- strands_agents_evals-0.1.0/tests_integ/test_output_evaluator.py +259 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
name: Bug Report
|
|
2
|
+
description: Report a bug in the Strands Agents Evals
|
|
3
|
+
title: "[BUG] "
|
|
4
|
+
labels: ["bug", "triage"]
|
|
5
|
+
assignees: []
|
|
6
|
+
body:
|
|
7
|
+
- type: markdown
|
|
8
|
+
attributes:
|
|
9
|
+
value: |
|
|
10
|
+
Thanks for taking the time to fill out this bug report for Strands Agents Evals!
|
|
11
|
+
- type: checkboxes
|
|
12
|
+
id: "checks"
|
|
13
|
+
attributes:
|
|
14
|
+
label: "Checks"
|
|
15
|
+
options:
|
|
16
|
+
- label: "I have updated to the lastest minor and patch version of Strands and evals"
|
|
17
|
+
required: true
|
|
18
|
+
- label: "I have checked the documentation and this is not expected behavior"
|
|
19
|
+
required: true
|
|
20
|
+
- label: "I have searched [./issues](./issues?q=) and there are no duplicates of my issue"
|
|
21
|
+
required: true
|
|
22
|
+
- type: input
|
|
23
|
+
id: strands-version
|
|
24
|
+
attributes:
|
|
25
|
+
label: Strands Version
|
|
26
|
+
description: Which version of Strands are you using?
|
|
27
|
+
placeholder: e.g., 0.5.2
|
|
28
|
+
validations:
|
|
29
|
+
required: true
|
|
30
|
+
- type: input
|
|
31
|
+
id: strands-evals-version
|
|
32
|
+
attributes:
|
|
33
|
+
label: Strands Evals Version
|
|
34
|
+
description: Which version of Strands Evals are you using?
|
|
35
|
+
placeholder: e.g., 0.5.2
|
|
36
|
+
validations:
|
|
37
|
+
required: true
|
|
38
|
+
- type: input
|
|
39
|
+
id: python-version
|
|
40
|
+
attributes:
|
|
41
|
+
label: Python Version
|
|
42
|
+
description: Which version of Python are you using?
|
|
43
|
+
placeholder: e.g., 3.10.5
|
|
44
|
+
validations:
|
|
45
|
+
required: true
|
|
46
|
+
- type: input
|
|
47
|
+
id: os
|
|
48
|
+
attributes:
|
|
49
|
+
label: Operating System
|
|
50
|
+
description: Which operating system are you using?
|
|
51
|
+
placeholder: e.g., macOS 12.6
|
|
52
|
+
validations:
|
|
53
|
+
required: true
|
|
54
|
+
- type: dropdown
|
|
55
|
+
id: installation-method
|
|
56
|
+
attributes:
|
|
57
|
+
label: Installation Method
|
|
58
|
+
description: How did you install Strands?
|
|
59
|
+
options:
|
|
60
|
+
- pip
|
|
61
|
+
- git clone
|
|
62
|
+
- binary
|
|
63
|
+
- other
|
|
64
|
+
- type: textarea
|
|
65
|
+
id: steps-to-reproduce
|
|
66
|
+
attributes:
|
|
67
|
+
label: Steps to Reproduce
|
|
68
|
+
description: Detailed steps to reproduce the behavior
|
|
69
|
+
placeholder: |
|
|
70
|
+
1. Code Snippet (Minimal reproducible example)
|
|
71
|
+
2. Install Strands using...
|
|
72
|
+
3. Run the command...
|
|
73
|
+
4. See error...
|
|
74
|
+
validations:
|
|
75
|
+
required: true
|
|
76
|
+
- type: textarea
|
|
77
|
+
id: expected-behavior
|
|
78
|
+
attributes:
|
|
79
|
+
label: Expected Behavior
|
|
80
|
+
description: A clear description of what you expected to happen
|
|
81
|
+
validations:
|
|
82
|
+
required: true
|
|
83
|
+
- type: textarea
|
|
84
|
+
id: actual-behavior
|
|
85
|
+
attributes:
|
|
86
|
+
label: Actual Behavior
|
|
87
|
+
description: What actually happened
|
|
88
|
+
validations:
|
|
89
|
+
required: true
|
|
90
|
+
- type: textarea
|
|
91
|
+
id: additional-context
|
|
92
|
+
attributes:
|
|
93
|
+
label: Additional Context
|
|
94
|
+
description: Any other relevant information, logs, screenshots, etc.
|
|
95
|
+
- type: textarea
|
|
96
|
+
id: possible-solution
|
|
97
|
+
attributes:
|
|
98
|
+
label: Possible Solution
|
|
99
|
+
description: Optional - If you have suggestions on how to fix the bug
|
|
100
|
+
- type: input
|
|
101
|
+
id: related-issues
|
|
102
|
+
attributes:
|
|
103
|
+
label: Related Issues
|
|
104
|
+
description: Optional - Link to related issues if applicable
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
name: Feature Request
|
|
2
|
+
description: Suggest a new feature or enhancement for Strands Evals SDK
|
|
3
|
+
title: "[FEATURE] "
|
|
4
|
+
labels: ["enhancement", "triage"]
|
|
5
|
+
assignees: []
|
|
6
|
+
body:
|
|
7
|
+
- type: markdown
|
|
8
|
+
attributes:
|
|
9
|
+
value: |
|
|
10
|
+
Thanks for suggesting a new feature for Strands Evals SDK!
|
|
11
|
+
- type: textarea
|
|
12
|
+
id: problem-statement
|
|
13
|
+
attributes:
|
|
14
|
+
label: Problem Statement
|
|
15
|
+
description: Describe the problem you're trying to solve. What is currently difficult or impossible to do?
|
|
16
|
+
placeholder: I would like Strands Evals to...
|
|
17
|
+
validations:
|
|
18
|
+
required: true
|
|
19
|
+
- type: textarea
|
|
20
|
+
id: proposed-solution
|
|
21
|
+
attributes:
|
|
22
|
+
label: Proposed Solution
|
|
23
|
+
description: Optional - Describe your proposed solution in detail. How would this feature work?
|
|
24
|
+
- type: textarea
|
|
25
|
+
id: use-case
|
|
26
|
+
attributes:
|
|
27
|
+
label: Use Case
|
|
28
|
+
description: Provide specific use cases for the feature. How would people use it?
|
|
29
|
+
placeholder: This would help with...
|
|
30
|
+
validations:
|
|
31
|
+
required: true
|
|
32
|
+
- type: textarea
|
|
33
|
+
id: alternatives-solutions
|
|
34
|
+
attributes:
|
|
35
|
+
label: Alternatives Solutions
|
|
36
|
+
description: Optional - Have you considered alternative approaches? What are their pros and cons?
|
|
37
|
+
- type: textarea
|
|
38
|
+
id: additional-context
|
|
39
|
+
attributes:
|
|
40
|
+
label: Additional Context
|
|
41
|
+
description: Include any other context, screenshots, code examples, or references that might help understand the feature request.
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
## Description
|
|
2
|
+
<!-- Provide a detailed description of the changes in this PR -->
|
|
3
|
+
|
|
4
|
+
## Related Issues
|
|
5
|
+
|
|
6
|
+
<!-- Link to related issues using #issue-number format -->
|
|
7
|
+
|
|
8
|
+
## Documentation PR
|
|
9
|
+
|
|
10
|
+
<!-- Link to related associated PR in the agent-docs repo -->
|
|
11
|
+
|
|
12
|
+
## Type of Change
|
|
13
|
+
|
|
14
|
+
<!-- Choose one of the following types of changes, delete the rest -->
|
|
15
|
+
|
|
16
|
+
Bug fix
|
|
17
|
+
New feature
|
|
18
|
+
Breaking change
|
|
19
|
+
Documentation update
|
|
20
|
+
Other (please describe):
|
|
21
|
+
|
|
22
|
+
## Testing
|
|
23
|
+
|
|
24
|
+
How have you tested the change? Verify that the changes do not break functionality or introduce warnings in consuming repositories: agents-docs, agents-tools, agents-cli
|
|
25
|
+
|
|
26
|
+
- [ ] I ran `hatch run prepare`
|
|
27
|
+
|
|
28
|
+
## Checklist
|
|
29
|
+
- [ ] I have read the CONTRIBUTING document
|
|
30
|
+
- [ ] I have added any necessary tests that prove my fix is effective or my feature works
|
|
31
|
+
- [ ] I have updated the documentation accordingly
|
|
32
|
+
- [ ] I have added an appropriate example to the documentation to outline the feature, or no new docs are needed
|
|
33
|
+
- [ ] My changes generate no new warnings
|
|
34
|
+
- [ ] Any dependent changes have been merged and published
|
|
35
|
+
|
|
36
|
+
----
|
|
37
|
+
|
|
38
|
+
By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
updates:
|
|
3
|
+
- package-ecosystem: "pip"
|
|
4
|
+
directory: "/"
|
|
5
|
+
schedule:
|
|
6
|
+
interval: "daily"
|
|
7
|
+
open-pull-requests-limit: 100
|
|
8
|
+
commit-message:
|
|
9
|
+
prefix: ci
|
|
10
|
+
groups:
|
|
11
|
+
dev-dependencies:
|
|
12
|
+
patterns:
|
|
13
|
+
- "pytest"
|
|
14
|
+
- package-ecosystem: "github-actions"
|
|
15
|
+
directory: "/"
|
|
16
|
+
schedule:
|
|
17
|
+
interval: "daily"
|
|
18
|
+
open-pull-requests-limit: 100
|
|
19
|
+
commit-message:
|
|
20
|
+
prefix: ci
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
name: Secure Integration test
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request_target:
|
|
5
|
+
branches: main
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
authorization-check:
|
|
9
|
+
permissions: read-all
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
outputs:
|
|
12
|
+
approval-env: ${{ steps.collab-check.outputs.result }}
|
|
13
|
+
steps:
|
|
14
|
+
- name: Collaborator Check
|
|
15
|
+
uses: actions/github-script@v8
|
|
16
|
+
id: collab-check
|
|
17
|
+
with:
|
|
18
|
+
result-encoding: string
|
|
19
|
+
script: |
|
|
20
|
+
try {
|
|
21
|
+
const permissionResponse = await github.rest.repos.getCollaboratorPermissionLevel({
|
|
22
|
+
owner: context.repo.owner,
|
|
23
|
+
repo: context.repo.repo,
|
|
24
|
+
username: context.payload.pull_request.user.login,
|
|
25
|
+
});
|
|
26
|
+
const permission = permissionResponse.data.permission;
|
|
27
|
+
const hasWriteAccess = ['write', 'admin'].includes(permission);
|
|
28
|
+
if (!hasWriteAccess) {
|
|
29
|
+
console.log(`User ${context.payload.pull_request.user.login} does not have write access to the repository (permission: ${permission})`);
|
|
30
|
+
return "manual-approval"
|
|
31
|
+
} else {
|
|
32
|
+
console.log(`Verifed ${context.payload.pull_request.user.login} has write access. Auto Approving PR Checks.`)
|
|
33
|
+
return "auto-approve"
|
|
34
|
+
}
|
|
35
|
+
} catch (error) {
|
|
36
|
+
console.log(`${context.payload.pull_request.user.login} does not have write access. Requiring Manual Approval to run PR Checks.`)
|
|
37
|
+
return "manual-approval"
|
|
38
|
+
}
|
|
39
|
+
check-access-and-checkout:
|
|
40
|
+
runs-on: ubuntu-latest
|
|
41
|
+
needs: authorization-check
|
|
42
|
+
environment: ${{ needs.authorization-check.outputs.approval-env }}
|
|
43
|
+
permissions:
|
|
44
|
+
id-token: write
|
|
45
|
+
pull-requests: read
|
|
46
|
+
contents: read
|
|
47
|
+
steps:
|
|
48
|
+
- name: Configure Credentials
|
|
49
|
+
uses: aws-actions/configure-aws-credentials@v5
|
|
50
|
+
with:
|
|
51
|
+
role-to-assume: ${{ secrets.STRANDS_INTEG_TEST_ROLE }}
|
|
52
|
+
aws-region: us-east-1
|
|
53
|
+
mask-aws-account-id: true
|
|
54
|
+
- name: Checkout head commit
|
|
55
|
+
uses: actions/checkout@v6
|
|
56
|
+
with:
|
|
57
|
+
ref: ${{ github.event.pull_request.head.sha }} # Pull the commit from the forked repo
|
|
58
|
+
persist-credentials: false # Don't persist credentials for subsequent actions
|
|
59
|
+
- name: Set up Python
|
|
60
|
+
uses: actions/setup-python@v6
|
|
61
|
+
with:
|
|
62
|
+
python-version: '3.10'
|
|
63
|
+
- name: Install dependencies
|
|
64
|
+
run: |
|
|
65
|
+
pip install --no-cache-dir hatch
|
|
66
|
+
- name: Run integration tests
|
|
67
|
+
env:
|
|
68
|
+
AWS_REGION: us-east-1
|
|
69
|
+
AWS_REGION_NAME: us-east-1
|
|
70
|
+
STRANDS_TEST_API_KEYS_SECRET_NAME: ${{ secrets.STRANDS_TEST_API_KEYS_SECRET_NAME }}
|
|
71
|
+
id: tests
|
|
72
|
+
run: |
|
|
73
|
+
hatch test tests_integ
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
name: Pull Request and Push Action
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request: # Safer than pull_request_target for untrusted code
|
|
5
|
+
branches: [ main ]
|
|
6
|
+
types: [opened, synchronize, reopened, ready_for_review]
|
|
7
|
+
push:
|
|
8
|
+
branches: [ main ] # Also run on direct pushes to main
|
|
9
|
+
concurrency:
|
|
10
|
+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
|
11
|
+
cancel-in-progress: true
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
call-test-lint:
|
|
15
|
+
uses: ./.github/workflows/test-lint.yml
|
|
16
|
+
permissions:
|
|
17
|
+
contents: read
|
|
18
|
+
with:
|
|
19
|
+
ref: ${{ github.event.pull_request.head.sha }}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
name: Publish Python Package
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types:
|
|
6
|
+
- published
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
call-test-lint:
|
|
10
|
+
uses: ./.github/workflows/test-lint.yml
|
|
11
|
+
permissions:
|
|
12
|
+
contents: read
|
|
13
|
+
with:
|
|
14
|
+
ref: ${{ github.event.release.target_commitish }}
|
|
15
|
+
|
|
16
|
+
build:
|
|
17
|
+
name: Build distribution 📦
|
|
18
|
+
permissions:
|
|
19
|
+
contents: read
|
|
20
|
+
needs:
|
|
21
|
+
- call-test-lint
|
|
22
|
+
runs-on: ubuntu-latest
|
|
23
|
+
|
|
24
|
+
steps:
|
|
25
|
+
- uses: actions/checkout@v6
|
|
26
|
+
with:
|
|
27
|
+
persist-credentials: false
|
|
28
|
+
|
|
29
|
+
- name: Set up Python
|
|
30
|
+
uses: actions/setup-python@v6
|
|
31
|
+
with:
|
|
32
|
+
python-version: '3.10'
|
|
33
|
+
|
|
34
|
+
- name: Install dependencies
|
|
35
|
+
run: |
|
|
36
|
+
python -m pip install --upgrade pip
|
|
37
|
+
pip install hatch twine
|
|
38
|
+
|
|
39
|
+
- name: Validate version
|
|
40
|
+
run: |
|
|
41
|
+
version=$(hatch version)
|
|
42
|
+
if [[ $version =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
|
|
43
|
+
echo "Valid version format"
|
|
44
|
+
exit 0
|
|
45
|
+
else
|
|
46
|
+
echo "Invalid version format"
|
|
47
|
+
exit 1
|
|
48
|
+
fi
|
|
49
|
+
|
|
50
|
+
- name: Build
|
|
51
|
+
run: |
|
|
52
|
+
hatch build
|
|
53
|
+
|
|
54
|
+
- name: Store the distribution packages
|
|
55
|
+
uses: actions/upload-artifact@v5
|
|
56
|
+
with:
|
|
57
|
+
name: python-package-distributions
|
|
58
|
+
path: dist/
|
|
59
|
+
|
|
60
|
+
deploy:
|
|
61
|
+
name: Upload release to PyPI
|
|
62
|
+
needs:
|
|
63
|
+
- build
|
|
64
|
+
runs-on: ubuntu-latest
|
|
65
|
+
|
|
66
|
+
# environment is used by PyPI Trusted Publisher and is strongly encouraged
|
|
67
|
+
# https://docs.pypi.org/trusted-publishers/adding-a-publisher/
|
|
68
|
+
environment:
|
|
69
|
+
name: pypi
|
|
70
|
+
url: https://pypi.org/p/strands-agents-evals
|
|
71
|
+
permissions:
|
|
72
|
+
# IMPORTANT: this permission is mandatory for Trusted Publishing
|
|
73
|
+
id-token: write
|
|
74
|
+
|
|
75
|
+
steps:
|
|
76
|
+
- name: Download all the dists
|
|
77
|
+
uses: actions/download-artifact@v4
|
|
78
|
+
with:
|
|
79
|
+
name: python-package-distributions
|
|
80
|
+
path: dist/
|
|
81
|
+
- name: Publish distribution 📦 to PyPI
|
|
82
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
name: Test and Lint
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_call:
|
|
5
|
+
inputs:
|
|
6
|
+
ref:
|
|
7
|
+
required: true
|
|
8
|
+
type: string
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
unit-test:
|
|
12
|
+
name: Unit Tests - Python ${{ matrix.python-version }} - ${{ matrix.os-name }}
|
|
13
|
+
permissions:
|
|
14
|
+
contents: read
|
|
15
|
+
strategy:
|
|
16
|
+
matrix:
|
|
17
|
+
include:
|
|
18
|
+
# Linux
|
|
19
|
+
- os: ubuntu-latest
|
|
20
|
+
os-name: 'linux'
|
|
21
|
+
python-version: "3.10"
|
|
22
|
+
- os: ubuntu-latest
|
|
23
|
+
os-name: 'linux'
|
|
24
|
+
python-version: "3.11"
|
|
25
|
+
- os: ubuntu-latest
|
|
26
|
+
os-name: 'linux'
|
|
27
|
+
python-version: "3.12"
|
|
28
|
+
- os: ubuntu-latest
|
|
29
|
+
os-name: 'linux'
|
|
30
|
+
python-version: "3.13"
|
|
31
|
+
# Windows
|
|
32
|
+
- os: windows-latest
|
|
33
|
+
os-name: 'windows'
|
|
34
|
+
python-version: "3.10"
|
|
35
|
+
- os: windows-latest
|
|
36
|
+
os-name: 'windows'
|
|
37
|
+
python-version: "3.11"
|
|
38
|
+
- os: windows-latest
|
|
39
|
+
os-name: 'windows'
|
|
40
|
+
python-version: "3.12"
|
|
41
|
+
- os: windows-latest
|
|
42
|
+
os-name: 'windows'
|
|
43
|
+
python-version: "3.13"
|
|
44
|
+
# MacOS - latest only; not enough runners for macOS
|
|
45
|
+
- os: macos-latest
|
|
46
|
+
os-name: 'macOS'
|
|
47
|
+
python-version: "3.13"
|
|
48
|
+
fail-fast: true
|
|
49
|
+
runs-on: ${{ matrix.os }}
|
|
50
|
+
env:
|
|
51
|
+
LOG_LEVEL: DEBUG
|
|
52
|
+
steps:
|
|
53
|
+
- name: Checkout code
|
|
54
|
+
uses: actions/checkout@v6
|
|
55
|
+
with:
|
|
56
|
+
ref: ${{ inputs.ref }} # Explicitly define which commit to check out
|
|
57
|
+
persist-credentials: false # Don't persist credentials for subsequent actions
|
|
58
|
+
- name: Set up Python
|
|
59
|
+
uses: actions/setup-python@v6
|
|
60
|
+
with:
|
|
61
|
+
python-version: ${{ matrix.python-version }}
|
|
62
|
+
- name: Install dependencies
|
|
63
|
+
run: |
|
|
64
|
+
pip install --no-cache-dir hatch
|
|
65
|
+
- name: Run Unit tests
|
|
66
|
+
id: tests
|
|
67
|
+
run: hatch test tests --cover
|
|
68
|
+
continue-on-error: false
|
|
69
|
+
lint:
|
|
70
|
+
name: Lint
|
|
71
|
+
runs-on: ubuntu-latest
|
|
72
|
+
permissions:
|
|
73
|
+
contents: read
|
|
74
|
+
steps:
|
|
75
|
+
- name: Checkout code
|
|
76
|
+
uses: actions/checkout@v6
|
|
77
|
+
with:
|
|
78
|
+
ref: ${{ inputs.ref }}
|
|
79
|
+
persist-credentials: false
|
|
80
|
+
|
|
81
|
+
- name: Set up Python
|
|
82
|
+
uses: actions/setup-python@v6
|
|
83
|
+
with:
|
|
84
|
+
python-version: '3.10'
|
|
85
|
+
cache: 'pip'
|
|
86
|
+
|
|
87
|
+
- name: Install dependencies
|
|
88
|
+
run: |
|
|
89
|
+
pip install --no-cache-dir hatch
|
|
90
|
+
|
|
91
|
+
- name: Run lint
|
|
92
|
+
id: lint
|
|
93
|
+
run: hatch run test-lint
|
|
94
|
+
continue-on-error: false
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: local
|
|
3
|
+
hooks:
|
|
4
|
+
- id: hatch-format
|
|
5
|
+
name: Format code
|
|
6
|
+
entry: hatch fmt --formatter --check
|
|
7
|
+
language: system
|
|
8
|
+
pass_filenames: false
|
|
9
|
+
types: [python]
|
|
10
|
+
stages: [pre-commit]
|
|
11
|
+
- id: hatch-lint
|
|
12
|
+
name: Lint code
|
|
13
|
+
entry: hatch fmt --linter --check
|
|
14
|
+
language: system
|
|
15
|
+
pass_filenames: false
|
|
16
|
+
types: [python]
|
|
17
|
+
stages: [pre-commit]
|
|
18
|
+
- id: hatch-test
|
|
19
|
+
name: Unit tests
|
|
20
|
+
entry: hatch test
|
|
21
|
+
language: system
|
|
22
|
+
pass_filenames: false
|
|
23
|
+
types: [python]
|
|
24
|
+
stages: [pre-commit]
|
|
25
|
+
- id: commitizen-check
|
|
26
|
+
name: Check commit message
|
|
27
|
+
entry: hatch run cz check --commit-msg-file
|
|
28
|
+
language: system
|
|
29
|
+
stages: [commit-msg]
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
## Code of Conduct
|
|
2
|
+
This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
|
|
3
|
+
For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
|
|
4
|
+
opensource-codeofconduct@amazon.com with any additional questions or comments.
|