strands-agents-evals 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. strands_agents_evals-0.1.0/.github/ISSUE_TEMPLATE/bug_report.yml +104 -0
  2. strands_agents_evals-0.1.0/.github/ISSUE_TEMPLATE/config.yml +5 -0
  3. strands_agents_evals-0.1.0/.github/ISSUE_TEMPLATE/feature_request.yml +41 -0
  4. strands_agents_evals-0.1.0/.github/PULL_REQUEST_TEMPLATE.md +38 -0
  5. strands_agents_evals-0.1.0/.github/dependabot.yml +20 -0
  6. strands_agents_evals-0.1.0/.github/workflows/integration-test.yml +73 -0
  7. strands_agents_evals-0.1.0/.github/workflows/pr-and-push.yml +19 -0
  8. strands_agents_evals-0.1.0/.github/workflows/pypi-publish-on-release.yml +82 -0
  9. strands_agents_evals-0.1.0/.github/workflows/test-lint.yml +94 -0
  10. strands_agents_evals-0.1.0/.gitignore +17 -0
  11. strands_agents_evals-0.1.0/.pre-commit-config.yaml +29 -0
  12. strands_agents_evals-0.1.0/CODE_OF_CONDUCT.md +4 -0
  13. strands_agents_evals-0.1.0/CONTRIBUTING.md +170 -0
  14. strands_agents_evals-0.1.0/LICENSE +175 -0
  15. strands_agents_evals-0.1.0/NOTICE +1 -0
  16. strands_agents_evals-0.1.0/PKG-INFO +408 -0
  17. strands_agents_evals-0.1.0/README.md +376 -0
  18. strands_agents_evals-0.1.0/STYLE_GUIDE.md +59 -0
  19. strands_agents_evals-0.1.0/pyproject.toml +175 -0
  20. strands_agents_evals-0.1.0/src/__init__.py +0 -0
  21. strands_agents_evals-0.1.0/src/strands_evals/__init__.py +22 -0
  22. strands_agents_evals-0.1.0/src/strands_evals/case.py +53 -0
  23. strands_agents_evals-0.1.0/src/strands_evals/display/display_console.py +150 -0
  24. strands_agents_evals-0.1.0/src/strands_evals/evaluators/__init__.py +23 -0
  25. strands_agents_evals-0.1.0/src/strands_evals/evaluators/evaluator.py +182 -0
  26. strands_agents_evals-0.1.0/src/strands_evals/evaluators/faithfulness_evaluator.py +116 -0
  27. strands_agents_evals-0.1.0/src/strands_evals/evaluators/goal_success_rate_evaluator.py +90 -0
  28. strands_agents_evals-0.1.0/src/strands_evals/evaluators/harmfulness_evaluator.py +135 -0
  29. strands_agents_evals-0.1.0/src/strands_evals/evaluators/helpfulness_evaluator.py +148 -0
  30. strands_agents_evals-0.1.0/src/strands_evals/evaluators/interactions_evaluator.py +244 -0
  31. strands_agents_evals-0.1.0/src/strands_evals/evaluators/output_evaluator.py +72 -0
  32. strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/case_prompt_template.py +63 -0
  33. strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/faithfulness/__init__.py +11 -0
  34. strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/faithfulness/faithfulness_v0.py +30 -0
  35. strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/goal_success_rate/__init__.py +11 -0
  36. strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/goal_success_rate/goal_success_rate_v0.py +17 -0
  37. strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/harmfulness/__init__.py +11 -0
  38. strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/harmfulness/harmfulness_v0.py +8 -0
  39. strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/helpfulness/__init__.py +11 -0
  40. strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/helpfulness/helpfulness_v0.py +38 -0
  41. strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/prompt_templates.py +176 -0
  42. strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/tool_parameter_accuracy/__init__.py +11 -0
  43. strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/tool_parameter_accuracy/tool_parameter_accuracy_v0.py +40 -0
  44. strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/tool_selection_accuracy/__init__.py +11 -0
  45. strands_agents_evals-0.1.0/src/strands_evals/evaluators/prompt_templates/tool_selection_accuracy/tool_selection_accuracy_v0.py +23 -0
  46. strands_agents_evals-0.1.0/src/strands_evals/evaluators/tool_parameter_accuracy_evaluator.py +112 -0
  47. strands_agents_evals-0.1.0/src/strands_evals/evaluators/tool_selection_accuracy_evaluator.py +112 -0
  48. strands_agents_evals-0.1.0/src/strands_evals/evaluators/trajectory_evaluator.py +100 -0
  49. strands_agents_evals-0.1.0/src/strands_evals/experiment.py +652 -0
  50. strands_agents_evals-0.1.0/src/strands_evals/extractors/__init__.py +3 -0
  51. strands_agents_evals-0.1.0/src/strands_evals/extractors/graph_extractor.py +30 -0
  52. strands_agents_evals-0.1.0/src/strands_evals/extractors/swarm_extractor.py +73 -0
  53. strands_agents_evals-0.1.0/src/strands_evals/extractors/tools_use_extractor.py +164 -0
  54. strands_agents_evals-0.1.0/src/strands_evals/extractors/trace_extractor.py +166 -0
  55. strands_agents_evals-0.1.0/src/strands_evals/generators/__init__.py +3 -0
  56. strands_agents_evals-0.1.0/src/strands_evals/generators/experiment_generator.py +498 -0
  57. strands_agents_evals-0.1.0/src/strands_evals/generators/prompt_template/prompt_templates.py +75 -0
  58. strands_agents_evals-0.1.0/src/strands_evals/generators/topic_planner.py +60 -0
  59. strands_agents_evals-0.1.0/src/strands_evals/mappers/__init__.py +6 -0
  60. strands_agents_evals-0.1.0/src/strands_evals/mappers/session_mapper.py +27 -0
  61. strands_agents_evals-0.1.0/src/strands_evals/mappers/strands_in_memory_session_mapper.py +473 -0
  62. strands_agents_evals-0.1.0/src/strands_evals/simulation/README.md +323 -0
  63. strands_agents_evals-0.1.0/src/strands_evals/simulation/__init__.py +6 -0
  64. strands_agents_evals-0.1.0/src/strands_evals/simulation/actor_simulator.py +292 -0
  65. strands_agents_evals-0.1.0/src/strands_evals/simulation/profiles/__init__.py +5 -0
  66. strands_agents_evals-0.1.0/src/strands_evals/simulation/profiles/actor_profile.py +26 -0
  67. strands_agents_evals-0.1.0/src/strands_evals/simulation/prompt_templates/__init__.py +11 -0
  68. strands_agents_evals-0.1.0/src/strands_evals/simulation/prompt_templates/actor_profile_extraction.py +25 -0
  69. strands_agents_evals-0.1.0/src/strands_evals/simulation/prompt_templates/actor_system_prompt.py +64 -0
  70. strands_agents_evals-0.1.0/src/strands_evals/simulation/prompt_templates/goal_completion.py +27 -0
  71. strands_agents_evals-0.1.0/src/strands_evals/simulation/tools/__init__.py +5 -0
  72. strands_agents_evals-0.1.0/src/strands_evals/simulation/tools/goal_completion.py +93 -0
  73. strands_agents_evals-0.1.0/src/strands_evals/telemetry/__init__.py +15 -0
  74. strands_agents_evals-0.1.0/src/strands_evals/telemetry/_cloudwatch_logger.py +209 -0
  75. strands_agents_evals-0.1.0/src/strands_evals/telemetry/config.py +207 -0
  76. strands_agents_evals-0.1.0/src/strands_evals/telemetry/tracer.py +38 -0
  77. strands_agents_evals-0.1.0/src/strands_evals/tools/evaluation_tools.py +67 -0
  78. strands_agents_evals-0.1.0/src/strands_evals/types/__init__.py +11 -0
  79. strands_agents_evals-0.1.0/src/strands_evals/types/evaluation.py +105 -0
  80. strands_agents_evals-0.1.0/src/strands_evals/types/evaluation_report.py +244 -0
  81. strands_agents_evals-0.1.0/src/strands_evals/types/simulation/__init__.py +5 -0
  82. strands_agents_evals-0.1.0/src/strands_evals/types/simulation/actor.py +34 -0
  83. strands_agents_evals-0.1.0/src/strands_evals/types/trace.py +205 -0
  84. strands_agents_evals-0.1.0/tests/__init__.py +0 -0
  85. strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_evaluator.py +151 -0
  86. strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_faithfulness_evaluator.py +111 -0
  87. strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_goal_success_rate_evaluator.py +125 -0
  88. strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_harmfulness_evaluator.py +114 -0
  89. strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_helpfulness_evaluator.py +115 -0
  90. strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_interactions_evaluator.py +398 -0
  91. strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_output_evaluator.py +174 -0
  92. strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_tool_parameter_accuracy_evaluator.py +137 -0
  93. strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_tool_selection_accuracy_evaluator.py +127 -0
  94. strands_agents_evals-0.1.0/tests/strands_evals/evaluators/test_trajectory_evaluator.py +254 -0
  95. strands_agents_evals-0.1.0/tests/strands_evals/extractors/test_graph_extractor.py +109 -0
  96. strands_agents_evals-0.1.0/tests/strands_evals/extractors/test_swarm_extractor.py +154 -0
  97. strands_agents_evals-0.1.0/tests/strands_evals/extractors/test_tools_use_extractor.py +211 -0
  98. strands_agents_evals-0.1.0/tests/strands_evals/extractors/test_trace_extractor.py +159 -0
  99. strands_agents_evals-0.1.0/tests/strands_evals/generators/test_experiment_generator.py +407 -0
  100. strands_agents_evals-0.1.0/tests/strands_evals/generators/test_topic_planner.py +26 -0
  101. strands_agents_evals-0.1.0/tests/strands_evals/mappers/__init__.py +0 -0
  102. strands_agents_evals-0.1.0/tests/strands_evals/mappers/test_strands_in_memory_mapper.py +575 -0
  103. strands_agents_evals-0.1.0/tests/strands_evals/simulation/__init__.py +1 -0
  104. strands_agents_evals-0.1.0/tests/strands_evals/simulation/test_actor_simulator.py +213 -0
  105. strands_agents_evals-0.1.0/tests/strands_evals/simulation/test_goal_completion.py +196 -0
  106. strands_agents_evals-0.1.0/tests/strands_evals/telemetry/test_config.py +305 -0
  107. strands_agents_evals-0.1.0/tests/strands_evals/telemetry/test_tracer.py +125 -0
  108. strands_agents_evals-0.1.0/tests/strands_evals/test_cases.py +71 -0
  109. strands_agents_evals-0.1.0/tests/strands_evals/test_experiment.py +1054 -0
  110. strands_agents_evals-0.1.0/tests/strands_evals/tools/test_evaluation_tools.py +117 -0
  111. strands_agents_evals-0.1.0/tests/strands_evals/types/test_trace.py +181 -0
  112. strands_agents_evals-0.1.0/tests/test_integration.py +350 -0
  113. strands_agents_evals-0.1.0/tests_integ/test_output_evaluator.py +259 -0
@@ -0,0 +1,104 @@
1
+ name: Bug Report
2
+ description: Report a bug in the Strands Agents Evals
3
+ title: "[BUG] "
4
+ labels: ["bug", "triage"]
5
+ assignees: []
6
+ body:
7
+ - type: markdown
8
+ attributes:
9
+ value: |
10
+ Thanks for taking the time to fill out this bug report for Strands Agents Evals!
11
+ - type: checkboxes
12
+ id: "checks"
13
+ attributes:
14
+ label: "Checks"
15
+ options:
16
+ - label: "I have updated to the lastest minor and patch version of Strands and evals"
17
+ required: true
18
+ - label: "I have checked the documentation and this is not expected behavior"
19
+ required: true
20
+ - label: "I have searched [./issues](./issues?q=) and there are no duplicates of my issue"
21
+ required: true
22
+ - type: input
23
+ id: strands-version
24
+ attributes:
25
+ label: Strands Version
26
+ description: Which version of Strands are you using?
27
+ placeholder: e.g., 0.5.2
28
+ validations:
29
+ required: true
30
+ - type: input
31
+ id: strands-evals-version
32
+ attributes:
33
+ label: Strands Evals Version
34
+ description: Which version of Strands Evals are you using?
35
+ placeholder: e.g., 0.5.2
36
+ validations:
37
+ required: true
38
+ - type: input
39
+ id: python-version
40
+ attributes:
41
+ label: Python Version
42
+ description: Which version of Python are you using?
43
+ placeholder: e.g., 3.10.5
44
+ validations:
45
+ required: true
46
+ - type: input
47
+ id: os
48
+ attributes:
49
+ label: Operating System
50
+ description: Which operating system are you using?
51
+ placeholder: e.g., macOS 12.6
52
+ validations:
53
+ required: true
54
+ - type: dropdown
55
+ id: installation-method
56
+ attributes:
57
+ label: Installation Method
58
+ description: How did you install Strands?
59
+ options:
60
+ - pip
61
+ - git clone
62
+ - binary
63
+ - other
64
+ - type: textarea
65
+ id: steps-to-reproduce
66
+ attributes:
67
+ label: Steps to Reproduce
68
+ description: Detailed steps to reproduce the behavior
69
+ placeholder: |
70
+ 1. Code Snippet (Minimal reproducible example)
71
+ 2. Install Strands using...
72
+ 3. Run the command...
73
+ 4. See error...
74
+ validations:
75
+ required: true
76
+ - type: textarea
77
+ id: expected-behavior
78
+ attributes:
79
+ label: Expected Behavior
80
+ description: A clear description of what you expected to happen
81
+ validations:
82
+ required: true
83
+ - type: textarea
84
+ id: actual-behavior
85
+ attributes:
86
+ label: Actual Behavior
87
+ description: What actually happened
88
+ validations:
89
+ required: true
90
+ - type: textarea
91
+ id: additional-context
92
+ attributes:
93
+ label: Additional Context
94
+ description: Any other relevant information, logs, screenshots, etc.
95
+ - type: textarea
96
+ id: possible-solution
97
+ attributes:
98
+ label: Possible Solution
99
+ description: Optional - If you have suggestions on how to fix the bug
100
+ - type: input
101
+ id: related-issues
102
+ attributes:
103
+ label: Related Issues
104
+ description: Optional - Link to related issues if applicable
@@ -0,0 +1,5 @@
1
+ blank_issues_enabled: false
2
+ contact_links:
3
+ - name: Strands Evals SDK Documentation
4
+ url: https://github.com/strands-agents/docs
5
+ about: Visit our documentation for help
@@ -0,0 +1,41 @@
1
+ name: Feature Request
2
+ description: Suggest a new feature or enhancement for Strands Evals SDK
3
+ title: "[FEATURE] "
4
+ labels: ["enhancement", "triage"]
5
+ assignees: []
6
+ body:
7
+ - type: markdown
8
+ attributes:
9
+ value: |
10
+ Thanks for suggesting a new feature for Strands Evals SDK!
11
+ - type: textarea
12
+ id: problem-statement
13
+ attributes:
14
+ label: Problem Statement
15
+ description: Describe the problem you're trying to solve. What is currently difficult or impossible to do?
16
+ placeholder: I would like Strands Evals to...
17
+ validations:
18
+ required: true
19
+ - type: textarea
20
+ id: proposed-solution
21
+ attributes:
22
+ label: Proposed Solution
23
+ description: Optional - Describe your proposed solution in detail. How would this feature work?
24
+ - type: textarea
25
+ id: use-case
26
+ attributes:
27
+ label: Use Case
28
+ description: Provide specific use cases for the feature. How would people use it?
29
+ placeholder: This would help with...
30
+ validations:
31
+ required: true
32
+ - type: textarea
33
+ id: alternatives-solutions
34
+ attributes:
35
+ label: Alternatives Solutions
36
+ description: Optional - Have you considered alternative approaches? What are their pros and cons?
37
+ - type: textarea
38
+ id: additional-context
39
+ attributes:
40
+ label: Additional Context
41
+ description: Include any other context, screenshots, code examples, or references that might help understand the feature request.
@@ -0,0 +1,38 @@
1
+ ## Description
2
+ <!-- Provide a detailed description of the changes in this PR -->
3
+
4
+ ## Related Issues
5
+
6
+ <!-- Link to related issues using #issue-number format -->
7
+
8
+ ## Documentation PR
9
+
10
+ <!-- Link to related associated PR in the agent-docs repo -->
11
+
12
+ ## Type of Change
13
+
14
+ <!-- Choose one of the following types of changes, delete the rest -->
15
+
16
+ Bug fix
17
+ New feature
18
+ Breaking change
19
+ Documentation update
20
+ Other (please describe):
21
+
22
+ ## Testing
23
+
24
+ How have you tested the change? Verify that the changes do not break functionality or introduce warnings in consuming repositories: agents-docs, agents-tools, agents-cli
25
+
26
+ - [ ] I ran `hatch run prepare`
27
+
28
+ ## Checklist
29
+ - [ ] I have read the CONTRIBUTING document
30
+ - [ ] I have added any necessary tests that prove my fix is effective or my feature works
31
+ - [ ] I have updated the documentation accordingly
32
+ - [ ] I have added an appropriate example to the documentation to outline the feature, or no new docs are needed
33
+ - [ ] My changes generate no new warnings
34
+ - [ ] Any dependent changes have been merged and published
35
+
36
+ ----
37
+
38
+ By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
@@ -0,0 +1,20 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "pip"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "daily"
7
+ open-pull-requests-limit: 100
8
+ commit-message:
9
+ prefix: ci
10
+ groups:
11
+ dev-dependencies:
12
+ patterns:
13
+ - "pytest"
14
+ - package-ecosystem: "github-actions"
15
+ directory: "/"
16
+ schedule:
17
+ interval: "daily"
18
+ open-pull-requests-limit: 100
19
+ commit-message:
20
+ prefix: ci
@@ -0,0 +1,73 @@
1
+ name: Secure Integration test
2
+
3
+ on:
4
+ pull_request_target:
5
+ branches: main
6
+
7
+ jobs:
8
+ authorization-check:
9
+ permissions: read-all
10
+ runs-on: ubuntu-latest
11
+ outputs:
12
+ approval-env: ${{ steps.collab-check.outputs.result }}
13
+ steps:
14
+ - name: Collaborator Check
15
+ uses: actions/github-script@v8
16
+ id: collab-check
17
+ with:
18
+ result-encoding: string
19
+ script: |
20
+ try {
21
+ const permissionResponse = await github.rest.repos.getCollaboratorPermissionLevel({
22
+ owner: context.repo.owner,
23
+ repo: context.repo.repo,
24
+ username: context.payload.pull_request.user.login,
25
+ });
26
+ const permission = permissionResponse.data.permission;
27
+ const hasWriteAccess = ['write', 'admin'].includes(permission);
28
+ if (!hasWriteAccess) {
29
+ console.log(`User ${context.payload.pull_request.user.login} does not have write access to the repository (permission: ${permission})`);
30
+ return "manual-approval"
31
+ } else {
32
+ console.log(`Verifed ${context.payload.pull_request.user.login} has write access. Auto Approving PR Checks.`)
33
+ return "auto-approve"
34
+ }
35
+ } catch (error) {
36
+ console.log(`${context.payload.pull_request.user.login} does not have write access. Requiring Manual Approval to run PR Checks.`)
37
+ return "manual-approval"
38
+ }
39
+ check-access-and-checkout:
40
+ runs-on: ubuntu-latest
41
+ needs: authorization-check
42
+ environment: ${{ needs.authorization-check.outputs.approval-env }}
43
+ permissions:
44
+ id-token: write
45
+ pull-requests: read
46
+ contents: read
47
+ steps:
48
+ - name: Configure Credentials
49
+ uses: aws-actions/configure-aws-credentials@v5
50
+ with:
51
+ role-to-assume: ${{ secrets.STRANDS_INTEG_TEST_ROLE }}
52
+ aws-region: us-east-1
53
+ mask-aws-account-id: true
54
+ - name: Checkout head commit
55
+ uses: actions/checkout@v6
56
+ with:
57
+ ref: ${{ github.event.pull_request.head.sha }} # Pull the commit from the forked repo
58
+ persist-credentials: false # Don't persist credentials for subsequent actions
59
+ - name: Set up Python
60
+ uses: actions/setup-python@v6
61
+ with:
62
+ python-version: '3.10'
63
+ - name: Install dependencies
64
+ run: |
65
+ pip install --no-cache-dir hatch
66
+ - name: Run integration tests
67
+ env:
68
+ AWS_REGION: us-east-1
69
+ AWS_REGION_NAME: us-east-1
70
+ STRANDS_TEST_API_KEYS_SECRET_NAME: ${{ secrets.STRANDS_TEST_API_KEYS_SECRET_NAME }}
71
+ id: tests
72
+ run: |
73
+ hatch test tests_integ
@@ -0,0 +1,19 @@
1
+ name: Pull Request and Push Action
2
+
3
+ on:
4
+ pull_request: # Safer than pull_request_target for untrusted code
5
+ branches: [ main ]
6
+ types: [opened, synchronize, reopened, ready_for_review]
7
+ push:
8
+ branches: [ main ] # Also run on direct pushes to main
9
+ concurrency:
10
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
11
+ cancel-in-progress: true
12
+
13
+ jobs:
14
+ call-test-lint:
15
+ uses: ./.github/workflows/test-lint.yml
16
+ permissions:
17
+ contents: read
18
+ with:
19
+ ref: ${{ github.event.pull_request.head.sha }}
@@ -0,0 +1,82 @@
1
+ name: Publish Python Package
2
+
3
+ on:
4
+ release:
5
+ types:
6
+ - published
7
+
8
+ jobs:
9
+ call-test-lint:
10
+ uses: ./.github/workflows/test-lint.yml
11
+ permissions:
12
+ contents: read
13
+ with:
14
+ ref: ${{ github.event.release.target_commitish }}
15
+
16
+ build:
17
+ name: Build distribution 📦
18
+ permissions:
19
+ contents: read
20
+ needs:
21
+ - call-test-lint
22
+ runs-on: ubuntu-latest
23
+
24
+ steps:
25
+ - uses: actions/checkout@v6
26
+ with:
27
+ persist-credentials: false
28
+
29
+ - name: Set up Python
30
+ uses: actions/setup-python@v6
31
+ with:
32
+ python-version: '3.10'
33
+
34
+ - name: Install dependencies
35
+ run: |
36
+ python -m pip install --upgrade pip
37
+ pip install hatch twine
38
+
39
+ - name: Validate version
40
+ run: |
41
+ version=$(hatch version)
42
+ if [[ $version =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
43
+ echo "Valid version format"
44
+ exit 0
45
+ else
46
+ echo "Invalid version format"
47
+ exit 1
48
+ fi
49
+
50
+ - name: Build
51
+ run: |
52
+ hatch build
53
+
54
+ - name: Store the distribution packages
55
+ uses: actions/upload-artifact@v5
56
+ with:
57
+ name: python-package-distributions
58
+ path: dist/
59
+
60
+ deploy:
61
+ name: Upload release to PyPI
62
+ needs:
63
+ - build
64
+ runs-on: ubuntu-latest
65
+
66
+ # environment is used by PyPI Trusted Publisher and is strongly encouraged
67
+ # https://docs.pypi.org/trusted-publishers/adding-a-publisher/
68
+ environment:
69
+ name: pypi
70
+ url: https://pypi.org/p/strands-agents-evals
71
+ permissions:
72
+ # IMPORTANT: this permission is mandatory for Trusted Publishing
73
+ id-token: write
74
+
75
+ steps:
76
+ - name: Download all the dists
77
+ uses: actions/download-artifact@v4
78
+ with:
79
+ name: python-package-distributions
80
+ path: dist/
81
+ - name: Publish distribution 📦 to PyPI
82
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,94 @@
1
+ name: Test and Lint
2
+
3
+ on:
4
+ workflow_call:
5
+ inputs:
6
+ ref:
7
+ required: true
8
+ type: string
9
+
10
+ jobs:
11
+ unit-test:
12
+ name: Unit Tests - Python ${{ matrix.python-version }} - ${{ matrix.os-name }}
13
+ permissions:
14
+ contents: read
15
+ strategy:
16
+ matrix:
17
+ include:
18
+ # Linux
19
+ - os: ubuntu-latest
20
+ os-name: 'linux'
21
+ python-version: "3.10"
22
+ - os: ubuntu-latest
23
+ os-name: 'linux'
24
+ python-version: "3.11"
25
+ - os: ubuntu-latest
26
+ os-name: 'linux'
27
+ python-version: "3.12"
28
+ - os: ubuntu-latest
29
+ os-name: 'linux'
30
+ python-version: "3.13"
31
+ # Windows
32
+ - os: windows-latest
33
+ os-name: 'windows'
34
+ python-version: "3.10"
35
+ - os: windows-latest
36
+ os-name: 'windows'
37
+ python-version: "3.11"
38
+ - os: windows-latest
39
+ os-name: 'windows'
40
+ python-version: "3.12"
41
+ - os: windows-latest
42
+ os-name: 'windows'
43
+ python-version: "3.13"
44
+ # MacOS - latest only; not enough runners for macOS
45
+ - os: macos-latest
46
+ os-name: 'macOS'
47
+ python-version: "3.13"
48
+ fail-fast: true
49
+ runs-on: ${{ matrix.os }}
50
+ env:
51
+ LOG_LEVEL: DEBUG
52
+ steps:
53
+ - name: Checkout code
54
+ uses: actions/checkout@v6
55
+ with:
56
+ ref: ${{ inputs.ref }} # Explicitly define which commit to check out
57
+ persist-credentials: false # Don't persist credentials for subsequent actions
58
+ - name: Set up Python
59
+ uses: actions/setup-python@v6
60
+ with:
61
+ python-version: ${{ matrix.python-version }}
62
+ - name: Install dependencies
63
+ run: |
64
+ pip install --no-cache-dir hatch
65
+ - name: Run Unit tests
66
+ id: tests
67
+ run: hatch test tests --cover
68
+ continue-on-error: false
69
+ lint:
70
+ name: Lint
71
+ runs-on: ubuntu-latest
72
+ permissions:
73
+ contents: read
74
+ steps:
75
+ - name: Checkout code
76
+ uses: actions/checkout@v6
77
+ with:
78
+ ref: ${{ inputs.ref }}
79
+ persist-credentials: false
80
+
81
+ - name: Set up Python
82
+ uses: actions/setup-python@v6
83
+ with:
84
+ python-version: '3.10'
85
+ cache: 'pip'
86
+
87
+ - name: Install dependencies
88
+ run: |
89
+ pip install --no-cache-dir hatch
90
+
91
+ - name: Run lint
92
+ id: lint
93
+ run: hatch run test-lint
94
+ continue-on-error: false
@@ -0,0 +1,17 @@
1
+ __pycache__*
2
+ .pytest_cache
3
+ slack_events
4
+ build
5
+ .coverage*
6
+ .env
7
+ .venv
8
+ .mypy_cache
9
+ .ruff_cache
10
+ *.bak
11
+ .vscode
12
+ dist
13
+ repl_state
14
+ dataset_files
15
+ report_files
16
+ .venv
17
+ *.DS_Store*
@@ -0,0 +1,29 @@
1
+ repos:
2
+ - repo: local
3
+ hooks:
4
+ - id: hatch-format
5
+ name: Format code
6
+ entry: hatch fmt --formatter --check
7
+ language: system
8
+ pass_filenames: false
9
+ types: [python]
10
+ stages: [pre-commit]
11
+ - id: hatch-lint
12
+ name: Lint code
13
+ entry: hatch fmt --linter --check
14
+ language: system
15
+ pass_filenames: false
16
+ types: [python]
17
+ stages: [pre-commit]
18
+ - id: hatch-test
19
+ name: Unit tests
20
+ entry: hatch test
21
+ language: system
22
+ pass_filenames: false
23
+ types: [python]
24
+ stages: [pre-commit]
25
+ - id: commitizen-check
26
+ name: Check commit message
27
+ entry: hatch run cz check --commit-msg-file
28
+ language: system
29
+ stages: [commit-msg]
@@ -0,0 +1,4 @@
1
+ ## Code of Conduct
2
+ This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3
+ For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4
+ opensource-codeofconduct@amazon.com with any additional questions or comments.