synthetic-graph-benchmarks 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synthetic_graph_benchmarks-0.1.0/.gitignore +12 -0
- synthetic_graph_benchmarks-0.1.0/.python-version +1 -0
- synthetic_graph_benchmarks-0.1.0/LICENSE +21 -0
- synthetic_graph_benchmarks-0.1.0/MANIFEST.in +6 -0
- synthetic_graph_benchmarks-0.1.0/PKG-INFO +227 -0
- synthetic_graph_benchmarks-0.1.0/PUBLISH.md +117 -0
- synthetic_graph_benchmarks-0.1.0/README.md +191 -0
- synthetic_graph_benchmarks-0.1.0/notebooks/dataset.ipynb +69 -0
- synthetic_graph_benchmarks-0.1.0/pyproject.toml +81 -0
- synthetic_graph_benchmarks-0.1.0/src/synthetic_graph_benchmarks/__init__.py +23 -0
- synthetic_graph_benchmarks-0.1.0/src/synthetic_graph_benchmarks/benchmarks.py +85 -0
- synthetic_graph_benchmarks-0.1.0/src/synthetic_graph_benchmarks/dataset.py +47 -0
- synthetic_graph_benchmarks-0.1.0/src/synthetic_graph_benchmarks/dist_helper.py +222 -0
- synthetic_graph_benchmarks-0.1.0/src/synthetic_graph_benchmarks/spectre_utils.py +1230 -0
- synthetic_graph_benchmarks-0.1.0/src/synthetic_graph_benchmarks/utils.py +56 -0
- synthetic_graph_benchmarks-0.1.0/test/test_benchmarks_match_paper.py +141 -0
- synthetic_graph_benchmarks-0.1.0/uv.lock +1462 -0
@@ -0,0 +1 @@
|
|
1
|
+
3.12
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Ole Petersen
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1,227 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: synthetic-graph-benchmarks
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: Standardized benchmarks for evaluating synthetic graph generation methods
|
5
|
+
Project-URL: Homepage, https://github.com/peteole/synthetic_graph_benchmarks
|
6
|
+
Project-URL: Repository, https://github.com/peteole/synthetic_graph_benchmarks
|
7
|
+
Project-URL: Documentation, https://github.com/peteole/synthetic_graph_benchmarks#readme
|
8
|
+
Project-URL: Bug Tracker, https://github.com/peteole/synthetic_graph_benchmarks/issues
|
9
|
+
Author-email: Ole Petersen <peteole2707@gmail.com>
|
10
|
+
Maintainer-email: Ole Petersen <peteole2707@gmail.com>
|
11
|
+
License: MIT
|
12
|
+
License-File: LICENSE
|
13
|
+
Keywords: benchmarks,evaluation-metrics,graph-generation,graph-neural-networks,machine-learning,networkx,synthetic-graphs
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
15
|
+
Classifier: Intended Audience :: Developers
|
16
|
+
Classifier: Intended Audience :: Science/Research
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
18
|
+
Classifier: Operating System :: OS Independent
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
25
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
26
|
+
Requires-Python: >=3.10
|
27
|
+
Requires-Dist: networkx>=3.4.2
|
28
|
+
Requires-Dist: numpy>=2.2.6
|
29
|
+
Requires-Dist: orca-graphlets>=0.1.4
|
30
|
+
Requires-Dist: pygsp>=0.5.1
|
31
|
+
Requires-Dist: requests>=2.32.4
|
32
|
+
Requires-Dist: scikit-learn>=1.7.1
|
33
|
+
Requires-Dist: scipy>=1.15.3
|
34
|
+
Requires-Dist: torch>=2.3.0
|
35
|
+
Description-Content-Type: text/markdown
|
36
|
+
|
37
|
+
# Synthetic Graph Benchmarks
|
38
|
+
|
39
|
+
[](https://badge.fury.io/py/synthetic-graph-benchmarks)
|
40
|
+
[](https://www.python.org/downloads/)
|
41
|
+
[](https://opensource.org/licenses/MIT)
|
42
|
+
|
43
|
+
A Python package implementing standardized benchmarks for evaluating synthetic graph generation methods, based on the evaluation frameworks introduced in:
|
44
|
+
|
45
|
+
- [**SPECTRE: Spectral Conditioning Helps to Overcome the Expressivity Limits of One-shot Graph Generators**](https://arxiv.org/pdf/2204.01613) (ICML 2022)
|
46
|
+
- [**Efficient and Scalable Graph Generation through Iterative Local Expansion**](https://arxiv.org/html/2312.11529v4) (2023)
|
47
|
+
|
48
|
+
This package provides a unified interface for benchmarking graph generation algorithms against established datasets and metrics used in the graph generation literature.
|
49
|
+
|
50
|
+
## Features
|
51
|
+
|
52
|
+
- **Standardized Datasets**: Access to benchmark datasets including Stochastic Block Model (SBM), Planar graphs, and Tree graphs
|
53
|
+
- **Comprehensive Metrics**: Implementation of key evaluation metrics including:
|
54
|
+
- Degree distribution comparison (MMD)
|
55
|
+
- Clustering coefficient analysis
|
56
|
+
- Orbit count statistics (using ORCA)
|
57
|
+
- Spectral properties analysis
|
58
|
+
- Wavelet coefficient comparison
|
59
|
+
- **Validation Metrics**: Graph-type specific validation (planarity, tree properties, SBM likelihood)
|
60
|
+
- **Reproducible Evaluation**: Consistent benchmarking across different graph generation methods
|
61
|
+
- **Easy Integration**: Simple API for evaluating your own graph generation algorithms
|
62
|
+
|
63
|
+
## Installation
|
64
|
+
|
65
|
+
### From PyPI (recommended)
|
66
|
+
|
67
|
+
```bash
|
68
|
+
pip install synthetic-graph-benchmarks
|
69
|
+
```
|
70
|
+
|
71
|
+
### From Source
|
72
|
+
|
73
|
+
```bash
|
74
|
+
git clone https://github.com/peteole/synthetic_graph_benchmarks.git
|
75
|
+
cd synthetic_graph_benchmarks
|
76
|
+
pip install -e .
|
77
|
+
```
|
78
|
+
|
79
|
+
## Quick Start
|
80
|
+
|
81
|
+
```python
|
82
|
+
import networkx as nx
|
83
|
+
from synthetic_graph_benchmarks import (
|
84
|
+
benchmark_planar_results,
|
85
|
+
benchmark_sbm_results,
|
86
|
+
benchmark_tree_results
|
87
|
+
)
|
88
|
+
|
89
|
+
# Generate some example graphs (replace with your graph generation method)
|
90
|
+
generated_graphs = [nx.erdos_renyi_graph(64, 0.1) for _ in range(20)]
|
91
|
+
|
92
|
+
# Benchmark against planar graph dataset
|
93
|
+
results = benchmark_planar_results(generated_graphs)
|
94
|
+
print(f"Planar accuracy: {results['planar_acc']:.3f}")
|
95
|
+
print(f"Average metric ratio: {results['average_ratio']:.3f}")
|
96
|
+
|
97
|
+
# Benchmark against SBM dataset
|
98
|
+
sbm_results = benchmark_sbm_results(generated_graphs)
|
99
|
+
print(f"SBM accuracy: {sbm_results['sbm_acc']:.3f}")
|
100
|
+
|
101
|
+
# Benchmark against tree dataset
|
102
|
+
tree_results = benchmark_tree_results(generated_graphs)
|
103
|
+
print(f"Tree accuracy: {tree_results['planar_acc']:.3f}")
|
104
|
+
```
|
105
|
+
|
106
|
+
## Datasets
|
107
|
+
|
108
|
+
The package provides access to three standard benchmark datasets:
|
109
|
+
|
110
|
+
### Stochastic Block Model (SBM)
|
111
|
+
- **Size**: 200 graphs
|
112
|
+
- **Properties**: 2-5 communities, 20-40 nodes per community
|
113
|
+
- **Edge probabilities**: 0.3 intra-community, 0.05 inter-community
|
114
|
+
|
115
|
+
### Planar Graphs
|
116
|
+
- **Size**: 200 graphs with 64 nodes each
|
117
|
+
- **Generation**: Delaunay triangulation on random points in unit square
|
118
|
+
- **Properties**: Guaranteed planarity
|
119
|
+
|
120
|
+
### Tree Graphs
|
121
|
+
- **Size**: 200 graphs with 64 nodes each
|
122
|
+
- **Properties**: Connected acyclic graphs (trees)
|
123
|
+
|
124
|
+
## Evaluation Metrics
|
125
|
+
|
126
|
+
### Graph Statistics
|
127
|
+
- **Degree Distribution**: Maximum Mean Discrepancy (MMD) between degree histograms
|
128
|
+
- **Clustering Coefficient**: Local clustering coefficient comparison
|
129
|
+
- **Orbit Counts**: 4-node orbit statistics using ORCA package
|
130
|
+
- **Spectral Properties**: Laplacian eigenvalue distribution analysis
|
131
|
+
- **Wavelet Coefficients**: Graph wavelet signature comparison
|
132
|
+
|
133
|
+
### Validity Metrics
|
134
|
+
- **Planar Accuracy**: Fraction of generated graphs that are planar
|
135
|
+
- **Tree Accuracy**: Fraction of generated graphs that are trees (acyclic)
|
136
|
+
- **SBM Accuracy**: Likelihood of graphs under fitted SBM parameters
|
137
|
+
|
138
|
+
### Quality Scores
|
139
|
+
- **Uniqueness**: Fraction of non-isomorphic graphs in generated set
|
140
|
+
- **Novelty**: Fraction of generated graphs not isomorphic to training graphs
|
141
|
+
- **Validity-Uniqueness-Novelty (VUN)**: Combined score for overall quality
|
142
|
+
|
143
|
+
## Advanced Usage
|
144
|
+
|
145
|
+
### Custom Evaluation
|
146
|
+
|
147
|
+
```python
|
148
|
+
from synthetic_graph_benchmarks.dataset import Dataset
|
149
|
+
from synthetic_graph_benchmarks.spectre_utils import PlanarSamplingMetrics
|
150
|
+
|
151
|
+
# Load dataset manually
|
152
|
+
dataset = Dataset.load_planar()
|
153
|
+
print(f"Training graphs: {len(dataset.train_graphs)}")
|
154
|
+
print(f"Validation graphs: {len(dataset.val_graphs)}")
|
155
|
+
|
156
|
+
# Use metrics directly
|
157
|
+
metrics = PlanarSamplingMetrics(dataset)
|
158
|
+
test_metrics = metrics.forward(dataset.train_graphs, test=True)
|
159
|
+
results = metrics.forward(generated_graphs, ref_metrics={"test": test_metrics}, test=True)
|
160
|
+
```
|
161
|
+
|
162
|
+
### Accessing Individual Metrics
|
163
|
+
|
164
|
+
```python
|
165
|
+
# Get detailed breakdown of all metrics
|
166
|
+
results = benchmark_planar_results(generated_graphs)
|
167
|
+
|
168
|
+
# Individual metric values
|
169
|
+
print(f"Degree MMD: {results['degree']:.6f}")
|
170
|
+
print(f"Clustering MMD: {results['clustering']:.6f}")
|
171
|
+
print(f"Orbit MMD: {results['orbit']:.6f}")
|
172
|
+
print(f"Spectral MMD: {results['spectre']:.6f}")
|
173
|
+
print(f"Wavelet MMD: {results['wavelet']:.6f}")
|
174
|
+
|
175
|
+
# Ratios compared to training set
|
176
|
+
print(f"Degree ratio: {results['degree_ratio']:.3f}")
|
177
|
+
print(f"Average ratio: {results['average_ratio']:.3f}")
|
178
|
+
```
|
179
|
+
|
180
|
+
## Citing
|
181
|
+
|
182
|
+
If you use this package in your research, please cite the original papers:
|
183
|
+
|
184
|
+
```bibtex
|
185
|
+
@inproceedings{martinkus2022spectre,
|
186
|
+
title={SPECTRE: Spectral Conditioning Helps to Overcome the Expressivity Limits of One-shot Graph Generators},
|
187
|
+
author={Martinkus, Karolis and Loukas, Andreas and Perraudin, Nathanaël and Wattenhofer, Roger},
|
188
|
+
booktitle={International Conference on Machine Learning},
|
189
|
+
pages={15159--15202},
|
190
|
+
year={2022},
|
191
|
+
organization={PMLR}
|
192
|
+
}
|
193
|
+
|
194
|
+
@article{bergmeister2023efficient,
|
195
|
+
title={Efficient and Scalable Graph Generation through Iterative Local Expansion},
|
196
|
+
author={Bergmeister, Andreas and Martinkus, Karolis and Perraudin, Nathanaël and Wattenhofer, Roger},
|
197
|
+
journal={arXiv preprint arXiv:2312.11529},
|
198
|
+
year={2023}
|
199
|
+
}
|
200
|
+
```
|
201
|
+
|
202
|
+
## Dependencies
|
203
|
+
|
204
|
+
- Python ≥ 3.10
|
205
|
+
- NetworkX ≥ 3.4.2
|
206
|
+
- NumPy ≥ 2.2.6
|
207
|
+
- SciPy ≥ 1.15.3
|
208
|
+
- PyGSP ≥ 0.5.1
|
209
|
+
- scikit-learn ≥ 1.7.1
|
210
|
+
- ORCA-graphlets ≥ 0.1.4
|
211
|
+
- PyTorch ≥ 2.3.0
|
212
|
+
|
213
|
+
## Contributing
|
214
|
+
|
215
|
+
Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
|
216
|
+
|
217
|
+
## License
|
218
|
+
|
219
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
220
|
+
|
221
|
+
## Acknowledgments
|
222
|
+
|
223
|
+
This package is based on evaluation frameworks developed by:
|
224
|
+
- Karolis Martinkus (SPECTRE paper)
|
225
|
+
- Andreas Bergmeister (Iterative Local Expansion paper)
|
226
|
+
- The original GRAN evaluation codebase
|
227
|
+
- NetworkX and PyGSP communities
|
@@ -0,0 +1,117 @@
|
|
1
|
+
# PyPI Publishing Guide
|
2
|
+
|
3
|
+
This guide explains how to build and publish the `synthetic-graph-benchmarks` package to PyPI.
|
4
|
+
|
5
|
+
## Prerequisites
|
6
|
+
|
7
|
+
1. Install build tools:
|
8
|
+
```bash
|
9
|
+
pip install build twine
|
10
|
+
```
|
11
|
+
|
12
|
+
2. Ensure you have PyPI credentials configured:
|
13
|
+
- Create account on [PyPI](https://pypi.org/) and [TestPyPI](https://test.pypi.org/)
|
14
|
+
- Generate API tokens for both
|
15
|
+
- Configure in `~/.pypirc` or use environment variables
|
16
|
+
|
17
|
+
## Building the Package
|
18
|
+
|
19
|
+
1. Clean previous builds:
|
20
|
+
```bash
|
21
|
+
rm -rf build/ dist/ *.egg-info/
|
22
|
+
```
|
23
|
+
|
24
|
+
2. Build the package:
|
25
|
+
```bash
|
26
|
+
python -m build
|
27
|
+
```
|
28
|
+
|
29
|
+
This creates both source distribution (.tar.gz) and wheel (.whl) files in the `dist/` directory.
|
30
|
+
|
31
|
+
## Testing the Build
|
32
|
+
|
33
|
+
1. Install locally to test:
|
34
|
+
```bash
|
35
|
+
pip install dist/synthetic_graph_benchmarks-*.whl
|
36
|
+
```
|
37
|
+
|
38
|
+
2. Run basic tests:
|
39
|
+
```bash
|
40
|
+
python -c "import synthetic_graph_benchmarks; print(synthetic_graph_benchmarks.__version__)"
|
41
|
+
```
|
42
|
+
|
43
|
+
## Publishing to TestPyPI (Recommended First)
|
44
|
+
|
45
|
+
1. Upload to TestPyPI:
|
46
|
+
```bash
|
47
|
+
python -m twine upload --repository testpypi dist/*
|
48
|
+
```
|
49
|
+
|
50
|
+
2. Test installation from TestPyPI:
|
51
|
+
```bash
|
52
|
+
pip install --index-url https://test.pypi.org/simple/ synthetic-graph-benchmarks
|
53
|
+
```
|
54
|
+
|
55
|
+
## Publishing to PyPI
|
56
|
+
|
57
|
+
1. Upload to PyPI:
|
58
|
+
```bash
|
59
|
+
python -m twine upload dist/*
|
60
|
+
```
|
61
|
+
|
62
|
+
2. Verify the package is available:
|
63
|
+
```bash
|
64
|
+
pip install synthetic-graph-benchmarks
|
65
|
+
```
|
66
|
+
|
67
|
+
## Version Management
|
68
|
+
|
69
|
+
Before each release:
|
70
|
+
|
71
|
+
1. Update version in `pyproject.toml`
|
72
|
+
2. Update version in `src/synthetic_graph_benchmarks/__init__.py`
|
73
|
+
3. Update CHANGELOG if you have one
|
74
|
+
4. Create a git tag:
|
75
|
+
```bash
|
76
|
+
git tag v0.1.0
|
77
|
+
git push origin v0.1.0
|
78
|
+
```
|
79
|
+
|
80
|
+
## Automated Publishing with GitHub Actions
|
81
|
+
|
82
|
+
Consider setting up GitHub Actions for automated publishing. Create `.github/workflows/publish.yml`:
|
83
|
+
|
84
|
+
```yaml
|
85
|
+
name: Publish to PyPI
|
86
|
+
|
87
|
+
on:
|
88
|
+
release:
|
89
|
+
types: [published]
|
90
|
+
|
91
|
+
jobs:
|
92
|
+
publish:
|
93
|
+
runs-on: ubuntu-latest
|
94
|
+
steps:
|
95
|
+
- uses: actions/checkout@v3
|
96
|
+
- name: Set up Python
|
97
|
+
uses: actions/setup-python@v4
|
98
|
+
with:
|
99
|
+
python-version: '3.10'
|
100
|
+
- name: Install dependencies
|
101
|
+
run: |
|
102
|
+
python -m pip install --upgrade pip
|
103
|
+
pip install build twine
|
104
|
+
- name: Build package
|
105
|
+
run: python -m build
|
106
|
+
- name: Publish to PyPI
|
107
|
+
env:
|
108
|
+
TWINE_USERNAME: __token__
|
109
|
+
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
110
|
+
run: twine upload dist/*
|
111
|
+
```
|
112
|
+
|
113
|
+
## Security Notes
|
114
|
+
|
115
|
+
- Never commit API tokens to version control
|
116
|
+
- Use GitHub Secrets for automated publishing
|
117
|
+
- Consider using trusted publishing (OIDC) for better security
|
@@ -0,0 +1,191 @@
|
|
1
|
+
# Synthetic Graph Benchmarks
|
2
|
+
|
3
|
+
[](https://badge.fury.io/py/synthetic-graph-benchmarks)
|
4
|
+
[](https://www.python.org/downloads/)
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
6
|
+
|
7
|
+
A Python package implementing standardized benchmarks for evaluating synthetic graph generation methods, based on the evaluation frameworks introduced in:
|
8
|
+
|
9
|
+
- [**SPECTRE: Spectral Conditioning Helps to Overcome the Expressivity Limits of One-shot Graph Generators**](https://arxiv.org/pdf/2204.01613) (ICML 2022)
|
10
|
+
- [**Efficient and Scalable Graph Generation through Iterative Local Expansion**](https://arxiv.org/html/2312.11529v4) (2023)
|
11
|
+
|
12
|
+
This package provides a unified interface for benchmarking graph generation algorithms against established datasets and metrics used in the graph generation literature.
|
13
|
+
|
14
|
+
## Features
|
15
|
+
|
16
|
+
- **Standardized Datasets**: Access to benchmark datasets including Stochastic Block Model (SBM), Planar graphs, and Tree graphs
|
17
|
+
- **Comprehensive Metrics**: Implementation of key evaluation metrics including:
|
18
|
+
- Degree distribution comparison (MMD)
|
19
|
+
- Clustering coefficient analysis
|
20
|
+
- Orbit count statistics (using ORCA)
|
21
|
+
- Spectral properties analysis
|
22
|
+
- Wavelet coefficient comparison
|
23
|
+
- **Validation Metrics**: Graph-type specific validation (planarity, tree properties, SBM likelihood)
|
24
|
+
- **Reproducible Evaluation**: Consistent benchmarking across different graph generation methods
|
25
|
+
- **Easy Integration**: Simple API for evaluating your own graph generation algorithms
|
26
|
+
|
27
|
+
## Installation
|
28
|
+
|
29
|
+
### From PyPI (recommended)
|
30
|
+
|
31
|
+
```bash
|
32
|
+
pip install synthetic-graph-benchmarks
|
33
|
+
```
|
34
|
+
|
35
|
+
### From Source
|
36
|
+
|
37
|
+
```bash
|
38
|
+
git clone https://github.com/peteole/synthetic_graph_benchmarks.git
|
39
|
+
cd synthetic_graph_benchmarks
|
40
|
+
pip install -e .
|
41
|
+
```
|
42
|
+
|
43
|
+
## Quick Start
|
44
|
+
|
45
|
+
```python
|
46
|
+
import networkx as nx
|
47
|
+
from synthetic_graph_benchmarks import (
|
48
|
+
benchmark_planar_results,
|
49
|
+
benchmark_sbm_results,
|
50
|
+
benchmark_tree_results
|
51
|
+
)
|
52
|
+
|
53
|
+
# Generate some example graphs (replace with your graph generation method)
|
54
|
+
generated_graphs = [nx.erdos_renyi_graph(64, 0.1) for _ in range(20)]
|
55
|
+
|
56
|
+
# Benchmark against planar graph dataset
|
57
|
+
results = benchmark_planar_results(generated_graphs)
|
58
|
+
print(f"Planar accuracy: {results['planar_acc']:.3f}")
|
59
|
+
print(f"Average metric ratio: {results['average_ratio']:.3f}")
|
60
|
+
|
61
|
+
# Benchmark against SBM dataset
|
62
|
+
sbm_results = benchmark_sbm_results(generated_graphs)
|
63
|
+
print(f"SBM accuracy: {sbm_results['sbm_acc']:.3f}")
|
64
|
+
|
65
|
+
# Benchmark against tree dataset
|
66
|
+
tree_results = benchmark_tree_results(generated_graphs)
|
67
|
+
print(f"Tree accuracy: {tree_results['planar_acc']:.3f}")
|
68
|
+
```
|
69
|
+
|
70
|
+
## Datasets
|
71
|
+
|
72
|
+
The package provides access to three standard benchmark datasets:
|
73
|
+
|
74
|
+
### Stochastic Block Model (SBM)
|
75
|
+
- **Size**: 200 graphs
|
76
|
+
- **Properties**: 2-5 communities, 20-40 nodes per community
|
77
|
+
- **Edge probabilities**: 0.3 intra-community, 0.05 inter-community
|
78
|
+
|
79
|
+
### Planar Graphs
|
80
|
+
- **Size**: 200 graphs with 64 nodes each
|
81
|
+
- **Generation**: Delaunay triangulation on random points in unit square
|
82
|
+
- **Properties**: Guaranteed planarity
|
83
|
+
|
84
|
+
### Tree Graphs
|
85
|
+
- **Size**: 200 graphs with 64 nodes each
|
86
|
+
- **Properties**: Connected acyclic graphs (trees)
|
87
|
+
|
88
|
+
## Evaluation Metrics
|
89
|
+
|
90
|
+
### Graph Statistics
|
91
|
+
- **Degree Distribution**: Maximum Mean Discrepancy (MMD) between degree histograms
|
92
|
+
- **Clustering Coefficient**: Local clustering coefficient comparison
|
93
|
+
- **Orbit Counts**: 4-node orbit statistics using ORCA package
|
94
|
+
- **Spectral Properties**: Laplacian eigenvalue distribution analysis
|
95
|
+
- **Wavelet Coefficients**: Graph wavelet signature comparison
|
96
|
+
|
97
|
+
### Validity Metrics
|
98
|
+
- **Planar Accuracy**: Fraction of generated graphs that are planar
|
99
|
+
- **Tree Accuracy**: Fraction of generated graphs that are trees (acyclic)
|
100
|
+
- **SBM Accuracy**: Likelihood of graphs under fitted SBM parameters
|
101
|
+
|
102
|
+
### Quality Scores
|
103
|
+
- **Uniqueness**: Fraction of non-isomorphic graphs in generated set
|
104
|
+
- **Novelty**: Fraction of generated graphs not isomorphic to training graphs
|
105
|
+
- **Validity-Uniqueness-Novelty (VUN)**: Combined score for overall quality
|
106
|
+
|
107
|
+
## Advanced Usage
|
108
|
+
|
109
|
+
### Custom Evaluation
|
110
|
+
|
111
|
+
```python
|
112
|
+
from synthetic_graph_benchmarks.dataset import Dataset
|
113
|
+
from synthetic_graph_benchmarks.spectre_utils import PlanarSamplingMetrics
|
114
|
+
|
115
|
+
# Load dataset manually
|
116
|
+
dataset = Dataset.load_planar()
|
117
|
+
print(f"Training graphs: {len(dataset.train_graphs)}")
|
118
|
+
print(f"Validation graphs: {len(dataset.val_graphs)}")
|
119
|
+
|
120
|
+
# Use metrics directly
|
121
|
+
metrics = PlanarSamplingMetrics(dataset)
|
122
|
+
test_metrics = metrics.forward(dataset.train_graphs, test=True)
|
123
|
+
results = metrics.forward(generated_graphs, ref_metrics={"test": test_metrics}, test=True)
|
124
|
+
```
|
125
|
+
|
126
|
+
### Accessing Individual Metrics
|
127
|
+
|
128
|
+
```python
|
129
|
+
# Get detailed breakdown of all metrics
|
130
|
+
results = benchmark_planar_results(generated_graphs)
|
131
|
+
|
132
|
+
# Individual metric values
|
133
|
+
print(f"Degree MMD: {results['degree']:.6f}")
|
134
|
+
print(f"Clustering MMD: {results['clustering']:.6f}")
|
135
|
+
print(f"Orbit MMD: {results['orbit']:.6f}")
|
136
|
+
print(f"Spectral MMD: {results['spectre']:.6f}")
|
137
|
+
print(f"Wavelet MMD: {results['wavelet']:.6f}")
|
138
|
+
|
139
|
+
# Ratios compared to training set
|
140
|
+
print(f"Degree ratio: {results['degree_ratio']:.3f}")
|
141
|
+
print(f"Average ratio: {results['average_ratio']:.3f}")
|
142
|
+
```
|
143
|
+
|
144
|
+
## Citing
|
145
|
+
|
146
|
+
If you use this package in your research, please cite the original papers:
|
147
|
+
|
148
|
+
```bibtex
|
149
|
+
@inproceedings{martinkus2022spectre,
|
150
|
+
title={SPECTRE: Spectral Conditioning Helps to Overcome the Expressivity Limits of One-shot Graph Generators},
|
151
|
+
author={Martinkus, Karolis and Loukas, Andreas and Perraudin, Nathanaël and Wattenhofer, Roger},
|
152
|
+
booktitle={International Conference on Machine Learning},
|
153
|
+
pages={15159--15202},
|
154
|
+
year={2022},
|
155
|
+
organization={PMLR}
|
156
|
+
}
|
157
|
+
|
158
|
+
@article{bergmeister2023efficient,
|
159
|
+
title={Efficient and Scalable Graph Generation through Iterative Local Expansion},
|
160
|
+
author={Bergmeister, Andreas and Martinkus, Karolis and Perraudin, Nathanaël and Wattenhofer, Roger},
|
161
|
+
journal={arXiv preprint arXiv:2312.11529},
|
162
|
+
year={2023}
|
163
|
+
}
|
164
|
+
```
|
165
|
+
|
166
|
+
## Dependencies
|
167
|
+
|
168
|
+
- Python ≥ 3.10
|
169
|
+
- NetworkX ≥ 3.4.2
|
170
|
+
- NumPy ≥ 2.2.6
|
171
|
+
- SciPy ≥ 1.15.3
|
172
|
+
- PyGSP ≥ 0.5.1
|
173
|
+
- scikit-learn ≥ 1.7.1
|
174
|
+
- ORCA-graphlets ≥ 0.1.4
|
175
|
+
- PyTorch ≥ 2.3.0
|
176
|
+
|
177
|
+
## Contributing
|
178
|
+
|
179
|
+
Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
|
180
|
+
|
181
|
+
## License
|
182
|
+
|
183
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
184
|
+
|
185
|
+
## Acknowledgments
|
186
|
+
|
187
|
+
This package is based on evaluation frameworks developed by:
|
188
|
+
- Karolis Martinkus (SPECTRE paper)
|
189
|
+
- Andreas Bergmeister (Iterative Local Expansion paper)
|
190
|
+
- The original GRAN evaluation codebase
|
191
|
+
- NetworkX and PyGSP communities
|
@@ -0,0 +1,69 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "code",
|
5
|
+
"execution_count": 1,
|
6
|
+
"id": "82c456e9",
|
7
|
+
"metadata": {},
|
8
|
+
"outputs": [],
|
9
|
+
"source": [
|
10
|
+
"%load_ext autoreload\n",
|
11
|
+
"%autoreload 2"
|
12
|
+
]
|
13
|
+
},
|
14
|
+
{
|
15
|
+
"cell_type": "code",
|
16
|
+
"execution_count": 2,
|
17
|
+
"id": "2300b31f",
|
18
|
+
"metadata": {},
|
19
|
+
"outputs": [],
|
20
|
+
"source": [
|
21
|
+
"from synthetic_graph_benchmarks.dataset import Dataset\n",
|
22
|
+
"\n",
|
23
|
+
"ds_planar = Dataset.load_planar()"
|
24
|
+
]
|
25
|
+
},
|
26
|
+
{
|
27
|
+
"cell_type": "code",
|
28
|
+
"execution_count": 8,
|
29
|
+
"id": "c6854ac2",
|
30
|
+
"metadata": {},
|
31
|
+
"outputs": [
|
32
|
+
{
|
33
|
+
"data": {
|
34
|
+
"text/plain": [
|
35
|
+
"64"
|
36
|
+
]
|
37
|
+
},
|
38
|
+
"execution_count": 8,
|
39
|
+
"metadata": {},
|
40
|
+
"output_type": "execute_result"
|
41
|
+
}
|
42
|
+
],
|
43
|
+
"source": [
|
44
|
+
"ds_planar.train_graphs[1].number_of_nodes()"
|
45
|
+
]
|
46
|
+
}
|
47
|
+
],
|
48
|
+
"metadata": {
|
49
|
+
"kernelspec": {
|
50
|
+
"display_name": "synthetic-graph-benchmarks (3.12.4)",
|
51
|
+
"language": "python",
|
52
|
+
"name": "python3"
|
53
|
+
},
|
54
|
+
"language_info": {
|
55
|
+
"codemirror_mode": {
|
56
|
+
"name": "ipython",
|
57
|
+
"version": 3
|
58
|
+
},
|
59
|
+
"file_extension": ".py",
|
60
|
+
"mimetype": "text/x-python",
|
61
|
+
"name": "python",
|
62
|
+
"nbconvert_exporter": "python",
|
63
|
+
"pygments_lexer": "ipython3",
|
64
|
+
"version": "3.12.4"
|
65
|
+
}
|
66
|
+
},
|
67
|
+
"nbformat": 4,
|
68
|
+
"nbformat_minor": 5
|
69
|
+
}
|