topcup 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. topcup-1.0.1/LICENSE.md +41 -0
  2. topcup-1.0.1/PKG-INFO +301 -0
  3. topcup-1.0.1/README.md +276 -0
  4. topcup-1.0.1/pyproject.toml +44 -0
  5. topcup-1.0.1/setup.cfg +4 -0
  6. topcup-1.0.1/src/topcup/__init__.py +5 -0
  7. topcup-1.0.1/src/topcup/cli/__init__.py +0 -0
  8. topcup-1.0.1/src/topcup/cli/cli.py +45 -0
  9. topcup-1.0.1/src/topcup/cli/log.py +11 -0
  10. topcup-1.0.1/src/topcup/cli/main.py +504 -0
  11. topcup-1.0.1/src/topcup/data/__init__.py +0 -0
  12. topcup-1.0.1/src/topcup/data/augmentation.py +102 -0
  13. topcup-1.0.1/src/topcup/data/copick_dataset.py +289 -0
  14. topcup-1.0.1/src/topcup/data/custom_dataset.py +39 -0
  15. topcup-1.0.1/src/topcup/data/utils.py +76 -0
  16. topcup-1.0.1/src/topcup/loss/__init__.py +0 -0
  17. topcup-1.0.1/src/topcup/loss/dense_cross_entropy.py +25 -0
  18. topcup-1.0.1/src/topcup/model.py +305 -0
  19. topcup-1.0.1/src/topcup/modules/__init__.py +0 -0
  20. topcup-1.0.1/src/topcup/modules/unet.py +160 -0
  21. topcup-1.0.1/src/topcup/modules/utils.py +24 -0
  22. topcup-1.0.1/src/topcup/postprocess/__init__.py +0 -0
  23. topcup-1.0.1/src/topcup/postprocess/metric.py +230 -0
  24. topcup-1.0.1/src/topcup/postprocess/simple_pp.py +148 -0
  25. topcup-1.0.1/src/topcup/postprocess/utils.py +254 -0
  26. topcup-1.0.1/src/topcup/utils/__init__.py +0 -0
  27. topcup-1.0.1/src/topcup/utils/ema.py +66 -0
  28. topcup-1.0.1/src/topcup/utils/utils.py +336 -0
  29. topcup-1.0.1/src/topcup.egg-info/PKG-INFO +301 -0
  30. topcup-1.0.1/src/topcup.egg-info/SOURCES.txt +32 -0
  31. topcup-1.0.1/src/topcup.egg-info/dependency_links.txt +1 -0
  32. topcup-1.0.1/src/topcup.egg-info/entry_points.txt +2 -0
  33. topcup-1.0.1/src/topcup.egg-info/requires.txt +14 -0
  34. topcup-1.0.1/src/topcup.egg-info/top_level.txt +1 -0
@@ -0,0 +1,41 @@
1
+ # Legal
2
+
3
+ ## License for the octopi package
4
+
5
+ This package is licensed under the MIT License:
6
+
7
+ ```
8
+ MIT License
9
+
10
+ Copyright (c) 2025 Chan Zuckerberg Initiative
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ SOFTWARE.
29
+ ```
30
+
31
+ ## License Notice for Dependencies
32
+
33
+ ```
34
+ This repository is licensed under the MIT License; however, it relies on certain third-party dependencies that are licensed under the GNU General Public License (GPL). Specifically:
35
+
36
+ - monai is licensed under the Apache License 2.0.
37
+ - pytorch-lightning is licensed under the Apache License 2.0.
38
+
39
+ All dependencies use permissive open-source licenses that are compatible with this project's MIT License. No GPL or other copyleft licensed dependencies are included.
40
+ For specific licensing information about any dependency, please refer to the respective package documentation or repository.
41
+ ```
topcup-1.0.1/PKG-INFO ADDED
@@ -0,0 +1,301 @@
1
+ Metadata-Version: 2.4
2
+ Name: topcup
3
+ Version: 1.0.1
4
+ Summary: An implementation of 2024-2025 Kaggle/CZI cryoET ML challenge winning models
5
+ Author-email: Zhuowen Zhao <kevin.zhao@biohub.org>, Christof Henkel <chenkel@nvidia.com>, Eugene Khvedchenya <ekhvedchenia@nvidia.com>
6
+ Project-URL: Homepage, https://github.com/czimaginginstitute/czii_cryoet_challenge_models
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE.md
10
+ Requires-Dist: copick
11
+ Requires-Dist: scikit-learn
12
+ Requires-Dist: zarr==2.18.2
13
+ Requires-Dist: numcodecs==0.11.0
14
+ Requires-Dist: pandas
15
+ Requires-Dist: einops
16
+ Requires-Dist: albumentations==1.4.21
17
+ Requires-Dist: opencv-python==4.12.0.88
18
+ Requires-Dist: timm==1.0.11
19
+ Requires-Dist: torch==2.4.*
20
+ Requires-Dist: torchvision==0.19.*
21
+ Requires-Dist: pytorch-lightning==2.4.*
22
+ Requires-Dist: monai==1.5.0
23
+ Requires-Dist: mrcfile
24
+ Dynamic: license-file
25
+
26
+ # TopCUP: Top CryoET U-Net Picker 🏆🏆🏆
27
+ The re-implementation of 1st winning team's solution [kaggle-cryoet-1st-place-segmentation](https://github.com/ChristofHenkel/kaggle-cryoet-1st-place-segmentation/tree/main) in pytorch-lightning and copick.
28
+
29
+
30
+ ## Performance
31
+ We are able to train 3 models (resnet34 backbones) with 6, 12, and 24 tomograms respectively, and achieved an esenmble score of 0.774. This is comparable to the original submission of the 1st place [kaggle-cryoet-leader-board](https://www.kaggle.com/competitions/czii-cryo-et-object-identification/leaderboard).
32
+ <p align="center">
33
+ <img src="assets/scores.png" alt="F4 score of each protein complex using different training set sizes">
34
+ </p>
35
+
36
+
37
+
38
+ ## Installation
39
+ ```
40
+ pip install git+https://github.com/czimaginginstitute/czii_cryoet_mlchallenge_winning_models.git
41
+ ```
42
+
43
+ Or cd into the root folder, then
44
+ ```
45
+ pip install -e .
46
+ ```
47
+
48
+ ## 🚀 Quickstart
49
+ You can explore and train TopCUP models using the provided example notebooks:
50
+ - [Quickstart notebook](https://github.com/czimaginginstitute/czii_cryoet_mlchallenge_winning_models/blob/main/notebooks/Quickstart_TopCUP.ipynb): Run inference with pretrained TopCUP models.
51
+ - [Training notebook](https://github.com/czimaginginstitute/czii_cryoet_mlchallenge_winning_models/blob/main/notebooks/Tutorial_training_TopCUP.ipynb): Learn how to train or fine-tune a TopCUP model using your own tomograms.
52
+
53
+
54
+ ## Copick configuration file
55
+ The copick data ingestion can automatically populate many important internal variables from the config file. Especially, the metrics for the training and evaluation process, such as `class_loss_weight`, `score_threshold`, and `score_weight` are stored under the metadata key in the configuration file.
56
+
57
+ - `class_loss_weight`: weighting each class in the DenseCrossEntrope loss
58
+ - `score_threshold`: white filter picks per class above the value from the final probability--reduce false positives
59
+ - `score_weight`: weighting each class in the F beta score
60
+
61
+ An example of copick config file is shown below:
62
+ ```
63
+ {
64
+ "name": "Phatom Dataset",
65
+ "description": "CZII ML Challenge Training dataset",
66
+ "version": "1.0.1",
67
+ "pickable_objects": [
68
+ {
69
+ "name": "apo-ferritin",
70
+ "is_particle": true,
71
+ "pdb_id": "4V1W",
72
+ "label": 1,
73
+ "color": [ 0, 117, 220, 255],
74
+ "radius": 60,
75
+ "map_threshold": 0.0418,
76
+ "metadata": {
77
+ "score_weight": 1,
78
+ "score_threshold": 0.16,
79
+ "class_loss_weight": 256
80
+ }
81
+ },
82
+ {
83
+ "name": "beta-amylase",
84
+ "is_particle": true,
85
+ "pdb_id": "1FA2",
86
+ "label": 2,
87
+ "color": [153, 63, 0, 255],
88
+ "radius": 65,
89
+ "map_threshold": 0.035,
90
+ "metadata": {
91
+ "score_weight": 0,
92
+ "score_threshold": 0.25,
93
+ "class_loss_weight": 256
94
+ }
95
+ },
96
+ {
97
+ "name": "beta-galactosidase",
98
+ "is_particle": true,
99
+ "pdb_id": "6X1Q",
100
+ "label": 3,
101
+ "color": [ 76, 0, 92, 255],
102
+ "radius": 90,
103
+ "map_threshold": 0.0578,
104
+ "metadata": {
105
+ "score_weight": 2,
106
+ "score_threshold": 0.13,
107
+ "class_loss_weight": 256
108
+ }
109
+ },
110
+ {
111
+ "name": "ribosome",
112
+ "is_particle": true,
113
+ "pdb_id": "6EK0",
114
+ "label": 4,
115
+ "color": [ 0, 92, 49, 255],
116
+ "radius": 150,
117
+ "map_threshold": 0.0374,
118
+ "metadata": {
119
+ "score_weight": 1,
120
+ "score_threshold": 0.19,
121
+ "class_loss_weight": 256
122
+ }
123
+ },
124
+ {
125
+ "name": "thyroglobulin",
126
+ "is_particle": true,
127
+ "pdb_id": "6SCJ",
128
+ "label": 5,
129
+ "color": [ 43, 206, 72, 255],
130
+ "radius": 130,
131
+ "map_threshold": 0.0278,
132
+ "metadata": {
133
+ "score_weight": 2,
134
+ "score_threshold": 0.18,
135
+ "class_loss_weight": 256
136
+ }
137
+ },
138
+ {
139
+ "name": "virus-like-particle",
140
+ "is_particle": true,
141
+ "label": 6,
142
+ "color": [255, 204, 153, 255],
143
+ "radius": 135,
144
+ "map_threshold": 0.201,
145
+ "metadata": {
146
+ "score_weight": 1,
147
+ "score_threshold": 0.5,
148
+ "class_loss_weight": 256
149
+ }
150
+ }
151
+ ],
152
+ "config_type": "filesystem",
153
+ "overlay_root": "local:///PATH/TO/EXTRACTED/PROJECT/",
154
+ "static_root": "local:///PATH/TO/EXTRACTED/PROJECT/"
155
+ }
156
+ ```
157
+
158
+ ## Commands
159
+ After installation, use the command `topcup --help` to show all the possible subcomamnds:
160
+ ```
161
+ Usage: topcup [OPTIONS] COMMAND [ARGS]...
162
+
163
+ topcup: a top crypet u-net picker
164
+
165
+ Options:
166
+ -v, --verbose Increase verbosity (-v, -vv).
167
+ --version Show the version and exit.
168
+ -h, --help Show this message and exit.
169
+
170
+ Commands:
171
+ inference
172
+ train
173
+ score
174
+ ```
175
+
176
+ ### Training from scratch
177
+ Use `topcup train --help` to see all the options for training. The code support loading data via copick. An example training command is below.
178
+ ```
179
+ topcup train \
180
+ --copick_config COPICK_CONFIG_FILE \
181
+ --train_run_names TS_6_4,TS_6_6,TS_69_2,TS_73_6,TS_86_3,TS_99_9 \
182
+ --val_run_names TS_5_4 \
183
+ --tomo_type denoised \
184
+ --user_id COPICK_USER_ID \
185
+ --pixelsize 10 \
186
+ --batch_size 16 \
187
+ --n_aug 1112 \
188
+ --output_dir OUTPUT_PATH \
189
+ --logger_version 1 \
190
+ --epochs 100
191
+ ```
192
+
193
+ ### Re-training from a checkpoint for the same dataset
194
+ ```
195
+ topcup train \
196
+ --copick_config COPICK_CONFIG_FILE \
197
+ --train_run_names TS_6_4,TS_6_6,TS_69_2,TS_73_6,TS_86_3,TS_99_9 \
198
+ --val_run_names TS_5_4 \
199
+ --tomo_type denoised \
200
+ --user_id COPICK_USER_ID \
201
+ --pixelsize 10 \
202
+ --batch_size 16 \
203
+ --n_aug 1112 \
204
+ --output_dir OUTPUT_PATH \
205
+ --logger_version 1 \
206
+ --epochs 100 \
207
+ --pretrained_weight CHECKPOINT_PATH
208
+ ```
209
+
210
+ ### *Subset transfer learning: re-training from a checkpoint for a different dataset
211
+ **Subset transfer learning** involves loading a checkpoint from a pretrained model and fine-tuning it on a new dataset that includes only a subset of the original classes. To do this correctly, it’s important to know which classes and the corresponding `channel_id` the original model was trained on. This information can be accessed by loading the checkpoint and inspecting the `model.description` attribute. The `copick_config` used for fine-tuning should include the same pickable objects as the original training setup, with updated class weights and thresholds as needed for the new task.
212
+
213
+ ```
214
+ >>> from czii_cryoet_models.model import SegNet
215
+ >>> model = SegNet.load_from_checkpoint('/hpc/projects/group.czii/kevin.zhao/ml_challenge/winning_models/czii_cryoet_mlchallenge_models/output_test/checkpoints/best_model-v6.ckpt')
216
+ >>> print(model.description)
217
+ SegNet model predicting 6 classes
218
+
219
+ Class details:
220
+ {
221
+ "apo-ferritin": {
222
+ "channel_id": 0,
223
+ "radius": 60.0,
224
+ "score_threshold": 0.16,
225
+ "score_weight": 1
226
+ },
227
+ "beta-amylase": {
228
+ "channel_id": 1,
229
+ "radius": 65.0,
230
+ "score_threshold": 0.25,
231
+ "score_weight": 0
232
+ },
233
+ "beta-galactosidase": {
234
+ "channel_id": 2,
235
+ "radius": 90.0,
236
+ "score_threshold": 0.13,
237
+ "score_weight": 2
238
+ },
239
+ "ribosome": {
240
+ "channel_id": 3,
241
+ "radius": 150.0,
242
+ "score_threshold": 0.19,
243
+ "score_weight": 1
244
+ },
245
+ "thyroglobulin": {
246
+ "channel_id": 4,
247
+ "radius": 130.0,
248
+ "score_threshold": 0.18,
249
+ "score_weight": 2
250
+ },
251
+ "virus-like-particle": {
252
+ "channel_id": 5,
253
+ "radius": 135.0,
254
+ "score_threshold": 0.5,
255
+ "score_weight": 1
256
+ }
257
+ }
258
+ ```
259
+
260
+
261
+ ### Inference
262
+ Use command `topcup inference --help` to see all the options for the inference pipeline. An example command for inference with PyTorch checkpoints (a single checkpoint file path or multiple folder paths, each containing mutiple checkpoints) that supports pattern matching.
263
+
264
+ ```
265
+ topcup inference \
266
+ --copick_config copick_config.json \
267
+ --run_names TS_100_4,TS_100_6,TS_100_7,TS_100_9 \
268
+ --tomo_type denoised \
269
+ --user_id COPICK_USER_ID \
270
+ --pretrained_weights FOLDER_PATH1/checkpoints/,FOLDER_PATH2/checkpoints/,FOLDER_PATH3/checkpoints/ \
271
+ --batch_size 16 \
272
+ --output_dir OUTPUT_PATH \
273
+ --pattern *v1.ckpt
274
+ ```
275
+
276
+ ### Score calculation
277
+ Use command 'topcup score --help' to see all the options for calculating F-beta score for the predictions:
278
+ ```
279
+ Usage: topcup score [OPTIONS]
280
+
281
+ Options:
282
+ -c, --copick_config FILE copick config file path [required]
283
+ -g, --gt FILE Ground truth picks csv file path [required]
284
+ -s, --submission FILE Submission picks csv file path [required]
285
+ -h, --help Show this message and exit.
286
+ ```
287
+
288
+
289
+ ## 📚 Documentation
290
+
291
+ Coming soon.
292
+
293
+ ## 🤝 Contributor covenant code of conduct
294
+
295
+ This project adheres to the Contributor Covenant code of conduct. By participating, you are expected to uphold this code. Please report unacceptable behavior to opensource@chanzuckerberg.com.
296
+
297
+ Responsible Use: We are committed to advancing the responsible development and use of artificial intelligence. Please follow our [Acceptable Use Policy](https://virtualcellmodels.cziscience.com/acceptable-use-policy) when engaging with the model.
298
+
299
+ ## 🔒 Security
300
+
301
+ If you believe you have found a security issue, please responsibly disclose by contacting us at security@chanzuckerberg.com.
topcup-1.0.1/README.md ADDED
@@ -0,0 +1,276 @@
1
+ # TopCUP: Top CryoET U-Net Picker 🏆🏆🏆
2
+ The re-implementation of 1st winning team's solution [kaggle-cryoet-1st-place-segmentation](https://github.com/ChristofHenkel/kaggle-cryoet-1st-place-segmentation/tree/main) in pytorch-lightning and copick.
3
+
4
+
5
+ ## Performance
6
+ We are able to train 3 models (resnet34 backbones) with 6, 12, and 24 tomograms respectively, and achieved an esenmble score of 0.774. This is comparable to the original submission of the 1st place [kaggle-cryoet-leader-board](https://www.kaggle.com/competitions/czii-cryo-et-object-identification/leaderboard).
7
+ <p align="center">
8
+ <img src="assets/scores.png" alt="F4 score of each protein complex using different training set sizes">
9
+ </p>
10
+
11
+
12
+
13
+ ## Installation
14
+ ```
15
+ pip install git+https://github.com/czimaginginstitute/czii_cryoet_mlchallenge_winning_models.git
16
+ ```
17
+
18
+ Or cd into the root folder, then
19
+ ```
20
+ pip install -e .
21
+ ```
22
+
23
+ ## 🚀 Quickstart
24
+ You can explore and train TopCUP models using the provided example notebooks:
25
+ - [Quickstart notebook](https://github.com/czimaginginstitute/czii_cryoet_mlchallenge_winning_models/blob/main/notebooks/Quickstart_TopCUP.ipynb): Run inference with pretrained TopCUP models.
26
+ - [Training notebook](https://github.com/czimaginginstitute/czii_cryoet_mlchallenge_winning_models/blob/main/notebooks/Tutorial_training_TopCUP.ipynb): Learn how to train or fine-tune a TopCUP model using your own tomograms.
27
+
28
+
29
+ ## Copick configuration file
30
+ The copick data ingestion can automatically populate many important internal variables from the config file. Especially, the metrics for the training and evaluation process, such as `class_loss_weight`, `score_threshold`, and `score_weight` are stored under the metadata key in the configuration file.
31
+
32
+ - `class_loss_weight`: weighting each class in the DenseCrossEntrope loss
33
+ - `score_threshold`: white filter picks per class above the value from the final probability--reduce false positives
34
+ - `score_weight`: weighting each class in the F beta score
35
+
36
+ An example of copick config file is shown below:
37
+ ```
38
+ {
39
+ "name": "Phatom Dataset",
40
+ "description": "CZII ML Challenge Training dataset",
41
+ "version": "1.0.1",
42
+ "pickable_objects": [
43
+ {
44
+ "name": "apo-ferritin",
45
+ "is_particle": true,
46
+ "pdb_id": "4V1W",
47
+ "label": 1,
48
+ "color": [ 0, 117, 220, 255],
49
+ "radius": 60,
50
+ "map_threshold": 0.0418,
51
+ "metadata": {
52
+ "score_weight": 1,
53
+ "score_threshold": 0.16,
54
+ "class_loss_weight": 256
55
+ }
56
+ },
57
+ {
58
+ "name": "beta-amylase",
59
+ "is_particle": true,
60
+ "pdb_id": "1FA2",
61
+ "label": 2,
62
+ "color": [153, 63, 0, 255],
63
+ "radius": 65,
64
+ "map_threshold": 0.035,
65
+ "metadata": {
66
+ "score_weight": 0,
67
+ "score_threshold": 0.25,
68
+ "class_loss_weight": 256
69
+ }
70
+ },
71
+ {
72
+ "name": "beta-galactosidase",
73
+ "is_particle": true,
74
+ "pdb_id": "6X1Q",
75
+ "label": 3,
76
+ "color": [ 76, 0, 92, 255],
77
+ "radius": 90,
78
+ "map_threshold": 0.0578,
79
+ "metadata": {
80
+ "score_weight": 2,
81
+ "score_threshold": 0.13,
82
+ "class_loss_weight": 256
83
+ }
84
+ },
85
+ {
86
+ "name": "ribosome",
87
+ "is_particle": true,
88
+ "pdb_id": "6EK0",
89
+ "label": 4,
90
+ "color": [ 0, 92, 49, 255],
91
+ "radius": 150,
92
+ "map_threshold": 0.0374,
93
+ "metadata": {
94
+ "score_weight": 1,
95
+ "score_threshold": 0.19,
96
+ "class_loss_weight": 256
97
+ }
98
+ },
99
+ {
100
+ "name": "thyroglobulin",
101
+ "is_particle": true,
102
+ "pdb_id": "6SCJ",
103
+ "label": 5,
104
+ "color": [ 43, 206, 72, 255],
105
+ "radius": 130,
106
+ "map_threshold": 0.0278,
107
+ "metadata": {
108
+ "score_weight": 2,
109
+ "score_threshold": 0.18,
110
+ "class_loss_weight": 256
111
+ }
112
+ },
113
+ {
114
+ "name": "virus-like-particle",
115
+ "is_particle": true,
116
+ "label": 6,
117
+ "color": [255, 204, 153, 255],
118
+ "radius": 135,
119
+ "map_threshold": 0.201,
120
+ "metadata": {
121
+ "score_weight": 1,
122
+ "score_threshold": 0.5,
123
+ "class_loss_weight": 256
124
+ }
125
+ }
126
+ ],
127
+ "config_type": "filesystem",
128
+ "overlay_root": "local:///PATH/TO/EXTRACTED/PROJECT/",
129
+ "static_root": "local:///PATH/TO/EXTRACTED/PROJECT/"
130
+ }
131
+ ```
132
+
133
+ ## Commands
134
+ After installation, use the command `topcup --help` to show all the possible subcomamnds:
135
+ ```
136
+ Usage: topcup [OPTIONS] COMMAND [ARGS]...
137
+
138
+ topcup: a top crypet u-net picker
139
+
140
+ Options:
141
+ -v, --verbose Increase verbosity (-v, -vv).
142
+ --version Show the version and exit.
143
+ -h, --help Show this message and exit.
144
+
145
+ Commands:
146
+ inference
147
+ train
148
+ score
149
+ ```
150
+
151
+ ### Training from scratch
152
+ Use `topcup train --help` to see all the options for training. The code support loading data via copick. An example training command is below.
153
+ ```
154
+ topcup train \
155
+ --copick_config COPICK_CONFIG_FILE \
156
+ --train_run_names TS_6_4,TS_6_6,TS_69_2,TS_73_6,TS_86_3,TS_99_9 \
157
+ --val_run_names TS_5_4 \
158
+ --tomo_type denoised \
159
+ --user_id COPICK_USER_ID \
160
+ --pixelsize 10 \
161
+ --batch_size 16 \
162
+ --n_aug 1112 \
163
+ --output_dir OUTPUT_PATH \
164
+ --logger_version 1 \
165
+ --epochs 100
166
+ ```
167
+
168
+ ### Re-training from a checkpoint for the same dataset
169
+ ```
170
+ topcup train \
171
+ --copick_config COPICK_CONFIG_FILE \
172
+ --train_run_names TS_6_4,TS_6_6,TS_69_2,TS_73_6,TS_86_3,TS_99_9 \
173
+ --val_run_names TS_5_4 \
174
+ --tomo_type denoised \
175
+ --user_id COPICK_USER_ID \
176
+ --pixelsize 10 \
177
+ --batch_size 16 \
178
+ --n_aug 1112 \
179
+ --output_dir OUTPUT_PATH \
180
+ --logger_version 1 \
181
+ --epochs 100 \
182
+ --pretrained_weight CHECKPOINT_PATH
183
+ ```
184
+
185
+ ### *Subset transfer learning: re-training from a checkpoint for a different dataset
186
+ **Subset transfer learning** involves loading a checkpoint from a pretrained model and fine-tuning it on a new dataset that includes only a subset of the original classes. To do this correctly, it’s important to know which classes and the corresponding `channel_id` the original model was trained on. This information can be accessed by loading the checkpoint and inspecting the `model.description` attribute. The `copick_config` used for fine-tuning should include the same pickable objects as the original training setup, with updated class weights and thresholds as needed for the new task.
187
+
188
+ ```
189
+ >>> from czii_cryoet_models.model import SegNet
190
+ >>> model = SegNet.load_from_checkpoint('/hpc/projects/group.czii/kevin.zhao/ml_challenge/winning_models/czii_cryoet_mlchallenge_models/output_test/checkpoints/best_model-v6.ckpt')
191
+ >>> print(model.description)
192
+ SegNet model predicting 6 classes
193
+
194
+ Class details:
195
+ {
196
+ "apo-ferritin": {
197
+ "channel_id": 0,
198
+ "radius": 60.0,
199
+ "score_threshold": 0.16,
200
+ "score_weight": 1
201
+ },
202
+ "beta-amylase": {
203
+ "channel_id": 1,
204
+ "radius": 65.0,
205
+ "score_threshold": 0.25,
206
+ "score_weight": 0
207
+ },
208
+ "beta-galactosidase": {
209
+ "channel_id": 2,
210
+ "radius": 90.0,
211
+ "score_threshold": 0.13,
212
+ "score_weight": 2
213
+ },
214
+ "ribosome": {
215
+ "channel_id": 3,
216
+ "radius": 150.0,
217
+ "score_threshold": 0.19,
218
+ "score_weight": 1
219
+ },
220
+ "thyroglobulin": {
221
+ "channel_id": 4,
222
+ "radius": 130.0,
223
+ "score_threshold": 0.18,
224
+ "score_weight": 2
225
+ },
226
+ "virus-like-particle": {
227
+ "channel_id": 5,
228
+ "radius": 135.0,
229
+ "score_threshold": 0.5,
230
+ "score_weight": 1
231
+ }
232
+ }
233
+ ```
234
+
235
+
236
+ ### Inference
237
+ Use command `topcup inference --help` to see all the options for the inference pipeline. An example command for inference with PyTorch checkpoints (a single checkpoint file path or multiple folder paths, each containing mutiple checkpoints) that supports pattern matching.
238
+
239
+ ```
240
+ topcup inference \
241
+ --copick_config copick_config.json \
242
+ --run_names TS_100_4,TS_100_6,TS_100_7,TS_100_9 \
243
+ --tomo_type denoised \
244
+ --user_id COPICK_USER_ID \
245
+ --pretrained_weights FOLDER_PATH1/checkpoints/,FOLDER_PATH2/checkpoints/,FOLDER_PATH3/checkpoints/ \
246
+ --batch_size 16 \
247
+ --output_dir OUTPUT_PATH \
248
+ --pattern *v1.ckpt
249
+ ```
250
+
251
+ ### Score calculation
252
+ Use command 'topcup score --help' to see all the options for calculating F-beta score for the predictions:
253
+ ```
254
+ Usage: topcup score [OPTIONS]
255
+
256
+ Options:
257
+ -c, --copick_config FILE copick config file path [required]
258
+ -g, --gt FILE Ground truth picks csv file path [required]
259
+ -s, --submission FILE Submission picks csv file path [required]
260
+ -h, --help Show this message and exit.
261
+ ```
262
+
263
+
264
+ ## 📚 Documentation
265
+
266
+ Coming soon.
267
+
268
+ ## 🤝 Contributor covenant code of conduct
269
+
270
+ This project adheres to the Contributor Covenant code of conduct. By participating, you are expected to uphold this code. Please report unacceptable behavior to opensource@chanzuckerberg.com.
271
+
272
+ Responsible Use: We are committed to advancing the responsible development and use of artificial intelligence. Please follow our [Acceptable Use Policy](https://virtualcellmodels.cziscience.com/acceptable-use-policy) when engaging with the model.
273
+
274
+ ## 🔒 Security
275
+
276
+ If you believe you have found a security issue, please responsibly disclose by contacting us at security@chanzuckerberg.com.
@@ -0,0 +1,44 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "topcup"
7
+ version = "1.0.1"
8
+ description = "An implementation of 2024-2025 Kaggle/CZI cryoET ML challenge winning models"
9
+ authors = [
10
+ { name="Zhuowen Zhao", email="kevin.zhao@biohub.org" },
11
+ { name="Christof Henkel", email="chenkel@nvidia.com" },
12
+ { name="Eugene Khvedchenya", email="ekhvedchenia@nvidia.com" },
13
+ ]
14
+ readme = "README.md"
15
+ requires-python = ">=3.10"
16
+ dependencies = [
17
+ "copick",
18
+ "scikit-learn",
19
+ "zarr==2.18.2",
20
+ "numcodecs==0.11.0",
21
+ "pandas",
22
+ "einops",
23
+ "albumentations==1.4.21",
24
+ "opencv-python==4.12.0.88",
25
+ "timm==1.0.11",
26
+ "torch==2.4.*",
27
+ "torchvision==0.19.*",
28
+ "pytorch-lightning==2.4.*",
29
+ "monai==1.5.0",
30
+ "mrcfile"
31
+ ]
32
+
33
+ [project.urls]
34
+ Homepage = "https://github.com/czimaginginstitute/czii_cryoet_challenge_models"
35
+
36
+ [project.scripts]
37
+ topcup = "topcup.cli.cli:cli"
38
+
39
+ [tool.setuptools]
40
+ package-dir = {"" = "src"}
41
+
42
+ [tool.setuptools.packages.find]
43
+ where = ["src"]
44
+ include = ["topcup*"]
topcup-1.0.1/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,5 @@
1
+ """
2
+ TopCUP: Top Cryoet U-net Picker
3
+ """
4
+
5
+ __version__ = '0.0.6'
File without changes