youvegotdata 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,103 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ # Allow manual trigger for testing the workflow
8
+ workflow_dispatch:
9
+
10
+ permissions:
11
+ contents: read
12
+ id-token: write
13
+
14
+ concurrency:
15
+ group: ${{ github.workflow }}-${{ github.ref }}
16
+ cancel-in-progress: true
17
+
18
+ env:
19
+ UV_SYSTEM_PYTHON: true
20
+
21
+ jobs:
22
+ # ---------------------------------------------------------------------------
23
+ # Build sdist and wheel
24
+ # ---------------------------------------------------------------------------
25
+ build:
26
+ runs-on: ubuntu-latest
27
+ steps:
28
+ - uses: actions/checkout@v4
29
+
30
+ - uses: actions/setup-python@v5
31
+ with:
32
+ python-version: "3.13"
33
+
34
+ - uses: astral-sh/setup-uv@v6
35
+
36
+ - name: Install build tools
37
+ run: uv pip install build
38
+
39
+ - name: Build sdist and wheel
40
+ run: python -m build
41
+
42
+ - name: List built artifacts
43
+ run: ls -lh dist/
44
+
45
+ - name: Upload build artifacts
46
+ uses: actions/upload-artifact@v4
47
+ with:
48
+ name: dist
49
+ path: dist/
50
+ if-no-files-found: error
51
+
52
+ # ---------------------------------------------------------------------------
53
+ # Run the test suite against the built wheel (not the source tree)
54
+ # ---------------------------------------------------------------------------
55
+ test-built-package:
56
+ needs: build
57
+ runs-on: ${{ matrix.os }}
58
+ strategy:
59
+ fail-fast: false
60
+ matrix:
61
+ os: [ubuntu-latest, macos-latest, windows-latest]
62
+ python-version: ["3.11", "3.12", "3.13"]
63
+ steps:
64
+ - uses: actions/checkout@v4
65
+
66
+ - uses: actions/setup-python@v5
67
+ with:
68
+ python-version: ${{ matrix.python-version }}
69
+
70
+ - uses: astral-sh/setup-uv@v6
71
+
72
+ - name: Download build artifacts
73
+ uses: actions/download-artifact@v4
74
+ with:
75
+ name: dist
76
+ path: dist/
77
+
78
+ - name: Install wheel and test dependencies
79
+ shell: bash
80
+ run: |
81
+ uv pip install dist/*.whl
82
+ uv pip install pytest pytest-cov pytest-xdist pydantic
83
+
84
+ - name: Run tests
85
+ run: python -m pytest tests/ -v
86
+
87
+ # ---------------------------------------------------------------------------
88
+ # Publish to PyPI
89
+ # ---------------------------------------------------------------------------
90
+ publish-pypi:
91
+ needs: [build, test-built-package]
92
+ runs-on: ubuntu-latest
93
+ environment: pypi
94
+ #if: github.event_name == 'release'
95
+ steps:
96
+ - name: Download build artifacts
97
+ uses: actions/download-artifact@v4
98
+ with:
99
+ name: dist
100
+ path: dist/
101
+
102
+ - name: Publish to PyPI
103
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,60 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Distribution / packaging
7
+ .Python
8
+ build/
9
+ develop-eggs/
10
+ dist/
11
+ downloads/
12
+ eggs/
13
+ .eggs/
14
+ lib/
15
+ lib64/
16
+ parts/
17
+ sdist/
18
+ var/
19
+ wheels/
20
+ share/python-wheels/
21
+ *.egg-info/
22
+ .installed.cfg
23
+ *.egg
24
+ MANIFEST
25
+
26
+ # Installer logs
27
+ pip-log.txt
28
+ pip-delete-this-directory.txt
29
+
30
+ # Unit test / coverage reports
31
+ htmlcov/
32
+ .tox/
33
+ .nox/
34
+ .coverage
35
+ .coverage.*
36
+ .cache
37
+ nosetests.xml
38
+ coverage.xml
39
+ *.cover
40
+ *.py,cover
41
+ .hypothesis/
42
+ .pytest_cache/
43
+ cover/
44
+
45
+ # Jupyter Notebook
46
+ .ipynb_checkpoints
47
+
48
+ # Environments/configuration
49
+ config.ini
50
+
51
+ # Data and Reference Files
52
+ *.npy
53
+ *.npz
54
+ *.pth
55
+
56
+ # Temporary files
57
+ *.bak*
58
+ *.sav*
59
+ *.tmp*
60
+ *.out*
@@ -0,0 +1,47 @@
1
+ Metadata-Version: 2.4
2
+ Name: youvegotdata
3
+ Version: 1.0.0
4
+ Summary: Send new file notifications
5
+ Author-email: Jim Fluke <james.fluke@colostate.edu>
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: pika
9
+ Provides-Extra: test
10
+ Requires-Dist: pytest; extra == "test"
11
+
12
+ # youvegotdata
13
+
14
+ Uses RabbitMQ to send new file notifications, with the ultimate purpose of
15
+ getting the file metadata into the Data Inventory Database.
16
+
17
+ The "producer" `youvegotdata.py` will usually be called by the CIRA data
18
+ ingest scripts when a new file is added to the CIRA data stores, and will send
19
+ a message through RabbitMQ to the consumers with the file's metadata.
20
+
21
+ Message "consumers" will be running to receive the file metadata and insert it
22
+ into the database. It is expected that multiple consumers process will be
23
+ accepting messages in RabbitMQ's "fair dispatch" configuration. A given
24
+ notification will be received by one consumer.
25
+
26
+ ## Running youvegotdata.py
27
+ This must be run in a Python environment that includes `pika` - for connecting
28
+ to RabbitMQ - and other needed packages. The `environ-3.8.yml` file in this
29
+ repository can be used to create a workable conda environment. Setting one up
30
+ using `pip` will certainly also work. Python 3.8 is the minimum version needed
31
+ to run the script. Higher versions should work.
32
+
33
+ Copy the template-config.ini file to config.ini and edit the config.ini as
34
+ described inside that file.
35
+ Run the code with:
36
+ ```
37
+ python youvegotdata.py [-h] [-v] [-p PRODUCT] [-r VERSION] [-s START_TIME] [-e END_TIME] [-l LENGTH] [-c CHECKSUM] [-t CHECKSUM_TYPE] filepath
38
+ ```
39
+ Run this with the -h (--help) argument to see the available flagged arguments.
40
+
41
+ This will usually be run with just the `filepath` argument. An example is:
42
+ ```
43
+ python youvegotdata/youvegotdata.py /full/path/to/local/file/data_file.hdf
44
+ ```
45
+ If run from a local repository of this project.
46
+
47
+ The `filepath` file must exist on the local machine.
@@ -0,0 +1,36 @@
1
+ # youvegotdata
2
+
3
+ Uses RabbitMQ to send new file notifications, with the ultimate purpose of
4
+ getting the file metadata into the Data Inventory Database.
5
+
6
+ The "producer" `youvegotdata.py` will usually be called by the CIRA data
7
+ ingest scripts when a new file is added to the CIRA data stores, and will send
8
+ a message through RabbitMQ to the consumers with the file's metadata.
9
+
10
+ Message "consumers" will be running to receive the file metadata and insert it
11
+ into the database. It is expected that multiple consumers process will be
12
+ accepting messages in RabbitMQ's "fair dispatch" configuration. A given
13
+ notification will be received by one consumer.
14
+
15
+ ## Running youvegotdata.py
16
+ This must be run in a Python environment that includes `pika` - for connecting
17
+ to RabbitMQ - and other needed packages. The `environ-3.8.yml` file in this
18
+ repository can be used to create a workable conda environment. Setting one up
19
+ using `pip` will certainly also work. Python 3.8 is the minimum version needed
20
+ to run the script. Higher versions should work.
21
+
22
+ Copy the template-config.ini file to config.ini and edit the config.ini as
23
+ described inside that file.
24
+ Run the code with:
25
+ ```
26
+ python youvegotdata.py [-h] [-v] [-p PRODUCT] [-r VERSION] [-s START_TIME] [-e END_TIME] [-l LENGTH] [-c CHECKSUM] [-t CHECKSUM_TYPE] filepath
27
+ ```
28
+ Run this with the -h (--help) argument to see the available flagged arguments.
29
+
30
+ This will usually be run with just the `filepath` argument. An example is:
31
+ ```
32
+ python youvegotdata/youvegotdata.py /full/path/to/local/file/data_file.hdf
33
+ ```
34
+ If run from a local repository of this project.
35
+
36
+ The `filepath` file must exist on the local machine.
@@ -0,0 +1,8 @@
1
+ name: python3.8
2
+ channels:
3
+ - conda-forge
4
+ dependencies:
5
+ - python=3.8
6
+ - pika
7
+ - pytest
8
+ - black
@@ -0,0 +1,21 @@
1
+ # https://packaging.python.org/en/latest/guides/writing-pyproject-toml/
2
+ # Choosing a build backend for your Python package
3
+ [build-system]
4
+ requires = ["setuptools", "setuptools-scm"]
5
+ build-backend = "setuptools.build_meta"
6
+
7
+ [project]
8
+ authors = [
9
+ {name = "Jim Fluke", email = "james.fluke@colostate.edu" }
10
+ ]
11
+ name = "youvegotdata" # REQUIRED
12
+ version = "1.0.0"
13
+ description = "Send new file notifications"
14
+ readme = "README.md"
15
+
16
+ requires-python = ">=3.8"
17
+
18
+ dependencies = ["pika"]
19
+
20
+ [project.optional-dependencies]
21
+ test = ["pytest"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,4 @@
1
+ # Copy to config.ini and customize with your values.
2
+
3
+ [Settings]
4
+ RMQ_HOST = <host of the RabbitMQ server>
File without changes
@@ -0,0 +1,307 @@
1
+ """Unit tests for youvegotdata.youvegotdata."""
2
+
3
+ import configparser
4
+ import io
5
+ import json
6
+ import logging
7
+ from unittest.mock import MagicMock, mock_open, patch
8
+
9
+ import pytest
10
+
11
+ from youvegotdata.youvegotdata import (
12
+ parse_mountinfo,
13
+ parse_mountinfo_alike,
14
+ produce_notification,
15
+ resolve_data_store,
16
+ )
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Helpers
20
+ # ---------------------------------------------------------------------------
21
+
22
+ # A realistic /proc/self/mountinfo line for a local ext4 filesystem.
23
+ # Format: mount_id parent_id major:minor root mount_point mount_options
24
+ # [optional-fields] - filesystem_type mount_source super_options
25
+ LOCAL_MOUNTINFO_LINE = (
26
+ "23 1 8:1 / /data rw,relatime shared:1 - ext4 /dev/sda1 rw,errors=remount-ro"
27
+ )
28
+
29
+ # An NFS mount where mount_source contains a host:path pair.
30
+ NFS_MOUNTINFO_LINE = (
31
+ "42 1 0:35 / /mnt/nfs rw,relatime shared:2 - nfs4 nfsserver:/exports rw,vers=4"
32
+ )
33
+
34
+ # The root mount (should be skipped by resolve_data_store).
35
+ ROOT_MOUNTINFO_LINE = (
36
+ "1 0 8:0 / / rw,relatime shared:0 - ext4 /dev/sda rw"
37
+ )
38
+
39
+
40
+ def _lines(*lines):
41
+ """Return a file-like object containing the given lines."""
42
+ return io.StringIO("\n".join(lines) + "\n")
43
+
44
+
45
+ # ---------------------------------------------------------------------------
46
+ # parse_mountinfo_alike
47
+ # ---------------------------------------------------------------------------
48
+
49
+
50
+ class TestParseMountinfoAlike:
51
+ def test_local_mount_parsed_correctly(self):
52
+ entries = parse_mountinfo_alike(_lines(LOCAL_MOUNTINFO_LINE))
53
+ assert len(entries) == 1
54
+ e = entries[0]
55
+ assert e["mount_id"] == 23
56
+ assert e["parent_id"] == 1
57
+ assert e["major_minor"] == "8:1"
58
+ assert e["root"] == "/"
59
+ assert e["mount_point"] == "/data"
60
+ assert e["mount_options"] == ["rw", "relatime"]
61
+ assert e["filesystem_type"] == "ext4"
62
+ assert e["mount_source"] == "/dev/sda1"
63
+ assert "rw" in e["super_options"]
64
+ assert e["raw_line"] == LOCAL_MOUNTINFO_LINE.strip()
65
+
66
+ def test_nfs_mount_parsed_correctly(self):
67
+ entries = parse_mountinfo_alike(_lines(NFS_MOUNTINFO_LINE))
68
+ e = entries[0]
69
+ assert e["mount_point"] == "/mnt/nfs"
70
+ assert e["filesystem_type"] == "nfs4"
71
+ assert e["mount_source"] == "nfsserver:/exports"
72
+
73
+ def test_empty_input_returns_empty_list(self):
74
+ entries = parse_mountinfo_alike(io.StringIO(""))
75
+ assert entries == []
76
+
77
+ def test_multiple_lines_parsed(self):
78
+ entries = parse_mountinfo_alike(
79
+ _lines(LOCAL_MOUNTINFO_LINE, NFS_MOUNTINFO_LINE, ROOT_MOUNTINFO_LINE)
80
+ )
81
+ assert len(entries) == 3
82
+
83
+ def test_no_super_options(self):
84
+ # A line where the last_part has only two fields (no super options).
85
+ line = "10 1 8:2 / /tmp rw - tmpfs tmpfs"
86
+ entries = parse_mountinfo_alike(io.StringIO(line + "\n"))
87
+ assert len(entries) == 1
88
+ assert entries[0]["super_options"] == []
89
+
90
+ def test_mount_options_split_into_list(self):
91
+ entries = parse_mountinfo_alike(_lines(LOCAL_MOUNTINFO_LINE))
92
+ opts = entries[0]["mount_options"]
93
+ assert isinstance(opts, list)
94
+ assert "rw" in opts
95
+ assert "relatime" in opts
96
+
97
+
98
+ # ---------------------------------------------------------------------------
99
+ # parse_mountinfo
100
+ # ---------------------------------------------------------------------------
101
+
102
+
103
+ class TestParseMountinfo:
104
+ def test_reads_proc_self_mountinfo(self):
105
+ data = LOCAL_MOUNTINFO_LINE + "\n"
106
+ with patch("builtins.open", mock_open(read_data=data)) as mocked:
107
+ entries = parse_mountinfo()
108
+ mocked.assert_called_once_with("/proc/self/mountinfo", "r")
109
+ assert len(entries) == 1
110
+
111
+ def test_falls_back_to_proc_mountinfo_when_self_missing(self):
112
+ data = LOCAL_MOUNTINFO_LINE + "\n"
113
+
114
+ def side_effect(path, mode):
115
+ if path == "/proc/self/mountinfo":
116
+ raise FileNotFoundError
117
+ return mock_open(read_data=data)()
118
+
119
+ with patch("builtins.open", side_effect=side_effect):
120
+ entries = parse_mountinfo()
121
+ assert len(entries) == 1
122
+
123
+ def test_raises_when_both_files_missing(self):
124
+ with patch("builtins.open", side_effect=FileNotFoundError):
125
+ with pytest.raises(FileNotFoundError):
126
+ parse_mountinfo()
127
+
128
+ def test_fallback_logs_warning(self, caplog):
129
+ data = LOCAL_MOUNTINFO_LINE + "\n"
130
+
131
+ def side_effect(path, mode):
132
+ if path == "/proc/self/mountinfo":
133
+ raise FileNotFoundError
134
+ return mock_open(read_data=data)()
135
+
136
+ with caplog.at_level(logging.WARNING):
137
+ with patch("builtins.open", side_effect=side_effect):
138
+ parse_mountinfo()
139
+ assert any("/proc/self/mountinfo" in r.message for r in caplog.records)
140
+
141
+
142
+ # ---------------------------------------------------------------------------
143
+ # resolve_data_store
144
+ # ---------------------------------------------------------------------------
145
+
146
+
147
+ class TestResolveDataStore:
148
+ def _mock_parse(self, *lines):
149
+ """Return a list of mount entries parsed from the given lines."""
150
+ return parse_mountinfo_alike(_lines(*lines))
151
+
152
+ def test_local_mount_returns_device_and_filepath(self):
153
+ mounts = self._mock_parse(LOCAL_MOUNTINFO_LINE)
154
+ with patch(
155
+ "youvegotdata.youvegotdata.parse_mountinfo", return_value=mounts
156
+ ):
157
+ data_store, fpath = resolve_data_store("/data/subdir/file.hdf")
158
+ assert data_store == "/dev/sda1"
159
+ assert fpath == "/data/subdir/file.hdf"
160
+
161
+ def test_nfs_mount_returns_server_and_remote_path(self):
162
+ mounts = self._mock_parse(NFS_MOUNTINFO_LINE)
163
+ with patch(
164
+ "youvegotdata.youvegotdata.parse_mountinfo", return_value=mounts
165
+ ):
166
+ data_store, fpath = resolve_data_store("/mnt/nfs/subdir/file.hdf")
167
+ assert data_store == "nfsserver"
168
+ # The mount_source path (/exports) should replace the mount point prefix
169
+ assert fpath == "/exports/subdir/file.hdf"
170
+
171
+ def test_no_matching_mount_returns_none(self):
172
+ # Only the root mount, which is skipped.
173
+ mounts = self._mock_parse(ROOT_MOUNTINFO_LINE)
174
+ with patch(
175
+ "youvegotdata.youvegotdata.parse_mountinfo", return_value=mounts
176
+ ):
177
+ data_store, fpath = resolve_data_store("/unrelated/file.hdf")
178
+ assert data_store is None
179
+ assert fpath is None
180
+
181
+ def test_root_mount_is_skipped(self):
182
+ # Even though "/" prefix-matches everything, it must be skipped.
183
+ mounts = self._mock_parse(ROOT_MOUNTINFO_LINE, LOCAL_MOUNTINFO_LINE)
184
+ with patch(
185
+ "youvegotdata.youvegotdata.parse_mountinfo", return_value=mounts
186
+ ):
187
+ data_store, _ = resolve_data_store("/data/file.hdf")
188
+ # /data mount should win, not the root mount
189
+ assert data_store == "/dev/sda1"
190
+
191
+ def test_longest_prefix_mount_wins(self):
192
+ # /data and /data/archive are both valid prefixes; /data/archive is longer.
193
+ archive_line = (
194
+ "24 23 8:2 / /data/archive rw,relatime - ext4 /dev/sdb1 rw"
195
+ )
196
+ mounts = self._mock_parse(LOCAL_MOUNTINFO_LINE, archive_line)
197
+ with patch(
198
+ "youvegotdata.youvegotdata.parse_mountinfo", return_value=mounts
199
+ ):
200
+ data_store, fpath = resolve_data_store("/data/archive/file.hdf")
201
+ assert data_store == "/dev/sdb1"
202
+ assert fpath == "/data/archive/file.hdf"
203
+
204
+
205
+ # ---------------------------------------------------------------------------
206
+ # produce_notification
207
+ # ---------------------------------------------------------------------------
208
+
209
+
210
+ class TestProduceNotification:
211
+ def _make_config(self, host="rmq.example.com"):
212
+ config = configparser.ConfigParser()
213
+ config["Settings"] = {"RMQ_HOST": host}
214
+ return config
215
+
216
+ def _run(self, **kwargs):
217
+ """Run produce_notification with sensible defaults, mocking pika."""
218
+ defaults = dict(
219
+ config=self._make_config(),
220
+ filepath="/data/file.hdf",
221
+ product="VIIRS",
222
+ version="1.0",
223
+ start_time="2024-01-01T00:00:00",
224
+ end_time="2024-01-01T01:00:00",
225
+ length=1024,
226
+ checksum="abc123",
227
+ checksum_type="md5",
228
+ )
229
+ defaults.update(kwargs)
230
+
231
+ mock_channel = MagicMock()
232
+ mock_connection = MagicMock()
233
+ mock_connection.channel.return_value = mock_channel
234
+
235
+ with patch(
236
+ "youvegotdata.youvegotdata.resolve_data_store",
237
+ return_value=("/dev/sda1", "/data/file.hdf"),
238
+ ):
239
+ with patch(
240
+ "youvegotdata.youvegotdata.pika.BlockingConnection",
241
+ return_value=mock_connection,
242
+ ) as mock_bc:
243
+ produce_notification(**defaults)
244
+
245
+ return mock_bc, mock_connection, mock_channel
246
+
247
+ def test_connection_opened_with_correct_host(self):
248
+ mock_bc, _, _ = self._run()
249
+ call_args = mock_bc.call_args
250
+ conn_params = call_args[0][0]
251
+ assert conn_params.host == "rmq.example.com"
252
+
253
+ def test_queue_declared_durable(self):
254
+ _, _, mock_channel = self._run()
255
+ mock_channel.queue_declare.assert_called_once_with(
256
+ queue="file_notif_queue", durable=True
257
+ )
258
+
259
+ def test_message_published_to_correct_queue(self):
260
+ _, _, mock_channel = self._run()
261
+ mock_channel.basic_publish.assert_called_once()
262
+ kwargs = mock_channel.basic_publish.call_args.kwargs
263
+ assert kwargs["routing_key"] == "file_notif_queue"
264
+ assert kwargs["exchange"] == ""
265
+
266
+ def test_message_body_is_valid_json(self):
267
+ _, _, mock_channel = self._run()
268
+ body = mock_channel.basic_publish.call_args.kwargs["body"]
269
+ msg = json.loads(body)
270
+ assert isinstance(msg, dict)
271
+
272
+ def test_message_contains_expected_fields(self):
273
+ _, _, mock_channel = self._run()
274
+ body = mock_channel.basic_publish.call_args.kwargs["body"]
275
+ msg = json.loads(body)
276
+ assert msg["data_store"] == "/dev/sda1"
277
+ assert msg["filepath"] == "/data/file.hdf"
278
+ assert msg["product"] == "VIIRS"
279
+ assert msg["version"] == "1.0"
280
+ assert msg["checksum"] == "abc123"
281
+ assert msg["checksum_type"] == "md5"
282
+
283
+ def test_optional_fields_default_to_none(self):
284
+ _, _, mock_channel = self._run(
285
+ start_time=None,
286
+ end_time=None,
287
+ length=None,
288
+ checksum=None,
289
+ checksum_type=None,
290
+ )
291
+ body = mock_channel.basic_publish.call_args.kwargs["body"]
292
+ msg = json.loads(body)
293
+ assert msg["start_time"] is None
294
+ assert msg["end_time"] is None
295
+ assert msg["length"] is None
296
+
297
+ def test_connection_closed_after_publish(self):
298
+ _, mock_connection, _ = self._run()
299
+ mock_connection.close.assert_called_once()
300
+
301
+ def test_message_delivery_mode_is_persistent(self):
302
+ import pika as pika_mod
303
+
304
+ _, _, mock_channel = self._run()
305
+ props = mock_channel.basic_publish.call_args.kwargs["properties"]
306
+ # BasicProperties stores delivery_mode as an integer; compare via .value
307
+ assert props.delivery_mode == pika_mod.DeliveryMode.Persistent.value
@@ -0,0 +1,16 @@
1
+ import logging
2
+
3
+ try:
4
+ import importlib.metadata
5
+
6
+ __version__ = importlib.metadata.version(__package__ or __name__)
7
+ except ModuleNotFoundError:
8
+ try:
9
+ import importlib_metadata
10
+
11
+ __version__ = importlib_metadata.version(__package__ or __name__)
12
+ except ModuleNotFoundError:
13
+ logging.debug(
14
+ "Could not set __version__ because importlib.metadata is not available."
15
+ + "If running python 3.7, installing importlib-metadata will fix this issue"
16
+ )
@@ -0,0 +1,273 @@
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # Stock modules
5
+ import os
6
+ import sys
7
+ import logging
8
+ import argparse
9
+ import pika
10
+ import json
11
+ import configparser
12
+
13
+ DESCRIPTION = """
14
+ Allows a data ingest process to send a new file notification to the Data
15
+ Inventory RabbitMQ server. The notification will ultimately be used to add the
16
+ file metadata to the Data Inventory DB.
17
+ """
18
+
19
+ log = logging.getLogger(__name__)
20
+
21
+ def parse_mountinfo_alike(fobj):
22
+ mount_entries = []
23
+ for line in fobj:
24
+ # Each line in /proc/mountinfo has a specific format
25
+ # The fields are space-separated, but some fields can contain spaces
26
+ # The separator between the optional fields and the rest is '- '
27
+ parts = line.strip().split(' - ')
28
+
29
+ # Extract the first part (non-optional fields)
30
+ first_part_fields = parts[0].split(' ')
31
+
32
+ # Extract the last part (optional fields and remaining fields)
33
+ last_part_fields = parts[1].split(' ') if len(parts) > 1 else []
34
+
35
+ # Example of extracting common fields
36
+ # Adjust indices based on the specific fields you need
37
+ mount_id = int(first_part_fields[0])
38
+ parent_id = int(first_part_fields[1])
39
+ major_minor = first_part_fields[2]
40
+ root = first_part_fields[3]
41
+ mount_point = first_part_fields[4]
42
+ mount_options = first_part_fields[5].split(',')
43
+
44
+ # Filesystem type, mount source, and super options are in the last part
45
+ filesystem_type = last_part_fields[0]
46
+ mount_source = last_part_fields[1]
47
+ super_options = last_part_fields[2].split(',') if len(last_part_fields) > 2 else []
48
+
49
+ mount_entry = {
50
+ "mount_id": mount_id,
51
+ "parent_id": parent_id,
52
+ "major_minor": major_minor,
53
+ "root": root,
54
+ "mount_point": mount_point,
55
+ "mount_options": mount_options,
56
+ "filesystem_type": filesystem_type,
57
+ "mount_source": mount_source,
58
+ "super_options": super_options,
59
+ "raw_line": line.strip()
60
+ }
61
+ mount_entries.append(mount_entry)
62
+
63
+ return mount_entries
64
+
65
+
66
+ def parse_mountinfo():
67
+ """
68
+ Parses /proc/self/mountinfo (or /proc/mountinfo) returns a dictionary for each entry.
69
+ """
70
+ try:
71
+ with open("/proc/self/mountinfo", "r") as fobj:
72
+ mount_entries = parse_mountinfo_alike(fobj)
73
+ except FileNotFoundError:
74
+ log.warning("/proc/self/mountinfo not found. Trying /proc/mountinfo.")
75
+ try:
76
+ with open("/proc/mountinfo", "r") as fobj:
77
+ mount_entries = parse_mountinfo_alike(fobj)
78
+ except FileNotFoundError:
79
+ log.error("Could not open /proc/self/mountinfo nor /proc/mountinfo")
80
+ raise
81
+
82
+ return mount_entries
83
+
84
+
85
+ def resolve_data_store(filepath):
86
+ """
87
+ Get the data store name and the absolute path from the data store.
88
+ filepath: The filepath argument given to the program
89
+ """
90
+ # Read the /proc/self/mountinfo file to get the data store and mount point
91
+ # Example usage
92
+ data_store = None
93
+ fpath = None
94
+ mp_match_len = 0
95
+ log.info("Currently mounted filesystems:")
96
+ for mount in parse_mountinfo():
97
+ log.debug(
98
+ f"Source: {mount['mount_source']:<20} Mount Point: {mount['mount_point']:<20} FS Type: {mount['filesystem_type']:<10} Options: {mount['super_options']}"
99
+ )
100
+ if mount["mount_point"] == "/":
101
+ # Skip this - every path will match it
102
+ continue
103
+ # Check all the mount points and use the one with the longest match
104
+ if (
105
+ filepath.startswith(mount["mount_point"])
106
+ and len(mount["mount_point"]) > mp_match_len
107
+ ):
108
+ mp_match_len = len(mount["mount_point"])
109
+ dev_dir = mount["mount_source"].split(":")
110
+ data_store = dev_dir[0]
111
+ if len(dev_dir) == 2:
112
+ # There is a path associated with the mount_source. Replace the mount point with this path.
113
+ fpath = dev_dir[1] + filepath[mp_match_len:]
114
+ else:
115
+ fpath = filepath
116
+
117
+ return data_store, fpath
118
+
119
+
120
+ def produce_notification(
121
+ config,
122
+ filepath,
123
+ product,
124
+ version,
125
+ start_time=None,
126
+ end_time=None,
127
+ length=None,
128
+ checksum=None,
129
+ checksum_type=None,
130
+ ):
131
+ """
132
+ Send a "Fair Dispatch" message via RabbitMQ
133
+ """
134
+
135
+ # Get the data store name and the absolute path from the data store
136
+ data_store, fpath = resolve_data_store(filepath)
137
+ log.info(f"data_store: {data_store}, fpath: {fpath}")
138
+
139
+ log.info(f'RMQ_HOST: {config["Settings"]["RMQ_HOST"]}')
140
+
141
+ # Establish connection and create a channel on that connection
142
+ connection = pika.BlockingConnection(
143
+ pika.ConnectionParameters(host=config["Settings"]["RMQ_HOST"])
144
+ )
145
+ channel = connection.channel()
146
+
147
+ # Ensure the durable file_notif_queue exists
148
+ channel.queue_declare(queue="file_notif_queue", durable=True)
149
+
150
+ # Put the message data in a dictionary for conversion to JSON
151
+ msg_dict = {
152
+ "data_store": data_store,
153
+ "filepath": fpath,
154
+ "product": product,
155
+ "version": version,
156
+ "start_time": start_time,
157
+ "end_time": end_time,
158
+ "length": length,
159
+ "checksum": checksum,
160
+ "checksum_type": checksum_type,
161
+ }
162
+
163
+ msg_json = json.dumps(msg_dict)
164
+
165
+ # Send the JSON formatted message
166
+ channel.basic_publish(
167
+ exchange="",
168
+ routing_key="file_notif_queue",
169
+ body=msg_json,
170
+ properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent),
171
+ )
172
+ log.debug(f" [x] Sent {msg_json}")
173
+
174
+ # Close the connection to make sure the message actually gets sent - buffers
175
+ # are flushed
176
+ connection.close()
177
+
178
+
179
+ def main():
180
+
181
+ # Parse the arguments
182
+ parser = argparse.ArgumentParser(f"{DESCRIPTION}python youvegotdata.py")
183
+
184
+ # Add the positional argument(s?)
185
+ parser.add_argument(
186
+ "filepath", type=str, help="Send a notification for the file with this path."
187
+ )
188
+
189
+ # Add the flags
190
+ parser.add_argument(
191
+ "-v",
192
+ "--verbose",
193
+ action="store_true",
194
+ help="Verbose output - set log level to DEBUG",
195
+ )
196
+
197
+ parser.add_argument(
198
+ "-p",
199
+ "--product",
200
+ default=None,
201
+ help="The file's product",
202
+ )
203
+
204
+ parser.add_argument(
205
+ "-r",
206
+ "--version",
207
+ default=None,
208
+ help="The file's version",
209
+ )
210
+
211
+ parser.add_argument(
212
+ "-s",
213
+ "--start_time",
214
+ default=None,
215
+ help="The first date and time for which the file has data",
216
+ )
217
+
218
+ parser.add_argument(
219
+ "-e",
220
+ "--end_time",
221
+ default=None,
222
+ help="The last date and time for which the file has data",
223
+ )
224
+
225
+ parser.add_argument(
226
+ "-l", "--length", default=None, help="The length(size) of the file"
227
+ )
228
+
229
+ parser.add_argument("-c", "--checksum", default=None, help="The file's checksum")
230
+
231
+ parser.add_argument(
232
+ "-t",
233
+ "--checksum_type",
234
+ default=None,
235
+ help="The type of the checksum - its algorithm",
236
+ )
237
+
238
+ pargs = parser.parse_args()
239
+
240
+ # Setup logging.
241
+ logging.basicConfig(
242
+ format="%(asctime)s %(levelname)-8s%(name)s: %(message)s",
243
+ level="DEBUG" if pargs.verbose else "INFO",
244
+ )
245
+
246
+ # Reduce pika logging
247
+ logging.getLogger("pika").setLevel(logging.WARNING)
248
+
249
+ # Read the configuration file
250
+ config = configparser.ConfigParser()
251
+ try:
252
+ config.read("config.ini")
253
+ except FileNotFoundError:
254
+ log.error("config.ini not found. Please ensure the file exists.")
255
+ exit()
256
+
257
+ log.info("Sending a new file notification")
258
+
259
+ produce_notification(
260
+ config,
261
+ pargs.filepath,
262
+ pargs.product,
263
+ pargs.version,
264
+ pargs.start_time,
265
+ pargs.end_time,
266
+ pargs.length,
267
+ pargs.checksum,
268
+ pargs.checksum_type,
269
+ )
270
+
271
+
272
+ if __name__ == "__main__":
273
+ main()
@@ -0,0 +1,47 @@
1
+ Metadata-Version: 2.4
2
+ Name: youvegotdata
3
+ Version: 1.0.0
4
+ Summary: Send new file notifications
5
+ Author-email: Jim Fluke <james.fluke@colostate.edu>
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: pika
9
+ Provides-Extra: test
10
+ Requires-Dist: pytest; extra == "test"
11
+
12
+ # youvegotdata
13
+
14
+ Uses RabbitMQ to send new file notifications, with the ultimate purpose of
15
+ getting the file metadata into the Data Inventory Database.
16
+
17
+ The "producer" `youvegotdata.py` will usually be called by the CIRA data
18
+ ingest scripts when a new file is added to the CIRA data stores, and will send
19
+ a message through RabbitMQ to the consumers with the file's metadata.
20
+
21
+ Message "consumers" will be running to receive the file metadata and insert it
22
+ into the database. It is expected that multiple consumers process will be
23
+ accepting messages in RabbitMQ's "fair dispatch" configuration. A given
24
+ notification will be received by one consumer.
25
+
26
+ ## Running youvegotdata.py
27
+ This must be run in a Python environment that includes `pika` - for connecting
28
+ to RabbitMQ - and other needed packages. The `environ-3.8.yml` file in this
29
+ repository can be used to create a workable conda environment. Setting one up
30
+ using `pip` will certainly also work. Python 3.8 is the minimum version needed
31
+ to run the script. Higher versions should work.
32
+
33
+ Copy the template-config.ini file to config.ini and edit the config.ini as
34
+ described inside that file.
35
+ Run the code with:
36
+ ```
37
+ python youvegotdata.py [-h] [-v] [-p PRODUCT] [-r VERSION] [-s START_TIME] [-e END_TIME] [-l LENGTH] [-c CHECKSUM] [-t CHECKSUM_TYPE] filepath
38
+ ```
39
+ Run this with the -h (--help) argument to see the available flagged arguments.
40
+
41
+ This will usually be run with just the `filepath` argument. An example is:
42
+ ```
43
+ python youvegotdata/youvegotdata.py /full/path/to/local/file/data_file.hdf
44
+ ```
45
+ If run from a local repository of this project.
46
+
47
+ The `filepath` file must exist on the local machine.
@@ -0,0 +1,15 @@
1
+ .gitignore
2
+ README.md
3
+ environ-3.8.yml
4
+ pyproject.toml
5
+ template-config.ini
6
+ .github/workflows/package-and-publish.yaml
7
+ tests/__init__.py
8
+ tests/test_youvegotdata.py
9
+ youvegotdata/__init__.py
10
+ youvegotdata/youvegotdata.py
11
+ youvegotdata.egg-info/PKG-INFO
12
+ youvegotdata.egg-info/SOURCES.txt
13
+ youvegotdata.egg-info/dependency_links.txt
14
+ youvegotdata.egg-info/requires.txt
15
+ youvegotdata.egg-info/top_level.txt
@@ -0,0 +1,4 @@
1
+ pika
2
+
3
+ [test]
4
+ pytest
@@ -0,0 +1 @@
1
+ youvegotdata