PyPI - youvegotdata - Versions diffs - 1.0.0__tar.gz - Mend

youvegotdata 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

youvegotdata-1.0.0/.github/workflows/package-and-publish.yaml +103 -0
youvegotdata-1.0.0/.gitignore +60 -0
youvegotdata-1.0.0/PKG-INFO +47 -0
youvegotdata-1.0.0/README.md +36 -0
youvegotdata-1.0.0/environ-3.8.yml +8 -0
youvegotdata-1.0.0/pyproject.toml +21 -0
youvegotdata-1.0.0/setup.cfg +4 -0
youvegotdata-1.0.0/template-config.ini +4 -0
youvegotdata-1.0.0/tests/__init__.py +0 -0
youvegotdata-1.0.0/tests/test_youvegotdata.py +307 -0
youvegotdata-1.0.0/youvegotdata/__init__.py +16 -0
youvegotdata-1.0.0/youvegotdata/youvegotdata.py +273 -0
youvegotdata-1.0.0/youvegotdata.egg-info/PKG-INFO +47 -0
youvegotdata-1.0.0/youvegotdata.egg-info/SOURCES.txt +15 -0
youvegotdata-1.0.0/youvegotdata.egg-info/dependency_links.txt +1 -0
youvegotdata-1.0.0/youvegotdata.egg-info/requires.txt +4 -0
youvegotdata-1.0.0/youvegotdata.egg-info/top_level.txt +1 -0

youvegotdata-1.0.0/.github/workflows/package-and-publish.yaml ADDED Viewed

@@ -0,0 +1,103 @@
+name: Publish to PyPI
+on:
+  release:
+    types: [published]
+  # Allow manual trigger for testing the workflow
+  workflow_dispatch:
+permissions:
+  contents: read
+  id-token: write
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+env:
+  UV_SYSTEM_PYTHON: true
+jobs:
+  # ---------------------------------------------------------------------------
+  # Build sdist and wheel
+  # ---------------------------------------------------------------------------
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+      - uses: astral-sh/setup-uv@v6
+      - name: Install build tools
+        run: uv pip install build
+      - name: Build sdist and wheel
+        run: python -m build
+      - name: List built artifacts
+        run: ls -lh dist/
+      - name: Upload build artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/
+          if-no-files-found: error
+  # ---------------------------------------------------------------------------
+  # Run the test suite against the built wheel (not the source tree)
+  # ---------------------------------------------------------------------------
+  test-built-package:
+    needs: build
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        python-version: ["3.11", "3.12", "3.13"]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - uses: astral-sh/setup-uv@v6
+      - name: Download build artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: dist/
+      - name: Install wheel and test dependencies
+        shell: bash
+        run: |
+          uv pip install dist/*.whl
+          uv pip install pytest pytest-cov pytest-xdist pydantic
+      - name: Run tests
+        run: python -m pytest tests/ -v
+  # ---------------------------------------------------------------------------
+  # Publish to PyPI
+  # ---------------------------------------------------------------------------
+  publish-pypi:
+    needs: [build, test-built-package]
+    runs-on: ubuntu-latest
+    environment: pypi
+    #if: github.event_name == 'release'
+    steps:
+      - name: Download build artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: dist/
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1

youvegotdata-1.0.0/.gitignore ADDED Viewed

@@ -0,0 +1,60 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Jupyter Notebook
+.ipynb_checkpoints
+# Environments/configuration
+config.ini
+# Data and Reference Files
+*.npy
+*.npz
+*.pth
+# Temporary files
+*.bak*
+*.sav*
+*.tmp*
+*.out*

youvegotdata-1.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,47 @@
+Metadata-Version: 2.4
+Name: youvegotdata
+Version: 1.0.0
+Summary: Send new file notifications
+Author-email: Jim Fluke <james.fluke@colostate.edu>
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: pika
+Provides-Extra: test
+Requires-Dist: pytest; extra == "test"
+# youvegotdata
+Uses RabbitMQ to send new file notifications, with the ultimate purpose of
+getting the file metadata into the Data Inventory Database.
+The "producer" `youvegotdata.py` will usually be called by the CIRA data
+ingest scripts when a new file is added to the CIRA data stores, and will send
+a message through RabbitMQ to the consumers with the file's metadata.
+Message "consumers" will be running to receive the file metadata and insert it
+into the database. It is expected that multiple consumers process will be
+accepting messages in RabbitMQ's "fair dispatch" configuration. A given
+notification will be received by one consumer.
+## Running youvegotdata.py
+This must be run in a Python environment that includes `pika` - for connecting
+to RabbitMQ -  and other needed packages. The `environ-3.8.yml` file in this
+repository can be used to create a workable conda environment. Setting one up
+using `pip` will certainly also work. Python 3.8 is the minimum version needed
+to run the script. Higher versions should work.
+Copy the template-config.ini file to config.ini and edit the config.ini as
+described inside that file.
+Run the code with:
+```
+python youvegotdata.py [-h] [-v] [-p PRODUCT] [-r VERSION] [-s START_TIME] [-e END_TIME] [-l LENGTH] [-c CHECKSUM] [-t CHECKSUM_TYPE] filepath
+```
+Run this with the -h (--help) argument to see the available flagged arguments.
+This will usually be run with just the `filepath` argument. An example is:
+```
+python youvegotdata/youvegotdata.py /full/path/to/local/file/data_file.hdf
+```
+If run from a local repository of this project.
+The `filepath` file must exist on the local machine.

youvegotdata-1.0.0/README.md ADDED Viewed

@@ -0,0 +1,36 @@
+# youvegotdata
+Uses RabbitMQ to send new file notifications, with the ultimate purpose of
+getting the file metadata into the Data Inventory Database.
+The "producer" `youvegotdata.py` will usually be called by the CIRA data
+ingest scripts when a new file is added to the CIRA data stores, and will send
+a message through RabbitMQ to the consumers with the file's metadata.
+Message "consumers" will be running to receive the file metadata and insert it
+into the database. It is expected that multiple consumers process will be
+accepting messages in RabbitMQ's "fair dispatch" configuration. A given
+notification will be received by one consumer.
+## Running youvegotdata.py
+This must be run in a Python environment that includes `pika` - for connecting
+to RabbitMQ -  and other needed packages. The `environ-3.8.yml` file in this
+repository can be used to create a workable conda environment. Setting one up
+using `pip` will certainly also work. Python 3.8 is the minimum version needed
+to run the script. Higher versions should work.
+Copy the template-config.ini file to config.ini and edit the config.ini as
+described inside that file.
+Run the code with:
+```
+python youvegotdata.py [-h] [-v] [-p PRODUCT] [-r VERSION] [-s START_TIME] [-e END_TIME] [-l LENGTH] [-c CHECKSUM] [-t CHECKSUM_TYPE] filepath
+```
+Run this with the -h (--help) argument to see the available flagged arguments.
+This will usually be run with just the `filepath` argument. An example is:
+```
+python youvegotdata/youvegotdata.py /full/path/to/local/file/data_file.hdf
+```
+If run from a local repository of this project.
+The `filepath` file must exist on the local machine.

youvegotdata-1.0.0/environ-3.8.yml ADDED Viewed

@@ -0,0 +1,8 @@
+name: python3.8
+channels:
+  - conda-forge
+dependencies:
+  - python=3.8
+  - pika
+  - pytest
+  - black

youvegotdata-1.0.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,21 @@
+# https://packaging.python.org/en/latest/guides/writing-pyproject-toml/
+# Choosing a build backend for your Python package
+[build-system]
+requires = ["setuptools", "setuptools-scm"]
+build-backend = "setuptools.build_meta"
+[project]
+authors = [
+  {name = "Jim Fluke", email = "james.fluke@colostate.edu" }
+]
+name = "youvegotdata" # REQUIRED
+version = "1.0.0"
+description = "Send new file notifications"
+readme = "README.md"
+requires-python = ">=3.8"
+dependencies = ["pika"]
+[project.optional-dependencies]
+test = ["pytest"]

youvegotdata-1.0.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

youvegotdata-1.0.0/template-config.ini ADDED Viewed

@@ -0,0 +1,4 @@
+# Copy to config.ini and customize with your values.
+[Settings]
+RMQ_HOST = <host of the RabbitMQ server>

youvegotdata-1.0.0/tests/__init__.py ADDED Viewed

File without changes

youvegotdata-1.0.0/tests/test_youvegotdata.py ADDED Viewed

@@ -0,0 +1,307 @@
+"""Unit tests for youvegotdata.youvegotdata."""
+import configparser
+import io
+import json
+import logging
+from unittest.mock import MagicMock, mock_open, patch
+import pytest
+from youvegotdata.youvegotdata import (
+    parse_mountinfo,
+    parse_mountinfo_alike,
+    produce_notification,
+    resolve_data_store,
+)
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+# A realistic /proc/self/mountinfo line for a local ext4 filesystem.
+# Format: mount_id parent_id major:minor root mount_point mount_options
+#         [optional-fields] - filesystem_type mount_source super_options
+LOCAL_MOUNTINFO_LINE = (
+    "23 1 8:1 / /data rw,relatime shared:1 - ext4 /dev/sda1 rw,errors=remount-ro"
+)
+# An NFS mount where mount_source contains a host:path pair.
+NFS_MOUNTINFO_LINE = (
+    "42 1 0:35 / /mnt/nfs rw,relatime shared:2 - nfs4 nfsserver:/exports rw,vers=4"
+)
+# The root mount (should be skipped by resolve_data_store).
+ROOT_MOUNTINFO_LINE = (
+    "1 0 8:0 / / rw,relatime shared:0 - ext4 /dev/sda rw"
+)
+def _lines(*lines):
+    """Return a file-like object containing the given lines."""
+    return io.StringIO("\n".join(lines) + "\n")
+# ---------------------------------------------------------------------------
+# parse_mountinfo_alike
+# ---------------------------------------------------------------------------
+class TestParseMountinfoAlike:
+    def test_local_mount_parsed_correctly(self):
+        entries = parse_mountinfo_alike(_lines(LOCAL_MOUNTINFO_LINE))
+        assert len(entries) == 1
+        e = entries[0]
+        assert e["mount_id"] == 23
+        assert e["parent_id"] == 1
+        assert e["major_minor"] == "8:1"
+        assert e["root"] == "/"
+        assert e["mount_point"] == "/data"
+        assert e["mount_options"] == ["rw", "relatime"]
+        assert e["filesystem_type"] == "ext4"
+        assert e["mount_source"] == "/dev/sda1"
+        assert "rw" in e["super_options"]
+        assert e["raw_line"] == LOCAL_MOUNTINFO_LINE.strip()
+    def test_nfs_mount_parsed_correctly(self):
+        entries = parse_mountinfo_alike(_lines(NFS_MOUNTINFO_LINE))
+        e = entries[0]
+        assert e["mount_point"] == "/mnt/nfs"
+        assert e["filesystem_type"] == "nfs4"
+        assert e["mount_source"] == "nfsserver:/exports"
+    def test_empty_input_returns_empty_list(self):
+        entries = parse_mountinfo_alike(io.StringIO(""))
+        assert entries == []
+    def test_multiple_lines_parsed(self):
+        entries = parse_mountinfo_alike(
+            _lines(LOCAL_MOUNTINFO_LINE, NFS_MOUNTINFO_LINE, ROOT_MOUNTINFO_LINE)
+        )
+        assert len(entries) == 3
+    def test_no_super_options(self):
+        # A line where the last_part has only two fields (no super options).
+        line = "10 1 8:2 / /tmp rw - tmpfs tmpfs"
+        entries = parse_mountinfo_alike(io.StringIO(line + "\n"))
+        assert len(entries) == 1
+        assert entries[0]["super_options"] == []
+    def test_mount_options_split_into_list(self):
+        entries = parse_mountinfo_alike(_lines(LOCAL_MOUNTINFO_LINE))
+        opts = entries[0]["mount_options"]
+        assert isinstance(opts, list)
+        assert "rw" in opts
+        assert "relatime" in opts
+# ---------------------------------------------------------------------------
+# parse_mountinfo
+# ---------------------------------------------------------------------------
+class TestParseMountinfo:
+    def test_reads_proc_self_mountinfo(self):
+        data = LOCAL_MOUNTINFO_LINE + "\n"
+        with patch("builtins.open", mock_open(read_data=data)) as mocked:
+            entries = parse_mountinfo()
+        mocked.assert_called_once_with("/proc/self/mountinfo", "r")
+        assert len(entries) == 1
+    def test_falls_back_to_proc_mountinfo_when_self_missing(self):
+        data = LOCAL_MOUNTINFO_LINE + "\n"
+        def side_effect(path, mode):
+            if path == "/proc/self/mountinfo":
+                raise FileNotFoundError
+            return mock_open(read_data=data)()
+        with patch("builtins.open", side_effect=side_effect):
+            entries = parse_mountinfo()
+        assert len(entries) == 1
+    def test_raises_when_both_files_missing(self):
+        with patch("builtins.open", side_effect=FileNotFoundError):
+            with pytest.raises(FileNotFoundError):
+                parse_mountinfo()
+    def test_fallback_logs_warning(self, caplog):
+        data = LOCAL_MOUNTINFO_LINE + "\n"
+        def side_effect(path, mode):
+            if path == "/proc/self/mountinfo":
+                raise FileNotFoundError
+            return mock_open(read_data=data)()
+        with caplog.at_level(logging.WARNING):
+            with patch("builtins.open", side_effect=side_effect):
+                parse_mountinfo()
+        assert any("/proc/self/mountinfo" in r.message for r in caplog.records)
+# ---------------------------------------------------------------------------
+# resolve_data_store
+# ---------------------------------------------------------------------------
+class TestResolveDataStore:
+    def _mock_parse(self, *lines):
+        """Return a list of mount entries parsed from the given lines."""
+        return parse_mountinfo_alike(_lines(*lines))
+    def test_local_mount_returns_device_and_filepath(self):
+        mounts = self._mock_parse(LOCAL_MOUNTINFO_LINE)
+        with patch(
+            "youvegotdata.youvegotdata.parse_mountinfo", return_value=mounts
+        ):
+            data_store, fpath = resolve_data_store("/data/subdir/file.hdf")
+        assert data_store == "/dev/sda1"
+        assert fpath == "/data/subdir/file.hdf"
+    def test_nfs_mount_returns_server_and_remote_path(self):
+        mounts = self._mock_parse(NFS_MOUNTINFO_LINE)
+        with patch(
+            "youvegotdata.youvegotdata.parse_mountinfo", return_value=mounts
+        ):
+            data_store, fpath = resolve_data_store("/mnt/nfs/subdir/file.hdf")
+        assert data_store == "nfsserver"
+        # The mount_source path (/exports) should replace the mount point prefix
+        assert fpath == "/exports/subdir/file.hdf"
+    def test_no_matching_mount_returns_none(self):
+        # Only the root mount, which is skipped.
+        mounts = self._mock_parse(ROOT_MOUNTINFO_LINE)
+        with patch(
+            "youvegotdata.youvegotdata.parse_mountinfo", return_value=mounts
+        ):
+            data_store, fpath = resolve_data_store("/unrelated/file.hdf")
+        assert data_store is None
+        assert fpath is None
+    def test_root_mount_is_skipped(self):
+        # Even though "/" prefix-matches everything, it must be skipped.
+        mounts = self._mock_parse(ROOT_MOUNTINFO_LINE, LOCAL_MOUNTINFO_LINE)
+        with patch(
+            "youvegotdata.youvegotdata.parse_mountinfo", return_value=mounts
+        ):
+            data_store, _ = resolve_data_store("/data/file.hdf")
+        # /data mount should win, not the root mount
+        assert data_store == "/dev/sda1"
+    def test_longest_prefix_mount_wins(self):
+        # /data and /data/archive are both valid prefixes; /data/archive is longer.
+        archive_line = (
+            "24 23 8:2 / /data/archive rw,relatime - ext4 /dev/sdb1 rw"
+        )
+        mounts = self._mock_parse(LOCAL_MOUNTINFO_LINE, archive_line)
+        with patch(
+            "youvegotdata.youvegotdata.parse_mountinfo", return_value=mounts
+        ):
+            data_store, fpath = resolve_data_store("/data/archive/file.hdf")
+        assert data_store == "/dev/sdb1"
+        assert fpath == "/data/archive/file.hdf"
+# ---------------------------------------------------------------------------
+# produce_notification
+# ---------------------------------------------------------------------------
+class TestProduceNotification:
+    def _make_config(self, host="rmq.example.com"):
+        config = configparser.ConfigParser()
+        config["Settings"] = {"RMQ_HOST": host}
+        return config
+    def _run(self, **kwargs):
+        """Run produce_notification with sensible defaults, mocking pika."""
+        defaults = dict(
+            config=self._make_config(),
+            filepath="/data/file.hdf",
+            product="VIIRS",
+            version="1.0",
+            start_time="2024-01-01T00:00:00",
+            end_time="2024-01-01T01:00:00",
+            length=1024,
+            checksum="abc123",
+            checksum_type="md5",
+        )
+        defaults.update(kwargs)
+        mock_channel = MagicMock()
+        mock_connection = MagicMock()
+        mock_connection.channel.return_value = mock_channel
+        with patch(
+            "youvegotdata.youvegotdata.resolve_data_store",
+            return_value=("/dev/sda1", "/data/file.hdf"),
+        ):
+            with patch(
+                "youvegotdata.youvegotdata.pika.BlockingConnection",
+                return_value=mock_connection,
+            ) as mock_bc:
+                produce_notification(**defaults)
+        return mock_bc, mock_connection, mock_channel
+    def test_connection_opened_with_correct_host(self):
+        mock_bc, _, _ = self._run()
+        call_args = mock_bc.call_args
+        conn_params = call_args[0][0]
+        assert conn_params.host == "rmq.example.com"
+    def test_queue_declared_durable(self):
+        _, _, mock_channel = self._run()
+        mock_channel.queue_declare.assert_called_once_with(
+            queue="file_notif_queue", durable=True
+        )
+    def test_message_published_to_correct_queue(self):
+        _, _, mock_channel = self._run()
+        mock_channel.basic_publish.assert_called_once()
+        kwargs = mock_channel.basic_publish.call_args.kwargs
+        assert kwargs["routing_key"] == "file_notif_queue"
+        assert kwargs["exchange"] == ""
+    def test_message_body_is_valid_json(self):
+        _, _, mock_channel = self._run()
+        body = mock_channel.basic_publish.call_args.kwargs["body"]
+        msg = json.loads(body)
+        assert isinstance(msg, dict)
+    def test_message_contains_expected_fields(self):
+        _, _, mock_channel = self._run()
+        body = mock_channel.basic_publish.call_args.kwargs["body"]
+        msg = json.loads(body)
+        assert msg["data_store"] == "/dev/sda1"
+        assert msg["filepath"] == "/data/file.hdf"
+        assert msg["product"] == "VIIRS"
+        assert msg["version"] == "1.0"
+        assert msg["checksum"] == "abc123"
+        assert msg["checksum_type"] == "md5"
+    def test_optional_fields_default_to_none(self):
+        _, _, mock_channel = self._run(
+            start_time=None,
+            end_time=None,
+            length=None,
+            checksum=None,
+            checksum_type=None,
+        )
+        body = mock_channel.basic_publish.call_args.kwargs["body"]
+        msg = json.loads(body)
+        assert msg["start_time"] is None
+        assert msg["end_time"] is None
+        assert msg["length"] is None
+    def test_connection_closed_after_publish(self):
+        _, mock_connection, _ = self._run()
+        mock_connection.close.assert_called_once()
+    def test_message_delivery_mode_is_persistent(self):
+        import pika as pika_mod
+        _, _, mock_channel = self._run()
+        props = mock_channel.basic_publish.call_args.kwargs["properties"]
+        # BasicProperties stores delivery_mode as an integer; compare via .value
+        assert props.delivery_mode == pika_mod.DeliveryMode.Persistent.value

youvegotdata-1.0.0/youvegotdata/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+import logging
+try:
+    import importlib.metadata
+    __version__ = importlib.metadata.version(__package__ or __name__)
+except ModuleNotFoundError:
+    try:
+        import importlib_metadata
+        __version__ = importlib_metadata.version(__package__ or __name__)
+    except ModuleNotFoundError:
+        logging.debug(
+            "Could not set __version__ because importlib.metadata is not available."
+            + "If running python 3.7, installing importlib-metadata will fix this issue"
+        )

youvegotdata-1.0.0/youvegotdata/youvegotdata.py ADDED Viewed

@@ -0,0 +1,273 @@
+#!/usr/bin/env python
+# coding: utf-8
+# Stock modules
+import os
+import sys
+import logging
+import argparse
+import pika
+import json
+import configparser
+DESCRIPTION = """
+Allows a data ingest process to send a new file notification to the Data
+Inventory RabbitMQ server. The notification will ultimately be used to add the
+file metadata to the Data Inventory DB.
+"""
+log = logging.getLogger(__name__)
+def parse_mountinfo_alike(fobj):
+    mount_entries = []
+    for line in fobj:
+        # Each line in /proc/mountinfo has a specific format
+        # The fields are space-separated, but some fields can contain spaces
+        # The separator between the optional fields and the rest is '- '
+        parts = line.strip().split(' - ')
+        # Extract the first part (non-optional fields)
+        first_part_fields = parts[0].split(' ')
+        # Extract the last part (optional fields and remaining fields)
+        last_part_fields = parts[1].split(' ') if len(parts) > 1 else []
+        # Example of extracting common fields
+        # Adjust indices based on the specific fields you need
+        mount_id = int(first_part_fields[0])
+        parent_id = int(first_part_fields[1])
+        major_minor = first_part_fields[2]
+        root = first_part_fields[3]
+        mount_point = first_part_fields[4]
+        mount_options = first_part_fields[5].split(',')
+        # Filesystem type, mount source, and super options are in the last part
+        filesystem_type = last_part_fields[0]
+        mount_source = last_part_fields[1]
+        super_options = last_part_fields[2].split(',') if len(last_part_fields) > 2 else []
+        mount_entry = {
+            "mount_id": mount_id,
+            "parent_id": parent_id,
+            "major_minor": major_minor,
+            "root": root,
+            "mount_point": mount_point,
+            "mount_options": mount_options,
+            "filesystem_type": filesystem_type,
+            "mount_source": mount_source,
+            "super_options": super_options,
+            "raw_line": line.strip()
+        }
+        mount_entries.append(mount_entry)
+    return mount_entries
+def parse_mountinfo():
+    """
+    Parses /proc/self/mountinfo (or /proc/mountinfo) returns a dictionary for each entry.
+    """
+    try:
+        with open("/proc/self/mountinfo", "r") as fobj:
+            mount_entries = parse_mountinfo_alike(fobj)
+    except FileNotFoundError:
+        log.warning("/proc/self/mountinfo not found. Trying /proc/mountinfo.")
+        try:
+            with open("/proc/mountinfo", "r") as fobj:
+                mount_entries = parse_mountinfo_alike(fobj)
+        except FileNotFoundError:
+            log.error("Could not open /proc/self/mountinfo nor /proc/mountinfo")
+            raise
+    return mount_entries
+def resolve_data_store(filepath):
+    """
+    Get the data store name and the absolute path from the data store.
+    filepath: The filepath argument given to the program
+    """
+    # Read the /proc/self/mountinfo file to get the data store and mount point
+    # Example usage
+    data_store = None
+    fpath = None
+    mp_match_len = 0
+    log.info("Currently mounted filesystems:")
+    for mount in parse_mountinfo():
+        log.debug(
+            f"Source: {mount['mount_source']:<20} Mount Point: {mount['mount_point']:<20} FS Type: {mount['filesystem_type']:<10} Options: {mount['super_options']}"
+        )
+        if mount["mount_point"] == "/":
+            # Skip this - every path will match it
+            continue
+        # Check all the mount points and use the one with the longest match
+        if (
+            filepath.startswith(mount["mount_point"])
+            and len(mount["mount_point"]) > mp_match_len
+        ):
+            mp_match_len = len(mount["mount_point"])
+            dev_dir = mount["mount_source"].split(":")
+            data_store = dev_dir[0]
+            if len(dev_dir) == 2:
+                # There is a path associated with the mount_source. Replace the mount point with this path.
+                fpath = dev_dir[1] + filepath[mp_match_len:]
+            else:
+                fpath = filepath
+    return data_store, fpath
+def produce_notification(
+    config,
+    filepath,
+    product,
+    version,
+    start_time=None,
+    end_time=None,
+    length=None,
+    checksum=None,
+    checksum_type=None,
+):
+    """
+    Send a "Fair Dispatch" message via RabbitMQ
+    """
+    # Get the data store name and the absolute path from the data store
+    data_store, fpath = resolve_data_store(filepath)
+    log.info(f"data_store: {data_store}, fpath: {fpath}")
+    log.info(f'RMQ_HOST:  {config["Settings"]["RMQ_HOST"]}')
+    # Establish connection and create a channel on that connection
+    connection = pika.BlockingConnection(
+        pika.ConnectionParameters(host=config["Settings"]["RMQ_HOST"])
+    )
+    channel = connection.channel()
+    # Ensure the durable file_notif_queue exists
+    channel.queue_declare(queue="file_notif_queue", durable=True)
+    # Put the message data in a dictionary for conversion to JSON
+    msg_dict = {
+        "data_store": data_store,
+        "filepath": fpath,
+        "product": product,
+        "version": version,
+        "start_time": start_time,
+        "end_time": end_time,
+        "length": length,
+        "checksum": checksum,
+        "checksum_type": checksum_type,
+    }
+    msg_json = json.dumps(msg_dict)
+    # Send the JSON formatted message
+    channel.basic_publish(
+        exchange="",
+        routing_key="file_notif_queue",
+        body=msg_json,
+        properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent),
+    )
+    log.debug(f" [x] Sent {msg_json}")
+    # Close the connection to make sure the message actually gets sent - buffers
+    # are flushed
+    connection.close()
+def main():
+    # Parse the arguments
+    parser = argparse.ArgumentParser(f"{DESCRIPTION}python youvegotdata.py")
+    # Add the positional argument(s?)
+    parser.add_argument(
+        "filepath", type=str, help="Send a notification for the file with this path."
+    )
+    # Add the flags
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Verbose output - set log level to DEBUG",
+    )
+    parser.add_argument(
+        "-p",
+        "--product",
+        default=None,
+        help="The file's product",
+    )
+    parser.add_argument(
+        "-r",
+        "--version",
+        default=None,
+        help="The file's version",
+    )
+    parser.add_argument(
+        "-s",
+        "--start_time",
+        default=None,
+        help="The first date and time for which the file has data",
+    )
+    parser.add_argument(
+        "-e",
+        "--end_time",
+        default=None,
+        help="The last date and time for which the file has data",
+    )
+    parser.add_argument(
+        "-l", "--length", default=None, help="The length(size) of the file"
+    )
+    parser.add_argument("-c", "--checksum", default=None, help="The file's checksum")
+    parser.add_argument(
+        "-t",
+        "--checksum_type",
+        default=None,
+        help="The type of the checksum - its algorithm",
+    )
+    pargs = parser.parse_args()
+    # Setup logging.
+    logging.basicConfig(
+        format="%(asctime)s %(levelname)-8s%(name)s: %(message)s",
+        level="DEBUG" if pargs.verbose else "INFO",
+    )
+    # Reduce pika logging
+    logging.getLogger("pika").setLevel(logging.WARNING)
+    # Read the configuration file
+    config = configparser.ConfigParser()
+    try:
+        config.read("config.ini")
+    except FileNotFoundError:
+        log.error("config.ini not found. Please ensure the file exists.")
+        exit()
+    log.info("Sending a new file notification")
+    produce_notification(
+        config,
+        pargs.filepath,
+        pargs.product,
+        pargs.version,
+        pargs.start_time,
+        pargs.end_time,
+        pargs.length,
+        pargs.checksum,
+        pargs.checksum_type,
+    )
+if __name__ == "__main__":
+    main()

youvegotdata-1.0.0/youvegotdata.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,47 @@
+Metadata-Version: 2.4
+Name: youvegotdata
+Version: 1.0.0
+Summary: Send new file notifications
+Author-email: Jim Fluke <james.fluke@colostate.edu>
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: pika
+Provides-Extra: test
+Requires-Dist: pytest; extra == "test"
+# youvegotdata
+Uses RabbitMQ to send new file notifications, with the ultimate purpose of
+getting the file metadata into the Data Inventory Database.
+The "producer" `youvegotdata.py` will usually be called by the CIRA data
+ingest scripts when a new file is added to the CIRA data stores, and will send
+a message through RabbitMQ to the consumers with the file's metadata.
+Message "consumers" will be running to receive the file metadata and insert it
+into the database. It is expected that multiple consumers process will be
+accepting messages in RabbitMQ's "fair dispatch" configuration. A given
+notification will be received by one consumer.
+## Running youvegotdata.py
+This must be run in a Python environment that includes `pika` - for connecting
+to RabbitMQ -  and other needed packages. The `environ-3.8.yml` file in this
+repository can be used to create a workable conda environment. Setting one up
+using `pip` will certainly also work. Python 3.8 is the minimum version needed
+to run the script. Higher versions should work.
+Copy the template-config.ini file to config.ini and edit the config.ini as
+described inside that file.
+Run the code with:
+```
+python youvegotdata.py [-h] [-v] [-p PRODUCT] [-r VERSION] [-s START_TIME] [-e END_TIME] [-l LENGTH] [-c CHECKSUM] [-t CHECKSUM_TYPE] filepath
+```
+Run this with the -h (--help) argument to see the available flagged arguments.
+This will usually be run with just the `filepath` argument. An example is:
+```
+python youvegotdata/youvegotdata.py /full/path/to/local/file/data_file.hdf
+```
+If run from a local repository of this project.
+The `filepath` file must exist on the local machine.

youvegotdata-1.0.0/youvegotdata.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,15 @@
+.gitignore
+README.md
+environ-3.8.yml
+pyproject.toml
+template-config.ini
+.github/workflows/package-and-publish.yaml
+tests/__init__.py
+tests/test_youvegotdata.py
+youvegotdata/__init__.py
+youvegotdata/youvegotdata.py
+youvegotdata.egg-info/PKG-INFO
+youvegotdata.egg-info/SOURCES.txt
+youvegotdata.egg-info/dependency_links.txt
+youvegotdata.egg-info/requires.txt
+youvegotdata.egg-info/top_level.txt

youvegotdata-1.0.0/youvegotdata.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

youvegotdata-1.0.0/youvegotdata.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,4 @@
+pika
+[test]
+pytest

youvegotdata-1.0.0/youvegotdata.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ youvegotdata