From 58d15a6cee050da865f41416729bee418cda5f79 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Wed, 5 Feb 2025 18:36:33 -0600
Subject: [PATCH 1/4] Add initial version

---
 .github/workflows/build.yaml          |  49 +++++++++
 .github/workflows/do_not_merge.yaml   |  41 ++++++++
 .github/workflows/formatting.yaml     |  11 ++
 .github/workflows/lint.yaml           |  16 +++
 .github/workflows/mypy.yaml           |  11 ++
 .github/workflows/rebase_checker.yaml |   7 ++
 .github/workflows/yamllint.yaml       |  11 ++
 .gitignore                            |   3 +
 .pre-commit-config.yaml               |  28 ++++++
 .yamllint.yaml                        |  20 ++++
 pyproject.toml                        | 139 ++++++++++++++++++++++++++
 python/lsst/__init__.py               |   0
 python/lsst/sdm_tools/__init__.py     |  22 ++++
 python/lsst/sdm_tools/cli.py          |  36 +++++++
 python/lsst/sdm_tools/mypy.ini        |  23 +++++
 requirements.txt                      |   2 +
 setup.cfg                             |   5 +
 types.txt                             |   3 +
 18 files changed, 427 insertions(+)
 create mode 100644 .github/workflows/build.yaml
 create mode 100644 .github/workflows/do_not_merge.yaml
 create mode 100644 .github/workflows/formatting.yaml
 create mode 100644 .github/workflows/lint.yaml
 create mode 100644 .github/workflows/mypy.yaml
 create mode 100644 .github/workflows/rebase_checker.yaml
 create mode 100644 .github/workflows/yamllint.yaml
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 .yamllint.yaml
 create mode 100644 pyproject.toml
 create mode 100644 python/lsst/__init__.py
 create mode 100644 python/lsst/sdm_tools/__init__.py
 create mode 100644 python/lsst/sdm_tools/cli.py
 create mode 100644 python/lsst/sdm_tools/mypy.ini
 create mode 100644 requirements.txt
 create mode 100644 setup.cfg
 create mode 100644 types.txt

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
new file mode 100644
index 0000000..5c70977
--- /dev/null
+++ b/.github/workflows/build.yaml
@@ -0,0 +1,49 @@
+name: Build and test
+
+on:
+  push:
+    branches:
+      - main
+    tags:
+      - "*"
+  pull_request:
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.11", "3.12", "3.13"]
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          # Need to clone everything for the git tags.
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: "pip"
+
+      - name: Install prereqs for setuptools
+        run: pip install wheel
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip uv
+          uv pip install --system -r requirements.txt
+
+      - name: Install pytest packages
+        run: uv pip install --system pytest pytest-xdist pytest-cov
+
+      - name: List installed packages
+        run: uv pip list
+
+      - name: Build and install
+        run: uv pip install --system --no-deps -e .
+
+      - name: Run tests
+        run: |
+          sdm-tools --help
diff --git a/.github/workflows/do_not_merge.yaml b/.github/workflows/do_not_merge.yaml
new file mode 100644
index 0000000..e3fae1a
--- /dev/null
+++ b/.github/workflows/do_not_merge.yaml
@@ -0,0 +1,41 @@
+name: "Check commits can be merged"
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  do-not-merge-checker:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check that there are no commits that should not be merged
+        uses: gsactions/commit-message-checker@v2
+        with:
+          excludeDescription: "true"  # optional: this excludes the description body of a pull request
+          excludeTitle: "true"  # optional: this excludes the title of a pull request
+          checkAllCommitMessages: "true"  # optional: this checks all commits associated with a pull request
+          accessToken: ${{ secrets.GITHUB_TOKEN }}  # github access token is only required if checkAllCommitMessages is true
+          # Check for message indicating that there is a commit that should
+          # not be merged.
+          pattern: ^(?!DO NOT MERGE)
+          flags: "i"
+          error: |
+            "This step failed because there is a commit containing the text
+            'DO NOT MERGE'. Remove this commit from the branch before merging
+            or change the commit summary."
+
+      - uses: actions/checkout@v4
+
+      - name: Check requirements.txt for branches
+        shell: bash
+        run: |
+          FILE="requirements.txt"
+          MATCH=tickets/DM-
+          if grep -q $MATCH $FILE
+          then
+            echo "Ticket branches found in $FILE:"
+            grep -n $MATCH $FILE
+            exit 1
+          fi
diff --git a/.github/workflows/formatting.yaml b/.github/workflows/formatting.yaml
new file mode 100644
index 0000000..27f34a6
--- /dev/null
+++ b/.github/workflows/formatting.yaml
@@ -0,0 +1,11 @@
+name: Check Python formatting
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  call-workflow:
+    uses: lsst/rubin_workflows/.github/workflows/formatting.yaml@main
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
new file mode 100644
index 0000000..3f5e9b0
--- /dev/null
+++ b/.github/workflows/lint.yaml
@@ -0,0 +1,16 @@
+name: Lint Python code
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  call-workflow:
+    uses: lsst/rubin_workflows/.github/workflows/lint.yaml@main
+  ruff:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: chartboost/ruff-action@v1
diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml
new file mode 100644
index 0000000..0849ea4
--- /dev/null
+++ b/.github/workflows/mypy.yaml
@@ -0,0 +1,11 @@
+name: Run mypy
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  call-workflow:
+    uses: lsst/rubin_workflows/.github/workflows/mypy.yaml@main
diff --git a/.github/workflows/rebase_checker.yaml b/.github/workflows/rebase_checker.yaml
new file mode 100644
index 0000000..65516d9
--- /dev/null
+++ b/.github/workflows/rebase_checker.yaml
@@ -0,0 +1,7 @@
+name: Check that 'main' is not merged into the development branch
+
+on: pull_request
+
+jobs:
+  call-workflow:
+    uses: lsst/rubin_workflows/.github/workflows/rebase_checker.yaml@main
diff --git a/.github/workflows/yamllint.yaml b/.github/workflows/yamllint.yaml
new file mode 100644
index 0000000..d0cdd57
--- /dev/null
+++ b/.github/workflows/yamllint.yaml
@@ -0,0 +1,11 @@
+name: Lint YAML files
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  call-workflow:
+    uses: lsst/rubin_workflows/.github/workflows/yamllint.yaml@main
diff --git a/.gitignore b/.gitignore
index 15201ac..6abe941 100644
--- a/.gitignore
+++ b/.gitignore
@@ -169,3 +169,6 @@ cython_debug/
 
 # PyPI configuration file
 .pypirc
+
+# lsst version file
+python/lsst/sdm_tools/version.py
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..808a440
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,28 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: check-yaml
+        args:
+          - "--unsafe"
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+  - repo: https://github.com/psf/black-pre-commit-mirror
+    rev: 24.4.2
+    hooks:
+      - id: black
+        # It is recommended to specify the latest version of Python
+        # supported by your project here, or alternatively use
+        # pre-commit's default_language_version, see
+        # https://pre-commit.com/#top_level-default_language_version
+        language_version: python3.12
+  - repo: https://github.com/pycqa/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+        name: isort (python)
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.4.8
+    hooks:
+      - id: ruff
diff --git a/.yamllint.yaml b/.yamllint.yaml
new file mode 100644
index 0000000..10c9e71
--- /dev/null
+++ b/.yamllint.yaml
@@ -0,0 +1,20 @@
+extends: default
+
+rules:
+  document-start:
+    present: false
+    ignore: |
+      /tests/
+      /examples/
+  line-length:
+    max: 132
+    allow-non-breakable-words: true
+    allow-non-breakable-inline-mappings: true
+    ignore: |
+      /.github/workflows/lint.yaml
+  truthy:
+    # "on" as a key in workflows confuses things
+    ignore: |
+      /.github/workflows/
+  indentation:
+    indent-sequences: consistent
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..1165e7a
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,139 @@
+[build-system]
+requires = ["setuptools", "lsst-versions >= 1.3.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "sdm_tools"
+description = "Tools for working with the Science Data Model (SDM) schemas of the Rubin Observatory"
+license = {text = "GNU General Public License v3 or later (GPLv3+)"}
+readme = "README.md"
+authors = [
+    {name="Rubin Observatory Data Management", email="dm-admin@lists.lsst.org"},
+]
+classifiers = [
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Scientific/Engineering :: Astronomy"
+]
+keywords = ["lsst"]
+dependencies = [
+    "lsst-felis",
+    "click"
+]
+requires-python = ">=3.11.0"
+dynamic = ["version"]
+
+[project.urls]
+Source = "https://github.com/lsst-dm/sdm_tools"
+
+[project.optional-dependencies]
+test = [
+    "pytest >= 3.2"
+]
+
+[tool.setuptools.packages.find]
+where = ["python"]
+
+[tool.setuptools]
+zip-safe = true
+license-files = ["COPYRIGHT", "LICENSE"]
+
+[tool.setuptools.package-data]
+"sdm_tools" = ["py.typed"]
+
+[tool.setuptools.dynamic]
+version = { attr = "lsst_versions.get_lsst_version" }
+
+[project.scripts]
+sdm-tools = "lsst.sdm_tools.cli:cli"
+
+[tool.black]
+line-length = 110
+target-version = ["py311"]
+
+[tool.isort]
+profile = "black"
+line_length = 110
+
+[tool.lsst_versions]
+write_to = "python/lsst/sdm_tools/version.py"
+
+[tool.ruff]
+line-length = 110
+target-version = "py311"
+exclude = [
+    "__init__.py",
+    "lex.py",
+    "yacc.py",
+]
+
+[tool.ruff.lint]
+ignore = [
+    "D100",
+    "D102",
+    "D104",
+    "D105",
+    "D107",
+    "D200",
+    "D203",
+    "D205",
+    "D213",
+    "D400",
+    "D413",
+    "N802",
+    "N803",
+    "N806",
+    "N812",
+    "N815",
+    "N816",
+    "N999",
+    "UP007",  # Allow UNION in type annotation
+]
+select = [
+    "E",  # pycodestyle
+    "F",  # pycodestyle
+    "N",  # pep8-naming
+    "W",  # pycodestyle
+    "D",  # pydocstyle
+    "UP",  # pyupgrade
+]
+
+[tool.pydocstyle]
+convention = "numpy"
+# Our coding style does not require docstrings for magic methods (D105)
+# Our docstyle documents __init__ at the class level (D107)
+# We allow methods to inherit docstrings and this is not compatible with D102.
+# Docstring at the very first line is not required
+# D200, D205 and D400 all complain if the first sentence of the docstring does
+# not fit on one line. We do not require docstrings in __init__ files (D104).
+add-ignore = ["D107", "D105", "D102", "D100", "D200", "D205", "D400", "D104"]
+
+[tool.ruff.lint.pycodestyle]
+max-doc-length = 79
+
+[tool.ruff.lint.pydocstyle]
+convention = "numpy"
+
+[tool.numpydoc_validation]
+checks = [
+    "all",  # All except the rules listed below.
+    "SA01",  # See Also section.
+    "SA04",  # We don't use descriptions with See Also.
+    "EX01",  # Example section.
+    "SS06",  # Summary can go into second line.
+    "GL01",  # Summary text can start on same line as """
+    "GL08",  # Do not require docstring.
+    "ES01",  # No extended summary required.
+    "SS05",  # pydocstyle is better at finding infinitive verb.
+    "PR04",  # Sphinx does not require parameter type.
+]
+exclude = [
+    "^test_.*",  # Do not test docstrings in test code.
+    '^__init__$',
+    '\._[a-zA-Z_]+$',  # Private methods.
+]
diff --git a/python/lsst/__init__.py b/python/lsst/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/python/lsst/sdm_tools/__init__.py b/python/lsst/sdm_tools/__init__.py
new file mode 100644
index 0000000..1346885
--- /dev/null
+++ b/python/lsst/sdm_tools/__init__.py
@@ -0,0 +1,22 @@
+# This file is part of sdm-tools.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from .version import *
diff --git a/python/lsst/sdm_tools/cli.py b/python/lsst/sdm_tools/cli.py
new file mode 100644
index 0000000..1b8bb0f
--- /dev/null
+++ b/python/lsst/sdm_tools/cli.py
@@ -0,0 +1,36 @@
+# This file is part of sdm-tools.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+import click
+
+from . import __version__
+
+
+@click.group()
+@click.version_option(__version__)
+@click.pass_context
+def cli(ctx: click.Context) -> None:
+    """SDM Tools Command Line Interface"""
+    ctx.ensure_object(dict)
+
+
+if __name__ == "__main__":
+    cli()
diff --git a/python/lsst/sdm_tools/mypy.ini b/python/lsst/sdm_tools/mypy.ini
new file mode 100644
index 0000000..796f8b5
--- /dev/null
+++ b/python/lsst/sdm_tools/mypy.ini
@@ -0,0 +1,23 @@
+[mypy]
+ignore_errors = False
+warn_unused_configs = True
+warn_redundant_casts = True
+ignore_missing_imports = False
+disallow_untyped_defs = True
+disallow_incomplete_defs = True
+plugins = pydantic.mypy
+
+[mypy-sqlalchemy.*]
+ignore_missing_imports = True
+
+[mypy-deepdiff.*]
+ignore_missing_imports = True
+
+[mypy-felis.*]
+ignore_missing_imports = False
+ignore_errors = False
+disallow_untyped_defs = True
+disallow_incomplete_defs = True
+strict_equality = True
+warn_unreachable = True
+warn_unused_ignores = True
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..2a7ca93
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+lsst-felis
+click
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..a0396e9
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,5 @@
+[flake8]
+max-line-length = 110
+max-doc-length = 79
+ignore = E203, W503, N802, N803, N806, N812, N815, N816
+exclude = __init__.py
diff --git a/types.txt b/types.txt
new file mode 100644
index 0000000..e47ca69
--- /dev/null
+++ b/types.txt
@@ -0,0 +1,3 @@
+types-PyYAML
+types-click
+types-Deprecated

From 328ade14b80545d72f7dc64f3b02ccbd74da8268 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Fri, 7 Feb 2025 15:08:57 -0600
Subject: [PATCH 2/4] Add cli command for building and packaging datalink
 metadata

This was ported from:

https://github.com/lsst/sdm_schemas/blob/main/datalink/build_datalink_metadata.py
---
 python/lsst/__init__.py                       |  20 +++
 .../lsst/sdm_tools/build_datalink_metadata.py | 131 ++++++++++++++++++
 python/lsst/sdm_tools/cli.py                  |  47 ++++++-
 3 files changed, 197 insertions(+), 1 deletion(-)
 create mode 100644 python/lsst/sdm_tools/build_datalink_metadata.py

diff --git a/python/lsst/__init__.py b/python/lsst/__init__.py
index e69de29..d82548c 100644
--- a/python/lsst/__init__.py
+++ b/python/lsst/__init__.py
@@ -0,0 +1,20 @@
+# This file is part of sdm_tools.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
diff --git a/python/lsst/sdm_tools/build_datalink_metadata.py b/python/lsst/sdm_tools/build_datalink_metadata.py
new file mode 100644
index 0000000..cfb02cc
--- /dev/null
+++ b/python/lsst/sdm_tools/build_datalink_metadata.py
@@ -0,0 +1,131 @@
+"""From the Felis source files, build YAML metadata used by DataLink.
+
+Currently, this only determines principal column names.  In the future, once
+a new key has been added to Felis, it will include other column lists, and
+possibly additional metadata.
+"""
+
+# This file is part of sdm_tools.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from __future__ import annotations
+
+import sys
+from collections import defaultdict
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+
+def filter_columns(table: dict[str, Any], filter_key: str) -> list[str]:
+    """Find the columns for a table with a given key.
+
+    This respects the TAP v1.1 convention for ordering of columns.  All
+    columns without ``tap:column_index`` set will be sorted after all those
+    with it set, in the order in which they appeared in the Felis file.
+
+    Parameters
+    ----------
+    table : Dict[`str`, Any]
+        Felis definition of a table.
+    filter_key : `str`
+        Felis key to use to find columns of interest.  For example, use
+        ``tap:principal`` to find principal columns.
+
+    Returns
+    -------
+    columns : List[`str`]
+        List of filtered columns in sorted order.
+    """
+    principal = []
+    unknown_column_index = 100000000
+    for column in table["columns"]:
+        if column.get(filter_key):
+            column_index = column.get("tap:column_index", unknown_column_index)
+            unknown_column_index += 1
+            principal.append((column["name"], column_index))
+    return [c[0] for c in sorted(principal, key=lambda c: c[1])]
+
+
+def build_columns(felis: dict[str, Any], column_properties: list[str]) -> dict[str, dict[str, list[str]]]:
+    """Find the list of tables with a particular Felis property.
+
+    Parameters
+    ----------
+    felis : Dict[`str`, Any]
+        The parsed Felis YAML.
+    column_properties : `str`
+        The column properties to search for.
+    """
+    schema = felis["name"]
+    output: dict[str, dict[str, list[str]]] = defaultdict(dict)
+    for table in felis["tables"]:
+        name = table["name"]
+        full_name = f"{schema}.{name}"
+        for column_property in column_properties:
+            columns = filter_columns(table, column_property)
+            output[full_name][column_property] = columns
+    return output
+
+
+def process_files(files: list[Path], output_path: Path | None = None) -> None:
+    """Process a set of Felis input files and print output to standard out.
+
+    Parameters
+    ----------
+    files : List[`pathlib.Path`]
+        List of input files.
+
+    Output
+    ------
+    The YAML version of the output format will look like this:
+
+    .. code-block:: yaml
+
+       tables:
+         dp02_dc2_catalogs.ForcedSourceOnDiaObject:
+           tap:principal:
+             - band
+             - ccdVisitId
+    """
+    tables = {}
+    for input_file in files:
+        with input_file.open("r") as fh:
+            felis = yaml.safe_load(fh)
+        tables.update(build_columns(felis, ["tap:principal"]))
+
+    # Dump the result to the output stream.
+    if output_path is None:
+        print(yaml.dump({"tables": tables}), file=sys.stdout)
+    else:
+
+        with output_path.open("w") as output:
+            print(yaml.dump({"tables": tables}), file=output)
+
+
+def main() -> None:
+    """Script entry point."""
+    process_files([Path(f) for f in sys.argv[1:]])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python/lsst/sdm_tools/cli.py b/python/lsst/sdm_tools/cli.py
index 1b8bb0f..47ee678 100644
--- a/python/lsst/sdm_tools/cli.py
+++ b/python/lsst/sdm_tools/cli.py
@@ -1,4 +1,4 @@
-# This file is part of sdm-tools.
+# This file is part of sdm_tools.
 #
 # Developed for the LSST Data Management System.
 # This product includes software developed by the LSST Project
@@ -19,9 +19,15 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
+import zipfile
+from pathlib import Path
+
 import click
 
 from . import __version__
+from . import build_datalink_metadata as _build_datalink_metadata
+
+__all__ = ["cli"]
 
 
 @click.group()
@@ -32,5 +38,44 @@ def cli(ctx: click.Context) -> None:
     ctx.ensure_object(dict)
 
 
+@cli.command("build-datalink-metadata", help="Build Datalink metadata from Felis YAML files")
+@click.argument("files", type=click.Path(exists=True), nargs=-1, required=True)
+@click.option(
+    "--resource-dir",
+    type=click.Path(exists=True, file_okay=False),
+    default=".",
+    help="Directory to search for and write resources (DEFAULT: current directory)",
+)
+@click.option(
+    "--zip-dir",
+    type=click.Path(exists=True, file_okay=False),
+    default=".",
+    help="Directory to write zip files (DEFAULT: current directory)",
+)
+@click.pass_context
+def build_datalink_metadata(ctx: click.Context, files: list[str], resource_dir: str, zip_dir: str) -> None:
+    """Build Datalink Metadata
+
+    Build a collection of configuration files for datalinker that specify the
+    principal and minimal columns for tables. This temporarily only does
+    tap:principal and we hand-maintain a columns-minimal.yaml file until we can
+    include a new key in the Felis input files.
+    """
+    resource_path = Path(resource_dir)
+
+    paths = [Path(file) for file in files]
+    _build_datalink_metadata.process_files(paths, Path(resource_path / "columns-principal.yaml"))
+
+    zip_path = Path(zip_dir)
+    with zipfile.ZipFile(zip_path / "datalink-columns.zip", "w") as columns_zip:
+        for yaml_file in resource_path.glob("columns-*.yaml"):
+            columns_zip.write(yaml_file, yaml_file.name)
+    with zipfile.ZipFile(zip_path / "datalink-snippets.zip", "w") as snippets_zip:
+        for snippet_file in resource_path.glob("*.json"):
+            snippets_zip.write(snippet_file, snippet_file.name)
+        for snippet_file in resource_path.glob("*.xml"):
+            snippets_zip.write(snippet_file, snippet_file.name)
+
+
 if __name__ == "__main__":
     cli()

From 4b0742198d85ec65c53c351e2a8b06c2a7e84101 Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Thu, 13 Feb 2025 14:39:05 -0600
Subject: [PATCH 3/4] Add deepdiff dependency

---
 pyproject.toml   | 3 ++-
 requirements.txt | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1165e7a..952fb25 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,7 +23,8 @@ classifiers = [
 keywords = ["lsst"]
 dependencies = [
     "lsst-felis",
-    "click"
+    "click",
+    "deepdiff",
 ]
 requires-python = ">=3.11.0"
 dynamic = ["version"]
diff --git a/requirements.txt b/requirements.txt
index 2a7ca93..1e45e25 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
 lsst-felis
 click
+deepdiff
\ No newline at end of file

From 5e43df2dfa6b870acdb76bfa65748edc4d1bdcbf Mon Sep 17 00:00:00 2001
From: Jeremy McCormick <jeremym@slac.stanford.edu>
Date: Thu, 13 Feb 2025 14:40:00 -0600
Subject: [PATCH 4/4] WIP on band column checker

---
 python/lsst/sdm_tools/band_column_checker.py | 179 +++++++++++++++++++
 python/lsst/sdm_tools/cli.py                 |  65 ++++++-
 2 files changed, 243 insertions(+), 1 deletion(-)
 create mode 100644 python/lsst/sdm_tools/band_column_checker.py

diff --git a/python/lsst/sdm_tools/band_column_checker.py b/python/lsst/sdm_tools/band_column_checker.py
new file mode 100644
index 0000000..1753965
--- /dev/null
+++ b/python/lsst/sdm_tools/band_column_checker.py
@@ -0,0 +1,179 @@
+"""Check consistency of band column definitions."""
+
+import logging
+import pprint
+import re
+
+from deepdiff.diff import DeepDiff
+from typing import Any
+
+from felis.datamodel import Schema, Table
+
+logger = logging.getLogger("band_column_checker")
+logger.setLevel(logging.INFO)
+logging.basicConfig(level=logging.INFO)
+
+BANDS = set(["u", "g", "r", "i", "z", "y"])
+
+
+class BandColumnChecker:
+    """Check consistency of band column definitions.
+
+    Parameters
+    ----------
+    files : list[str]
+        List of schema files to check.
+    table_names : list[str]
+        List of table names to check. If empty, all tables are checked.
+    """
+
+    def __init__(self, files: list[str], table_names: list[str]) -> None:
+        self.files = files
+        self.table_names = table_names
+        if len(self.table_names) > 0:
+            logger.debug(f"Checking tables: {self.table_names}")
+        self.schemas: dict[str, Schema] = {}
+        for file in files:
+            with open(file) as schema_file:
+                logger.debug(f"Reading schema file: {file}")
+                schema = Schema.from_stream(schema_file)
+                if schema.name in self.schemas:
+                    raise ValueError(f"Duplicate schema name: {schema.name}")
+                self.schemas[schema.name] = schema
+        self._band_dict = self._create_band_dict()
+
+    @property
+    def band_columns(self) -> dict[str, Any]:
+        """Return the band columns by schema and table."""
+        return self._band_dict
+
+    def _should_check_table(self, table_or_name: str | Table) -> bool:
+        """Check if the table or table name should be included in checks."""
+        table_name = table_or_name if isinstance(table_or_name, str) else table_or_name.name
+        return len(self.table_names) == 0 or table_name in self.table_names
+
+    def _create_band_dict(self) -> dict[str, Any]:
+        """Create a dictionary of band columns by schema and table."""
+        band_columns: dict[str, Any] = {}
+        for schema_name, schema in self.schemas.items():
+            band_columns[schema_name] = {}
+            for table in schema.tables:
+                if self._should_check_table(table):
+                    band_columns[schema.name][table.name] = {}
+                    for column in table.columns:
+                        for band in BANDS:
+                            if column.name.startswith(f"{band}_"):
+                                if band not in band_columns[schema.name][table.name]:
+                                    band_columns[schema.name][table.name][band] = []
+                                band_columns[schema.name][table.name][band].append(
+                                    column.name.removeprefix(f"{band}_")
+                                )
+                    if len(band_columns[schema.name][table.name]) == 0:
+                        del band_columns[schema.name][table.name]
+                else:
+                    logger.debug(f"Skipping table: {table.name}")
+        return band_columns
+
+    def _create_table_band_dict(self, table: Table) -> dict[str, Any]:
+        """Create a dictionary of band columns for a single table."""
+        band_columns: dict[str, Any] = {}
+        for column in table.columns:
+            for band in BANDS:
+                if column.name.startswith(f"{band}_"):
+                    if band not in band_columns:
+                        band_columns[band] = []
+                    raw_data = column.model_dump(exclude_none=True)
+                    raw_data["name"] = raw_data["name"].removeprefix(f"{band}_")
+                    raw_data["description"] = self._clean_column_description(raw_data["description"], band)
+                    del raw_data["id"]  # Remove the id field as they are always different
+                    band_columns[band].append(raw_data)
+        return band_columns
+
+    def _check_column_count(self) -> None:
+        """Check that number of columns in each band is the same."""
+        for schema_name, columns in self.band_columns.items():
+            for table_name, bands in columns.items():
+                column_counts = {band: len(column_names) for band, column_names in bands.items()}
+                logger.info(f"'{schema_name}'.'{table_name}' column counts: %s", column_counts)
+                if len(set(column_counts.values())) > 1:
+                    logger.error(
+                        f"Inconsistent number of band columns in "
+                        f"'{schema_name}'.'{table_name}': {column_counts}"
+                    )
+
+    def _check_column_names(self) -> None:
+        """Check that the names of the band columns are the same."""
+        for schema_name, tables in self.band_columns.items():
+            logger.info("Checking column names for schema: %s", schema_name)
+            for table_name, bands in tables.items():
+                # Check that each band has the same column names
+                reference_band, reference_columns = next(iter(bands.items()))
+                reference_set = set(reference_columns)
+                for band, column_names in bands.items():
+                    column_set = set(column_names)
+                    if column_set == reference_set:
+                        continue
+                    logger.error(
+                        f"In '{schema_name}'.'{table_name}', "
+                        f"inconsistencies found between '{reference_band}' and '{band}'"
+                    )
+                    logger.error(
+                        f"  In '{reference_band}' but not in '{band}': {sorted(reference_set - column_set)}"
+                    )
+                    logger.error(
+                        f"  In '{band}' but not in '{reference_band}': {sorted(column_set - reference_set)}"
+                    )
+        logger.info("All band columns have the same names")
+
+    def dump(self) -> None:
+        pprint.pprint(self.band_columns)
+
+    def print(self) -> None:
+        """Print out band columns by schema and table."""
+        for schema_name, columns in self.band_columns.items():
+            print(f"Schema: {schema_name}")
+            for table_name, bands in columns.items():
+                print(f"  Table: {table_name}")
+                for band, column_names in bands.items():
+                    print(f"    Band: {band}")
+                    for column_name in column_names:
+                        print(f"      Column: {column_name}")
+
+    def _clean_column_description(self, description: str, band: str) -> str:
+        """Replace band names in the column description with a placeholder."""
+        description = re.sub(rf"\b{band}-", "[BAND]-", description)
+        description = re.sub(rf"\b{band}_", "[BAND]_", description)
+        return description
+
+    def _diff(self) -> None:
+        for schema_name, schema in self.schemas.items():
+            logger.debug(f"Running diff on schema: {schema_name}")
+            for table in schema.tables:
+                if self._should_check_table(table):
+                    logger.debug(f"Running diff on table: {table.name}")
+                    band_columns: dict[str, Any] = self._create_table_band_dict(table)
+                    if len(band_columns) > 1:
+                        self._diff_single_table(schema_name, table.name, band_columns)
+                else:  # Skip table
+                    logger.debug(f"Skipping table: {table.name}")
+
+    def _diff_single_table(self, schema_name: str, table_name: str, band_columns: dict[str, Any]) -> None:
+        """Run diff comparison on the band columns for a single table."""
+        reference_band, reference_columns = next(iter(band_columns.items()))
+        for compare_band, compare_columns in band_columns.items():
+            if compare_band == reference_band:
+                continue
+            logger.debug(f"Comparing '{reference_band}' and '{compare_band}'")
+            diff = DeepDiff(reference_columns, compare_columns, ignore_order=True)
+            if len(diff) > 0:
+                logger.warning(
+                    f"In '{schema_name}'.'{table_name}', differences found between"
+                    f"'{reference_band}' and '{compare_band}': "
+                    f"{diff}"
+                )
+
+    def check(self) -> None:
+        """Check consistency of band column definitions."""
+        self._check_column_count()
+        self._check_column_names()
+        self._diff()
diff --git a/python/lsst/sdm_tools/cli.py b/python/lsst/sdm_tools/cli.py
index 47ee678..bb587b6 100644
--- a/python/lsst/sdm_tools/cli.py
+++ b/python/lsst/sdm_tools/cli.py
@@ -23,19 +23,42 @@
 from pathlib import Path
 
 import click
+import logging
 
 from . import __version__
 from . import build_datalink_metadata as _build_datalink_metadata
+from .band_column_checker import BandColumnChecker
 
 __all__ = ["cli"]
 
+logger = logging.getLogger("sdm_tools")
+
+loglevel_choices = list(logging._nameToLevel.keys())
+
 
 @click.group()
 @click.version_option(__version__)
+@click.option(
+    "--log-level",
+    type=click.Choice(loglevel_choices),
+    envvar="SDM_TOOLS_LOGLEVEL",
+    help="SDM Tools log level",
+    default=logging.getLevelName(logging.INFO),
+)
+@click.option(
+    "--log-file",
+    type=click.Path(),
+    envvar="SDM_TOOLS_LOGFILE",
+    help="SDM Tools log file path",
+)
 @click.pass_context
-def cli(ctx: click.Context) -> None:
+def cli(ctx: click.Context, log_level: str, log_file: str | None) -> None:
     """SDM Tools Command Line Interface"""
     ctx.ensure_object(dict)
+    if log_file:
+        logging.basicConfig(filename=log_file, level=log_level)
+    else:
+        logging.basicConfig(level=log_level)
 
 
 @cli.command("build-datalink-metadata", help="Build Datalink metadata from Felis YAML files")
@@ -77,5 +100,45 @@ def build_datalink_metadata(ctx: click.Context, files: list[str], resource_dir:
             snippets_zip.write(snippet_file, snippet_file.name)
 
 
+def _parse_comma_separated(ctx, param, value):
+    if value:
+        return value.split(",")
+    return []
+
+
+@cli.command("check-band-columns", help="Check consistency of band column definitions")
+@click.argument("files", type=click.Path(exists=True), nargs=-1, required=True)
+@click.option("--print", "-p", "print_columns", is_flag=True, help="Print out the band columns")
+@click.option("--dump", "-d", "dump", is_flag=True, help="Dump the raw band column data")
+@click.option(
+    "--tables",
+    "-t",
+    "table_names",
+    callback=_parse_comma_separated,
+    help="Names of tables to check (comma-separated)",
+)
+@click.pass_context
+def check_band_columns(
+    ctx: click.Context,
+    files: list[str],
+    print_columns: bool = False,
+    dump: bool = False,
+    table_names: list[str] = [],
+) -> None:
+    """Build Datalink Metadata
+
+    Build a collection of configuration files for datalinker that specify the
+    principal and minimal columns for tables. This temporarily only does
+    tap:principal and we hand-maintain a columns-minimal.yaml file until we can
+    include a new key in the Felis input files.
+    """
+    checker = BandColumnChecker(files, table_names)
+    if dump:
+        checker.dump()
+    if print_columns:
+        checker.print()
+    checker.check()
+
+
 if __name__ == "__main__":
     cli()