From 9d42856793b36879b16ccef85f0278ef5b5b4032 Mon Sep 17 00:00:00 2001 From: Jeremy McCormick Date: Fri, 7 Feb 2025 15:08:57 -0600 Subject: [PATCH] Add cli command for building and packaging datalink metadata This was ported from: https://github.com/lsst/sdm_schemas/blob/main/datalink/build_datalink_metadata.py --- .../lsst/sdm_tools/build_datalink_metadata.py | 110 ++++++++++++++++++ python/lsst/sdm_tools/cli.py | 45 +++++++ 2 files changed, 155 insertions(+) create mode 100644 python/lsst/sdm_tools/build_datalink_metadata.py diff --git a/python/lsst/sdm_tools/build_datalink_metadata.py b/python/lsst/sdm_tools/build_datalink_metadata.py new file mode 100644 index 0000000..cc81092 --- /dev/null +++ b/python/lsst/sdm_tools/build_datalink_metadata.py @@ -0,0 +1,110 @@ +"""From the Felis source files, build YAML metadata used by DataLink. + +Currently, this only determines principal column names. In the future, once +a new key has been added to Felis, it will include other column lists, and +possibly additional metadata. +""" + +from __future__ import annotations + +import sys +from collections import defaultdict +from pathlib import Path +from typing import Any + +import yaml + + +def filter_columns(table: dict[str, Any], filter_key: str) -> list[str]: + """Find the columns for a table with a given key. + + This respects the TAP v1.1 convention for ordering of columns. All + columns without ``tap:column_index`` set will be sorted after all those + with it set, in the order in which they appeared in the Felis file. + + Parameters + ---------- + table : Dict[`str`, Any] + Felis definition of a table. + filter_key : `str` + Felis key to use to find columns of interest. For example, use + ``tap:principal`` to find principal columns. + + Returns + ------- + columns : List[`str`] + List of filtered columns in sorted order. + """ + principal = [] + unknown_column_index = 100000000 + for column in table["columns"]: + if column.get(filter_key): + column_index = column.get("tap:column_index", unknown_column_index) + unknown_column_index += 1 + principal.append((column["name"], column_index)) + return [c[0] for c in sorted(principal, key=lambda c: c[1])] + + +def build_columns(felis: dict[str, Any], column_properties: list[str]) -> dict[str, dict[str, list[str]]]: + """Find the list of tables with a particular Felis property. + + Parameters + ---------- + felis : Dict[`str`, Any] + The parsed Felis YAML. + column_properties : `str` + The column properties to search for. + """ + schema = felis["name"] + output: dict[str, dict[str, list[str]]] = defaultdict(dict) + for table in felis["tables"]: + name = table["name"] + full_name = f"{schema}.{name}" + for column_property in column_properties: + columns = filter_columns(table, column_property) + output[full_name][column_property] = columns + return output + + +def process_files(files: list[Path], output_path: Path | None = None) -> None: + """Process a set of Felis input files and print output to standard out. + + Parameters + ---------- + files : List[`pathlib.Path`] + List of input files. + + Output + ------ + The YAML version of the output format will look like this: + + .. code-block:: yaml + + tables: + dp02_dc2_catalogs.ForcedSourceOnDiaObject: + tap:principal: + - band + - ccdVisitId + """ + tables = {} + for input_file in files: + with input_file.open("r") as fh: + felis = yaml.safe_load(fh) + tables.update(build_columns(felis, ["tap:principal"])) + + # Dump the result to the output stream. + if output_path is None: + print(yaml.dump({"tables": tables}), file=sys.stdout) + else: + + with output_path.open("w") as output: + print(yaml.dump({"tables": tables}), file=output) + + +def main() -> None: + """Script entry point.""" + process_files([Path(f) for f in sys.argv[1:]]) + + +if __name__ == "__main__": + main() diff --git a/python/lsst/sdm_tools/cli.py b/python/lsst/sdm_tools/cli.py index 1b8bb0f..17015c8 100644 --- a/python/lsst/sdm_tools/cli.py +++ b/python/lsst/sdm_tools/cli.py @@ -19,9 +19,15 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +import zipfile +from pathlib import Path + import click from . import __version__ +from . import build_datalink_metadata as _build_datalink_metadata + +__all__ = ["cli"] @click.group() @@ -32,5 +38,44 @@ def cli(ctx: click.Context) -> None: ctx.ensure_object(dict) +@cli.command("build-datalink-metadata", help="Build Datalink metadata from Felis YAML files") +@click.argument("files", type=click.Path(exists=True), nargs=-1, required=True) +@click.option( + "--resource-dir", + type=click.Path(exists=True, file_okay=False), + default=".", + help="Directory to search for and write resources (DEFAULT: current directory)", +) +@click.option( + "--zip-dir", + type=click.Path(exists=True, file_okay=False), + default=".", + help="Directory to write zip files (DEFAULT: current directory)", +) +@click.pass_context +def build_datalink_metadata(ctx: click.Context, files: list[str], resource_dir: str, zip_dir: str) -> None: + """Build Datalink Metadata + + Build a collection of configuration files for datalinker that specify the + principal and minimal columns for tables. This temporarily only does + tap:principal and we hand-maintain a columns-minimal.yaml file until we can + include a new key in the Felis input files. + """ + resource_path = Path(resource_dir) + + paths = [Path(file) for file in files] + _build_datalink_metadata.process_files(paths, Path(resource_path / "columns-principal.yaml")) + + zip_path = Path(zip_dir) + with zipfile.ZipFile(zip_path / "datalink-columns.zip", "w") as columns_zip: + for yaml_file in resource_path.glob("columns-*.yaml"): + columns_zip.write(yaml_file, yaml_file.name) + with zipfile.ZipFile(zip_path / "datalink-snippets.zip", "w") as snippets_zip: + for snippet_file in resource_path.glob("*.json"): + snippets_zip.write(snippet_file, snippet_file.name) + for snippet_file in resource_path.glob("*.xml"): + snippets_zip.write(snippet_file, snippet_file.name) + + if __name__ == "__main__": cli()