Skip to content

Commit

Permalink
Add cli command for building and packaging datalink metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
JeremyMcCormick committed Feb 13, 2025
1 parent 7a3918e commit 5649e01
Show file tree
Hide file tree
Showing 2 changed files with 155 additions and 0 deletions.
110 changes: 110 additions & 0 deletions python/lsst/sdm_tools/build_datalink_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
"""From the Felis source files, build YAML metadata used by DataLink.
Currently, this only determines principal column names. In the future, once
a new key has been added to Felis, it will include other column lists, and
possibly additional metadata.
"""

from __future__ import annotations

import sys
from collections import defaultdict
from pathlib import Path
from typing import Any

import yaml


def filter_columns(table: dict[str, Any], filter_key: str) -> list[str]:
"""Find the columns for a table with a given key.
This respects the TAP v1.1 convention for ordering of columns. All
columns without ``tap:column_index`` set will be sorted after all those
with it set, in the order in which they appeared in the Felis file.
Parameters
----------
table : Dict[`str`, Any]
Felis definition of a table.
filter_key : `str`
Felis key to use to find columns of interest. For example, use
``tap:principal`` to find principal columns.
Returns
-------
columns : List[`str`]
List of filtered columns in sorted order.
"""
principal = []
unknown_column_index = 100000000
for column in table["columns"]:
if column.get(filter_key):
column_index = column.get("tap:column_index", unknown_column_index)
unknown_column_index += 1
principal.append((column["name"], column_index))
return [c[0] for c in sorted(principal, key=lambda c: c[1])]


def build_columns(felis: dict[str, Any], column_properties: list[str]) -> dict[str, dict[str, list[str]]]:
"""Find the list of tables with a particular Felis property.
Parameters
----------
felis : Dict[`str`, Any]
The parsed Felis YAML.
column_properties : `str`
The column properties to search for.
"""
schema = felis["name"]
output: dict[str, dict[str, list[str]]] = defaultdict(dict)
for table in felis["tables"]:
name = table["name"]
full_name = f"{schema}.{name}"
for column_property in column_properties:
columns = filter_columns(table, column_property)
output[full_name][column_property] = columns
return output


def process_files(files: list[Path], output_path: Path | None = None) -> None:
"""Process a set of Felis input files and print output to standard out.
Parameters
----------
files : List[`pathlib.Path`]
List of input files.
Output
------
The YAML version of the output format will look like this:
.. code-block:: yaml
tables:
dp02_dc2_catalogs.ForcedSourceOnDiaObject:
tap:principal:
- band
- ccdVisitId
"""
tables = {}
for input_file in files:
with input_file.open("r") as fh:
felis = yaml.safe_load(fh)
tables.update(build_columns(felis, ["tap:principal"]))

# Dump the result to the output stream.
if output_path is None:
print(yaml.dump({"tables": tables}), file=sys.stdout)
else:

with output_path.open("w") as output:
print(yaml.dump({"tables": tables}), file=output)


def main() -> None:
"""Script entry point."""
process_files([Path(f) for f in sys.argv[1:]])


if __name__ == "__main__":
main()
45 changes: 45 additions & 0 deletions python/lsst/sdm_tools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,15 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

import zipfile
from pathlib import Path

import click

from . import __version__
from . import build_datalink_metadata as _build_datalink_metadata

__all__ = ["cli"]


@click.group()
Expand All @@ -32,5 +38,44 @@ def cli(ctx: click.Context) -> None:
ctx.ensure_object(dict)


@cli.command("build-datalink-metadata", help="Build Datalink metadata from Felis YAML files")
@click.argument("files", type=click.Path(exists=True), nargs=-1, required=True)
@click.option(
"--resource-dir",
type=click.Path(exists=True, file_okay=False),
default=".",
help="Directory to search for and write resources (DEFAULT: current directory)",
)
@click.option(
"--zip-dir",
type=click.Path(exists=True, file_okay=False),
default=".",
help="Directory to write zip files (DEFAULT: current directory)",
)
@click.pass_context
def build_datalink_metadata(ctx: click.Context, files: list[str], resource_dir: str, zip_dir: str) -> None:
"""Build Datalink Metadata
Build a collection of configuration files for datalinker that specify the
principal and minimal columns for tables. This temporarily only does
tap:principal and we hand-maintain a columns-minimal.yaml file until we can
include a new key in the Felis input files.
"""
resource_path = Path(resource_dir)

paths = [Path(file) for file in files]
_build_datalink_metadata.process_files(paths, Path(resource_path / "columns-principal.yaml"))

zip_path = Path(zip_dir)
with zipfile.ZipFile(zip_path / "datalink-columns.zip", "w") as columns_zip:
for yaml_file in resource_path.glob("columns-*.yaml"):
columns_zip.write(yaml_file, yaml_file.name)
with zipfile.ZipFile(zip_path / "datalink-snippets.zip", "w") as snippets_zip:
for snippet_file in resource_path.glob("*.json"):
snippets_zip.write(snippet_file, snippet_file.name)
for snippet_file in resource_path.glob("*.xml"):
snippets_zip.write(snippet_file, snippet_file.name)


if __name__ == "__main__":
cli()

0 comments on commit 5649e01

Please sign in to comment.