-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add cli command for building and packaging datalink metadata
This was ported from: https://github.com/lsst/sdm_schemas/blob/main/datalink/build_datalink_metadata.py
- Loading branch information
1 parent
7a3918e
commit 5649e01
Showing
2 changed files
with
155 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
"""From the Felis source files, build YAML metadata used by DataLink. | ||
Currently, this only determines principal column names. In the future, once | ||
a new key has been added to Felis, it will include other column lists, and | ||
possibly additional metadata. | ||
""" | ||
|
||
from __future__ import annotations | ||
|
||
import sys | ||
from collections import defaultdict | ||
from pathlib import Path | ||
from typing import Any | ||
|
||
import yaml | ||
|
||
|
||
def filter_columns(table: dict[str, Any], filter_key: str) -> list[str]: | ||
"""Find the columns for a table with a given key. | ||
This respects the TAP v1.1 convention for ordering of columns. All | ||
columns without ``tap:column_index`` set will be sorted after all those | ||
with it set, in the order in which they appeared in the Felis file. | ||
Parameters | ||
---------- | ||
table : Dict[`str`, Any] | ||
Felis definition of a table. | ||
filter_key : `str` | ||
Felis key to use to find columns of interest. For example, use | ||
``tap:principal`` to find principal columns. | ||
Returns | ||
------- | ||
columns : List[`str`] | ||
List of filtered columns in sorted order. | ||
""" | ||
principal = [] | ||
unknown_column_index = 100000000 | ||
for column in table["columns"]: | ||
if column.get(filter_key): | ||
column_index = column.get("tap:column_index", unknown_column_index) | ||
unknown_column_index += 1 | ||
principal.append((column["name"], column_index)) | ||
return [c[0] for c in sorted(principal, key=lambda c: c[1])] | ||
|
||
|
||
def build_columns(felis: dict[str, Any], column_properties: list[str]) -> dict[str, dict[str, list[str]]]: | ||
"""Find the list of tables with a particular Felis property. | ||
Parameters | ||
---------- | ||
felis : Dict[`str`, Any] | ||
The parsed Felis YAML. | ||
column_properties : `str` | ||
The column properties to search for. | ||
""" | ||
schema = felis["name"] | ||
output: dict[str, dict[str, list[str]]] = defaultdict(dict) | ||
for table in felis["tables"]: | ||
name = table["name"] | ||
full_name = f"{schema}.{name}" | ||
for column_property in column_properties: | ||
columns = filter_columns(table, column_property) | ||
output[full_name][column_property] = columns | ||
return output | ||
|
||
|
||
def process_files(files: list[Path], output_path: Path | None = None) -> None: | ||
"""Process a set of Felis input files and print output to standard out. | ||
Parameters | ||
---------- | ||
files : List[`pathlib.Path`] | ||
List of input files. | ||
Output | ||
------ | ||
The YAML version of the output format will look like this: | ||
.. code-block:: yaml | ||
tables: | ||
dp02_dc2_catalogs.ForcedSourceOnDiaObject: | ||
tap:principal: | ||
- band | ||
- ccdVisitId | ||
""" | ||
tables = {} | ||
for input_file in files: | ||
with input_file.open("r") as fh: | ||
felis = yaml.safe_load(fh) | ||
tables.update(build_columns(felis, ["tap:principal"])) | ||
|
||
# Dump the result to the output stream. | ||
if output_path is None: | ||
print(yaml.dump({"tables": tables}), file=sys.stdout) | ||
else: | ||
|
||
with output_path.open("w") as output: | ||
print(yaml.dump({"tables": tables}), file=output) | ||
|
||
|
||
def main() -> None: | ||
"""Script entry point.""" | ||
process_files([Path(f) for f in sys.argv[1:]]) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters