Skip to content
This repository has been archived by the owner on May 9, 2024. It is now read-only.

[Date Conversion] Enables conversion date to int #472

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions omniscidb/Analyzer/Analyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ const hdk::ir::Type* analyze_type_info(hdk::ir::OpType op,
auto& ctx = left_type->ctx();
const hdk::ir::Type* result_type;
const hdk::ir::Type* common_type;
LOG(ERROR) << "analyze_type_info - left type: " << left_type->toString()
<< " right: " << right_type->toString();
*new_left_type = left_type;
*new_right_type = right_type;
if (hdk::ir::isLogic(op)) {
Expand Down Expand Up @@ -306,6 +308,8 @@ const hdk::ir::Type* analyze_type_info(hdk::ir::OpType op,
} else {
throw std::runtime_error("invalid binary operator type.");
}
LOG(ERROR) << "new left: " << (*new_left_type)->toString()
<< " right: " << (*new_right_type)->toString();
result_type =
result_type->withNullable(left_type->nullable() || right_type->nullable());
return result_type;
Expand Down
4 changes: 4 additions & 0 deletions omniscidb/ArrowStorage/ArrowStorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ TableInfoPtr ArrowStorage::createTable(const std::string& table_name,
TableInfoPtr res;
int table_id;
mapd_unique_lock<mapd_shared_mutex> data_lock(data_mutex_);
LOG(ERROR) << "Table ------- " << table_name << " -------";
size_t next_col_idx = 0;
{
mapd_unique_lock<mapd_shared_mutex> dict_lock(dict_mutex_);
Expand Down Expand Up @@ -395,8 +396,10 @@ TableInfoPtr ArrowStorage::createTable(const std::string& table_name,
type = elem_type;
}
}
LOG(ERROR) << "adding col info " << type->toString();
auto col_info = addColumnInfo(
db_id_, table_id, columnId(next_col_idx++), col.name, type, false);
LOG(ERROR) << "added col info " << col_info->toString();
}
addRowidColumn(db_id_, table_id, columnId(next_col_idx++));
}
Expand All @@ -418,6 +421,7 @@ TableInfoPtr ArrowStorage::createTable(const std::string& table_name,
table.fragment_size = options.fragment_size;
table.schema = schema;
}
LOG(ERROR) << "table info " << res->toString() << " schema: " << schema->ToString(true);

return res;
}
Expand Down
1 change: 1 addition & 0 deletions omniscidb/IR/Expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ ExprPtr Expr::decompress() const {
return makeExpr<UOper>(new_type, contains_agg_, OpType::kCast, shared_from_this());
} else if (type_->id() == Type::kDate && type_->size() != 8) {
auto date_type = static_cast<const DateType*>(type_);
LOG(ERROR) << "decompress type: " << type_->toString();
return makeExpr<UOper>(type_->ctx().date64(TimeUnit::kSecond, date_type->nullable()),
contains_agg_,
OpType::kCast,
Expand Down
5 changes: 3 additions & 2 deletions omniscidb/QueryBuilder/QueryBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -864,15 +864,16 @@ BuilderExpr BuilderExpr::cast(const Type* new_type) const {
return {builder_, expr_->cast(new_type), "", true};
}
} else if (expr_->type()->isDate()) {
if (new_type->isDate() || new_type->isTimestamp()) {
LOG(ERROR) << "Conversion date: " << expr_->type() << " new_type: " << new_type;
if (new_type->isInteger() || new_type->isDate() || new_type->isTimestamp()) {
return {builder_, expr_->cast(new_type), "", true};
}
} else if (expr_->type()->isTime()) {
if (new_type->isTime()) {
return {builder_, expr_->cast(new_type), "", true};
}
} else if (expr_->type()->isTimestamp()) {
if (new_type->isNumber() || new_type->isDate() || new_type->isTimestamp()) {
if (new_type->isInteger() || new_type->isDate() || new_type->isTimestamp()) {
return {builder_, expr_->cast(new_type), "", true};
}
}
Expand Down
1 change: 1 addition & 0 deletions omniscidb/QueryEngine/ArrowResultSet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ namespace {

const hdk::ir::Type* type_from_arrow_field(hdk::ir::Context& ctx,
const arrow::Field& field) {
LOG(ERROR) << "type_from_arrow called: " << field.type()->ToString();
switch (field.type()->id()) {
case arrow::Type::INT8:
return ctx.int8(field.nullable());
Expand Down
2 changes: 2 additions & 0 deletions omniscidb/QueryEngine/RelAlgDagBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ const hdk::ir::Type* buildType(hdk::ir::Context& ctx,
return ctx.timestamp(precisionToTimeUnit(precision), nullable);
}
if (type_name == std::string("DATE")) {
LOG(ERROR) << "type_name: " << type_name;
return ctx.date64(hdk::ir::TimeUnit::kSecond, nullable);
}
if (type_name == std::string("TIME")) {
Expand Down Expand Up @@ -2306,6 +2307,7 @@ class RelAlgDispatcher {
}

std::vector<TargetMetaInfo> parseTupleType(const rapidjson::Value& tuple_type_arr) {
LOG(ERROR) << "ParseTuple? ";
CHECK(tuple_type_arr.IsArray());
std::vector<TargetMetaInfo> tuple_type;
for (auto tuple_type_arr_it = tuple_type_arr.Begin();
Expand Down
16 changes: 16 additions & 0 deletions omniscidb/QueryEngine/RelAlgExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,20 @@ ExecutionResult RelAlgExecutor::executeRelAlgQueryNoRetry(const CompilationOptio
executor_->setupCaching(data_provider_, col_descs, phys_table_ids);

ScopeGuard restore_metainfo_cache = [this] { executor_->clearMetaInfoCache(); };

auto schema_provider = executor_->getSchemaProvider();
auto dbs = schema_provider->listDatabases();
// Current JSON format supports a single database only. To support result
// sets in SQL queries, we add tables from the ResultSetRegistry using
// negative table ids.
auto tables = schema_provider->listTables(dbs[0]);
auto more_tables = schema_provider->listTables(dbs[0]);
for (auto table : more_tables) {
LOG(ERROR) << table->toString();
for (auto column : schema_provider->listColumns(dbs[0], table->table_id)) {
LOG(ERROR) << column->toString();
}
}
hdk::QueryExecutionSequence query_seq(ra, executor_->getConfigPtr());
if (just_explain_plan) {
std::stringstream ss;
Expand Down Expand Up @@ -260,6 +274,7 @@ ExecutionResult RelAlgExecutor::executeRelAlgQueryNoRetry(const CompilationOptio
for (auto& subquery : getSubqueries()) {
auto subquery_ra = subquery->node();
CHECK(subquery_ra);
LOG(ERROR) << "subq node: " << subquery_ra->toString();
if (subquery_ra->hasContextData()) {
continue;
}
Expand All @@ -268,6 +283,7 @@ ExecutionResult RelAlgExecutor::executeRelAlgQueryNoRetry(const CompilationOptio
hdk::QueryExecutionSequence subquery_seq(subquery_ra, executor_->getConfigPtr());
ra_executor.execute(subquery_seq, co, eo, 0);
}
LOG(ERROR) << "seq front: " << query_seq.steps().front()->toString();

auto shared_res = execute(query_seq, co, eo, queue_time_ms);
return std::move(*shared_res);
Expand Down
1 change: 1 addition & 0 deletions omniscidb/QueryEngine/RelAlgTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ std::pair<Datum, bool> datum_from_scalar_tv(const ScalarTargetValue* scalar_tv,
case hdk::ir::Type::kTimestamp: {
const auto ival = boost::get<int64_t>(scalar_tv);
CHECK(ival);
LOG(ERROR) << "translator: " << type->toString() << " scalar val: " << scalar_tv;
if (*ival == inline_int_null_value(type)) {
is_null_const = true;
} else {
Expand Down
2 changes: 1 addition & 1 deletion omniscidb/ResultSetRegistry/ResultSetRegistry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ ResultSetTableTokenPtr ResultSetRegistry::put(ResultSetTable table) {
addRowidColumn(db_id_, table_id, columnId(first_rs->colCount()));

// TODO: lazily compute row count and try to avoid global write
// locks for that
// locks for Date
auto table_data = std::make_unique<TableData>();
size_t row_count = 0;
for (auto& rs : table.results()) {
Expand Down
24 changes: 24 additions & 0 deletions omniscidb/Shared/DateTimeParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,30 @@ int64_t dateTimeParse(std::string_view const s, hdk::ir::TimeUnit unit) {
}
}

namespace {

template <
class To,
class From,
std::enable_if_t<std::is_arithmetic_v<To> && std::is_arithmetic_v<From>, bool> = true>
To numeric_cast(From v) {
auto r = static_cast<To>(v);
if (static_cast<From>(r) != v || std::signbit(r) != std::signbit(v))
throw std::runtime_error("numeric_cast<>() failed");
return r;
}
} // namespace

template <typename R, hdk::ir::Type::Id TYPE>
R dateTimeParse(std::string_view const s, hdk::ir::TimeUnit unit) {
if (auto const time = dateTimeParseOptional<TYPE>(s, unit)) {
return numeric_cast<R>(*time);
} else {
throw std::runtime_error(
cat("Invalid date/time (", std::to_string(TYPE), ") string (", s, ')'));
}
}

template <hdk::ir::Type::Id TYPE>
int64_t dateTimeParse(std::string_view const s, int dim) {
hdk::ir::TimeUnit unit;
Expand Down
13 changes: 13 additions & 0 deletions omniscidb/Tests/ExecutionSequenceTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,19 @@ class ExecutionSequenceTest : public ::testing::Test {

ExecutionResult runQuery(std::unique_ptr<QueryDag> dag, bool just_explain = false) {
auto ra_executor = RelAlgExecutor(getExecutor(), getStorage(), std::move(dag));
auto schema_provider = getStorage();
auto dbs = schema_provider->listDatabases();
// Current JSON format supports a single database only. To support result
// sets in SQL queries, we add tables from the ResultSetRegistry using
// negative table ids.
auto tables = schema_provider->listTables(dbs[0]);
auto more_tables = schema_provider->listTables(dbs[0]);
for (auto table : more_tables) {
LOG(ERROR) << table->toString();
for (auto column : schema_provider->listColumns(dbs[0], table->table_id)) {
LOG(ERROR) << column->toString();
}
}
auto eo = ExecutionOptions::fromConfig(config());
eo.just_explain = just_explain;
eo.allow_loop_joins = true;
Expand Down
72 changes: 72 additions & 0 deletions omniscidb/Tests/QueryBuilderTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,19 @@ class QueryBuilderTest : public TestSuite {
{"id": 3, "arr1":[1, null], "arr2" : [null, 5.0, null]}
{"id": 4, "arr1":[1, 2], "arr2" : [4.0, 5.0, 6.0]})___");

createTable("test_date",
{{"col_bi", ctx().int64()},
{"col_i", ctx().int32()},
{"col_f", ctx().fp32()},
{"col_d", ctx().fp64()},
{"col_dec", ctx().decimal64(10, 2)},
{"col_b", ctx().boolean()},
{"col_str", ctx().text()},
{"col_date", ctx().date32(hdk::ir::TimeUnit::kDay)}});
insertCsvValues("test_date",
"1,1,0.75,0.13444545,50.02,false,some_text,2000-01-01\n"
"2,2,30.82,0.461,7.05,true,a_text,2015-03-18\n");

createTable("sort",
{{"x", ctx().int32()}, {"y", ctx().int32()}, {"z", ctx().int32()}});
insertCsvValues("sort",
Expand All @@ -362,12 +375,28 @@ class QueryBuilderTest : public TestSuite {

createTable("withNull", {{"a", ctx().int64()}});
insertCsvValues("withNull", "1\nNULL");

LOG(ERROR) << "===========================getStorage()===========================";
auto schema_provider = getStorage();
auto dbs = schema_provider->listDatabases();
auto tables = schema_provider->listTables(dbs[0]);
auto more_tables = schema_provider->listTables(dbs[0]);
for (auto table : more_tables) {
LOG(ERROR) << table->toString();
for (auto column : schema_provider->listColumns(dbs[0], table->table_id)) {
LOG(ERROR) << " " << column->toString();
}
}
}

static void TearDownTestSuite() {
dropTable("test1");
dropTable("test2");
dropTable("test3");
dropTable("test_str");
dropTable("test_varr");
dropTable("test_arr");
dropTable("test_date");
dropTable("sort");
dropTable("ambiguous");
dropTable("join1");
Expand Down Expand Up @@ -518,6 +547,49 @@ TEST_F(QueryBuilderTest, Arithmetics) {
compare_res_data(res, std::vector<int64_t>({0, NULL_BIGINT}));
}

TEST_F(QueryBuilderTest, DateToInt) {
QueryBuilder builder(ctx(), schema_mgr_, configPtr());

auto tinfo_a = builder.scan("test_date");

auto dag = tinfo_a.proj(tinfo_a.ref("col_date").cast("int32")).finalize();
auto res = runQuery(std::move(dag));
LOG(ERROR) << "res: " << res.toString();
LOG(ERROR) << "res: " << toArrow(res)->ToString();
compare_res_data(res, std::vector<int32_t>({946684800, 1426636800}));

dag = tinfo_a.proj("col_date").finalize();
res = runQuery(std::move(dag));
LOG(ERROR) << "second res: " << res.toString();
LOG(ERROR) << "second res: " << toArrow(res)->ToString();
compare_res_data(
res,
std::vector<int32_t>(
{dateTimeParse<int32_t, hdk::ir::Type::kDate>("2000-01-01", TimeUnit::kDay),
dateTimeParse<int32_t, hdk::ir::Type::kDate>("2015-03-18", TimeUnit::kDay)}));
}

TEST_F(QueryBuilderTest, DateToInt2) {
QueryBuilder builder(ctx(), schema_mgr_, configPtr());

auto tinfo_a = builder.scan("test_date");

auto dag = tinfo_a.proj("col_date").finalize();
auto res = runQuery(std::move(dag));
LOG(ERROR) << "second res: " << res.toString();
LOG(ERROR) << "second res: " << toArrow(res)->ToString();
compare_res_data(
res,
std::vector<int32_t>(
{dateTimeParse<int32_t, hdk::ir::Type::kDate>("2000-01-01", TimeUnit::kDay),
dateTimeParse<int32_t, hdk::ir::Type::kDate>("2015-03-18", TimeUnit::kDay)}));

dag = tinfo_a.finalize();
res = runQuery(std::move(dag));
LOG(ERROR) << "second res: " << res.toString();
LOG(ERROR) << "second res: " << toArrow(res)->ToString();
}

TEST_F(QueryBuilderTest, Arithmetics2) {
QueryBuilder builder(ctx(), schema_mgr_, configPtr());

Expand Down