Skip to content

Commit

Permalink
all
Browse files Browse the repository at this point in the history
  • Loading branch information
lzyy2024 committed Jan 19, 2025
1 parent 1e22fd4 commit 7343f2a
Show file tree
Hide file tree
Showing 8 changed files with 206 additions and 9 deletions.
2 changes: 1 addition & 1 deletion be/src/vec/functions/function_compress.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ class FunctionUncompress : public IFunction {
}
};

void register_function_uuid_transforms(SimpleFunctionFactory& factory) {
void register_function_compress(SimpleFunctionFactory& factory) {
factory.register_function<FunctionCompress>();
factory.register_function<FunctionUncompress>();
}
Expand Down
2 changes: 2 additions & 0 deletions be/src/vec/functions/simple_function_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ void register_function_ip(SimpleFunctionFactory& factory);
void register_function_multi_match(SimpleFunctionFactory& factory);
void register_function_split_by_regexp(SimpleFunctionFactory& factory);
void register_function_assert_true(SimpleFunctionFactory& factory);
void register_function_compress(SimpleFunctionFactory& factory);
void register_function_bit_test(SimpleFunctionFactory& factory);

class SimpleFunctionFactory {
Expand Down Expand Up @@ -301,6 +302,7 @@ class SimpleFunctionFactory {
register_function_split_by_regexp(instance);
register_function_assert_true(instance);
register_function_bit_test(instance);
register_function_compress(instance);
});
return instance;
}
Expand Down
55 changes: 49 additions & 6 deletions be/test/vec/function/function_string_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -784,8 +784,7 @@ TEST(function_string_test, function_string_reverse_test) {
std::string("🎓‍👩🎓‍👨")}, // 毕业生emoji序列
{{std::string("ASCII 👨‍👨‍👧‍👦 UNICODE")},
std::string("EDOCINU 👦‍👧‍👨‍👨 IICSA")}, // 混合ASCII和UNICODE字符
{{std::string("💻 Programming 💾")},
std::string("💾 gnimmargorP 💻")}, // 编程相关emoji
{{std::string("💻 Programming 💾")}, std::string("💾 gnimmargorP 💻")}, // 编程相关emoji
{{std::string("0010110")}, std::string("0110100")}, // 二进制序列
{{std::string("readme.md")}, std::string("dm.emdaer")}, // 包含点的文件名
{{std::string("[email protected]")},
Expand Down Expand Up @@ -828,7 +827,7 @@ TEST(function_string_test, function_string_length_test) {
std::int32_t(2)}, // ñ,为拉丁字母n with tilde,UTF-8中占用2字节
{{std::string("\u65E5\u672C\u8A9E")}, std::int32_t(9)}, // 日本语,每个字符通常3个字节
{{std::string("Hello, 世界!")}, std::int32_t(16)}, // 混合ASCII和非ASCII字符
{{std::string("😀😃😄😁")}, std::int32_t(16)}, // Emoji,每个通常4个字节
{{std::string("😀😃😄😁")}, std::int32_t(16)}, // Emoji,每个通常4个字节
{{std::string("Quick brown 狐 jumps over a lazy 狗.")}, std::int32_t(38)}, // 混合字符串
{{std::string("Löwe 老虎 Léopard")}, std::int32_t(21)}, // 欧洲文字和中文的混合
{{std::string("Café 美丽")}, std::int32_t(12)}, // 带重音的字符
Expand Down Expand Up @@ -1480,8 +1479,7 @@ TEST(function_string_test, function_from_base64_test) {
{{std::string("SEVMTE8sIV4l")}, std::string("HELLO,!^%")},
{{std::string("__123hehe1")}, Null()},
{{std::string("")}, std::string("")},
{{std::string("5ZWK5ZOI5ZOI5ZOI8J+YhCDjgILigJTigJQh")},
std::string("啊哈哈哈😄 。——!")},
{{std::string("5ZWK5ZOI5ZOI5ZOI8J+YhCDjgILigJTigJQh")}, std::string("啊哈哈哈😄 。——!")},
{{std::string("ò&ø")}, Null()},
{{std::string("TVl0ZXN0U1RS")}, std::string("MYtestSTR")},
{{Null()}, Null()},
Expand Down Expand Up @@ -3340,9 +3338,54 @@ TEST(function_string_test, function_rpad_test) {
{{Null(), std::int32_t(0), Null()}, Null()},
};

TEST(function_string_test, function_compress_test) {
TEST(function_string_test, function_compress_uncompress_test) {
{
std::string func_name = "compress";
InputTypeSet input_types = {TypeIndex::String};

// 压缩多个不同的字符串
DataSet data_set = {
{{Null()}, Null()},
// 示例 1: 压缩普通字符串
{"Hello, world!", "0x0D000000789CF348CDC9C9D75128CF2FCA49510400205E048A"},
// 示例 2: 压缩空字符串
{"", "0x"},
// 示例 3: 压缩带特殊字符的字符串
{"String with special characters! @#$%^&*()",
"0x29000000789C0B2E29CACC4B5728CF2CC950282E484DCE4CCC5148CE482C4A4C2E492D2A565"
"4705056518D53D3D2D004003C2A0D81"},
// 示例 4: 压缩带前后空格的字符串
{" This is a string with leading and trailing spaces ",
"0x37000000789C15C7510A00101045D1ADDC45D9C00B3125C94CD93EEAFC1C2075731EE1B16D3"
"68E456754951FCD426CD9F8F1A55C1DB8054B12C9"}};

static_cast<void>(
check_function<DataTypeString, true>(func_name, input_types, data_set));
}

{
std::string func_name = "uncompress";
InputTypeSet input_types = {TypeIndex::String};

// 解压缩多个压缩后的字符串
DataSet data_set = {
{{Null()}, Null()},
// 示例 1: 解压 'Hello, world!' 的压缩结果
{"0x0D000000789CF348CDC9C9D75128CF2FCA49510400205E048A", "Hello, world!"},
// 示例 2: 解压空字符串的压缩结果
{"0x", ""},
// 示例 3: 解压带特殊字符的字符串
{"0x29000000789C0B2E29CACC4B5728CF2CC950282E484DCE4CCC5148CE482C4A4C2E492D2A565"
"4705056518D53D3D2D004003C2A0D81",
"String with special characters! @#$%^&*()"},
// 示例 4: 解压带前后空格的字符串
{"0x37000000789C15C7510A00101045D1ADDC45D9C00B3125C94CD93EEAFC1C2075731EE1B16D3"
"68E456754951FCD426CD9F8F1A55C1DB8054B12C9",
" This is a string with leading and trailing spaces "}};

static_cast<void>(
check_function<DataTypeString, true>(func_name, input_types, data_set));
}
}

check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,8 @@
import org.apache.doris.nereids.trees.expressions.functions.scalar.YearsAdd;
import org.apache.doris.nereids.trees.expressions.functions.scalar.YearsDiff;
import org.apache.doris.nereids.trees.expressions.functions.scalar.YearsSub;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Compress;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Unompress;

import com.google.common.collect.ImmutableList;

Expand Down Expand Up @@ -974,7 +976,9 @@ public class BuiltinScalarFunctions implements FunctionHelper {
scalar(YearsSub.class, "years_sub"),
scalar(MultiMatch.class, "multi_match"),
scalar(SessionUser.class, "session_user"),
scalar(LastQueryId.class, "last_query_id"));
scalar(LastQueryId.class, "last_query_id"),
scalar(Compress.class, "compress"),
scalar(Uncompress.class, "uncompress"));

public static final BuiltinScalarFunctions INSTANCE = new BuiltinScalarFunctions();

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.trees.expressions.functions.scalar;

import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.BinaryType;
import org.apache.doris.nereids.types.StringType;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;

import java.util.List;

/**
* ScalarFunction 'compress'.
*/
public class Compress extends ScalarFunction
implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNullable {

public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE));

/**
* constructor with 1 argument.
*/
public Compress(Expression arg) {
super("compress", arg);
}

/**
* withChildren.
*/
@Override
public Compress withChildren(List<Expression> children) {
Preconditions.checkArgument(children.size() == 1);
return new Compress(children.get(0));
}

@Override
public List<FunctionSignature> getSignatures() {
return SIGNATURES;
}

@Override
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
return visitor.visitCompress(this, context);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.trees.expressions.functions.scalar;

import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.StringType;
import org.apache.doris.nereids.types.BinaryType;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;

import java.util.List;

/**
* ScalarFunction 'uncompress'.
*/
public class Uncompress extends ScalarFunction
implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNullable {

public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE));

/**
* constructor with 1 argument.
*/
public Uncompress(Expression arg) {
super("uncompress", arg);
}

/**
* withChildren.
*/
@Override
public Uncompress withChildren(List<Expression> children) {
Preconditions.checkArgument(children.size() == 1);
return new Uncompress(children.get(0));
}

@Override
public List<FunctionSignature> getSignatures() {
return SIGNATURES;
}

@Override
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
return visitor.visitUncompress(this, context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,8 @@
import org.apache.doris.nereids.trees.expressions.functions.scalar.YearsSub;
import org.apache.doris.nereids.trees.expressions.functions.udf.AliasUdf;
import org.apache.doris.nereids.trees.expressions.functions.udf.JavaUdf;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Compress;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Uncompress;

/** ScalarFunctionVisitor. */
public interface ScalarFunctionVisitor<R, C> {
Expand Down Expand Up @@ -2328,4 +2330,12 @@ default R visitMultiMatch(MultiMatch multiMatch, C context) {
default R visitLastQueryId(LastQueryId queryId, C context) {
return visitScalarFunction(queryId, context);
}

default R visitCompress(Compress compress, C context) {
return visitScalarFunction(compress, context);
}

default R visitUncompress(Uncompress uncompress, C context) {
return visitScalarFunction(uncompress, context);
}
}
4 changes: 3 additions & 1 deletion gensrc/script/doris_builtins_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1737,7 +1737,9 @@
[['random_bytes'], 'STRING', ['INT'], ''],

[['overlay'], 'STRING', ['STRING', 'INT', 'INT', 'STRING'], ''],
[['strcmp'], 'INT', ['STRING', 'STRING'], 'DEPEND_ON_ARGUMENT']
[['strcmp'], 'INT', ['STRING', 'STRING'], 'DEPEND_ON_ARGUMENT'],
[['compress'], 'STRING', ['STRING'], 'ALWAYS_NULLABLE'],
[['uncompress'], 'STRING', ['STRING'], 'ALWAYS_NULLABLE']
],


Expand Down

0 comments on commit 7343f2a

Please sign in to comment.