Skip to content

Commit

Permalink
Update yEnc module to rapidyenc 1.1.1 (#114)
Browse files Browse the repository at this point in the history
* Update rapidyenc
CRC32 now uses rapidyenc's functions instead of crcutil

* Update rapidyenc

* Add CRC32 util tests for lengths greater than 32 bits

* Fix RISC-V compile failure
  • Loading branch information
animetosho authored May 10, 2024
1 parent 3ed5229 commit 04386bd
Show file tree
Hide file tree
Showing 41 changed files with 7,332 additions and 5,659 deletions.
35 changes: 33 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,12 @@ def build_extension(self, ext: Extension):
# Determine compiler flags
gcc_arm_neon_flags = []
gcc_arm_crc_flags = []
gcc_arm_crc_pmull_flags = []
gcc_vpclmulqdq_flags = []
gcc_vbmi2_flags = []
gcc_avx10_flags = []
gcc_rvv_flags = []
gcc_rvzbkc_flags = []
gcc_macros = []
if self.compiler.compiler_type == "msvc":
# LTCG not enabled due to issues seen with code generation where
Expand Down Expand Up @@ -132,9 +136,11 @@ def build_extension(self, ext: Extension):
IS_AARCH64 = False
if autoconf_check(self.compiler, flag_check="-march=armv8-a+crc"):
gcc_arm_crc_flags.append("-march=armv8-a+crc")
gcc_arm_crc_pmull_flags.append("-march=armv8-a+crc+crypto")
# Resolve problems on armv7, see issue #56
if not IS_AARCH64:
gcc_arm_crc_flags.append("-fno-lto")
gcc_arm_crc_pmull_flags.append("-fno-lto")
if not IS_AARCH64 and autoconf_check(self.compiler, flag_check="-mfpu=neon"):
gcc_arm_neon_flags.append("-mfpu=neon")
# Resolve problems on armv7, see issue #56
Expand Down Expand Up @@ -165,6 +171,16 @@ def build_extension(self, ext: Extension):
"-mlzcnt",
]

if IS_X86 and autoconf_check(self.compiler, flag_check="-mno-evex512"):
gcc_avx10_flags = ["-mno-evex512"]

if machine.startswith("riscv"):
arch_flag = "-march=rv" + ("32" if machine.startswith("riscv32") else "64") + "gc"
if autoconf_check(self.compiler, flag_check=arch_flag+"v"):
gcc_rvv_flags = [arch_flag+"v"]
if autoconf_check(self.compiler, flag_check=arch_flag+"_zbkc"):
gcc_rvzbkc_flags = [arch_flag+"_zbkc"]

srcdeps_crc_common = ["src/yencode/common.h", "src/yencode/crc_common.h", "src/yencode/crc.h"]
srcdeps_dec_common = ["src/yencode/common.h", "src/yencode/decoder_common.h", "src/yencode/decoder.h"]
srcdeps_enc_common = ["src/yencode/common.h", "src/yencode/encoder_common.h", "src/yencode/encoder.h"]
Expand Down Expand Up @@ -240,13 +256,13 @@ def build_extension(self, ext: Extension):
{
"sources": ["src/yencode/encoder_vbmi2.cc"],
"depends": srcdeps_enc_common + ["encoder_avx_base.h"],
"gcc_x86_flags": gcc_vbmi2_flags,
"gcc_x86_flags": gcc_vbmi2_flags + gcc_avx10_flags,
"msvc_x86_flags": ["/arch:AVX512"],
},
{
"sources": ["src/yencode/decoder_vbmi2.cc"],
"depends": srcdeps_dec_common + ["decoder_avx2_base.h"],
"gcc_x86_flags": gcc_vbmi2_flags,
"gcc_x86_flags": gcc_vbmi2_flags + gcc_avx10_flags,
"msvc_x86_flags": ["/arch:AVX512"],
},
{
Expand All @@ -264,6 +280,21 @@ def build_extension(self, ext: Extension):
"depends": srcdeps_crc_common,
"gcc_arm_flags": gcc_arm_crc_flags,
},
{
"sources": ["src/yencode/crc_arm_pmull.cc"],
"depends": srcdeps_crc_common,
"gcc_arm_flags": gcc_arm_crc_pmull_flags,
},
{
"sources": ["src/yencode/encoder_rvv.cc", "src/yencode/decoder_rvv.cc"],
"depends": srcdeps_enc_common + srcdeps_dec_common,
"gcc_rv_flags": gcc_rvv_flags,
},
{
"sources": ["src/yencode/crc_riscv.cc"],
"depends": srcdeps_crc_common,
"gcc_rv_flags": gcc_rvzbkc_flags,
},
{
"sources": [
"src/crcutil-1.0/code/crc32c_sse4.cc",
Expand Down
40 changes: 19 additions & 21 deletions src/crc32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,68 +17,66 @@
*/

#include "crc32.h"
#include "crcutil-1.0/examples/interface.h"

extern crcutil_interface::CRC *crc;
#include "yencode/crc.h"

PyObject* crc32_combine(PyObject *self, PyObject *args) {
crcutil_interface::UINT64 crc1, crc2;
unsigned long crc1, crc2;
unsigned long long length;

if(!PyArg_ParseTuple(args, "KKK:crc32_combine", &crc1, &crc2, &length)) {
if(!PyArg_ParseTuple(args, "kkK:crc32_combine", &crc1, &crc2, &length)) {
return NULL;
}

crc->Concatenate(crc2, 0, length, &crc1);
crc1 = RapidYenc::crc32_combine(crc1, crc2, length);

return PyLong_FromUnsignedLong((uint32_t) crc1);
return PyLong_FromUnsignedLong(crc1);
}

PyObject* crc32_multiply(PyObject *self, PyObject *args) {
crcutil_interface::UINT64 crc1, crc2;
unsigned long crc1, crc2;

if(!PyArg_ParseTuple(args, "KK:crc32_multiply", &crc1, &crc2)) {
if(!PyArg_ParseTuple(args, "kk:crc32_multiply", &crc1, &crc2)) {
return NULL;
}

crc->Multiply(crc2, &crc1);
crc1 = RapidYenc::crc32_multiply(crc1, crc2);

return PyLong_FromUnsignedLong((uint32_t)crc1);
return PyLong_FromUnsignedLong(crc1);
}

PyObject* crc32_zero_unpad(PyObject *self, PyObject *args) {
crcutil_interface::UINT64 crc1;
unsigned long crc1;
unsigned long long length;

if(!PyArg_ParseTuple(args, "KK:crc32_zero_unpad", &crc1, &length)) {
if(!PyArg_ParseTuple(args, "kK:crc32_zero_unpad", &crc1, &length)) {
return NULL;
}

crc->ZeroUnpad(length, &crc1);
crc1 = RapidYenc::crc32_unzero(crc1, length);

return PyLong_FromUnsignedLong((uint32_t) crc1);
return PyLong_FromUnsignedLong(crc1);
}

PyObject* crc32_xpown(PyObject* self, PyObject* arg) {
crcutil_interface::UINT64 n = PyLong_AsUnsignedLongLong(arg) % 0xffffffff;
long long n = PyLong_AsLongLong(arg);

if (PyErr_Occurred()) {
return NULL;
}

crc->XpowN(&n);
unsigned long result = RapidYenc::crc32_2pow(n);

return PyLong_FromUnsignedLong(n);
return PyLong_FromUnsignedLong(result);
}

PyObject* crc32_xpow8n(PyObject* self, PyObject* arg) {
crcutil_interface::UINT64 n = PyLong_AsUnsignedLongLong(arg) % 0xffffffff;
unsigned long long n = PyLong_AsUnsignedLongLong(arg);

if (PyErr_Occurred()) {
return NULL;
}

crc->Xpow8N(&n);
unsigned long result = RapidYenc::crc32_256pow(n);

return PyLong_FromUnsignedLong(n);
return PyLong_FromUnsignedLong(result);
}
40 changes: 37 additions & 3 deletions src/sabctools.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,46 @@ static struct PyModuleDef sabctools_definition = {
sabctools_methods
};

static const char* simd_detected(void) {
int level = RapidYenc::decode_isa_level();
#ifdef PLATFORM_X86
if(level >= ISA_LEVEL_VBMI2)
return "AVX512VL+VBMI2";
if(level >= ISA_LEVEL_AVX3)
return "AVX512VL";
if(level >= ISA_LEVEL_AVX2)
return "AVX2";
if(level >= ISA_LEVEL_AVX)
return "AVX";
if(level >= ISA_LEVEL_SSE4_POPCNT)
return "SSE4.1+POPCNT";
if(level >= ISA_LEVEL_SSE41)
return "SSE4.1";
if(level >= ISA_LEVEL_SSSE3)
return "SSSE3";
if(level >= (ISA_LEVEL_SSE2 | ISA_FEATURE_POPCNT | ISA_FEATURE_LZCNT))
return "SSE2+ABM";
return "SSE2";
#endif
#ifdef PLATFORM_ARM
if(level >= ISA_LEVEL_NEON) {
return "NEON";
}
#endif
#ifdef __riscv
if(level >= ISA_LEVEL_RVV) {
return "RVV";
}
#endif
return "";
}

PyMODINIT_FUNC PyInit_sabctools(void) {
// Initialize and add version / SIMD information
Py_Initialize();
encoder_init();
decoder_init();
crc_init();
RapidYenc::encoder_init();
RapidYenc::decoder_init();
RapidYenc::crc32_init();
openssl_init();
sparse_init();

Expand Down
10 changes: 5 additions & 5 deletions src/yenc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,9 @@ PyObject* yenc_decode(PyObject* self, PyObject* Py_memoryview_obj) {
Py_BEGIN_ALLOW_THREADS;

// send to decoder
YencDecoderState state = YDEC_STATE_CRLF;
output_len = do_decode(1, (unsigned char*) start_loc, (unsigned char*) dest_loc, yenc_data_length, &state);
crc = do_crc32(dest_loc, output_len, crc);
RapidYenc::YencDecoderState state = RapidYenc::YDEC_STATE_CRLF;
output_len = RapidYenc::decode(1, start_loc, dest_loc, yenc_data_length, &state);
crc = RapidYenc::crc32(dest_loc, output_len, crc);

// Return GIL to perform Python modifications
Py_END_ALLOW_THREADS;
Expand Down Expand Up @@ -289,8 +289,8 @@ PyObject* yenc_encode(PyObject* self, PyObject* Py_input_string)

// Encode result
int column = 0;
output_len = do_encode(YENC_LINESIZE, &column, (unsigned char*)input_buffer, (unsigned char*)output_buffer, input_len, 1);
crc = do_crc32(input_buffer, input_len, 0);
output_len = RapidYenc::encode(YENC_LINESIZE, &column, input_buffer, output_buffer, input_len, 1);
crc = RapidYenc::crc32(input_buffer, input_len, 0);

// Restore GIL so we can build Python strings
Py_END_ALLOW_THREADS;
Expand Down
Loading

0 comments on commit 04386bd

Please sign in to comment.