Skip to content

Commit

Permalink
Use prepended length for bit-packed hybrid bool columns
Browse files Browse the repository at this point in the history
  • Loading branch information
Johan Levin committed Feb 19, 2025
1 parent cad751e commit eb6b9d2
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 2 deletions.
3 changes: 2 additions & 1 deletion src/datapage.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ export function readDataPage(bytes, daph, schemaPath, { type }) {
const bitWidth = type === 'BOOLEAN' ? 1 : view.getUint8(reader.offset++)
if (bitWidth) {
dataPage = new Array(nValues)
readRleBitPackedHybrid(reader, bitWidth, view.byteLength - reader.offset, dataPage)
const encodedLength = type === 'BOOLEAN' ? 0 : view.byteLength - reader.offset
readRleBitPackedHybrid(reader, bitWidth, encodedLength, dataPage)
} else {
dataPage = new Uint8Array(nValues) // nValue zeroes
}
Expand Down
2 changes: 1 addition & 1 deletion src/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ export function bitWidth(value) {
*/
export function readRleBitPackedHybrid(reader, width, length, output) {
if (!length) {
// length = reader.view.getUint32(reader.offset, true)
length = reader.view.getUint32(reader.offset, true)
reader.offset += 4
}
let seen = 0
Expand Down
17 changes: 17 additions & 0 deletions test/files/boolean_rle.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[
[1],
[1],
[1],
[1],
[1],
[null],
[null],
[null],
[null],
[null],
[0],
[0],
[0],
[0],
[0]
]
61 changes: 61 additions & 0 deletions test/files/boolean_rle.metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
{
"created_by": "Polars",
"key_value_metadata": [
{
"key": "ARROW:schema",
"value": "/////3YAAAAEAAAA8v///xQAAAAEAAEAAAAKAAsACAAKAAQA+P///wwAAAAIAAgAAAAEAAEAAAAEAAAA7P///ywAAAAgAAAAGAAAAAEGAAAQABIABAAQABEACAAAAAwAAAAAAPz///8EAAQADQAAAEJvb2xlYW5Db2x1bW4A"
}
],
"metadata_length": 308,
"num_rows": 15,
"row_groups": [
{
"columns": [
{
"column_index_length": 17,
"column_index_offset": 89,
"crypto_metadata": 17,
"file_offset": 47,
"meta_data": {
"codec": "SNAPPY",
"data_page_offset": 4,
"encodings": [
"RLE"
],
"num_values": 15,
"path_in_schema": [
"BooleanColumn"
],
"statistics": {
"max_value": true,
"min_value": false,
"null_count": 5
},
"total_compressed_size": 43,
"total_uncompressed_size": 41,
"type": "BOOLEAN"
},
"offset_index_length": 10,
"offset_index_offset": 106
}
],
"file_offset": 4,
"num_rows": 15,
"ordinal": 0,
"total_byte_size": 41,
"total_compressed_size": 43
}
],
"schema": [
{
"name": "root",
"num_children": 1
},
{
"name": "BooleanColumn",
"repetition_type": "OPTIONAL",
"type": "BOOLEAN"
}
],
"version": 1
}
Binary file added test/files/boolean_rle.parquet
Binary file not shown.

0 comments on commit eb6b9d2

Please sign in to comment.