Skip to content

Commit

Permalink
Fix issue #59
Browse files Browse the repository at this point in the history
  • Loading branch information
platypii committed Feb 8, 2025
1 parent 5675560 commit 2e72cd2
Show file tree
Hide file tree
Showing 7 changed files with 202 additions and 8 deletions.
1 change: 1 addition & 0 deletions eslint.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ export default [
object: true,
array: false,
}],
'prefer-exponentiation-operator': 'error',
'prefer-promise-reject-errors': 'error',
quotes: ['error', 'single'],
'require-await': 'warn',
Expand Down
16 changes: 11 additions & 5 deletions src/convert.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ export function convert(data, schemaElement, utf8 = true) {
const ctype = schemaElement.converted_type
if (ctype === 'DECIMAL') {
const scale = schemaElement.scale || 0
const factor = Math.pow(10, -scale)
const factor = 10 ** -scale
const arr = new Array(data.length)
for (let i = 0; i < arr.length; i++) {
if (data[0] instanceof Uint8Array) {
Expand Down Expand Up @@ -123,11 +123,17 @@ export function convert(data, schemaElement, utf8 = true) {
* @returns {number}
*/
export function parseDecimal(bytes) {
// TODO: handle signed
let value = 0
for (const byte of bytes) {
value = value << 8 | byte
value = value * 256 + byte
}

// handle signed
const bits = bytes.length * 8
if (value >= 2 ** (bits - 1)) {
value -= 2 ** bits
}

return value
}

Expand All @@ -152,7 +158,7 @@ export function parseFloat16(bytes) {
const sign = int16 >> 15 ? -1 : 1
const exp = int16 >> 10 & 0x1f
const frac = int16 & 0x3ff
if (exp === 0) return sign * Math.pow(2, -14) * (frac / 1024) // subnormals
if (exp === 0) return sign * 2 ** -14 * (frac / 1024) // subnormals
if (exp === 0x1f) return frac ? NaN : sign * Infinity
return sign * Math.pow(2, exp - 15) * (1 + frac / 1024)
return sign * 2 ** (exp - 15) * (1 + frac / 1024)
}
2 changes: 1 addition & 1 deletion src/metadata.js
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ export function convertMetadata(value, schema) {
if (type === 'INT64' && logical_type?.type === 'TIMESTAMP') return new Date(Number(view.getBigInt64(0, true)))
if (type === 'INT32' && view.byteLength === 4) return view.getInt32(0, true)
if (type === 'INT64' && view.byteLength === 8) return view.getBigInt64(0, true)
if (converted_type === 'DECIMAL') return parseDecimal(value) * Math.pow(10, -(schema.scale || 0))
if (converted_type === 'DECIMAL') return parseDecimal(value) * 10 ** -(schema.scale || 0)
if (logical_type?.type === 'FLOAT16') return parseFloat16(value)
if (type === 'FIXED_LEN_BYTE_ARRAY') return value
// assert(false)
Expand Down
38 changes: 36 additions & 2 deletions test/convert.test.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { describe, expect, it } from 'vitest'
import { convert, parseFloat16 } from '../src/convert.js'
import { convert, parseDecimal, parseFloat16 } from '../src/convert.js'

/**
* @import {SchemaElement} from '../src/types.js'
Expand Down Expand Up @@ -71,6 +71,13 @@ describe('convert function', () => {
expect(convert(data, schemaElement)).toEqual([100, 200])
})

it('converts byte array from issue #59 to DECIMAL', () => {
const data = [new Uint8Array([18, 83, 137, 151, 156, 0])]
/** @type {SchemaElement} */
const schemaElement = { name, converted_type: 'DECIMAL', scale: 10, precision: 14 }
expect(convert(data, schemaElement)).toEqual([2015])
})

it('converts epoch time to DATE', () => {
const data = [1, 2] // days since epoch
/** @type {SchemaElement} */
Expand Down Expand Up @@ -180,6 +187,33 @@ describe('parseFloat16', () => {

it('convert float16 subnormal number', () => {
expect(parseFloat16(new Uint8Array([0xff, 0x03])))
.toBeCloseTo(Math.pow(2, -14) * (1023 / 1024), 5)
.toBeCloseTo(2 ** -14 * (1023 / 1024), 5)
})
})

describe('parseDecimal', () => {
it('should return 0 for an empty Uint8Array', () => {
const result = parseDecimal(new Uint8Array())
expect(result).toBe(0)
})

it('should parse a single byte', () => {
const result = parseDecimal(new Uint8Array([42]))
expect(result).toBe(42)
})

it('should parse two bytes in big-endian order', () => {
const result = parseDecimal(new Uint8Array([1, 0]))
expect(result).toBe(256)
})

it('should parse three bytes', () => {
const result = parseDecimal(new Uint8Array([1, 2, 3]))
expect(result).toBe(66051)
})

it('should parse -1 as a 32-bit number', () => {
const result = parseDecimal(new Uint8Array([255, 255, 255, 255]))
expect(result).toBe(-1)
})
})
22 changes: 22 additions & 0 deletions test/files/decimal-column.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[
[
40,
2015
],
[
74,
2015
],
[
140,
2015
],
[
152,
2015
],
[
190,
2015
]
]
131 changes: 131 additions & 0 deletions test/files/decimal-column.metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
{
"version": 2,
"schema": [
{
"repetition_type": "REQUIRED",
"name": "schema",
"num_children": 2
},
{
"type": "INT64",
"repetition_type": "OPTIONAL",
"name": "mid"
},
{
"type": "FIXED_LEN_BYTE_ARRAY",
"type_length": 6,
"repetition_type": "OPTIONAL",
"name": "value",
"converted_type": "DECIMAL",
"scale": 10,
"precision": 14,
"logical_type": {
"type": "DECIMAL",
"scale": 10,
"precision": 14
}
}
],
"num_rows": 5,
"row_groups": [
{
"columns": [
{
"file_offset": 0,
"meta_data": {
"type": "INT64",
"encodings": [
"PLAIN",
"RLE",
"RLE_DICTIONARY"
],
"path_in_schema": [
"mid"
],
"codec": "SNAPPY",
"num_values": 5,
"total_uncompressed_size": 126,
"total_compressed_size": 120,
"data_page_offset": 50,
"dictionary_page_offset": 4,
"statistics": {
"max": 190,
"min": 40,
"null_count": 0,
"max_value": 190,
"min_value": 40
},
"encoding_stats": [
{
"page_type": "DICTIONARY_PAGE",
"encoding": "PLAIN",
"count": 1
},
{
"page_type": "DATA_PAGE",
"encoding": "RLE_DICTIONARY",
"count": 1
}
]
}
},
{
"file_offset": 0,
"meta_data": {
"type": "FIXED_LEN_BYTE_ARRAY",
"encodings": [
"PLAIN",
"RLE",
"RLE_DICTIONARY"
],
"path_in_schema": [
"value"
],
"codec": "SNAPPY",
"num_values": 5,
"total_uncompressed_size": 82,
"total_compressed_size": 86,
"data_page_offset": 146,
"dictionary_page_offset": 124,
"statistics": {
"max": 2015,
"min": 2015,
"null_count": 0,
"max_value": 2015,
"min_value": 2015
},
"encoding_stats": [
{
"page_type": "DICTIONARY_PAGE",
"encoding": "PLAIN",
"count": 1
},
{
"page_type": "DATA_PAGE",
"encoding": "RLE_DICTIONARY",
"count": 1
}
]
}
}
],
"total_byte_size": 208,
"num_rows": 5,
"file_offset": 4,
"total_compressed_size": 206,
"ordinal": 0
}
],
"key_value_metadata": [
{
"key": "pandas",
"value": "{\"index_columns\": [{\"kind\": \"range\", \"name\": null, \"start\": 0, \"stop\": 5, \"step\": 1}], \"column_indexes\": [{\"name\": null, \"field_name\": null, \"pandas_type\": \"unicode\", \"numpy_type\": \"object\", \"metadata\": {\"encoding\": \"UTF-8\"}}], \"columns\": [{\"name\": \"mid\", \"field_name\": \"mid\", \"pandas_type\": \"int64\", \"numpy_type\": \"int64\", \"metadata\": null}, {\"name\": \"value\", \"field_name\": \"value\", \"pandas_type\": \"decimal\", \"numpy_type\": \"object\", \"metadata\": {\"precision\": 14, \"scale\": 10}}], \"creator\": {\"library\": \"pyarrow\", \"version\": \"19.0.0\"}, \"pandas_version\": \"2.2.3\"}"
},
{
"key": "ARROW:schema",
"value": "/////xgDAAAQAAAAAAAKAA4ABgAFAAgACgAAAAABBAAQAAAAAAAKAAwAAAAEAAgACgAAAGACAAAEAAAAAQAAAAQAAACA/f//QAIAAAQAAAAyAgAAeyJpbmRleF9jb2x1bW5zIjogW3sia2luZCI6ICJyYW5nZSIsICJuYW1lIjogbnVsbCwgInN0YXJ0IjogMCwgInN0b3AiOiA1LCAic3RlcCI6IDF9XSwgImNvbHVtbl9pbmRleGVzIjogW3sibmFtZSI6IG51bGwsICJmaWVsZF9uYW1lIjogbnVsbCwgInBhbmRhc190eXBlIjogInVuaWNvZGUiLCAibnVtcHlfdHlwZSI6ICJvYmplY3QiLCAibWV0YWRhdGEiOiB7ImVuY29kaW5nIjogIlVURi04In19XSwgImNvbHVtbnMiOiBbeyJuYW1lIjogIm1pZCIsICJmaWVsZF9uYW1lIjogIm1pZCIsICJwYW5kYXNfdHlwZSI6ICJpbnQ2NCIsICJudW1weV90eXBlIjogImludDY0IiwgIm1ldGFkYXRhIjogbnVsbH0sIHsibmFtZSI6ICJ2YWx1ZSIsICJmaWVsZF9uYW1lIjogInZhbHVlIiwgInBhbmRhc190eXBlIjogImRlY2ltYWwiLCAibnVtcHlfdHlwZSI6ICJvYmplY3QiLCAibWV0YWRhdGEiOiB7InByZWNpc2lvbiI6IDE0LCAic2NhbGUiOiAxMH19XSwgImNyZWF0b3IiOiB7ImxpYnJhcnkiOiAicHlhcnJvdyIsICJ2ZXJzaW9uIjogIjE5LjAuMCJ9LCAicGFuZGFzX3ZlcnNpb24iOiAiMi4yLjMifQAABgAAAHBhbmRhcwAAAgAAAFAAAAAEAAAAyP///wAAAQcQAAAAIAAAAAQAAAAAAAAABQAAAHZhbHVlAAAACAAMAAQACAAIAAAADgAAAAoAAAAQABQACAAGAAcADAAAABAAEAAAAAAAAQIQAAAAHAAAAAQAAAAAAAAAAwAAAG1pZAAIAAwACAAHAAgAAAAAAAABQAAAAAAAAAA="
}
],
"created_by": "parquet-cpp-arrow version 19.0.0",
"metadata_length": 1959
}
Binary file added test/files/decimal-column.parquet
Binary file not shown.

0 comments on commit 2e72cd2

Please sign in to comment.