Skip to content

Commit

Permalink
Include metadata length in metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
platypii committed Jan 12, 2024
1 parent b01bfa8 commit 03727d0
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 8 deletions.
6 changes: 3 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@
"typecheck": "tsc"
},
"devDependencies": {
"@types/node": "20.10.8",
"@types/node": "20.11.0",
"@typescript-eslint/eslint-plugin": "6.18.1",
"@vitest/coverage-v8": "1.1.3",
"@vitest/coverage-v8": "1.2.0",
"eslint": "8.56.0",
"eslint-plugin-import": "2.29.1",
"eslint-plugin-jsdoc": "48.0.2",
"http-server": "14.1.1",
"typescript": "5.3.3",
"vitest": "1.1.3"
"vitest": "1.2.0"
}
}
7 changes: 4 additions & 3 deletions src/metadata.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ export function parquetMetadata(arrayBuffer) {
// Metadata length is 4 bytes before the last PAR1
const metadataLengthOffset = view.byteLength - 8
const metadataLength = view.getUint32(view.byteLength - 8, true)
if (metadataLength <= 0 || metadataLength > metadataLengthOffset) {
throw new Error('parquet file invalid metadata length')
if (metadataLength <= 0) {
throw new Error('parquet invalid metadata length')
}
if (metadataLength > view.byteLength - 8) {
throw new Error('parquet file metadata length exceeds file size')
throw new Error('parquet metadata length exceeds buffer size')
}

const metadataOffset = metadataLengthOffset - metadataLength
Expand Down Expand Up @@ -98,5 +98,6 @@ export function parquetMetadata(arrayBuffer) {
row_groups,
key_value_metadata,
created_by,
metadata_length: metadataLength,
}
}
1 change: 1 addition & 0 deletions src/types.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export interface FileMetaData {
row_groups: RowGroup[]
key_value_metadata?: KeyValue[]
created_by?: string
metadata_length: number
}

export interface SchemaElement {
Expand Down
6 changes: 4 additions & 2 deletions test/metadata.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ describe('parquetMetadata', () => {
// Parquet v1 from DuckDB
const expectedMetadata = {
version: 1,
created_by: 'DuckDB',
metadata_length: 149,
schema: [
{ repetition_type: 0, name: 'duckdb_schema', num_children: 1 },
{ type: 6, repetition_type: 1, name: 'ADDRTYPE', converted_type: 0 },
Expand Down Expand Up @@ -55,7 +57,6 @@ describe('parquetMetadata', () => {
num_rows: 10,
},
],
created_by: 'DuckDB',
}

const casted = toJson(result)
Expand All @@ -69,6 +70,8 @@ describe('parquetMetadata', () => {
// Parquet v2 from pandas with 2 row groups
const expectedMetadata = {
version: 2,
created_by: 'parquet-cpp-arrow version 14.0.2',
metadata_length: 1602,
schema: [
{
repetition_type: 0,
Expand Down Expand Up @@ -153,7 +156,6 @@ describe('parquetMetadata', () => {
// value: base64
},
],
created_by: 'parquet-cpp-arrow version 14.0.2',
}

const casted = toJson(result)
Expand Down

0 comments on commit 03727d0

Please sign in to comment.