Skip to content

Commit

Permalink
GH-3115: Fix int96 read issue in complex type (#3118)
Browse files Browse the repository at this point in the history
  • Loading branch information
pratyush-sharma-2025 authored Jan 30, 2025
1 parent be5ada2 commit bb4f867
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -939,7 +939,7 @@ public void end() {
// 2-level lists and the result is checked to see if it matches the requested
// element type. This should always convert assuming 2-level lists because
// 2-level and 3-level can't be mixed.
private static final AvroSchemaConverter CONVERTER = new AvroSchemaConverter(true);
private static final AvroSchemaConverter CONVERTER = new AvroSchemaConverter(true, true);

/**
* Returns whether the given type is the element type of a list or is a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,20 +90,21 @@ public class AvroSchemaConverter {
private final Set<String> pathsToInt96;

public AvroSchemaConverter() {
this(ADD_LIST_ELEMENT_RECORDS_DEFAULT);
this(ADD_LIST_ELEMENT_RECORDS_DEFAULT, READ_INT96_AS_FIXED_DEFAULT);
}

/**
* Constructor used by {@link AvroRecordConverter#isElementType}, which always
* uses the 2-level list conversion.
* uses the 2-level list conversion and reads INT96 as 12 byte array.
*
* @param assumeRepeatedIsListElement whether to assume 2-level lists
* @param readInt96AsFixed whether to read Parquet INT96 as 12 byte array.
*/
AvroSchemaConverter(boolean assumeRepeatedIsListElement) {
AvroSchemaConverter(boolean assumeRepeatedIsListElement, boolean readInt96AsFixed) {
this.assumeRepeatedIsListElement = assumeRepeatedIsListElement;
this.writeOldListStructure = WRITE_OLD_LIST_STRUCTURE_DEFAULT;
this.writeParquetUUID = WRITE_PARQUET_UUID_DEFAULT;
this.readInt96AsFixed = READ_INT96_AS_FIXED_DEFAULT;
this.readInt96AsFixed = readInt96AsFixed;
this.pathsToInt96 = Collections.emptySet();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.parquet.avro;

import static org.apache.parquet.avro.AvroReadSupport.READ_INT96_AS_FIXED;
import static org.apache.parquet.avro.AvroTestUtil.array;
import static org.apache.parquet.avro.AvroTestUtil.field;
import static org.apache.parquet.avro.AvroTestUtil.instance;
Expand Down Expand Up @@ -1136,6 +1137,25 @@ public void testIsElementTypeRequiredRepeatedRecord() {
avroSchema.getFields().get(0).schema()));
}

@Test
public void testIsElementTypeInt96Element() {
Configuration configuration = new Configuration();
configuration.setBoolean(READ_INT96_AS_FIXED, true);

MessageType parquetSchema = MessageTypeParser.parseMessageType("message SchemaWithInt96 {\n"
+ " optional group list (LIST) {\n"
+ " repeated group list {\n"
+ " optional int96 a_timestamp;\n"
+ " }\n"
+ " }\n"
+ "}");
Schema avroSchema = new AvroSchemaConverter(configuration).convert(parquetSchema);
Assert.assertTrue(AvroRecordConverter.isElementType(
parquetSchema.getType("list").asGroupType().getType("list"),
AvroSchemaConverter.getNonNull(avroSchema.getFields().get(0).schema())
.getElementType()));
}

@Test
public void testIsElementTypeOptionalRepeatedRecord() {
// Test `_tuple` style naming
Expand Down

0 comments on commit bb4f867

Please sign in to comment.