Skip to content

Commit

Permalink
Merge branch 'master' of github.com:apache/parquet-java into feature-…
Browse files Browse the repository at this point in the history
…apache-parquet-2417-geospatial
  • Loading branch information
zhangfengcdt committed Feb 7, 2025
2 parents 6f1d586 + fb6f0be commit e4e3cae
Show file tree
Hide file tree
Showing 57 changed files with 1,751 additions and 500 deletions.
1 change: 0 additions & 1 deletion .github/ISSUE_TEMPLATE/bug_report.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ body:
- Avro
- Pig
- Protobuf
- Scala
- Thrift
- CLI
- Benchmark
Expand Down
3 changes: 1 addition & 2 deletions .github/ISSUE_TEMPLATE/feature_request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,8 @@ body:
- Avro
- Pig
- Protobuf
- Scala
- Thrift
- CLI
- Benchmark
validations:
required: false
required: false
3 changes: 1 addition & 2 deletions .github/ISSUE_TEMPLATE/usage_question.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,8 @@ body:
- Avro
- Pig
- Protobuf
- Scala
- Thrift
- CLI
- Benchmark
validations:
required: false
required: false
56 changes: 0 additions & 56 deletions .github/workflows/ci-hadoop2.yml

This file was deleted.

19 changes: 0 additions & 19 deletions NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,6 @@ The Apache Software Foundation (http://www.apache.org/).

--------------------------------------------------------------------------------

This product includes parquet-tools, initially developed at ARRIS, Inc. with
the following copyright notice:

Copyright 2013 ARRIS, Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

--------------------------------------------------------------------------------

This product includes parquet-protobuf, initially developed by Lukas Nalezenc
with the following copyright notice:

Expand Down
11 changes: 6 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ Parquet is a very active project, and new features are being added quickly. Here
* Column stats
* Delta encoding
* Index pages
* Scala DSL (deprecated)
* Java Vector API support (experimental)

## Java Vector API support
Expand Down Expand Up @@ -166,29 +167,29 @@ The build runs in [GitHub Actions](https://github.com/apache/parquet-java/action

## Add Parquet as a dependency in Maven

The current release is version `1.14.4`.
The current release is version `1.15.0`.

```xml
<dependencies>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-common</artifactId>
<version>1.14.4</version>
<version>1.15.0</version>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-encoding</artifactId>
<version>1.14.4</version>
<version>1.15.0</version>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-column</artifactId>
<version>1.14.4</version>
<version>1.15.0</version>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-hadoop</artifactId>
<version>1.14.4</version>
<version>1.15.0</version>
</dependency>
</dependencies>
```
Expand Down
2 changes: 1 addition & 1 deletion parquet-arrow/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.15.0-SNAPSHOT</version>
<version>1.16.0-SNAPSHOT</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-avro/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.15.0-SNAPSHOT</version>
<version>1.16.0-SNAPSHOT</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,13 +194,13 @@ private static void addLogicalTypeConversion(SpecificData model, Schema schema,
model.addLogicalTypeConversion(conversion);
}
}

for (Schema.Field field : schema.getFields()) {
addLogicalTypeConversion(model, field.schema(), seenSchemas);
}
} catch (NoSuchFieldException e) {
// Avro classes without logical types (denoted by the "conversions" field)
}

for (Schema.Field field : schema.getFields()) {
addLogicalTypeConversion(model, field.schema(), seenSchemas);
}
}
break;
case MAP:
Expand Down Expand Up @@ -939,7 +939,7 @@ public void end() {
// 2-level lists and the result is checked to see if it matches the requested
// element type. This should always convert assuming 2-level lists because
// 2-level and 3-level can't be mixed.
private static final AvroSchemaConverter CONVERTER = new AvroSchemaConverter(true);
private static final AvroSchemaConverter CONVERTER = new AvroSchemaConverter(true, true);

/**
* Returns whether the given type is the element type of a list or is a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,20 +90,21 @@ public class AvroSchemaConverter {
private final Set<String> pathsToInt96;

public AvroSchemaConverter() {
this(ADD_LIST_ELEMENT_RECORDS_DEFAULT);
this(ADD_LIST_ELEMENT_RECORDS_DEFAULT, READ_INT96_AS_FIXED_DEFAULT);
}

/**
* Constructor used by {@link AvroRecordConverter#isElementType}, which always
* uses the 2-level list conversion.
* uses the 2-level list conversion and reads INT96 as 12 byte array.
*
* @param assumeRepeatedIsListElement whether to assume 2-level lists
* @param readInt96AsFixed whether to read Parquet INT96 as 12 byte array.
*/
AvroSchemaConverter(boolean assumeRepeatedIsListElement) {
AvroSchemaConverter(boolean assumeRepeatedIsListElement, boolean readInt96AsFixed) {
this.assumeRepeatedIsListElement = assumeRepeatedIsListElement;
this.writeOldListStructure = WRITE_OLD_LIST_STRUCTURE_DEFAULT;
this.writeParquetUUID = WRITE_PARQUET_UUID_DEFAULT;
this.readInt96AsFixed = READ_INT96_AS_FIXED_DEFAULT;
this.readInt96AsFixed = readInt96AsFixed;
this.pathsToInt96 = Collections.emptySet();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.parquet.avro;

import static org.apache.parquet.avro.AvroReadSupport.READ_INT96_AS_FIXED;
import static org.apache.parquet.avro.AvroTestUtil.array;
import static org.apache.parquet.avro.AvroTestUtil.field;
import static org.apache.parquet.avro.AvroTestUtil.instance;
Expand Down Expand Up @@ -1136,6 +1137,25 @@ public void testIsElementTypeRequiredRepeatedRecord() {
avroSchema.getFields().get(0).schema()));
}

@Test
public void testIsElementTypeInt96Element() {
Configuration configuration = new Configuration();
configuration.setBoolean(READ_INT96_AS_FIXED, true);

MessageType parquetSchema = MessageTypeParser.parseMessageType("message SchemaWithInt96 {\n"
+ " optional group list (LIST) {\n"
+ " repeated group list {\n"
+ " optional int96 a_timestamp;\n"
+ " }\n"
+ " }\n"
+ "}");
Schema avroSchema = new AvroSchemaConverter(configuration).convert(parquetSchema);
Assert.assertTrue(AvroRecordConverter.isElementType(
parquetSchema.getType("list").asGroupType().getType("list"),
AvroSchemaConverter.getNonNull(avroSchema.getFields().get(0).schema())
.getElementType()));
}

@Test
public void testIsElementTypeOptionalRepeatedRecord() {
// Test `_tuple` style naming
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,22 @@ public void testModelForGenericRecord() {
public void testModelForSpecificRecordWithLogicalTypesWithDeprecatedAvro1_8() {
Mockito.when(AvroRecordConverter.getRuntimeAvroVersion()).thenReturn("1.8.2");

// Test that model is generated correctly
final SpecificData model = AvroRecordConverter.getModelForSchema(LogicalTypesTestDeprecated.SCHEMA$);
// Test that model is generated correctly when record contains both top-level and nested logical types
SpecificData model = AvroRecordConverter.getModelForSchema(LogicalTypesTestDeprecated.SCHEMA$);
// Test that model is generated correctly
Collection<Conversion<?>> conversions = model.getConversions();
assertEquals(conversions.size(), 3);
assertEquals(3, conversions.size());
assertNotNull(model.getConversionByClass(Instant.class));
assertNotNull(model.getConversionByClass(LocalDate.class));
assertNotNull(model.getConversionByClass(LocalTime.class));

// Test that model is generated correctly when record contains only nested logical types
model = AvroRecordConverter.getModelForSchema(NestedOnlyLogicalTypesDeprecated.SCHEMA$);
// Test that model is generated correctly
conversions = model.getConversions();
assertEquals(2, conversions.size());
assertNotNull(model.getConversionByClass(LocalDate.class));
assertNotNull(model.getConversionByClass(LocalTime.class));
}

@Test
Expand Down Expand Up @@ -147,6 +155,7 @@ public static org.apache.avro.Schema getClassSchema() {
};
}

// An Avro class generated from Avro 1.8 that contains both nested and top-level logical type fields
@org.apache.avro.specific.AvroGenerated
public abstract static class LogicalTypesTestDeprecated extends org.apache.avro.specific.SpecificRecordBase
implements org.apache.avro.specific.SpecificRecord {
Expand Down Expand Up @@ -179,4 +188,26 @@ public static org.apache.avro.Schema getClassSchema() {
new org.apache.avro.data.TimeConversions.TimestampMillisConversion(), null, null
};
}

// An Avro class generated from Avro 1.8 that contains only nested logical type fields
@org.apache.avro.specific.AvroGenerated
public abstract static class NestedOnlyLogicalTypesDeprecated extends org.apache.avro.specific.SpecificRecordBase
implements org.apache.avro.specific.SpecificRecord {
public static final org.apache.avro.Schema SCHEMA$ = SchemaBuilder.builder()
.record("NestedOnlyLogicalTypesDeprecated")
.namespace("org.apache.parquet.avro.TestAvroRecordConverter")
.fields()
.name("local_date_time")
.type(LocalDateTimeTestDeprecated.getClassSchema())
.noDefault()
.endRecord();

public static org.apache.avro.Schema getClassSchema() {
return SCHEMA$;
}

private static SpecificData MODEL$ = new SpecificData();

// No top-level conversions field, since logical types are all nested
}
}
2 changes: 1 addition & 1 deletion parquet-benchmarks/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.15.0-SNAPSHOT</version>
<version>1.16.0-SNAPSHOT</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 2 additions & 0 deletions parquet-cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ Usage: parquet [options] [command] [command options]
Scan all records from a file
rewrite
Rewrite one or more Parquet files to a new Parquet file
size-stats
Print size statistics for a Parquet file
Examples:
Expand Down
12 changes: 9 additions & 3 deletions parquet-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.15.0-SNAPSHOT</version>
<version>1.16.0-SNAPSHOT</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down Expand Up @@ -190,6 +190,12 @@
<artifactId>hadoop-common</artifactId>
<scope>${deps.scope}</scope>
</dependency>
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
<version>${format.thrift.version}</version>
<scope>${deps.scope}</scope>
</dependency>
<dependency>
<groupId>com.google.code.findbugs</groupId>
<artifactId>jsr305</artifactId>
Expand All @@ -205,13 +211,13 @@
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.16.1</version>
<version>2.18.0</version>
<scope>${deps.scope}</scope>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.3.3</version>
<version>1.3.4</version>
<scope>${deps.scope}</scope>
</dependency>
</dependencies>
Expand Down
2 changes: 2 additions & 0 deletions parquet-cli/src/main/java/org/apache/parquet/cli/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.apache.parquet.cli.commands.ShowDictionaryCommand;
import org.apache.parquet.cli.commands.ShowFooterCommand;
import org.apache.parquet.cli.commands.ShowPagesCommand;
import org.apache.parquet.cli.commands.ShowSizeStatisticsCommand;
import org.apache.parquet.cli.commands.ToAvroCommand;
import org.apache.parquet.cli.commands.TransCompressionCommand;
import org.slf4j.Logger;
Expand Down Expand Up @@ -105,6 +106,7 @@ public class Main extends Configured implements Tool {
jc.addCommand("bloom-filter", new ShowBloomFilterCommand(console));
jc.addCommand("scan", new ScanCommand(console));
jc.addCommand("rewrite", new RewriteCommand(console));
jc.addCommand("size-stats", new ShowSizeStatisticsCommand(console));
}

@Override
Expand Down
Loading

0 comments on commit e4e3cae

Please sign in to comment.