diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java index d446598f06..ff9e4ca33a 100644 --- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java +++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java @@ -763,6 +763,12 @@ public ProtoUInt32ValueConverter(ParentValueContainer parent) { this.parent = parent; } + @Override + public void addInt(int value) { + parent.add(UInt32Value.of(value)); + } + + // This is left for backward compatibility with the old implementation which used int64 for uint32 @Override public void addLong(long value) { parent.add(UInt32Value.of(Math.toIntExact(value))); diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java index 74bb4235a2..ff27b263e9 100644 --- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java +++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java @@ -260,7 +260,7 @@ private Builder>, GroupBuilder> addF return builder.primitive(INT32, getRepetition(descriptor)); } if (messageType.equals(UInt32Value.getDescriptor())) { - return builder.primitive(INT64, getRepetition(descriptor)); + return builder.primitive(INT32, getRepetition(descriptor)); } if (messageType.equals(BytesValue.getDescriptor())) { return builder.primitive(BINARY, getRepetition(descriptor)); diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java index 637f6fda91..b9fe4bf9e1 100644 --- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java +++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java @@ -763,7 +763,7 @@ void writeRawValue(Object value) { class UInt32ValueWriter extends FieldWriter { @Override void writeRawValue(Object value) { - recordConsumer.addLong(((UInt32Value) value).getValue()); + recordConsumer.addInteger(((UInt32Value) value).getValue()); } } diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java index 57ad4d4f08..ca59d3db5e 100644 --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java @@ -678,6 +678,34 @@ public void testProto3WrappedMessageClass() throws Exception { assertEquals(msgNonEmpty, result.get(1)); } + @Test + public void testProto3Uint32Behaviour() throws Exception { + + TestProto3.SchemaConverterAllDatatypes intMin = TestProto3.SchemaConverterAllDatatypes.newBuilder() + .setOptionalUInt32(Integer.MIN_VALUE) + .build(); + assertEquals(intMin.toString(), "optionalUInt32: 2147483648\n"); + TestProto3.SchemaConverterAllDatatypes uintMin = TestProto3.SchemaConverterAllDatatypes.newBuilder() + .setOptionalUInt32(-1) + .build(); + assertEquals(uintMin.toString(), "optionalUInt32: 4294967295\n"); + TestProto3.SchemaConverterAllDatatypes uintMax = TestProto3.SchemaConverterAllDatatypes.newBuilder() + .setOptionalUInt32(Integer.MAX_VALUE) + .build(); + assertEquals(uintMax.toString(), "optionalUInt32: 2147483647\n"); + + Configuration conf = new Configuration(); + Path outputPath = new WriteUsingMR(conf).write(intMin, uintMin, uintMax); + ReadUsingMR readUsingMR = new ReadUsingMR(conf); + String customClass = TestProto3.SchemaConverterAllDatatypes.class.getName(); + ProtoReadSupport.setProtobufClass(readUsingMR.getConfiguration(), customClass); + List result = readUsingMR.read(outputPath); + + assertEquals(result.get(0), intMin); + assertEquals(result.get(1), uintMin); + assertEquals(result.get(2), uintMax); + } + /** * Runs job that writes input to file and then job reading data back. */ diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java index 5240be5a36..e9f08f33f7 100644 --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java @@ -400,7 +400,7 @@ public void testProto3ConvertWrappedMessageUnwrapped() throws Exception { + " optional int64 wrappedInt64 = 3;\n" + " optional int64 wrappedUInt64 = 4;\n" + " optional int32 wrappedInt32 = 5;\n" - + " optional int64 wrappedUInt32 = 6;\n" + + " optional int32 wrappedUInt32 = 6;\n" + " optional boolean wrappedBool = 7;\n" + " optional binary wrappedString (UTF8) = 8;\n" + " optional binary wrappedBytes = 9;\n" diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java index 360da8b741..e80524d14b 100644 --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java @@ -22,22 +22,7 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import com.google.protobuf.BoolValue; -import com.google.protobuf.ByteString; -import com.google.protobuf.BytesValue; -import com.google.protobuf.Descriptors; -import com.google.protobuf.DoubleValue; -import com.google.protobuf.DynamicMessage; -import com.google.protobuf.FloatValue; -import com.google.protobuf.Int32Value; -import com.google.protobuf.Int64Value; -import com.google.protobuf.Message; -import com.google.protobuf.MessageOrBuilder; -import com.google.protobuf.StringValue; -import com.google.protobuf.Timestamp; -import com.google.protobuf.UInt32Value; -import com.google.protobuf.UInt64Value; -import com.google.protobuf.Value; +import com.google.protobuf.*; import com.google.protobuf.util.Timestamps; import java.io.IOException; import java.time.LocalDate; @@ -1372,6 +1357,40 @@ public void testProto3WrappedMessageUnwrappedRoundTrip() throws Exception { gotBackFirst.getWrappedBytes().getValue()); } + @Test + public void testProto3WrappedMessageUnwrappedRoundTripUint32() throws Exception { + + TestProto3.WrappedMessage msgMin = TestProto3.WrappedMessage.newBuilder() + .setWrappedUInt32(UInt32Value.of(Integer.MIN_VALUE)) + .build(); + assertEquals(TextFormat.shortDebugString(msgMin), "wrappedUInt32 { value: 2147483648 }"); + + TestProto3.WrappedMessage msgMax = TestProto3.WrappedMessage.newBuilder() + .setWrappedUInt32(UInt32Value.of(Integer.MAX_VALUE)) + .build(); + assertEquals(TextFormat.shortDebugString(msgMax), "wrappedUInt32 { value: 2147483647 }"); + + TestProto3.WrappedMessage msgMinusOne = TestProto3.WrappedMessage.newBuilder() + .setWrappedUInt32(UInt32Value.of(-1)) + .build(); + assertEquals(TextFormat.shortDebugString(msgMinusOne), "wrappedUInt32 { value: 4294967295 }"); + + Path tmpFilePath = TestUtils.someTemporaryFilePath(); + ParquetWriter writer = ProtoParquetWriter.builder(tmpFilePath) + .withMessage(TestProto3.WrappedMessage.class) + .config(ProtoWriteSupport.PB_UNWRAP_PROTO_WRAPPERS, "true") + .build(); + writer.write(msgMin); + writer.write(msgMax); + writer.write(msgMinusOne); + writer.close(); + List gotBack = TestUtils.readMessages(tmpFilePath, TestProto3.WrappedMessage.class); + + assertEquals(msgMin, gotBack.get(0)); + assertEquals(msgMax, gotBack.get(1)); + assertEquals(msgMinusOne, gotBack.get(2)); + } + @Test public void testProto3WrappedMessageWithNullsRoundTrip() throws Exception { TestProto3.WrappedMessage.Builder msg = TestProto3.WrappedMessage.newBuilder();