Skip to content

Commit

Permalink
Merge pull request #444 from rubanm/rubanm/base64_reader_fix
Browse files Browse the repository at this point in the history
throw DecodeException in Base64Codec
  • Loading branch information
rubanm committed May 26, 2015
2 parents a7df8e5 + aed1c1e commit 8189113
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import java.util.Arrays;

import com.twitter.elephantbird.mapreduce.io.DecodeException;

/** A very fast and memory efficient class to encode and decode to and from BASE64 in full accordance
* with RFC 2045.<br><br>
* On Windows XP sp1 with 1.4.2_04 and later ;), this encoder and decoder is about 10 times faster
Expand Down Expand Up @@ -157,7 +159,7 @@ public final static byte[] encodeToByte(byte[] sArr, int sOff, int sLen, boolean
* @return The decoded array of bytes. May be of length 0. Will be <code>null</code> if the legal characters
* (including '=') isn't divideable by 4. (I.e. definitely corrupted).
*/
public final static byte[] decode(byte[] sArr)
public final static byte[] decode(byte[] sArr) throws DecodeException
{
return decode(sArr, 0, sArr.length);
}
Expand All @@ -170,8 +172,16 @@ public final static byte[] decode(byte[] sArr)
* @return The decoded array of bytes. May be of length 0. Will be <code>null</code> if the legal characters
* (including '=') isn't divideable by 4. (I.e. definitely corrupted).
*/
public final static byte[] decode(byte[] sArr, int sOff, int sLen)
public final static byte[] decode(byte[] sArr, int sOff, int sLen) throws DecodeException
{
try {
return doDecode(sArr, sOff, sLen);
} catch (RuntimeException e) {
throw new DecodeException(e);
}
}

private final static byte[] doDecode(byte[] sArr, int sOff, int sLen) {
// Count illegal characters (including '\r', '\n') to know what size the returned array will be,
// so we don't have to reallocate & copy it later.
int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...)
Expand Down Expand Up @@ -225,8 +235,16 @@ public final static byte[] decode(byte[] sArr, int sOff, int sLen)
* @param sArr The source array. Length 0 will return an empty array. <code>null</code> will throw an exception.
* @return The decoded array of bytes. May be of length 0.
*/
public final static byte[] decodeFast(byte[] sArr, int sLen)
public final static byte[] decodeFast(byte[] sArr, int sLen) throws DecodeException
{
try {
return doDecodeFast(sArr, sLen);
} catch (RuntimeException e) {
throw new DecodeException(e);
}
}

private final static byte[] doDecodeFast(byte[] sArr, int sLen) {
// Check special case
if (sLen == 0)
return new byte[0];
Expand Down Expand Up @@ -280,4 +298,4 @@ public final static byte[] decodeFast(byte[] sArr, int sLen)
return dArr;
}

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package com.twitter.elephantbird.mapreduce.input;

import static org.junit.Assert.*;

import com.twitter.elephantbird.mapreduce.io.DecodeException;
import org.junit.Test;
import java.nio.charset.Charset;

public class TestBase64Codec {

private static final String TEST_STRING = "The quick brown fox jumps over the lazy dog.";
private static final Charset UTF8 = Charset.forName("UTF-8");
private static final byte[] PLAIN = TEST_STRING.getBytes(UTF8);
private static final byte[] ENCODED = Base64Codec.encodeToByte(PLAIN, false);

private static byte[] concat(byte[] a, byte[] b) {
byte[] c = new byte[a.length + b.length];
System.arraycopy(a, 0, c, 0, a.length);
System.arraycopy(b, 0, c, a.length, b.length);
return c;
}

@Test
public void testDecode() throws DecodeException {
assertArrayEquals(PLAIN, Base64Codec.decode(ENCODED));
}

@Test
public void testDecodeIllegal() throws DecodeException {
byte[] illegal = "%$%".getBytes(UTF8);
byte[] merged = concat(ENCODED, concat(illegal, ENCODED));
// illegal characters in the middle are not ignored
assertFalse(concat(PLAIN, PLAIN).length == Base64Codec.decode(merged).length);
}

@Test
public void testDecodeIllegalLeading() throws DecodeException {
byte[] leading = "%$%".getBytes(UTF8);
byte[] merged = concat(leading, ENCODED);
assertArrayEquals(PLAIN, Base64Codec.decode(merged));
}

@Test
public void testDecodeIllegalTrailing() throws DecodeException {
byte[] trailing = "%$%".getBytes(UTF8);
byte[] merged = concat(ENCODED, trailing);
assertArrayEquals(PLAIN, Base64Codec.decode(merged));
}

@Test(expected=DecodeException.class)
public void testDecodeInvalidLength() throws DecodeException {
Base64Codec.decode(ENCODED, 0, ENCODED.length + 1); // incorrect length
}

// tests for the fast decode version:

@Test
public void testDecodeFast() throws DecodeException {
assertArrayEquals(PLAIN, Base64Codec.decodeFast(ENCODED, ENCODED.length));
}

@Test
public void testDecodeFastIllegal() throws DecodeException {
byte[] illegal = "%$%".getBytes(UTF8);
byte[] merged = concat(ENCODED, concat(illegal, ENCODED));
// illegal characters in the middle are not ignored
assertFalse(concat(PLAIN, PLAIN).length == Base64Codec.decode(merged).length);
}

@Test
public void testDecodeFastIllegalLeading() throws DecodeException {
byte[] leading = "%$%".getBytes(UTF8);
byte[] merged = concat(leading, ENCODED);
assertArrayEquals(PLAIN, Base64Codec.decodeFast(merged, merged.length));
}

@Test
public void testDecodeFastIllegalTrailing() throws DecodeException {
byte[] trailing = "%$%".getBytes(UTF8);
byte[] merged = concat(ENCODED, trailing);
assertArrayEquals(PLAIN, Base64Codec.decodeFast(merged, merged.length));
}

@Test
public void testDecodeFastZeroLength() throws DecodeException {
assertEquals(0, Base64Codec.decodeFast(new byte[0], 0).length);
}

@Test(expected=DecodeException.class)
public void testDecodeFastNullWithNonZeroLength() throws DecodeException {
Base64Codec.decodeFast(null, 100);
}

@Test(expected=DecodeException.class)
public void testDecodeFastInvalidLength() throws DecodeException {
Base64Codec.decodeFast(ENCODED, ENCODED.length + 1); // incorrect length
}
}

0 comments on commit 8189113

Please sign in to comment.