Merge pull request #444 from rubanm/rubanm/base64_reader_fix

throw DecodeException in Base64Codec
twitter · May 26, 2015 · 8189113 · 8189113
2 parents a7df8e5 + aed1c1e
commit 8189113
Show file tree

Hide file tree

Showing 2 changed files with 120 additions and 4 deletions.
diff --git a/core/src/main/java/com/twitter/elephantbird/mapreduce/input/Base64Codec.java b/core/src/main/java/com/twitter/elephantbird/mapreduce/input/Base64Codec.java
@@ -2,6 +2,8 @@
 
 import java.util.Arrays;
 
+import com.twitter.elephantbird.mapreduce.io.DecodeException;
+
 /** A very fast and memory efficient class to encode and decode to and from BASE64 in full accordance
  * with RFC 2045.<br><br>
  * On Windows XP sp1 with 1.4.2_04 and later ;), this encoder and decoder is about 10 times faster
@@ -157,7 +159,7 @@ public final static byte[] encodeToByte(byte[] sArr, int sOff, int sLen, boolean
    * @return The decoded array of bytes. May be of length 0. Will be <code>null</code> if the legal characters
    * (including '=') isn't divideable by 4. (I.e. definitely corrupted).
    */
-  public final static byte[] decode(byte[] sArr)
+  public final static byte[] decode(byte[] sArr) throws DecodeException
   {
     return decode(sArr, 0, sArr.length);
   }
@@ -170,8 +172,16 @@ public final static byte[] decode(byte[] sArr)
    * @return The decoded array of bytes. May be of length 0. Will be <code>null</code> if the legal characters
    * (including '=') isn't divideable by 4. (I.e. definitely corrupted).
    */
-  public final static byte[] decode(byte[] sArr, int sOff, int sLen)
+  public final static byte[] decode(byte[] sArr, int sOff, int sLen) throws DecodeException
   {
+    try {
+      return doDecode(sArr, sOff, sLen);
+    } catch (RuntimeException e) {
+      throw new DecodeException(e);
+    }
+  }
+
+  private final static byte[] doDecode(byte[] sArr, int sOff, int sLen) {
     // Count illegal characters (including '\r', '\n') to know what size the returned array will be,
     // so we don't have to reallocate & copy it later.
     int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...)
@@ -225,8 +235,16 @@ public final static byte[] decode(byte[] sArr, int sOff, int sLen)
    * @param sArr The source array. Length 0 will return an empty array. <code>null</code> will throw an exception.
    * @return The decoded array of bytes. May be of length 0.
    */
-  public final static byte[] decodeFast(byte[] sArr, int sLen)
+  public final static byte[] decodeFast(byte[] sArr, int sLen) throws DecodeException
   {
+    try {
+      return doDecodeFast(sArr, sLen);
+    } catch (RuntimeException e) {
+      throw new DecodeException(e);
+    }
+  }
+
+  private final static byte[] doDecodeFast(byte[] sArr, int sLen) {
     // Check special case
     if (sLen == 0)
       return new byte[0];
@@ -280,4 +298,4 @@ public final static byte[] decodeFast(byte[] sArr, int sLen)
     return dArr;
   }
 
-}
+}
diff --git a/core/src/test/java/com/twitter/elephantbird/mapreduce/input/TestBase64Codec.java b/core/src/test/java/com/twitter/elephantbird/mapreduce/input/TestBase64Codec.java
@@ -0,0 +1,98 @@
+package com.twitter.elephantbird.mapreduce.input;
+
+import static org.junit.Assert.*;
+
+import com.twitter.elephantbird.mapreduce.io.DecodeException;
+import org.junit.Test;
+import java.nio.charset.Charset;
+
+public class TestBase64Codec {
+
+  private static final String TEST_STRING = "The quick brown fox jumps over the lazy dog.";
+  private static final Charset UTF8 = Charset.forName("UTF-8");
+  private static final byte[] PLAIN = TEST_STRING.getBytes(UTF8);
+  private static final byte[] ENCODED = Base64Codec.encodeToByte(PLAIN, false);
+
+  private static byte[] concat(byte[] a, byte[] b) {
+    byte[] c = new byte[a.length + b.length];
+    System.arraycopy(a, 0, c, 0, a.length);
+    System.arraycopy(b, 0, c, a.length, b.length);
+    return c;
+  }
+
+  @Test
+  public void testDecode() throws DecodeException {
+    assertArrayEquals(PLAIN, Base64Codec.decode(ENCODED));
+  }
+
+  @Test
+  public void testDecodeIllegal() throws DecodeException {
+    byte[] illegal = "%$%".getBytes(UTF8);
+    byte[] merged = concat(ENCODED, concat(illegal, ENCODED));
+    // illegal characters in the middle are not ignored
+    assertFalse(concat(PLAIN, PLAIN).length == Base64Codec.decode(merged).length);
+  }
+
+  @Test
+  public void testDecodeIllegalLeading() throws DecodeException {
+    byte[] leading = "%$%".getBytes(UTF8);
+    byte[] merged = concat(leading, ENCODED);
+    assertArrayEquals(PLAIN, Base64Codec.decode(merged));
+  }
+
+  @Test
+  public void testDecodeIllegalTrailing() throws DecodeException {
+    byte[] trailing = "%$%".getBytes(UTF8);
+    byte[] merged = concat(ENCODED, trailing);
+    assertArrayEquals(PLAIN, Base64Codec.decode(merged));
+  }
+
+  @Test(expected=DecodeException.class)
+  public void testDecodeInvalidLength() throws DecodeException {
+    Base64Codec.decode(ENCODED, 0, ENCODED.length + 1); // incorrect length
+  }
+
+  // tests for the fast decode version:
+
+  @Test
+  public void testDecodeFast() throws DecodeException {
+    assertArrayEquals(PLAIN, Base64Codec.decodeFast(ENCODED, ENCODED.length));
+  }
+
+  @Test
+  public void testDecodeFastIllegal() throws DecodeException {
+    byte[] illegal = "%$%".getBytes(UTF8);
+    byte[] merged = concat(ENCODED, concat(illegal, ENCODED));
+    // illegal characters in the middle are not ignored
+    assertFalse(concat(PLAIN, PLAIN).length == Base64Codec.decode(merged).length);
+  }
+
+  @Test
+  public void testDecodeFastIllegalLeading() throws DecodeException {
+    byte[] leading = "%$%".getBytes(UTF8);
+    byte[] merged = concat(leading, ENCODED);
+    assertArrayEquals(PLAIN, Base64Codec.decodeFast(merged, merged.length));
+  }
+
+  @Test
+  public void testDecodeFastIllegalTrailing() throws DecodeException {
+    byte[] trailing = "%$%".getBytes(UTF8);
+    byte[] merged = concat(ENCODED, trailing);
+    assertArrayEquals(PLAIN, Base64Codec.decodeFast(merged, merged.length));
+  }
+
+  @Test
+  public void testDecodeFastZeroLength() throws DecodeException {
+    assertEquals(0, Base64Codec.decodeFast(new byte[0], 0).length);
+  }
+
+  @Test(expected=DecodeException.class)
+  public void testDecodeFastNullWithNonZeroLength() throws DecodeException {
+    Base64Codec.decodeFast(null, 100);
+  }
+
+  @Test(expected=DecodeException.class)
+  public void testDecodeFastInvalidLength() throws DecodeException {
+    Base64Codec.decodeFast(ENCODED, ENCODED.length + 1); // incorrect length
+  }
+}