Skip to content

Commit

Permalink
Decode yEnc filename using UTF8 with Latin1 as fallback
Browse files Browse the repository at this point in the history
  • Loading branch information
Safihre committed Feb 18, 2023
1 parent 36bc90d commit 3da250f
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 114 deletions.
2 changes: 1 addition & 1 deletion src/sabctools.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@
#include <string.h>

/* Version information */
#define SABCTOOLS_VERSION "6.0.0"
#define SABCTOOLS_VERSION "6.1.0"

PyMODINIT_FUNC PyInit_sabctools(void);
8 changes: 7 additions & 1 deletion src/yenc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,13 @@ PyObject* yenc_decode(PyObject* self, PyObject* Py_bytesarray_obj) {
// Extract filename
cur_char = start_loc;
for (; *cur_char != YENC_LF && *cur_char != YENC_CR && *cur_char != YENC_ZERO && cur_char < end_loc; cur_char++);
Py_output_filename = PyUnicode_DecodeLatin1(start_loc, cur_char - start_loc, NULL);
Py_output_filename = PyUnicode_DecodeUTF8(start_loc, cur_char - start_loc, NULL);

// In case it's invalid UTF8, we try the latin1 fallback
if (!Py_output_filename) {
PyErr_Clear();
Py_output_filename = PyUnicode_DecodeLatin1(start_loc, cur_char - start_loc, NULL);
}

// Check for =ypart, so we know where to start with decoding
start_loc = my_memstr(cur_char, end_loc - cur_char, "=ypart ", 1);
Expand Down
230 changes: 118 additions & 112 deletions tests/test_decoder.py
Original file line number Diff line number Diff line change
@@ -1,112 +1,118 @@
import sys
import pytest
import glob
from tests.testsupport import *


def test_regular():
data_plain = read_plain_yenc_file("test_regular.txt")
assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)
data_plain = read_plain_yenc_file("test_regular_2.txt")
assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)


def test_partial():
data_plain = read_plain_yenc_file("test_partial.txt")
decoded_data, filename, crc_correct = sabctools_yenc_wrapper(data_plain)
assert filename == "90E2Sdvsmds0801dvsmds90E.part06.rar"
assert crc_correct is None
assert len(decoded_data) == 549


def test_special_chars():
data_plain = read_plain_yenc_file("test_special_chars.txt")
# We only compare the data and the filename
assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)


def test_bad_crc():
data_plain = read_plain_yenc_file("test_badcrc.txt")
# We only compare the data and the filename
assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)


def test_bad_crc_end():
data_plain = read_plain_yenc_file("test_bad_crc_end.txt")
with pytest.raises(ValueError) as excinfo:
sabctools_yenc_wrapper(data_plain)
assert "Invalid CRC in footer" in str(excinfo.value)


def test_no_filename():
data_plain = read_plain_yenc_file("test_no_name.txt")
with pytest.raises(ValueError) as excinfo:
sabctools_yenc_wrapper(data_plain)
assert "Could not find yEnc filename" in str(excinfo.value)


def test_end_after_filename():
data_plain = read_plain_yenc_file("test_end_after_filename.txt")
with pytest.raises(ValueError):
sabctools_yenc_wrapper(data_plain)


def test_bad_size():
with pytest.raises(ValueError) as excinfo:
sabctools.yenc_decode(bytearray())
assert "Invalid data length" in str(excinfo.value)


def test_ref_counts():
"""Note that sys.getrefcount itself adds another reference!"""
# Test regular case
data_plain = read_plain_yenc_file("test_regular.txt")
data_out, filename, crc_correct = sabctools_yenc_wrapper(data_plain)
# data_plain and data_out point to the same data!
assert sys.getrefcount(data_plain) == 3
assert sys.getrefcount(data_out) == 3
assert sys.getrefcount(filename) == 2
assert sys.getrefcount(crc_correct) == 2

# Test simple error case
fake_inp = bytearray(b"1234")
assert sys.getrefcount(fake_inp) == 2
with pytest.raises(ValueError):
sabctools.yenc_decode(fake_inp)
assert sys.getrefcount(fake_inp) == 2

# Test further processing
data_plain = read_plain_yenc_file("test_bad_crc_end.txt")
with pytest.raises(ValueError):
sabctools_yenc_wrapper(data_plain)
assert sys.getrefcount(data_plain) == 2


def test_bad_filename_pickle():
# This one fails in the old yEnc in different way
data_plain = read_pickle("tests/yencfiles/split_filename")
decoded_data, filename, crc_correct = sabctools_yenc_wrapper(data_plain)
assert filename == "Low.Winter.Sun.US.S01E01.720p.BluRay.x264-DEMAND.part04.rar"
assert crc_correct is None
assert len(decoded_data) == 384126


def test_crc_pickles():
all_crc_fails = glob.glob("tests/yencfiles/crc_*")
for fname in all_crc_fails:
data_plain = read_pickle(fname)
assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)


def test_huge_file_pickles():
all_pickles = glob.glob("tests/yencfiles/huge_file*")
for fname in all_pickles:
data_plain = read_pickle(fname)
assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)


def test_small_file_pickles():
all_pickles = glob.glob("tests/yencfiles/small_file*")
for fname in all_pickles:
data_plain = read_pickle(fname)
assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)
import sys
import pytest
import glob
from tests.testsupport import *


def test_regular():
data_plain = read_plain_yenc_file("test_regular.txt")
assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)
data_plain = read_plain_yenc_file("test_regular_2.txt")
assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)


def test_partial():
data_plain = read_plain_yenc_file("test_partial.txt")
decoded_data, filename, crc_correct = sabctools_yenc_wrapper(data_plain)
assert filename == "90E2Sdvsmds0801dvsmds90E.part06.rar"
assert crc_correct is None
assert len(decoded_data) == 549


def test_special_chars():
data_plain = read_plain_yenc_file("test_special_chars.txt")
# We only compare the data and the filename
assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)

data_plain = bytearray(
b"=ybegin part=1 total=1 line=128 size=6 name=Hi Kingdom \xe4\xbd\xa0\xe5\xa5\xbd\xe4\xb8\x96\xe7\x95\x8c.txt\r\n=ypart begin=1 end=6\r\nr\x8f\x96\x96\x994\r\n=yend size=6 part=1 pcrc32=31963516 crc32=31963516\r\n"
)
# We only compare the data and the filename
assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)


def test_bad_crc():
data_plain = read_plain_yenc_file("test_badcrc.txt")
# We only compare the data and the filename
assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)


def test_bad_crc_end():
data_plain = read_plain_yenc_file("test_bad_crc_end.txt")
with pytest.raises(ValueError) as excinfo:
sabctools_yenc_wrapper(data_plain)
assert "Invalid CRC in footer" in str(excinfo.value)


def test_no_filename():
data_plain = read_plain_yenc_file("test_no_name.txt")
with pytest.raises(ValueError) as excinfo:
sabctools_yenc_wrapper(data_plain)
assert "Could not find yEnc filename" in str(excinfo.value)


def test_end_after_filename():
data_plain = read_plain_yenc_file("test_end_after_filename.txt")
with pytest.raises(ValueError):
sabctools_yenc_wrapper(data_plain)


def test_bad_size():
with pytest.raises(ValueError) as excinfo:
sabctools.yenc_decode(bytearray())
assert "Invalid data length" in str(excinfo.value)


def test_ref_counts():
"""Note that sys.getrefcount itself adds another reference!"""
# Test regular case
data_plain = read_plain_yenc_file("test_regular.txt")
data_out, filename, crc_correct = sabctools_yenc_wrapper(data_plain)
# data_plain and data_out point to the same data!
assert sys.getrefcount(data_plain) == 3
assert sys.getrefcount(data_out) == 3
assert sys.getrefcount(filename) == 2
assert sys.getrefcount(crc_correct) == 2

# Test simple error case
fake_inp = bytearray(b"1234")
assert sys.getrefcount(fake_inp) == 2
with pytest.raises(ValueError):
sabctools.yenc_decode(fake_inp)
assert sys.getrefcount(fake_inp) == 2

# Test further processing
data_plain = read_plain_yenc_file("test_bad_crc_end.txt")
with pytest.raises(ValueError):
sabctools_yenc_wrapper(data_plain)
assert sys.getrefcount(data_plain) == 2


def test_bad_filename_pickle():
# This one fails in the old yEnc in different way
data_plain = read_pickle("tests/yencfiles/split_filename")
decoded_data, filename, crc_correct = sabctools_yenc_wrapper(data_plain)
assert filename == "Low.Winter.Sun.US.S01E01.720p.BluRay.x264-DEMAND.part04.rar"
assert crc_correct is None
assert len(decoded_data) == 384126


def test_crc_pickles():
all_crc_fails = glob.glob("tests/yencfiles/crc_*")
for fname in all_crc_fails:
data_plain = read_pickle(fname)
assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)


def test_huge_file_pickles():
all_pickles = glob.glob("tests/yencfiles/huge_file*")
for fname in all_pickles:
data_plain = read_pickle(fname)
assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)


def test_small_file_pickles():
all_pickles = glob.glob("tests/yencfiles/small_file*")
for fname in all_pickles:
data_plain = read_pickle(fname)
assert python_yenc(data_plain) == sabctools_yenc_wrapper(data_plain)
4 changes: 4 additions & 0 deletions tests/yencfiles/test_special_utf8_chars.bin
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
=ybegin part=1 total=1 line=128 size=6 name=Hi Kingdom 你好世界.txt
=ypart begin=1 end=6
r����4
=yend size=6 part=1 pcrc32=31963516 crc32=31963516

0 comments on commit 3da250f

Please sign in to comment.