Skip to content

Commit

Permalink
Create sparse files on Windows and read yEnc begin and end (#89)
Browse files Browse the repository at this point in the history
* Add reading of yenc start and end

* Update output based on feedback

* Add method for creating sparse files

* Use same platform test as elsewhere

* Add test that file is sparse

* Only close file on Windows

* Init msvcrt module once and simplify calls

* Use CallMethod when no arguments are needed

* Add some error checking

* On Windows still set the file length even if it couldn't be made sparse

* Remove length check since it depends on filesystem

* Truncate already sets error

* Simplify is_sparse check by using os.stat to check if allocated space is less than file size

* Only set file length if making sparse succeeds

* Seek back to the original position after setting file length

* Test file position is unchanged

* Set version to 7.0.0

---------

Co-authored-by: Safihre <[email protected]>
  • Loading branch information
mnightingale and Safihre authored Apr 17, 2023
1 parent 5d1a12c commit c0f3b40
Show file tree
Hide file tree
Showing 11 changed files with 242 additions and 154 deletions.
6 changes: 6 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,12 @@ def build_extension(self, ext: Extension):
"gcc_flags": ["-Wno-unused-parameter"],
"include_dirs": ["src/crcutil-1.0/code", "src/crcutil-1.0/examples"],
},
{
"sources": [
"src/sparse.cc",
],
"gcc_flags": ["-Wno-unused-parameter"],
},
]:
args = {
"sources": source_files["sources"],
Expand Down
8 changes: 8 additions & 0 deletions src/sabctools.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "yenc.h"
#include "unlocked_ssl.h"
#include "crc32.h"
#include "sparse.h"

/* Function and exception declarations */
PyMODINIT_FUNC PyInit_sabctools(void);
Expand Down Expand Up @@ -74,6 +75,12 @@ static PyMethodDef sabctools_methods[] = {
METH_O,
"crc32_xpow8n(n)"
},
{
"sparse",
sparse,
METH_VARARGS,
"sparse(handle, length)"
},
{NULL, NULL, 0, NULL}
};

Expand All @@ -92,6 +99,7 @@ PyMODINIT_FUNC PyInit_sabctools(void) {
decoder_init();
crc_init();
openssl_init();
sparse_init();

PyObject* m = PyModule_Create(&sabctools_definition);
PyModule_AddStringConstant(m, "version", SABCTOOLS_VERSION);
Expand Down
2 changes: 1 addition & 1 deletion src/sabctools.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@
#include <string.h>

/* Version information */
#define SABCTOOLS_VERSION "6.1.2"
#define SABCTOOLS_VERSION "7.0.0"

PyMODINIT_FUNC PyInit_sabctools(void);
6 changes: 4 additions & 2 deletions src/sabctools.pyi
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from typing import Tuple, Optional
from typing import Tuple, Optional, IO
from ssl import SSLSocket

__version__: str
openssl_linked: bool
simd: str

def yenc_decode(raw_data: bytearray) -> Tuple[str, Optional[int]]: ...
def yenc_decode(raw_data: bytearray) -> Tuple[str, int, int, Optional[int]]: ...
def yenc_encode(input_string: bytes) -> Tuple[bytes, int]: ...

def unlocked_ssl_recv_into(ssl_socket: SSLSocket, buffer: memoryview) -> int: ...
Expand All @@ -15,3 +15,5 @@ def crc32_multiply(crc1: int, crc2: int) -> int: ...
def crc32_xpow8n(n: int) -> int: ...
def crc32_xpown(n: int) -> int: ...
def crc32_zero_unpad(crc1: int, length: int) -> int: ...

def sparse(file: IO, length: int) -> None: ...
103 changes: 103 additions & 0 deletions src/sparse.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* Copyright 2007-2023 The SABnzbd-Team <[email protected]>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/

#include "sparse.h"

PyObject *Py_msvcrt_module = NULL;
PyObject *get_osfhandle_string = NULL;

void sparse_init()
{
#if defined(_WIN32) || defined(__CYGWIN__)
Py_msvcrt_module = PyImport_ImportModule("msvcrt");
get_osfhandle_string = PyUnicode_FromString("get_osfhandle");
#endif
}

PyObject *sparse(PyObject *self, PyObject *args)
{
PyObject *Py_file;
long long length;

PyObject *Py_file_fileno = NULL;
PyObject *Py_file_handle = NULL;
PyObject *Py_file_truncate = NULL;

if (!PyArg_ParseTuple(args, "OL:sparse", &Py_file, &length))
{
return NULL;
}

#if defined(_WIN32) || defined(__CYGWIN__)
// Get the windows file handle and set file attributes to sparse

if (Py_msvcrt_module == NULL)
{
PyErr_SetString(PyExc_SystemError, "msvcrt module not loaded.");
goto error;
}

if (!(Py_file_fileno = PyObject_CallMethod(Py_file, "fileno", NULL)))
{
PyErr_SetString(PyExc_SystemError, "Error calling fileno function.");
goto error;
}

if (!(Py_file_handle = PyObject_CallMethodObjArgs(Py_msvcrt_module, get_osfhandle_string, Py_file_fileno, NULL)))
{
PyErr_SetString(PyExc_SystemError, "Failed calling get_osfhandle function.");
goto error;
}

HANDLE handle = reinterpret_cast<HANDLE>(PyLong_AsLongLong(Py_file_handle));

// Creating a sparse file may fail but that's OK
DWORD bytesReturned;
if (DeviceIoControl(handle, FSCTL_SET_SPARSE, nullptr, 0, nullptr, 0, &bytesReturned, nullptr))
{
// Increase the file length without writing any data and seek back to the original position
LARGE_INTEGER li_size;
li_size.QuadPart = length;
LARGE_INTEGER li_start = {0};
if (!SetFilePointerEx(handle, {0}, &li_start, FILE_CURRENT) || !SetFilePointerEx(handle, li_size, nullptr, FILE_END) || !SetEndOfFile(handle) || !SetFilePointerEx(handle, li_start, nullptr, FILE_BEGIN))
{
PyErr_SetFromWindowsErr(0);
goto error;
}
}
#else
// Call file.truncate(length)

if (!(Py_file_truncate = PyObject_CallMethod(Py_file, "truncate", "(L)", length)))
{
goto error;
}
#endif

done:
Py_XDECREF(Py_file_fileno);
Py_XDECREF(Py_file_handle);
Py_XDECREF(Py_file_truncate);
Py_RETURN_NONE;

error:
Py_XDECREF(Py_file_fileno);
Py_XDECREF(Py_file_handle);
Py_XDECREF(Py_file_truncate);
return NULL;
}
31 changes: 31 additions & 0 deletions src/sparse.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright 2007-2023 The SABnzbd-Team <[email protected]>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/

#ifndef SABCTOOLS_SPARSE_H
#define SABCTOOLS_SPARSE_H

#include <Python.h>

#if defined(_WIN32) || defined(__CYGWIN__)
#include <Windows.h>
#endif

void sparse_init();
PyObject *sparse(PyObject *, PyObject *);

#endif //SABCTOOLS_SPARSE_H
26 changes: 23 additions & 3 deletions src/yenc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ PyObject* yenc_decode(PyObject* self, PyObject* Py_bytesarray_obj) {
Py_buffer Py_buffer_obj;
PyObject *Py_output_filename = NULL;
PyObject *Py_output_crc = NULL;
int data_length;

// Used buffers
char *cur_char = NULL;
Expand All @@ -72,6 +71,9 @@ PyObject* yenc_decode(PyObject* self, PyObject* Py_bytesarray_obj) {
uint32_t crc_yenc = 0;
size_t yenc_data_length;
size_t output_len;
unsigned long long part_begin = 0;
unsigned long long part_end = 0;
unsigned long long part_size = 0;
const char* crc_pos;

// Verify it's a bytearray
Expand Down Expand Up @@ -131,9 +133,27 @@ PyObject* yenc_decode(PyObject* self, PyObject* Py_bytesarray_obj) {
Py_output_filename = PyUnicode_DecodeLatin1(start_loc, cur_char - start_loc, NULL);
}

// Check for =ypart, so we know where to start with decoding
// Check for =ypart in order to get begin/end
start_loc = my_memstr(cur_char, end_loc - cur_char, "=ypart ", 1);
if (start_loc) {
// Should be right after the "=part"
start_loc = my_memstr(start_loc, end_loc - start_loc, "begin=", 1);
if (start_loc) {
part_begin = atoll(start_loc);
}
start_loc = my_memstr(start_loc, end_loc - start_loc, "end=", 1);
if (start_loc) {
part_end = atoll(start_loc);
}

// Get the size and sanity check the values
part_size = part_end - part_begin + 1;
if(part_end > part_begin && part_size > 0 && part_size <= 10*1024*1024) {
part_begin = part_begin - 1;
} else {
part_size = part_end = part_begin = 0;
}

// Move to end of this line
cur_char = start_loc;
for (; *cur_char != YENC_LF && *cur_char != YENC_CR && *cur_char != YENC_ZERO && cur_char < end_loc; cur_char++);
Expand Down Expand Up @@ -195,7 +215,7 @@ PyObject* yenc_decode(PyObject* self, PyObject* Py_bytesarray_obj) {
}

// Build output
retval = Py_BuildValue("(S, N)", Py_output_filename, Py_output_crc);
retval = Py_BuildValue("(S, K, K, N)", Py_output_filename, part_begin, part_size, Py_output_crc);

finish:
Py_XDECREF(Py_output_filename);
Expand Down
17 changes: 6 additions & 11 deletions tests/test_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ def test_regular():

def test_partial():
data_plain = read_plain_yenc_file("test_partial.yenc")
decoded_data, filename, crc_correct = sabctools_yenc_wrapper(data_plain)
decoded_data, filename, begin, size, crc_correct = sabctools_yenc_wrapper(data_plain)
assert filename == "90E2Sdvsmds0801dvsmds90E.part06.rar"
assert begin == 15360000
assert size == 384000
assert crc_correct is None
assert len(decoded_data) == 549

Expand Down Expand Up @@ -65,11 +67,13 @@ def test_ref_counts():
"""Note that sys.getrefcount itself adds another reference!"""
# Test regular case
data_plain = read_plain_yenc_file("test_regular.yenc")
data_out, filename, crc_correct = sabctools_yenc_wrapper(data_plain)
data_out, filename, begin, end, crc_correct = sabctools_yenc_wrapper(data_plain)
# data_plain and data_out point to the same data!
assert sys.getrefcount(data_plain) == 3
assert sys.getrefcount(data_out) == 3
assert sys.getrefcount(filename) == 2
assert sys.getrefcount(begin) == 2
assert sys.getrefcount(end) == 2
assert sys.getrefcount(crc_correct) == 2

# Test simple error case
Expand All @@ -86,15 +90,6 @@ def test_ref_counts():
assert sys.getrefcount(data_plain) == 2


def test_bad_filename_pickle():
# This one fails in the old yEnc in different way
data_plain = read_pickle("tests/yencfiles/split_filename.pickle")
decoded_data, filename, crc_correct = sabctools_yenc_wrapper(data_plain)
assert filename == "Low.Winter.Sun.US.S01E01.720p.BluRay.x264-DEMAND.part04.rar"
assert crc_correct is None
assert len(decoded_data) == 384126


def test_crc_pickles():
all_crc_fails = glob.glob("tests/yencfiles/crc_*")
for fname in all_crc_fails:
Expand Down
45 changes: 45 additions & 0 deletions tests/test_sparse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import os
import subprocess
import sys
import tempfile
import pytest
from typing import IO

from tests.testsupport import *


def test_sparse():
file = tempfile.NamedTemporaryFile(delete=False)
try:
sabctools.sparse(file, 100)
assert os.path.getsize(file.name) == 100
assert is_sparse(file) is True
finally:
file.close()
os.unlink(file.name)

@pytest.mark.parametrize(
"length,position",
[
(1024, 0),
(1024, 512),
(1024, 4096),
],
)
def test_sparse_position_expected(length, position):
with tempfile.TemporaryFile() as file:
file.seek(position)
sabctools.sparse(file, length)
assert file.tell() == position

def is_sparse(file: IO) -> bool:
"""Is the file sparse?
On Windows this closes the file"""
if sys.platform == "win32":
file.close()
return b"This file is set as sparse" in subprocess.run(
["fsutil", "sparse", "queryflag", file.name],
capture_output=True
).stdout

return os.stat(file.name).st_blocks * 512 < os.path.getsize(file.name)
19 changes: 15 additions & 4 deletions tests/testsupport.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ def read_pickle(filename):
return bytearray(b"".join(data_chunks))


def sabctools_yenc_wrapper(data: bytearray) -> Tuple[bytearray, str, Optional[int]]:
filename, crc_correct = sabctools.yenc_decode(data)
return data, correct_unknown_encoding(filename), crc_correct
def sabctools_yenc_wrapper(data: bytearray) -> Tuple[bytearray, str, int, int, Optional[int]]:
filename, begin, size, crc_correct = sabctools.yenc_decode(data)
return data, correct_unknown_encoding(filename), begin, size, crc_correct


def python_yenc(data_plain):
Expand Down Expand Up @@ -101,7 +101,18 @@ def python_yenc(data_plain):
translate_table = bytes.maketrans(from_bytes, to_bytes)
decoded_data = flat_yenc_data.translate(translate_table)

return decoded_data, ybegin["name"], binascii.crc32(decoded_data)
# Detect begin and ending
begin = end = size = 0
if ypart:
if begin := ypart.get("begin"):
begin = int(begin)
if end := ypart.get("end"):
end = int(end)
if end and begin:
size = end - begin + 1
begin -= 1

return decoded_data, ybegin["name"], begin, size, binascii.crc32(decoded_data)


def parse_yenc_data(data):
Expand Down
133 changes: 0 additions & 133 deletions tests/yencfiles/split_filename.pickle

This file was deleted.

0 comments on commit c0f3b40

Please sign in to comment.