diff options
author | Mike Gilbert <floppym@gentoo.org> | 2024-03-01 10:45:58 -0500 |
---|---|---|
committer | Mike Gilbert <floppym@gentoo.org> | 2024-03-15 16:05:34 -0400 |
commit | 23529ee81964665107400e87fc3d49c256e915c0 (patch) | |
tree | 73a18240064bc913dd2c35216981d50d6850e062 /src | |
parent | bin/ebuild: Discard merge-wait from FEATURES (diff) | |
download | portage-23529ee81964665107400e87fc3d49c256e915c0.tar.gz portage-23529ee81964665107400e87fc3d49c256e915c0.tar.bz2 portage-23529ee81964665107400e87fc3d49c256e915c0.zip |
Replace linux_reflink extension module
Python 3.8 added support for copy_file_range in the os module,
so we can just call that directly.
Also, we can use the FICLONE ioctl for fast file clones on supported
filesystems (btrfs).
Signed-off-by: Mike Gilbert <floppym@gentoo.org>
Diffstat (limited to 'src')
-rw-r--r-- | src/meson.build | 20 | ||||
-rw-r--r-- | src/portage_util_file_copy_reflink_linux.c | 396 |
2 files changed, 0 insertions, 416 deletions
diff --git a/src/meson.build b/src/meson.build index 6a36724ce..0220e8d56 100644 --- a/src/meson.build +++ b/src/meson.build @@ -19,23 +19,3 @@ run_command( capture : false, check : true ) - -if host_machine.system() == 'linux' - reflink_ext = py.extension_module( - 'reflink_linux', - 'portage_util_file_copy_reflink_linux.c', - dependencies : py.dependency(), - subdir : 'portage' / 'util' / 'file_copy', - install : true - ) - - run_command( - [ - 'ln', '-srnf', - reflink_ext.full_path(), - meson.project_source_root() / 'lib' / 'portage' / 'util' / 'file_copy/' - ], - capture : false, - check : true - ) -endif diff --git a/src/portage_util_file_copy_reflink_linux.c b/src/portage_util_file_copy_reflink_linux.c deleted file mode 100644 index e98db3db8..000000000 --- a/src/portage_util_file_copy_reflink_linux.c +++ /dev/null @@ -1,396 +0,0 @@ -/* Copyright 2017-2023 Gentoo Authors - * Distributed under the terms of the GNU General Public License v2 - */ - -#include <Python.h> -#include <errno.h> -#include <stdlib.h> -#include <ctype.h> -#include <sys/sendfile.h> -#include <sys/stat.h> -#include <sys/syscall.h> -#include <sys/types.h> -#include <unistd.h> - -static PyObject * _reflink_linux_file_copy(PyObject *, PyObject *); - -static PyMethodDef reflink_linuxMethods[] = { - { - .ml_name = "file_copy", - .ml_meth = _reflink_linux_file_copy, - .ml_flags = METH_VARARGS, - .ml_doc = "Copy between two file descriptors with reflink and sparse file support." - }, - {NULL, NULL, 0, NULL} -}; - -static struct PyModuleDef moduledef = { - PyModuleDef_HEAD_INIT, - .m_name = "reflink_linux", - .m_doc = "Module for reflink_linux copy operations", - .m_size = -1, - .m_methods = reflink_linuxMethods, -}; - -PyMODINIT_FUNC -PyInit_reflink_linux(void) -{ - PyObject *m; - m = PyModule_Create(&moduledef); - return m; -} - - -/** - * cfr_wrapper - A copy_file_range syscall wrapper function, having a - * function signature that is compatible with sf_wrapper. - * @fd_out: output file descriptor - * @fd_in: input file descriptor - * @off_out: must point to a buffer that specifies the starting offset - * where bytes will be copied to fd_out, and this buffer is adjusted by - * the number of bytes copied. - * @len: number of bytes to copy between the file descriptors - * - * Bytes are copied from fd_in starting from *off_out, and the file - * offset of fd_in is not changed. Effects on the file offset of - * fd_out are undefined. - * - * Return: Number of bytes written to out_fd on success, -1 on failure - * (errno is set appropriately). - */ -static ssize_t -cfr_wrapper(int fd_out, int fd_in, off_t *off_out, size_t len) -{ -#ifdef __NR_copy_file_range - off_t off_in = *off_out; - return syscall(__NR_copy_file_range, fd_in, &off_in, fd_out, - off_out, len, 0); -#else - /* This is how it fails at runtime when the syscall is not supported. */ - errno = ENOSYS; - return -1; -#endif -} - -/** - * sf_wrapper - A sendfile wrapper function, having a function signature - * that is compatible with cfr_wrapper. - * @fd_out: output file descriptor - * @fd_in: input file descriptor - * @off_out: must point to a buffer that specifies the starting offset - * where bytes will be copied to fd_out, and this buffer is adjusted by - * the number of bytes copied. - * @len: number of bytes to copy between the file descriptors - * - * Bytes are copied from fd_in starting from *off_out, and the file - * offset of fd_in is not changed. Effects on the file offset of - * fd_out are undefined. - * - * Return: Number of bytes written to out_fd on success, -1 on failure - * (errno is set appropriately). - */ -static ssize_t -sf_wrapper(int fd_out, int fd_in, off_t *off_out, size_t len) -{ - ssize_t ret; - off_t off_in = *off_out; - /* The sendfile docs do not specify behavior of the output file - * offset, therefore it must be adjusted with lseek. - */ - if (lseek(fd_out, *off_out, SEEK_SET) < 0) - return -1; - ret = sendfile(fd_out, fd_in, &off_in, len); - if (ret > 0) - *off_out += ret; - return ret; -} - - -/** - * do_lseek_data - Adjust file offsets to the next location containing - * data, creating sparse empty blocks in the output file as needed. - * @fd_in: input file descriptor - * @fd_out: output file descriptor - * @off_out: offset of the output file - * - * Use lseek SEEK_DATA to adjust off_out to the next location from fd_in - * containing data (creates sparse empty blocks when appropriate). Effects - * on file offsets are undefined. - * - * Return: On success, the number of bytes to copy before the next hole, - * and -1 on failure (errno is set appropriately). Returns 0 when fd_in - * reaches EOF. - */ -static off_t -do_lseek_data(int fd_out, int fd_in, off_t *off_out) { -#ifdef SEEK_DATA - /* Use lseek SEEK_DATA/SEEK_HOLE for sparse file support, - * as suggested in the copy_file_range man page. - */ - off_t offset_data, offset_hole; - - offset_data = lseek(fd_in, *off_out, SEEK_DATA); - if (offset_data < 0) { - if (errno == ENXIO) { - /* EOF - If the file ends with a hole, then use lseek SEEK_END - * to find the end offset, and create sparse empty blocks in - * the output file. It's the caller's responsibility to - * truncate the file. - */ - offset_hole = lseek(fd_in, 0, SEEK_END); - if (offset_hole < 0) { - return -1; - } else if (offset_hole != *off_out) { - if (lseek(fd_out, offset_hole, SEEK_SET) < 0) { - return -1; - } - *off_out = offset_hole; - } - return 0; - } - return -1; - } - - /* Create sparse empty blocks in the output file, up - * until the next location that will contain data. - */ - if (offset_data != *off_out) { - if (lseek(fd_out, offset_data, SEEK_SET) < 0) { - return -1; - } - *off_out = offset_data; - } - - /* Locate the next hole, so that we know when to - * stop copying. There is an implicit hole at the - * end of the file. This should never result in ENXIO - * after SEEK_DATA has succeeded above. - */ - offset_hole = lseek(fd_in, offset_data, SEEK_HOLE); - if (offset_hole < 0) { - return -1; - } - - return offset_hole - offset_data; -#else - /* This is how it fails at runtime when lseek SEEK_DATA is not supported. */ - errno = EINVAL; - return -1; -#endif -} - - -/** - * _reflink_linux_file_copy - Copy between two file descriptors, with - * reflink and sparse file support. - * @fd_in: input file descriptor - * @fd_out: output file descriptor - * - * When supported, this uses copy_file_range for reflink support, - * and lseek SEEK_DATA for sparse file support. It has graceful - * fallbacks when support is unavailable for copy_file_range, lseek - * SEEK_DATA, or sendfile operations. When all else fails, it uses - * a plain read/write loop that works in any kernel version. - * - * If a syscall is interrupted by a signal, then the function will - * automatically resume copying a the appropriate location which is - * tracked internally by the offset_out variable. - * - * Return: The length of the output file on success. Raise OSError - * on failure. - */ -static PyObject * -_reflink_linux_file_copy(PyObject *self, PyObject *args) -{ - int eintr_retry, error, fd_in, fd_out, stat_in_acquired, stat_out_acquired; - int lseek_works, sendfile_works; - off_t offset_out, len; - ssize_t buf_bytes, buf_offset, copyfunc_ret; - struct stat stat_in, stat_out; - char* buf; - ssize_t (*copyfunc)(int, int, off_t *, size_t); - - if (!PyArg_ParseTuple(args, "ii", &fd_in, &fd_out)) - return NULL; - - eintr_retry = 1; - error = 0; - offset_out = 0; - stat_in_acquired = 0; - stat_out_acquired = 0; - buf = NULL; - buf_bytes = 0; - buf_offset = 0; - copyfunc = cfr_wrapper; - lseek_works = 1; - sendfile_works = 1; - - while (eintr_retry) { - - Py_BEGIN_ALLOW_THREADS - - /* Linux 3.1 and later support SEEK_DATA (for sparse file support). - * This code uses copy_file_range if possible, and falls back to - * sendfile for cross-device or when the copy_file_range syscall - * is not available (less than Linux 4.5). This will fail for - * Linux less than 3.1, which does not support the lseek SEEK_DATA - * parameter. - */ - if (sendfile_works && lseek_works) { - error = 0; - - while (1) { - len = do_lseek_data(fd_out, fd_in, &offset_out); - if (!len) { - /* EOF */ - break; - } else if (len < 0) { - error = errno; - if ((errno == EINVAL || errno == EOPNOTSUPP) && !offset_out) { - lseek_works = 0; - } - break; - } - - copyfunc_ret = copyfunc(fd_out, - fd_in, - &offset_out, - len); - - if (copyfunc_ret <= 0) { - error = errno; - if ((errno == EXDEV || errno == ENOSYS || errno == EOPNOTSUPP || copyfunc_ret == 0) && - copyfunc == cfr_wrapper) { - /* Use sendfile instead of copy_file_range for - * cross-device copies, or when the copy_file_range - * syscall is not available (less than Linux 4.5), - * or when copy_file_range copies zero bytes. - */ - error = 0; - copyfunc = sf_wrapper; - copyfunc_ret = copyfunc(fd_out, - fd_in, - &offset_out, - len); - - if (copyfunc_ret < 0) { - error = errno; - /* On Linux, if lseek succeeded above, then - * sendfile should have worked here too, so - * don't bother to fallback for EINVAL here. - */ - break; - } - } else { - break; - } - } - } - } - - /* Less than Linux 3.1 does not support SEEK_DATA or copy_file_range, - * so just use sendfile for in-kernel copy. This will fail for Linux - * versions from 2.6.0 to 2.6.32, because sendfile does not support - * writing to regular files. - */ - if (sendfile_works && !lseek_works) { - error = 0; - - if (!stat_in_acquired && fstat(fd_in, &stat_in) < 0) { - error = errno; - } else { - stat_in_acquired = 1; - - while (offset_out < stat_in.st_size) { - copyfunc_ret = sf_wrapper(fd_out, - fd_in, - &offset_out, - stat_in.st_size - offset_out); - - if (copyfunc_ret < 0) { - error = errno; - if (errno == EINVAL && !offset_out) { - sendfile_works = 0; - } - break; - } - } - } - } - - /* This implementation will work on any kernel. */ - if (!sendfile_works) { - error = 0; - - if (!stat_out_acquired && fstat(fd_in, &stat_out) < 0) { - error = errno; - } else { - stat_out_acquired = 1; - if (buf == NULL) - buf = malloc(stat_out.st_blksize); - if (buf == NULL) { - error = errno; - - /* For the read call, the fd_in file offset must be exactly - * equal to offset_out + buf_bytes, where buf_bytes is the - * amount of buffered data that has not been written to - * to the output file yet. Use lseek to ensure correct state, - * in case an EINTR retry caused it to get out of sync - * somewhow. - */ - } else if (lseek(fd_in, offset_out + buf_bytes, SEEK_SET) < 0) { - error = errno; - } else { - while (1) { - /* Some bytes may still be buffered from the - * previous iteration of the outer loop. - */ - if (!buf_bytes) { - buf_offset = 0; - buf_bytes = read(fd_in, buf, stat_out.st_blksize); - - if (!buf_bytes) { - /* EOF */ - break; - - } else if (buf_bytes < 0) { - error = errno; - buf_bytes = 0; - break; - } - } - - copyfunc_ret = write(fd_out, - buf + buf_offset, - buf_bytes); - - if (copyfunc_ret < 0) { - error = errno; - break; - } - - buf_bytes -= copyfunc_ret; - buf_offset += copyfunc_ret; - offset_out += copyfunc_ret; - } - } - } - } - - if (!error && ftruncate(fd_out, offset_out) < 0) - error = errno; - - Py_END_ALLOW_THREADS - - if (!(error == EINTR && PyErr_CheckSignals() == 0)) - eintr_retry = 0; - } - - if (buf != NULL) - free(buf); - - if (error) - return PyErr_SetFromErrno(PyExc_OSError); - - return Py_BuildValue("i", offset_out); -} |