From 4c9ad572a923d329d8ad3ef52f1f1542f3e04bc5 Mon Sep 17 00:00:00 2001 From: Norman Feske Date: Thu, 23 Jan 2025 19:12:29 +0100 Subject: [PATCH] fixup "blit: SIMD-based back2front copy" (slow: read sequentially) Issue #5428 --- repos/os/include/blit/internal/slow.h | 124 ++++++++++---------------- 1 file changed, 45 insertions(+), 79 deletions(-) diff --git a/repos/os/include/blit/internal/slow.h b/repos/os/include/blit/internal/slow.h index 27c34c7cd7a..ab18b206e83 100644 --- a/repos/os/include/blit/internal/slow.h +++ b/repos/os/include/blit/internal/slow.h @@ -52,17 +52,23 @@ namespace Blit { struct Slow; - static inline void _sample_line(uint32_t const *src, uint32_t *dst, - unsigned len, int const step) + static inline void _write_line(uint32_t const *src, uint32_t *dst, + unsigned len, int dst_step) { - for (; len--; src += step) - *dst++ = *src; + for (; len--; dst += dst_step) + *dst = *src++; } - static inline void _copy_line(uint32_t const *src, uint32_t *dst, unsigned len) + static inline void _write_lines(uint32_t const *src, unsigned src_w, + uint32_t *dst, + unsigned w, unsigned h, int dx, int dy) { - _sample_line(src, dst, len, 1); - } + for (unsigned lines = h*16; lines; lines--) { + _write_line(src, dst, 16*w, dx); + src += 16*src_w; + dst += dy; + } + }; } @@ -86,107 +92,67 @@ struct Blit::Slow }; -void Blit::Slow::B2f::r0(uint32_t *dst, unsigned const line_w, - uint32_t const *src, unsigned const w, unsigned const h) +void Blit::Slow::B2f::r0(uint32_t *dst, unsigned line_w, + uint32_t const *src, unsigned w, unsigned h) { - for (unsigned lines = h*16; lines; lines--) { - _copy_line(src, dst, 16*w); - src += 16*line_w; - dst += 16*line_w; - } + _write_lines(src, line_w, dst, w, h, 1, 16*line_w); } -void Blit::Slow::B2f::r90(uint32_t *dst, unsigned const dst_w, - uint32_t const *src, unsigned const src_w, - unsigned const w, unsigned const h) +void Blit::Slow::B2f::r90(uint32_t *dst, unsigned dst_w, + uint32_t const *src, unsigned src_w, + unsigned w, unsigned h) { - src += (16*h - 1)*16*src_w; - - for (unsigned i = 16*w; i; i--) { - _sample_line(src, dst, 16*h, -16*src_w); - src++; - dst += 16*dst_w; - } + _write_lines(src, src_w, dst + 16*h - 1, w, h, 16*dst_w, -1); } -void Blit::Slow::B2f::r180(uint32_t *dst, unsigned const line_w, - uint32_t const *src, unsigned const w, unsigned const h) +void Blit::Slow::B2f::r180(uint32_t *dst, unsigned line_w, + uint32_t const *src, unsigned w, unsigned h) { - src += 16*h*16*line_w + 16*w - 1; - - for (unsigned i = h*16; i; i--) { - src -= 16*line_w; - _sample_line(src, dst, 16*w, -1); - dst += 16*line_w; - } + dst += 16*w - 1 + (16*h - 1)*16*line_w; + _write_lines(src, line_w, dst, w, h, -1, -16*line_w); } -void Blit::Slow::B2f::r270(uint32_t *dst, unsigned const dst_w, - uint32_t const *src, unsigned const src_w, - unsigned const w, const unsigned h) +void Blit::Slow::B2f::r270(uint32_t *dst, unsigned dst_w, + uint32_t const *src, unsigned src_w, + unsigned w, unsigned h) { - src += 16*w; - - for (unsigned i = 16*w; i; i--) { - src--; - _sample_line(src, dst, 16*h, 16*src_w); - dst += 16*dst_w; - } + dst += 16*dst_w*(16*w - 1); + _write_lines(src, src_w, dst, w, h, -16*dst_w, 1); } -void Blit::Slow::B2f_flip::r0(uint32_t *dst, unsigned const line_w, - uint32_t const *src, unsigned const w, unsigned const h) +void Blit::Slow::B2f_flip::r0(uint32_t *dst, unsigned line_w, + uint32_t const *src, unsigned w, unsigned h) { - src += 16*w - 1; - - for (unsigned lines = h*16; lines; lines--) { - _sample_line(src, dst, 16*w, -1); - src += 16*line_w; - dst += 16*line_w; - } + _write_lines(src, line_w, dst + 16*w - 1, w, h, -1, 16*line_w); } -void Blit::Slow::B2f_flip::r90(uint32_t *dst, unsigned const dst_w, - uint32_t const *src, unsigned const src_w, - unsigned const w, unsigned const h) +void Blit::Slow::B2f_flip::r90(uint32_t *dst, unsigned dst_w, + uint32_t const *src, unsigned src_w, + unsigned w, unsigned h) { - for (unsigned i = 16*w; i; i--) { - _sample_line(src, dst, 16*h, 16*src_w); - src++; - dst += 16*dst_w; - } + _write_lines(src, src_w, dst, w, h, 16*dst_w, 1); } -void Blit::Slow::B2f_flip::r180(uint32_t *dst, unsigned const line_w, - uint32_t const *src, unsigned const w, unsigned const h) +void Blit::Slow::B2f_flip::r180(uint32_t *dst, unsigned line_w, + uint32_t const *src, unsigned w, unsigned h) { - src += 16*h*16*line_w; - - for (unsigned lines = h*16; lines; lines--) { - src -= 16*line_w; - _copy_line(src, dst, 16*w); - dst += 16*line_w; - } + dst += (16*h - 1)*16*line_w; + _write_lines(src, line_w, dst, w, h, 1, -16*line_w); } -void Blit::Slow::B2f_flip::r270(uint32_t *dst, unsigned const dst_w, - uint32_t const *src, unsigned const src_w, - unsigned const w, const unsigned h) +void Blit::Slow::B2f_flip::r270(uint32_t *dst, unsigned dst_w, + uint32_t const *src, unsigned src_w, + unsigned w, unsigned h) { - src += (16*h - 1)*16*src_w + 16*w; - - for (unsigned i = 16*w; i; i--) { - src--; - _sample_line(src, dst, 16*h, -16*src_w); - dst += 16*dst_w; - } + dst += 16*h - 1 + 16*dst_w*(16*w - 1); + _write_lines(src, src_w, dst, w, h, -16*dst_w, -1); } #endif /* _INCLUDE__BLIT__INTERNAL__SLOW_H_ */