Skip to content

Commit

Permalink
fixup "blit: SIMD-based back2front copy" (slow: read sequentially)
Browse files Browse the repository at this point in the history
  • Loading branch information
nfeske committed Jan 23, 2025
1 parent 61a712a commit 4c9ad57
Showing 1 changed file with 45 additions and 79 deletions.
124 changes: 45 additions & 79 deletions repos/os/include/blit/internal/slow.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,23 @@ namespace Blit {

struct Slow;

static inline void _sample_line(uint32_t const *src, uint32_t *dst,
unsigned len, int const step)
static inline void _write_line(uint32_t const *src, uint32_t *dst,
unsigned len, int dst_step)
{
for (; len--; src += step)
*dst++ = *src;
for (; len--; dst += dst_step)
*dst = *src++;
}

static inline void _copy_line(uint32_t const *src, uint32_t *dst, unsigned len)
static inline void _write_lines(uint32_t const *src, unsigned src_w,
uint32_t *dst,
unsigned w, unsigned h, int dx, int dy)
{
_sample_line(src, dst, len, 1);
}
for (unsigned lines = h*16; lines; lines--) {
_write_line(src, dst, 16*w, dx);
src += 16*src_w;
dst += dy;
}
};
}


Expand All @@ -86,107 +92,67 @@ struct Blit::Slow
};


void Blit::Slow::B2f::r0(uint32_t *dst, unsigned const line_w,
uint32_t const *src, unsigned const w, unsigned const h)
void Blit::Slow::B2f::r0(uint32_t *dst, unsigned line_w,
uint32_t const *src, unsigned w, unsigned h)
{
for (unsigned lines = h*16; lines; lines--) {
_copy_line(src, dst, 16*w);
src += 16*line_w;
dst += 16*line_w;
}
_write_lines(src, line_w, dst, w, h, 1, 16*line_w);
}


void Blit::Slow::B2f::r90(uint32_t *dst, unsigned const dst_w,
uint32_t const *src, unsigned const src_w,
unsigned const w, unsigned const h)
void Blit::Slow::B2f::r90(uint32_t *dst, unsigned dst_w,
uint32_t const *src, unsigned src_w,
unsigned w, unsigned h)
{
src += (16*h - 1)*16*src_w;

for (unsigned i = 16*w; i; i--) {
_sample_line(src, dst, 16*h, -16*src_w);
src++;
dst += 16*dst_w;
}
_write_lines(src, src_w, dst + 16*h - 1, w, h, 16*dst_w, -1);
}


void Blit::Slow::B2f::r180(uint32_t *dst, unsigned const line_w,
uint32_t const *src, unsigned const w, unsigned const h)
void Blit::Slow::B2f::r180(uint32_t *dst, unsigned line_w,
uint32_t const *src, unsigned w, unsigned h)
{
src += 16*h*16*line_w + 16*w - 1;

for (unsigned i = h*16; i; i--) {
src -= 16*line_w;
_sample_line(src, dst, 16*w, -1);
dst += 16*line_w;
}
dst += 16*w - 1 + (16*h - 1)*16*line_w;
_write_lines(src, line_w, dst, w, h, -1, -16*line_w);
}


void Blit::Slow::B2f::r270(uint32_t *dst, unsigned const dst_w,
uint32_t const *src, unsigned const src_w,
unsigned const w, const unsigned h)
void Blit::Slow::B2f::r270(uint32_t *dst, unsigned dst_w,
uint32_t const *src, unsigned src_w,
unsigned w, unsigned h)
{
src += 16*w;

for (unsigned i = 16*w; i; i--) {
src--;
_sample_line(src, dst, 16*h, 16*src_w);
dst += 16*dst_w;
}
dst += 16*dst_w*(16*w - 1);
_write_lines(src, src_w, dst, w, h, -16*dst_w, 1);
}


void Blit::Slow::B2f_flip::r0(uint32_t *dst, unsigned const line_w,
uint32_t const *src, unsigned const w, unsigned const h)
void Blit::Slow::B2f_flip::r0(uint32_t *dst, unsigned line_w,
uint32_t const *src, unsigned w, unsigned h)
{
src += 16*w - 1;

for (unsigned lines = h*16; lines; lines--) {
_sample_line(src, dst, 16*w, -1);
src += 16*line_w;
dst += 16*line_w;
}
_write_lines(src, line_w, dst + 16*w - 1, w, h, -1, 16*line_w);
}


void Blit::Slow::B2f_flip::r90(uint32_t *dst, unsigned const dst_w,
uint32_t const *src, unsigned const src_w,
unsigned const w, unsigned const h)
void Blit::Slow::B2f_flip::r90(uint32_t *dst, unsigned dst_w,
uint32_t const *src, unsigned src_w,
unsigned w, unsigned h)
{
for (unsigned i = 16*w; i; i--) {
_sample_line(src, dst, 16*h, 16*src_w);
src++;
dst += 16*dst_w;
}
_write_lines(src, src_w, dst, w, h, 16*dst_w, 1);
}


void Blit::Slow::B2f_flip::r180(uint32_t *dst, unsigned const line_w,
uint32_t const *src, unsigned const w, unsigned const h)
void Blit::Slow::B2f_flip::r180(uint32_t *dst, unsigned line_w,
uint32_t const *src, unsigned w, unsigned h)
{
src += 16*h*16*line_w;

for (unsigned lines = h*16; lines; lines--) {
src -= 16*line_w;
_copy_line(src, dst, 16*w);
dst += 16*line_w;
}
dst += (16*h - 1)*16*line_w;
_write_lines(src, line_w, dst, w, h, 1, -16*line_w);
}


void Blit::Slow::B2f_flip::r270(uint32_t *dst, unsigned const dst_w,
uint32_t const *src, unsigned const src_w,
unsigned const w, const unsigned h)
void Blit::Slow::B2f_flip::r270(uint32_t *dst, unsigned dst_w,
uint32_t const *src, unsigned src_w,
unsigned w, unsigned h)
{
src += (16*h - 1)*16*src_w + 16*w;

for (unsigned i = 16*w; i; i--) {
src--;
_sample_line(src, dst, 16*h, -16*src_w);
dst += 16*dst_w;
}
dst += 16*h - 1 + 16*dst_w*(16*w - 1);
_write_lines(src, src_w, dst, w, h, -16*dst_w, -1);
}

#endif /* _INCLUDE__BLIT__INTERNAL__SLOW_H_ */

0 comments on commit 4c9ad57

Please sign in to comment.