uterm: video: add vectorized blending
Vectorized blending allows pushing multiple blending-requests to the video hardware at once. This can speed up rendering on 2D devices a lot. However, the gl-rendering is really just for completeness and shouldn't be used if performance matters. Signed-off-by: David Herrmann <dh.herrmann@googlemail.com>
This commit is contained in:
parent
107581b7e4
commit
3386aa955b
14
src/uterm.h
14
src/uterm.h
@ -179,6 +179,18 @@ struct uterm_video_buffer {
|
||||
uint8_t *data;
|
||||
};
|
||||
|
||||
struct uterm_video_blend_req {
|
||||
const struct uterm_video_buffer *buf;
|
||||
unsigned int x;
|
||||
unsigned int y;
|
||||
uint8_t fr;
|
||||
uint8_t fg;
|
||||
uint8_t fb;
|
||||
uint8_t br;
|
||||
uint8_t bg;
|
||||
uint8_t bb;
|
||||
};
|
||||
|
||||
typedef void (*uterm_video_cb) (struct uterm_video *video,
|
||||
struct uterm_video_hotplug *arg,
|
||||
void *data);
|
||||
@ -207,6 +219,8 @@ int uterm_screen_blend(struct uterm_screen *screen,
|
||||
unsigned int x, unsigned int y,
|
||||
uint8_t fr, uint8_t fg, uint8_t fb,
|
||||
uint8_t br, uint8_t bg, uint8_t bb);
|
||||
int uterm_screen_blendv(struct uterm_screen *screen,
|
||||
const struct uterm_video_blend_req *req, size_t num);
|
||||
int uterm_screen_fill(struct uterm_screen *screen,
|
||||
uint8_t r, uint8_t g, uint8_t b,
|
||||
unsigned int x, unsigned int y,
|
||||
|
@ -63,6 +63,8 @@ struct display_ops {
|
||||
unsigned int x, unsigned int y,
|
||||
uint8_t fr, uint8_t fg, uint8_t fb,
|
||||
uint8_t br, uint8_t bg, uint8_t bb);
|
||||
int (*blendv) (struct uterm_display *disp,
|
||||
const struct uterm_video_blend_req *req, size_t num);
|
||||
int (*fill) (struct uterm_display *disp,
|
||||
uint8_t r, uint8_t g, uint8_t b, unsigned int x,
|
||||
unsigned int y, unsigned int width, unsigned int height);
|
||||
|
@ -157,6 +157,16 @@ int uterm_screen_blend(struct uterm_screen *screen,
|
||||
buf, x, y, fr, fg, fb, br, bg, bb);
|
||||
}
|
||||
|
||||
int uterm_screen_blendv(struct uterm_screen *screen,
|
||||
const struct uterm_video_blend_req *req, size_t num)
|
||||
{
|
||||
if (!screen)
|
||||
return -EINVAL;
|
||||
|
||||
return VIDEO_CALL(screen->disp->ops->blendv, -EOPNOTSUPP,
|
||||
screen->disp, req, num);
|
||||
}
|
||||
|
||||
int uterm_screen_fill(struct uterm_screen *screen,
|
||||
uint8_t r, uint8_t g, uint8_t b,
|
||||
unsigned int x, unsigned int y,
|
||||
|
@ -754,6 +754,26 @@ static int display_blend(struct uterm_display *disp,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int display_blendv(struct uterm_display *disp,
|
||||
const struct uterm_video_blend_req *req, size_t num)
|
||||
{
|
||||
int ret;
|
||||
unsigned int i;
|
||||
|
||||
if (!disp || !req)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < num; ++i, ++req) {
|
||||
ret = display_blend(disp, req->buf, req->x, req->y,
|
||||
req->fr, req->fg, req->fb,
|
||||
req->br, req->bg, req->bb);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int display_fill(struct uterm_display *disp,
|
||||
uint8_t r, uint8_t g, uint8_t b,
|
||||
unsigned int x, unsigned int y,
|
||||
@ -1241,6 +1261,7 @@ const struct display_ops drm_display_ops = {
|
||||
.swap = display_swap,
|
||||
.blit = display_blit,
|
||||
.blend = display_blend,
|
||||
.blendv = display_blendv,
|
||||
.fill = display_fill,
|
||||
};
|
||||
|
||||
|
@ -484,6 +484,74 @@ static int display_blend(struct uterm_display *disp,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int display_blendv(struct uterm_display *disp,
|
||||
const struct uterm_video_blend_req *req, size_t num)
|
||||
{
|
||||
unsigned int tmp;
|
||||
uint8_t *dst, *src;
|
||||
struct dumb_rb *rb;
|
||||
unsigned int width, height, i, j;
|
||||
unsigned int sw, sh;
|
||||
unsigned int r, g, b;
|
||||
|
||||
if (!disp->video || !display_is_online(disp))
|
||||
return -EINVAL;
|
||||
if (!req || !video_is_awake(disp->video))
|
||||
return -EINVAL;
|
||||
|
||||
rb = &disp->dumb.rb[disp->dumb.current_rb ^ 1];
|
||||
sw = disp->current_mode->dumb.info.hdisplay;
|
||||
sh = disp->current_mode->dumb.info.vdisplay;
|
||||
|
||||
for (j = 0; j < num; ++j, ++req) {
|
||||
if (req->buf->format != UTERM_FORMAT_GREY)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
tmp = req->x + req->buf->width;
|
||||
if (tmp < req->x || req->x >= sw)
|
||||
return -EINVAL;
|
||||
if (tmp > sw)
|
||||
width = sw - req->x;
|
||||
else
|
||||
width = req->buf->width;
|
||||
|
||||
tmp = req->y + req->buf->height;
|
||||
if (tmp < req->y || req->y >= sh)
|
||||
return -EINVAL;
|
||||
if (tmp > sh)
|
||||
height = sh - req->y;
|
||||
else
|
||||
height = req->buf->height;
|
||||
|
||||
dst = rb->map;
|
||||
dst = &dst[req->y * rb->stride + req->x * 4];
|
||||
src = req->buf->data;
|
||||
|
||||
while (height--) {
|
||||
for (i = 0; i < width; ++i) {
|
||||
/* Division by 256 instead of 255 increases
|
||||
* speed by like 20% on slower machines.
|
||||
* Downside is, full white is 254/254/254
|
||||
* instead of 255/255/255. */
|
||||
r = req->fr * src[i] +
|
||||
req->br * (255 - src[i]);
|
||||
r /= 256;
|
||||
g = req->fg * src[i] +
|
||||
req->bg * (255 - src[i]);
|
||||
g /= 256;
|
||||
b = req->fb * src[i] +
|
||||
req->bb * (255 - src[i]);
|
||||
b /= 256;
|
||||
((uint32_t*)dst)[i] = (r << 16) | (g << 8) | b;
|
||||
}
|
||||
dst += rb->stride;
|
||||
src += req->buf->stride;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int display_fill(struct uterm_display *disp,
|
||||
uint8_t r, uint8_t g, uint8_t b,
|
||||
unsigned int x, unsigned int y,
|
||||
@ -843,6 +911,7 @@ const struct display_ops dumb_display_ops = {
|
||||
.swap = display_swap,
|
||||
.blit = display_blit,
|
||||
.blend = display_blend,
|
||||
.blendv = display_blendv,
|
||||
.fill = display_fill,
|
||||
};
|
||||
|
||||
|
@ -598,6 +598,113 @@ static int display_blend(struct uterm_display *disp,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int display_blendv(struct uterm_display *disp,
|
||||
const struct uterm_video_blend_req *req, size_t num)
|
||||
{
|
||||
unsigned int tmp;
|
||||
uint8_t *dst, *src;
|
||||
unsigned int width, height, i, j;
|
||||
unsigned int r, g, b;
|
||||
uint32_t val;
|
||||
|
||||
if (!disp->video || !(disp->flags & DISPLAY_ONLINE))
|
||||
return -EINVAL;
|
||||
if (!req || !video_is_awake(disp->video))
|
||||
return -EINVAL;
|
||||
|
||||
for (j = 0; j < num; ++j, ++req) {
|
||||
if (req->buf->format != UTERM_FORMAT_GREY)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
tmp = req->x + req->buf->width;
|
||||
if (tmp < req->x || req->x >= disp->fbdev.xres)
|
||||
return -EINVAL;
|
||||
if (tmp > disp->fbdev.xres)
|
||||
width = disp->fbdev.xres - req->x;
|
||||
else
|
||||
width = req->buf->width;
|
||||
|
||||
tmp = req->y + req->buf->height;
|
||||
if (tmp < req->y || req->y >= disp->fbdev.yres)
|
||||
return -EINVAL;
|
||||
if (tmp > disp->fbdev.yres)
|
||||
height = disp->fbdev.yres - req->y;
|
||||
else
|
||||
height = req->buf->height;
|
||||
|
||||
if (!(disp->flags & DISPLAY_DBUF) || disp->fbdev.bufid)
|
||||
dst = disp->fbdev.map;
|
||||
else
|
||||
dst = &disp->fbdev.map[disp->fbdev.yres * disp->fbdev.stride];
|
||||
dst = &dst[req->y * disp->fbdev.stride + req->x * disp->fbdev.Bpp];
|
||||
src = req->buf->data;
|
||||
|
||||
/* Division by 256 instead of 255 increases
|
||||
* speed by like 20% on slower machines.
|
||||
* Downside is, full white is 254/254/254
|
||||
* instead of 255/255/255. */
|
||||
if (disp->fbdev.xrgb32) {
|
||||
while (height--) {
|
||||
for (i = 0; i < width; ++i) {
|
||||
r = req->fr * src[i] +
|
||||
req->br * (255 - src[i]);
|
||||
r /= 256;
|
||||
g = req->fg * src[i] +
|
||||
req->bg * (255 - src[i]);
|
||||
g /= 256;
|
||||
b = req->fb * src[i] +
|
||||
req->bb * (255 - src[i]);
|
||||
b /= 256;
|
||||
val = (r << 16) | (g << 8) | b;
|
||||
((uint32_t*)dst)[i] = val;
|
||||
}
|
||||
dst += disp->fbdev.stride;
|
||||
src += req->buf->stride;
|
||||
}
|
||||
} else if (disp->fbdev.Bpp == 2) {
|
||||
while (height--) {
|
||||
for (i = 0; i < width; ++i) {
|
||||
r = req->fr * src[i] +
|
||||
req->br * (255 - src[i]);
|
||||
r /= 256;
|
||||
g = req->fg * src[i] +
|
||||
req->bg * (255 - src[i]);
|
||||
g /= 256;
|
||||
b = req->fb * src[i] +
|
||||
req->bb * (255 - src[i]);
|
||||
b /= 256;
|
||||
val = (r << 16) | (g << 8) | b;
|
||||
((uint16_t*)dst)[i] = xrgb32_to_device(disp, val);
|
||||
}
|
||||
dst += disp->fbdev.stride;
|
||||
src += req->buf->stride;
|
||||
}
|
||||
} else if (disp->fbdev.Bpp == 4) {
|
||||
while (height--) {
|
||||
for (i = 0; i < width; ++i) {
|
||||
r = req->fr * src[i] +
|
||||
req->br * (255 - src[i]);
|
||||
r /= 256;
|
||||
g = req->fg * src[i] +
|
||||
req->bg * (255 - src[i]);
|
||||
g /= 256;
|
||||
b = req->fb * src[i] +
|
||||
req->bb * (255 - src[i]);
|
||||
b /= 256;
|
||||
val = (r << 16) | (g << 8) | b;
|
||||
((uint32_t*)dst)[i] = xrgb32_to_device(disp, val);
|
||||
}
|
||||
dst += disp->fbdev.stride;
|
||||
src += req->buf->stride;
|
||||
}
|
||||
} else {
|
||||
log_warning("invalid Bpp");
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int display_fill(struct uterm_display *disp,
|
||||
uint8_t r, uint8_t g, uint8_t b,
|
||||
unsigned int x, unsigned int y,
|
||||
@ -760,6 +867,7 @@ const struct display_ops fbdev_display_ops = {
|
||||
.swap = display_swap,
|
||||
.blit = display_blit,
|
||||
.blend = display_blend,
|
||||
.blendv = display_blendv,
|
||||
.fill = display_fill,
|
||||
};
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user