uterm: video: add vectorized blending

Vectorized blending allows pushing multiple blending-requests to the video
hardware at once. This can speed up rendering on 2D devices a lot.
However, the gl-rendering is really just for completeness and shouldn't be
used if performance matters.

Signed-off-by: David Herrmann <dh.herrmann@googlemail.com>
This commit is contained in:
David Herrmann 2012-08-31 00:00:07 +02:00
parent 107581b7e4
commit 3386aa955b
6 changed files with 224 additions and 0 deletions

View File

@ -179,6 +179,18 @@ struct uterm_video_buffer {
uint8_t *data;
};
struct uterm_video_blend_req {
const struct uterm_video_buffer *buf;
unsigned int x;
unsigned int y;
uint8_t fr;
uint8_t fg;
uint8_t fb;
uint8_t br;
uint8_t bg;
uint8_t bb;
};
typedef void (*uterm_video_cb) (struct uterm_video *video,
struct uterm_video_hotplug *arg,
void *data);
@ -207,6 +219,8 @@ int uterm_screen_blend(struct uterm_screen *screen,
unsigned int x, unsigned int y,
uint8_t fr, uint8_t fg, uint8_t fb,
uint8_t br, uint8_t bg, uint8_t bb);
int uterm_screen_blendv(struct uterm_screen *screen,
const struct uterm_video_blend_req *req, size_t num);
int uterm_screen_fill(struct uterm_screen *screen,
uint8_t r, uint8_t g, uint8_t b,
unsigned int x, unsigned int y,

View File

@ -63,6 +63,8 @@ struct display_ops {
unsigned int x, unsigned int y,
uint8_t fr, uint8_t fg, uint8_t fb,
uint8_t br, uint8_t bg, uint8_t bb);
int (*blendv) (struct uterm_display *disp,
const struct uterm_video_blend_req *req, size_t num);
int (*fill) (struct uterm_display *disp,
uint8_t r, uint8_t g, uint8_t b, unsigned int x,
unsigned int y, unsigned int width, unsigned int height);

View File

@ -157,6 +157,16 @@ int uterm_screen_blend(struct uterm_screen *screen,
buf, x, y, fr, fg, fb, br, bg, bb);
}
int uterm_screen_blendv(struct uterm_screen *screen,
const struct uterm_video_blend_req *req, size_t num)
{
if (!screen)
return -EINVAL;
return VIDEO_CALL(screen->disp->ops->blendv, -EOPNOTSUPP,
screen->disp, req, num);
}
int uterm_screen_fill(struct uterm_screen *screen,
uint8_t r, uint8_t g, uint8_t b,
unsigned int x, unsigned int y,

View File

@ -754,6 +754,26 @@ static int display_blend(struct uterm_display *disp,
return 0;
}
static int display_blendv(struct uterm_display *disp,
const struct uterm_video_blend_req *req, size_t num)
{
int ret;
unsigned int i;
if (!disp || !req)
return -EINVAL;
for (i = 0; i < num; ++i, ++req) {
ret = display_blend(disp, req->buf, req->x, req->y,
req->fr, req->fg, req->fb,
req->br, req->bg, req->bb);
if (ret)
return ret;
}
return 0;
}
static int display_fill(struct uterm_display *disp,
uint8_t r, uint8_t g, uint8_t b,
unsigned int x, unsigned int y,
@ -1241,6 +1261,7 @@ const struct display_ops drm_display_ops = {
.swap = display_swap,
.blit = display_blit,
.blend = display_blend,
.blendv = display_blendv,
.fill = display_fill,
};

View File

@ -484,6 +484,74 @@ static int display_blend(struct uterm_display *disp,
return 0;
}
static int display_blendv(struct uterm_display *disp,
const struct uterm_video_blend_req *req, size_t num)
{
unsigned int tmp;
uint8_t *dst, *src;
struct dumb_rb *rb;
unsigned int width, height, i, j;
unsigned int sw, sh;
unsigned int r, g, b;
if (!disp->video || !display_is_online(disp))
return -EINVAL;
if (!req || !video_is_awake(disp->video))
return -EINVAL;
rb = &disp->dumb.rb[disp->dumb.current_rb ^ 1];
sw = disp->current_mode->dumb.info.hdisplay;
sh = disp->current_mode->dumb.info.vdisplay;
for (j = 0; j < num; ++j, ++req) {
if (req->buf->format != UTERM_FORMAT_GREY)
return -EOPNOTSUPP;
tmp = req->x + req->buf->width;
if (tmp < req->x || req->x >= sw)
return -EINVAL;
if (tmp > sw)
width = sw - req->x;
else
width = req->buf->width;
tmp = req->y + req->buf->height;
if (tmp < req->y || req->y >= sh)
return -EINVAL;
if (tmp > sh)
height = sh - req->y;
else
height = req->buf->height;
dst = rb->map;
dst = &dst[req->y * rb->stride + req->x * 4];
src = req->buf->data;
while (height--) {
for (i = 0; i < width; ++i) {
/* Division by 256 instead of 255 increases
* speed by like 20% on slower machines.
* Downside is, full white is 254/254/254
* instead of 255/255/255. */
r = req->fr * src[i] +
req->br * (255 - src[i]);
r /= 256;
g = req->fg * src[i] +
req->bg * (255 - src[i]);
g /= 256;
b = req->fb * src[i] +
req->bb * (255 - src[i]);
b /= 256;
((uint32_t*)dst)[i] = (r << 16) | (g << 8) | b;
}
dst += rb->stride;
src += req->buf->stride;
}
}
return 0;
}
static int display_fill(struct uterm_display *disp,
uint8_t r, uint8_t g, uint8_t b,
unsigned int x, unsigned int y,
@ -843,6 +911,7 @@ const struct display_ops dumb_display_ops = {
.swap = display_swap,
.blit = display_blit,
.blend = display_blend,
.blendv = display_blendv,
.fill = display_fill,
};

View File

@ -598,6 +598,113 @@ static int display_blend(struct uterm_display *disp,
return 0;
}
static int display_blendv(struct uterm_display *disp,
const struct uterm_video_blend_req *req, size_t num)
{
unsigned int tmp;
uint8_t *dst, *src;
unsigned int width, height, i, j;
unsigned int r, g, b;
uint32_t val;
if (!disp->video || !(disp->flags & DISPLAY_ONLINE))
return -EINVAL;
if (!req || !video_is_awake(disp->video))
return -EINVAL;
for (j = 0; j < num; ++j, ++req) {
if (req->buf->format != UTERM_FORMAT_GREY)
return -EOPNOTSUPP;
tmp = req->x + req->buf->width;
if (tmp < req->x || req->x >= disp->fbdev.xres)
return -EINVAL;
if (tmp > disp->fbdev.xres)
width = disp->fbdev.xres - req->x;
else
width = req->buf->width;
tmp = req->y + req->buf->height;
if (tmp < req->y || req->y >= disp->fbdev.yres)
return -EINVAL;
if (tmp > disp->fbdev.yres)
height = disp->fbdev.yres - req->y;
else
height = req->buf->height;
if (!(disp->flags & DISPLAY_DBUF) || disp->fbdev.bufid)
dst = disp->fbdev.map;
else
dst = &disp->fbdev.map[disp->fbdev.yres * disp->fbdev.stride];
dst = &dst[req->y * disp->fbdev.stride + req->x * disp->fbdev.Bpp];
src = req->buf->data;
/* Division by 256 instead of 255 increases
* speed by like 20% on slower machines.
* Downside is, full white is 254/254/254
* instead of 255/255/255. */
if (disp->fbdev.xrgb32) {
while (height--) {
for (i = 0; i < width; ++i) {
r = req->fr * src[i] +
req->br * (255 - src[i]);
r /= 256;
g = req->fg * src[i] +
req->bg * (255 - src[i]);
g /= 256;
b = req->fb * src[i] +
req->bb * (255 - src[i]);
b /= 256;
val = (r << 16) | (g << 8) | b;
((uint32_t*)dst)[i] = val;
}
dst += disp->fbdev.stride;
src += req->buf->stride;
}
} else if (disp->fbdev.Bpp == 2) {
while (height--) {
for (i = 0; i < width; ++i) {
r = req->fr * src[i] +
req->br * (255 - src[i]);
r /= 256;
g = req->fg * src[i] +
req->bg * (255 - src[i]);
g /= 256;
b = req->fb * src[i] +
req->bb * (255 - src[i]);
b /= 256;
val = (r << 16) | (g << 8) | b;
((uint16_t*)dst)[i] = xrgb32_to_device(disp, val);
}
dst += disp->fbdev.stride;
src += req->buf->stride;
}
} else if (disp->fbdev.Bpp == 4) {
while (height--) {
for (i = 0; i < width; ++i) {
r = req->fr * src[i] +
req->br * (255 - src[i]);
r /= 256;
g = req->fg * src[i] +
req->bg * (255 - src[i]);
g /= 256;
b = req->fb * src[i] +
req->bb * (255 - src[i]);
b /= 256;
val = (r << 16) | (g << 8) | b;
((uint32_t*)dst)[i] = xrgb32_to_device(disp, val);
}
dst += disp->fbdev.stride;
src += req->buf->stride;
}
} else {
log_warning("invalid Bpp");
}
}
return 0;
}
static int display_fill(struct uterm_display *disp,
uint8_t r, uint8_t g, uint8_t b,
unsigned int x, unsigned int y,
@ -760,6 +867,7 @@ const struct display_ops fbdev_display_ops = {
.swap = display_swap,
.blit = display_blit,
.blend = display_blend,
.blendv = display_blendv,
.fill = display_fill,
};