mirror of
https://github.com/gryf/wmaker.git
synced 2026-03-19 09:13:33 +01:00
wrlib: alpha combine speed improvement
This patch is improving the alpha combine function by using int instead of float. That function is used for example in the switch panel to merge the transparency mask. The change is practically indistinguishable to the human eye for a single-pass blend but the performance gained is huge. I've been doing some benchmark of wrlib and even implemented AVX2 support. But the gain compared to the complexity of AVX2 is not worth, while having int usage in that specific function is a really good trade-off. Here the result: Alpha Blending Performance Test Image size: 1024x768 (786432 pixels) Iterations: 100 AVX2 support: YES === RGBA Source Test === Original (float): 2.540 ms/frame (393.8 FPS) Optimized (int): 1.983 ms/frame (504.2 FPS) [1.3x speedup] AVX2 optimized: 1.843 ms/frame (542.6 FPS) [1.4x speedup] By using int, the alpha blending in that use case is 28% faster.
This commit is contained in:
committed by
Carlos R. Mafra
parent
474b23344a
commit
ef1a504898
@@ -25,47 +25,47 @@
|
||||
|
||||
|
||||
void RCombineAlpha(unsigned char *d, unsigned char *s, int s_has_alpha,
|
||||
int width, int height, int dwi, int swi, int opacity) {
|
||||
int x, y;
|
||||
int t, sa;
|
||||
int alpha;
|
||||
float ratio, cratio;
|
||||
int width, int height, int dwi, int swi, int opacity) {
|
||||
int x, y;
|
||||
unsigned char *dst = d;
|
||||
unsigned char *src = s;
|
||||
|
||||
for (y=0; y<height; y++) {
|
||||
for (x=0; x<width; x++) {
|
||||
sa=s_has_alpha?*(s+3):255;
|
||||
for (y = 0; y < height; y++) {
|
||||
for (x = 0; x < width; x++) {
|
||||
int sa = s_has_alpha ? src[3] : 255;
|
||||
int t, alpha;
|
||||
|
||||
if (opacity!=255) {
|
||||
t = sa * opacity + 0x80;
|
||||
sa = ((t>>8)+t)>>8;
|
||||
}
|
||||
if (opacity != 255) {
|
||||
t = sa * opacity + 0x80;
|
||||
sa = ((t >> 8) + t) >> 8;
|
||||
}
|
||||
|
||||
t = *(d+3) * (255-sa) + 0x80;
|
||||
alpha = sa + (((t>>8)+t)>>8);
|
||||
t = dst[3] * (255 - sa) + 0x80;
|
||||
alpha = sa + (((t >> 8) + t) >> 8);
|
||||
|
||||
if (sa==0 || alpha==0) {
|
||||
ratio = 0;
|
||||
cratio = 1.0;
|
||||
} else if(sa == alpha) {
|
||||
ratio = 1.0;
|
||||
cratio = 0;
|
||||
} else {
|
||||
ratio = (float)sa / alpha;
|
||||
cratio = 1.0F - ratio;
|
||||
}
|
||||
if (alpha == 0) {
|
||||
dst[3] = 0;
|
||||
} else if (sa == alpha) {
|
||||
dst[0] = src[0];
|
||||
dst[1] = src[1];
|
||||
dst[2] = src[2];
|
||||
dst[3] = alpha;
|
||||
} else if (sa == 0) {
|
||||
dst[3] = alpha;
|
||||
} else {
|
||||
int ratio = (sa << 8) / alpha;
|
||||
int inv_ratio = 256 - ratio;
|
||||
|
||||
*d = (int)*d * cratio + (int)*s * ratio;
|
||||
s++; d++;
|
||||
*d = (int)*d * cratio + (int)*s * ratio;
|
||||
s++; d++;
|
||||
*d = (int)*d * cratio + (int)*s * ratio;
|
||||
s++; d++;
|
||||
*d = alpha;
|
||||
d++;
|
||||
dst[0] = (dst[0] * inv_ratio + src[0] * ratio) >> 8;
|
||||
dst[1] = (dst[1] * inv_ratio + src[1] * ratio) >> 8;
|
||||
dst[2] = (dst[2] * inv_ratio + src[2] * ratio) >> 8;
|
||||
dst[3] = alpha;
|
||||
}
|
||||
|
||||
if (s_has_alpha) s++;
|
||||
}
|
||||
d+=dwi;
|
||||
s+=swi;
|
||||
}
|
||||
}
|
||||
dst += 4;
|
||||
src += s_has_alpha ? 4 : 3;
|
||||
}
|
||||
dst += dwi;
|
||||
src += swi;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user