1
0
mirror of https://github.com/gryf/wmaker.git synced 2025-12-19 04:20:27 +01:00

ooptimized code for 8bpp/PseudoColor

This commit is contained in:
kojima
2000-01-19 17:47:35 +00:00
parent ec5f3a294e
commit 0780ab82fe
6 changed files with 331 additions and 144 deletions

View File

@@ -1,4 +1,4 @@
# Makefile.in generated automatically by automake 1.4 from Makefile.am # Makefile.in generated automatically by automake 1.4a from Makefile.am
# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. # Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation # This Makefile.in is free software; the Free Software Foundation
@@ -46,9 +46,10 @@ AUTOMAKE = @AUTOMAKE@
AUTOHEADER = @AUTOHEADER@ AUTOHEADER = @AUTOHEADER@
INSTALL = @INSTALL@ INSTALL = @INSTALL@
INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS) INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_DATA = @INSTALL_DATA@ INSTALL_DATA = @INSTALL_DATA@
INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_FLAG =
transform = @program_transform_name@ transform = @program_transform_name@
NORMAL_INSTALL = : NORMAL_INSTALL = :
@@ -375,7 +376,7 @@ uninstall: uninstall-am
all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) $(SCRIPTS) $(HEADERS) all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) $(SCRIPTS) $(HEADERS)
all-redirect: all-am all-redirect: all-am
install-strip: install-strip:
$(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install $(MAKE) $(AM_MAKEFLAGS) INSTALL_STRIP_FLAG=-s install
installdirs: installdirs:
$(mkinstalldirs) $(DESTDIR)$(libdir) $(DESTDIR)$(bindir) \ $(mkinstalldirs) $(DESTDIR)$(libdir) $(DESTDIR)$(bindir) \
$(DESTDIR)$(includedir) $(DESTDIR)$(includedir)

View File

@@ -67,8 +67,7 @@ static RContextAttributes DEFAULT_CONTEXT_ATTRIBS = {
* allocate colors according to colors_per_channel * allocate colors according to colors_per_channel
* *
* best/default: * best/default:
* if there's a std colormap defined * if there's a std colormap defined then use it
then use it
* *
* else * else
* create a std colormap and set it * create a std colormap and set it
@@ -113,6 +112,17 @@ allocateStandardPseudoColor(RContext *ctx, XStandardColormap *stdcmap)
return False; return False;
} }
ctx->pixels = malloc(sizeof(unsigned long)*ctx->ncolors);
if (!ctx->pixels) {
free(ctx->colors);
ctx->colors = NULL;
RErrorCode = RERR_NOMEMORY;
return False;
}
#define calc(max,mult) (((i / stdcmap->mult) % \ #define calc(max,mult) (((i / stdcmap->mult) % \
(stdcmap->max + 1)) * 65535) / stdcmap->max (stdcmap->max + 1)) * 65535) / stdcmap->max
@@ -122,6 +132,8 @@ allocateStandardPseudoColor(RContext *ctx, XStandardColormap *stdcmap)
ctx->colors[i].red = calc(red_max, red_mult); ctx->colors[i].red = calc(red_max, red_mult);
ctx->colors[i].green = calc(green_max, green_mult); ctx->colors[i].green = calc(green_max, green_mult);
ctx->colors[i].blue = calc(blue_max, blue_mult); ctx->colors[i].blue = calc(blue_max, blue_mult);
ctx->pixels[i] = ctx->colors[i].pixel;
} }
#undef calc #undef calc
@@ -177,6 +189,14 @@ allocatePseudoColor(RContext *ctx)
RErrorCode = RERR_NOMEMORY; RErrorCode = RERR_NOMEMORY;
return False; return False;
} }
ctx->pixels = malloc(sizeof(unsigned long)*ncolors);
if (!ctx->pixels) {
free(colors);
RErrorCode = RERR_NOMEMORY;
return False;
}
i=0; i=0;
if ((ctx->attribs->flags & RC_GammaCorrection) && ctx->attribs->rgamma > 0 if ((ctx->attribs->flags & RC_GammaCorrection) && ctx->attribs->rgamma > 0
@@ -276,6 +296,11 @@ allocatePseudoColor(RContext *ctx)
ctx->colors = colors; ctx->colors = colors;
ctx->ncolors = ncolors; ctx->ncolors = ncolors;
/* fill the pixels shortcut array */
for (i = 0; i < ncolors; i++) {
ctx->pixels[i] = ctx->colors[i].pixel;
}
return True; return True;
} }

View File

@@ -45,17 +45,17 @@ extern Pixmap R_CreateXImageMappedPixmap(RContext *context, RXImage *ximage);
extern int x86_check_mmx(); extern int x86_check_mmx();
extern void x86_TrueColor_32_to_16(unsigned char *image, extern void x86_mmx_TrueColor_32_to_16(unsigned char *image,
unsigned short *ximage, unsigned short *ximage,
short *err, short *nerr, short *err, short *nerr,
short *rtable, short *gtable, short *rtable, short *gtable,
short *btable, short *btable,
int dr, int dg, int db, int dr, int dg, int db,
unsigned int roffs, unsigned int roffs,
unsigned int goffs, unsigned int goffs,
unsigned int boffs, unsigned int boffs,
int width, int height, int width, int height,
int line_offset); int line_offset);
@@ -311,14 +311,14 @@ image2TrueColor(RContext *ctx, RImage *image)
memset(err, 0, 8*(image->width+3)); memset(err, 0, 8*(image->width+3));
memset(nerr, 0, 8*(image->width+3)); memset(nerr, 0, 8*(image->width+3));
x86_TrueColor_32_to_16(image->data, x86_mmx_TrueColor_32_to_16(image->data,
(unsigned short*)ximg->image->data, (unsigned short*)ximg->image->data,
err+8, nerr+8, err+8, nerr+8,
rtable, gtable, btable, rtable, gtable, btable,
dr, dg, db, dr, dg, db,
roffs, goffs, boffs, roffs, goffs, boffs,
image->width, image->height, image->width, image->height,
ximg->image->bytes_per_line - 2*image->width); ximg->image->bytes_per_line - 2*image->width);
free(err); free(err);
free(nerr); free(nerr);
@@ -361,6 +361,82 @@ image2TrueColor(RContext *ctx, RImage *image)
/***************************************************************************/ /***************************************************************************/
static void
convertPseudoColor_to_8(RXImage *ximg, RImage *image,
char *err, char *nerr,
const short *rtable,
const short *gtable,
const short *btable,
const int dr, const int dg, const int db,
unsigned long *pixels,
int cpc)
{
char *terr;
int x, y, r, g, b;
int pixel;
int rer, ger, ber;
unsigned char *ptr = image->data;
unsigned char *optr = ximg->image->data;
int channels = image->format == RRGBAFormat ? 4 : 3;
int cpcpc = cpc*cpc;
/* convert and dither the image to XImage */
for (y=0; y<image->height; y++) {
nerr[0] = 0;
nerr[1] = 0;
nerr[2] = 0;
for (x=0; x<image->width*3; x+=3, ptr+=channels) {
/* reduce pixel */
pixel = *ptr + err[x];
if (pixel<0) pixel=0; else if (pixel>0xff) pixel=0xff;
r = rtable[pixel];
/* calc error */
rer = pixel - r*dr;
/* reduce pixel */
pixel = *(ptr+1) + err[x+1];
if (pixel<0) pixel=0; else if (pixel>0xff) pixel=0xff;
g = gtable[pixel];
/* calc error */
ger = pixel - g*dg;
/* reduce pixel */
pixel = *(ptr+2) + err[x+2];
if (pixel<0) pixel=0; else if (pixel>0xff) pixel=0xff;
b = btable[pixel];
/* calc error */
ber = pixel - b*db;
*optr++ = pixels[r*cpcpc + g*cpc + b];
/* distribute error */
r = (rer*3)/8;
g = (ger*3)/8;
b = (ber*3)/8;
/* x+1, y */
err[x+3*1]+=r;
err[x+1+3*1]+=g;
err[x+2+3*1]+=b;
/* x, y+1 */
nerr[x]+=r;
nerr[x+1]+=g;
nerr[x+2]+=b;
/* x+1, y+1 */
nerr[x+3*1]=rer-2*r;
nerr[x+1+3*1]=ger-2*g;
nerr[x+2+3*1]=ber-2*b;
}
/* skip to next line */
terr = err;
err = nerr;
nerr = terr;
optr += ximg->image->bytes_per_line - image->width;
}
}
static RXImage* static RXImage*
image2PseudoColor(RContext *ctx, RImage *image) image2PseudoColor(RContext *ctx, RImage *image)
@@ -375,10 +451,7 @@ image2PseudoColor(RContext *ctx, RImage *image)
const unsigned short bmask = rmask; const unsigned short bmask = rmask;
unsigned short *rtable, *gtable, *btable; unsigned short *rtable, *gtable, *btable;
const int cpccpc = cpc*cpc; const int cpccpc = cpc*cpc;
unsigned char *data;
int ofs;
int channels = image->format == RRGBAFormat ? 4 : 3; int channels = image->format == RRGBAFormat ? 4 : 3;
/*register unsigned char maxrgb = 0xff;*/
ximg = RCreateXImage(ctx, ctx->depth, image->width, image->height); ximg = RCreateXImage(ctx, ctx->depth, image->width, image->height);
if (!ximg) { if (!ximg) {
@@ -387,8 +460,6 @@ image2PseudoColor(RContext *ctx, RImage *image)
ptr = image->data; ptr = image->data;
data = (unsigned char *)ximg->image->data;
/* Tables are same at the moment because rmask==gmask==bmask. */ /* Tables are same at the moment because rmask==gmask==bmask. */
rtable = computeTable(rmask); rtable = computeTable(rmask);
gtable = computeTable(gmask); gtable = computeTable(gmask);
@@ -418,20 +489,18 @@ image2PseudoColor(RContext *ctx, RImage *image)
} }
} else { } else {
/* dither */ /* dither */
short *err; char *err;
short *nerr; char *nerr;
short *terr;
int rer, ger, ber;
const int dr=0xff/rmask; const int dr=0xff/rmask;
const int dg=0xff/gmask; const int dg=0xff/gmask;
const int db=0xff/bmask; const int db=0xff/bmask;
int i;
#ifdef DEBUG #ifdef DEBUG
printf("pseudo color dithering with %d colors per channel\n", cpc); printf("pseudo color dithering with %d colors per channel\n", cpc);
#endif #endif
err = (short*)malloc(3*(image->width+2)*sizeof(short)); err = malloc(3*(image->width+2));
nerr = (short*)malloc(3*(image->width+2)*sizeof(short)); nerr = malloc(3*(image->width+2));
if (!err || !nerr) { if (!err || !nerr) {
if (nerr) if (nerr)
free(nerr); free(nerr);
@@ -439,77 +508,17 @@ image2PseudoColor(RContext *ctx, RImage *image)
RDestroyXImage(ctx, ximg); RDestroyXImage(ctx, ximg);
return NULL; return NULL;
} }
for (x=0, i=0; x<image->width*3; x+=channels-3) { memset(err, 0, 3*(image->width+3));
err[i++] = ptr[x++]; memset(nerr, 0, 3*(image->width+3));
err[i++] = ptr[x++];
err[i++] = ptr[x++]; convertPseudoColor_to_8(ximg, image, err+4, nerr+4,
} rtable, gtable, btable,
err[x++] = err[x++] = err[x++] = 0; dr, dg, db, ctx->pixels, cpc);
/* convert and dither the image to XImage */
for (ofs=0, y=0; y<image->height; y++) {
if (y<image->height-1) {
int x1;
for (x=0, x1=((y+1)*image->width)*channels;
x<image->width*3; x1+=channels-3) {
nerr[x++] = ptr[x1++];
nerr[x++] = ptr[x1++];
nerr[x++] = ptr[x1++];
}
/* last column */
x1-=channels;
nerr[x++] = ptr[x1++];
nerr[x++] = ptr[x1++];
nerr[x++] = ptr[x1++];
}
for (x=0; x<image->width*3; x+=3, ofs++) {
/* reduce pixel */
if (err[x]>0xff) err[x]=0xff; else if (err[x]<0) err[x]=0;
if (err[x+1]>0xff) err[x+1]=0xff; else if (err[x+1]<0) err[x+1]=0;
if (err[x+2]>0xff) err[x+2]=0xff; else if (err[x+2]<0) err[x+2]=0;
r = rtable[err[x]];
g = gtable[err[x+1]];
b = btable[err[x+2]];
pixel = r*cpccpc + g*cpc + b;
data[ofs] = ctx->colors[pixel].pixel;
/* calc error */
rer = err[x] - r*dr;
ger = err[x+1] - g*dg;
ber = err[x+2] - b*db;
/* distribute error */
err[x+3*1]+=(rer*7)/16;
err[x+1+3*1]+=(ger*7)/16;
err[x+2+3*1]+=(ber*7)/16;
nerr[x]+=(rer*5)/16;
nerr[x]+=(ger*5)/16;
nerr[x]+=(ber*5)/16;
if (x>0) {
nerr[x-3*1]+=(rer*3)/16;
nerr[x-(3*1)+1]+=(ger*3)/16;
nerr[x-(3*1)+2]+=(ber*3)/16;
}
nerr[x+3*1]+=rer/16;
nerr[x+1+3*1]+=ger/16;
nerr[x+2+3*1]+=ber/16;
}
/* skip to next line */
terr = err;
err = nerr;
nerr = terr;
ofs += ximg->image->bytes_per_line - image->width;
}
free(err); free(err);
free(nerr); free(nerr);
} }
ximg->image->data = (char*)data;
return ximg; return ximg;
} }
@@ -621,9 +630,6 @@ image2StandardPseudoColor(RContext *ctx, RImage *image)
pixel = r + g + b; pixel = r + g + b;
data[ofs] = base_pixel + pixel; data[ofs] = base_pixel + pixel;
/*
XPutPixel(ximg->image, x, y, pixel+base_pixel);
*/
/* calc error */ /* calc error */
rer = err[x] - (ctx->colors[pixel].red>>8); rer = err[x] - (ctx->colors[pixel].red>>8);
@@ -832,11 +838,16 @@ RConvertImage(RContext *context, RImage *image, Pixmap *pixmap)
} else if (context->vclass == PseudoColor } else if (context->vclass == PseudoColor
|| context->vclass == StaticColor) { || context->vclass == StaticColor) {
#ifdef BENCH
cycle_bench(1);
#endif
if (context->attribs->standard_colormap_mode != RIgnoreStdColormap) if (context->attribs->standard_colormap_mode != RIgnoreStdColormap)
ximg = image2StandardPseudoColor(context, image); ximg = image2StandardPseudoColor(context, image);
else else
ximg = image2PseudoColor(context, image); ximg = image2PseudoColor(context, image);
#ifdef BENCH
cycle_bench(0);
#endif
} else if (context->vclass == GrayScale || context->vclass == StaticGray) { } else if (context->vclass == GrayScale || context->vclass == StaticGray) {
ximg = image2GrayScale(context, image); ximg = image2GrayScale(context, image);

View File

@@ -250,13 +250,13 @@ renderDGradient(unsigned width, unsigned height, int r0, int g0, int b0,
return NULL; return NULL;
} }
a = (((width - 1)<<16) / (height - 1)) * 3; a = (((width - 1)<<16) / (height - 1))*3;
width *= 3; width *= 3;
/* copy the first line to the other lines with corresponding offset */ /* copy the first line to the other lines with corresponding offset */
for (i=0, j=0, offset = 0; i<height; i++, j+= width) { for (i=0, j=0, offset = 0; i<height; i++, j+= width) {
offset += a; offset += a;
memcpy(&(image->data[j]), &(tmp->data[(offset>>16)]), width); memcpy(&(image->data[j]), &(tmp->data[(offset>>16)]), width);
} }
RDestroyImage(tmp); RDestroyImage(tmp);
return image; return image;

View File

@@ -142,6 +142,7 @@ typedef struct RContext {
int ncolors; /* total number of colors we can use */ int ncolors; /* total number of colors we can use */
XColor *colors; /* internal colormap */ XColor *colors; /* internal colormap */
unsigned long *pixels; /* RContext->colors[].pixel */
struct { struct {
unsigned int use_shared_pixmap:1; unsigned int use_shared_pixmap:1;

View File

@@ -21,26 +21,30 @@
#include <config.h> #include <config.h>
#ifdef ASM_X86
#ifdef ASM_X86_MMX #ifdef ASM_X86_MMX
int int
x86_check_mmx() x86_check_mmx()
{ {
static int result = 1; static int result = -1;
return 1;
if (result >= 0) if (result >= 0)
return result; return result;
result = 0; result = 0;
#if 0
asm volatile asm volatile
("pushfl \n" // check whether cpuid supported ("pushfl \n" // check whether cpuid supported
"pop %%eax \n" "pop %%eax \n"
"movl %%eax, %%ebx \n" "movl %%eax, %%ebx \n"
"xorl 1<<21, %%eax \n" "xorl 1<<21, %%eax \n"
"pushfl %%eax \n" "pushl %%eax \n"
"popfd \n" "popfl \n"
"pushfl \n" "pushfl \n"
"popl %%eax \n" "popl %%eax \n"
"xorl %%eax, %%ebx \n" "xorl %%eax, %%ebx \n"
@@ -52,35 +56,46 @@ x86_check_mmx()
"cpuid \n" "cpuid \n"
"test 1<<23, %%edx \n" "test 1<<23, %%edx \n"
"jz .NotMMX \n" "jz .NotMMX \n"
"movl $1, %%0 \n" "movl $1, %0 \n"
".NotMMX: \n" ".NotMMX: \n"
".Bye: \n" ".Bye: \n"
".NotPentium: \n" ".NotPentium: \n"
: "=rm" (result)); : "=rm" (result));
#endif
return result; return result;
} }
/*
* TODO:
* 32/8 24/8 32/16 24/16 32/24 24/24
* PPlain YES YES
* MMX DONE
*
*/
void void
x86_TrueColor_32_to_16(unsigned char *image, // 8 x86_mmx_TrueColor_32_to_16(unsigned char *image, // 8
unsigned short *ximage, // 12 unsigned short *ximage, // 12
short *err, // 16 short *err, // 16
short *nerr, // 20 short *nerr, // 20
short *rtable, // 24 short *rtable, // 24
short *gtable, // 28 short *gtable, // 28
short *btable, // 32 short *btable, // 32
int dr, // 36 int dr, // 36
int dg, // 40 int dg, // 40
int db, // 44 int db, // 44
unsigned int roffs, // 48 unsigned int roffs, // 48
unsigned int goffs, // 52 unsigned int goffs, // 52
unsigned int boffs, // 56 unsigned int boffs, // 56
int width, // 60 int width, // 60
int height, // 64 int height, // 64
int line_offset) // 68 int line_offset) // 68
{ {
/* /*
int x; //-4 int x; //-4
@@ -92,7 +107,8 @@ x86_TrueColor_32_to_16(unsigned char *image, // 8
asm volatile asm volatile
( (
"subl $64, %esp \n" // alloc some more stack "andl $-8, %ebp \n" // make it align
"subl $128, %esp \n" // alloc some more stack
"pusha \n" "pusha \n"
@@ -129,27 +145,27 @@ x86_TrueColor_32_to_16(unsigned char *image, // 8
// process 1 pixel / cycle, each component treated as 16bit // process 1 pixel / cycle, each component treated as 16bit
"movl 8(%ebp), %esi \n" // esi = image->data "movl 8(%ebp), %esi \n" // esi = image->data
".LoopY: \n" ".LoopYa: \n"
"movl 60(%ebp), %eax \n" "movl 60(%ebp), %eax \n"
"movl %eax, -4(%ebp) \n" // x = width "movl %eax, -4(%ebp) \n" // x = width
"movl 64(%ebp), %eax \n" "movl 64(%ebp), %eax \n"
"decl %eax \n" // y-- "decl %eax \n" // y--
"movl %eax, 64(%ebp) \n" "movl %eax, 64(%ebp) \n"
"js .End \n" // if y < 0, goto end "js .Enda \n" // if y < 0, goto end
"andl $1, %eax \n" "andl $1, %eax \n"
"jz .LoopY_1 \n" // if (y&1) goto LoopY_1 "jz .LoopY_1a \n" // if (y&1) goto LoopY_1
".LoopY_0: \n" ".LoopY_0a: \n"
"movl 16(%ebp), %ebx \n" // ebx = err "movl 16(%ebp), %ebx \n" // ebx = err
"movl %ebx, -36(%ebp) \n" // [-36] = err "movl %ebx, -36(%ebp) \n" // [-36] = err
"movl 20(%ebp), %eax \n" // "movl 20(%ebp), %eax \n" //
"movl %eax, -32(%ebp) \n" // [-32] = nerr "movl %eax, -32(%ebp) \n" // [-32] = nerr
"jmp .LoopX \n" "jmp .LoopXa \n"
".LoopY_1: \n" ".LoopY_1a: \n"
"movl 20(%ebp), %ebx \n" // ebx = nerr "movl 20(%ebp), %ebx \n" // ebx = nerr
"movl %ebx, -36(%ebp) \n" // [-36] = nerr "movl %ebx, -36(%ebp) \n" // [-36] = nerr
@@ -157,7 +173,7 @@ x86_TrueColor_32_to_16(unsigned char *image, // 8
"movl %eax, -32(%ebp) \n" // [-32] = eerr "movl %eax, -32(%ebp) \n" // [-32] = eerr
".LoopX: \n" ".LoopXa: \n"
// calculate errors and pixel components // calculate errors and pixel components
@@ -261,7 +277,7 @@ x86_TrueColor_32_to_16(unsigned char *image, // 8
"decl -4(%ebp) \n" // x-- "decl -4(%ebp) \n" // x--
"jnz .LoopX \n" // if x>0, goto .LoopX "jnz .LoopXa \n" // if x>0, goto .LoopX
// depend on edx // depend on edx
@@ -269,9 +285,9 @@ x86_TrueColor_32_to_16(unsigned char *image, // 8
"movl %edx, 12(%ebp) \n" "movl %edx, 12(%ebp) \n"
"jmp .LoopY \n" "jmp .LoopYa \n"
".End: \n" // THE END ".Enda: \n" // THE END
"emms \n" "emms \n"
@@ -280,6 +296,139 @@ x86_TrueColor_32_to_16(unsigned char *image, // 8
} }
#endif /* ASM_X86_MMX */ #endif /* ASM_X86_MMX */
#if 0
/* convert and dither the image to XImage */
for (y=0; y<image->height; y++) {
nerr[0] = 0;
nerr[1] = 0;
nerr[2] = 0;
for (x=0; x<image->width*3; x+=3, ptr+=channels) {
/* reduce pixel */
pixel = *ptr + err[x];
if (pixel<0) pixel=0; else if (pixel>0xff) pixel=0xff;
r = rtable[pixel];
/* calc error */
rer = pixel - r*dr;
/* reduce pixel */
pixel = *(ptr+1) + err[x+1];
if (pixel<0) pixel=0; else if (pixel>0xff) pixel=0xff;
g = gtable[pixel];
/* calc error */
ger = pixel - g*dg;
/* reduce pixel */
pixel = *(ptr+2) + err[x+2];
if (pixel<0) pixel=0; else if (pixel>0xff) pixel=0xff;
b = btable[pixel];
/* calc error */
ber = pixel - b*db;
*optr++ = pixels[r*cpcpc + g*cpc + b];
/* distribute error */
r = (rer*3)/8;
g = (ger*3)/8;
b = (ber*3)/8;
/* x+1, y */
err[x+3*1]+=r;
err[x+1+3*1]+=g;
err[x+2+3*1]+=b;
/* x, y+1 */
nerr[x]+=r;
nerr[x+1]+=g;
nerr[x+2]+=b;
/* x+1, y+1 */
nerr[x+3*1]=rer-2*r;
nerr[x+1+3*1]=ger-2*g;
nerr[x+2+3*1]=ber-2*b;
}
/* skip to next line */
terr = err;
err = nerr;
nerr = terr;
optr += ximg->image->bytes_per_line - image->width;
}
}
#endif
void
x86_PseudoColor_32_to_8(unsigned char *image, // 8
unsigned char *ximage, // 12
char *err, // 16
char *nerr, // 20
short *rtable, // 24
short *gtable, // 28
short *btable, // 32
int dr, // 36
int dg, // 40
int db, // 44
unsigned long *pixels, // 48
int cpc, // 52
int width, // 56
int height, // 60
int line_offset) // 64
{
asm volatile
(
"andl $-8, %ebp \n"
"subl $128, %esp \n" // alloc some stack space
"pusha \n"
// process 1 pixel / cycle, each component treated as 16bit
"movl 8(%ebp), %esi \n" // esi = image->data
".LoopYb: \n"
"movl 56(%ebp), %eax \n"
"movl %eax, -4(%ebp) \n" // x = width
"movl 60(%ebp), %eax \n"
"decl %eax \n" // y--
"movl %eax, 64(%ebp) \n"
"js .Endb \n" // if y < 0, goto end
"andl $1, %eax \n"
"jz .LoopY_1b \n" // if (y&1) goto LoopY_1
".LoopY_0b: \n"
"movl 16(%ebp), %ebx \n" // ebx = err
"movl %ebx, -36(%ebp) \n" // [-36] = err
"movl 20(%ebp), %eax \n" //
"movl %eax, -32(%ebp) \n" // [-32] = nerr
"movl $0, -32(%ebp) \n" // init error of nerr[0] to 0
"jmp .LoopXb \n"
".LoopY_1b: \n"
"movl 20(%ebp), %ebx \n" // ebx = nerr
"movl %ebx, -36(%ebp) \n" // [-36] = nerr
"movl 16(%ebp), %eax \n" //
"movl %eax, -32(%ebp) \n" // [-32] = err
"movl $0, -32(%ebp) \n" // init error of nerr[0] to 0
".LoopXb: \n"
"movl (%esi), %edx \n" // fetch a pixel
// "movl \n"
".Endb: \n"
"popa \n"
);
}
#endif /* ASM_X86 */