1
0
mirror of https://github.com/gryf/wmaker.git synced 2025-12-19 12:28:22 +01:00

ooptimized code for 8bpp/PseudoColor

This commit is contained in:
kojima
2000-01-19 17:47:35 +00:00
parent ec5f3a294e
commit 0780ab82fe
6 changed files with 331 additions and 144 deletions

View File

@@ -1,4 +1,4 @@
# Makefile.in generated automatically by automake 1.4 from Makefile.am # Makefile.in generated automatically by automake 1.4a from Makefile.am
# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. # Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation # This Makefile.in is free software; the Free Software Foundation
@@ -46,9 +46,10 @@ AUTOMAKE = @AUTOMAKE@
AUTOHEADER = @AUTOHEADER@ AUTOHEADER = @AUTOHEADER@
INSTALL = @INSTALL@ INSTALL = @INSTALL@
INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS) INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_DATA = @INSTALL_DATA@ INSTALL_DATA = @INSTALL_DATA@
INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_FLAG =
transform = @program_transform_name@ transform = @program_transform_name@
NORMAL_INSTALL = : NORMAL_INSTALL = :
@@ -375,7 +376,7 @@ uninstall: uninstall-am
all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) $(SCRIPTS) $(HEADERS) all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) $(SCRIPTS) $(HEADERS)
all-redirect: all-am all-redirect: all-am
install-strip: install-strip:
$(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install $(MAKE) $(AM_MAKEFLAGS) INSTALL_STRIP_FLAG=-s install
installdirs: installdirs:
$(mkinstalldirs) $(DESTDIR)$(libdir) $(DESTDIR)$(bindir) \ $(mkinstalldirs) $(DESTDIR)$(libdir) $(DESTDIR)$(bindir) \
$(DESTDIR)$(includedir) $(DESTDIR)$(includedir)

View File

@@ -67,8 +67,7 @@ static RContextAttributes DEFAULT_CONTEXT_ATTRIBS = {
* allocate colors according to colors_per_channel * allocate colors according to colors_per_channel
* *
* best/default: * best/default:
* if there's a std colormap defined * if there's a std colormap defined then use it
then use it
* *
* else * else
* create a std colormap and set it * create a std colormap and set it
@@ -113,6 +112,17 @@ allocateStandardPseudoColor(RContext *ctx, XStandardColormap *stdcmap)
return False; return False;
} }
ctx->pixels = malloc(sizeof(unsigned long)*ctx->ncolors);
if (!ctx->pixels) {
free(ctx->colors);
ctx->colors = NULL;
RErrorCode = RERR_NOMEMORY;
return False;
}
#define calc(max,mult) (((i / stdcmap->mult) % \ #define calc(max,mult) (((i / stdcmap->mult) % \
(stdcmap->max + 1)) * 65535) / stdcmap->max (stdcmap->max + 1)) * 65535) / stdcmap->max
@@ -122,6 +132,8 @@ allocateStandardPseudoColor(RContext *ctx, XStandardColormap *stdcmap)
ctx->colors[i].red = calc(red_max, red_mult); ctx->colors[i].red = calc(red_max, red_mult);
ctx->colors[i].green = calc(green_max, green_mult); ctx->colors[i].green = calc(green_max, green_mult);
ctx->colors[i].blue = calc(blue_max, blue_mult); ctx->colors[i].blue = calc(blue_max, blue_mult);
ctx->pixels[i] = ctx->colors[i].pixel;
} }
#undef calc #undef calc
@@ -177,6 +189,14 @@ allocatePseudoColor(RContext *ctx)
RErrorCode = RERR_NOMEMORY; RErrorCode = RERR_NOMEMORY;
return False; return False;
} }
ctx->pixels = malloc(sizeof(unsigned long)*ncolors);
if (!ctx->pixels) {
free(colors);
RErrorCode = RERR_NOMEMORY;
return False;
}
i=0; i=0;
if ((ctx->attribs->flags & RC_GammaCorrection) && ctx->attribs->rgamma > 0 if ((ctx->attribs->flags & RC_GammaCorrection) && ctx->attribs->rgamma > 0
@@ -276,6 +296,11 @@ allocatePseudoColor(RContext *ctx)
ctx->colors = colors; ctx->colors = colors;
ctx->ncolors = ncolors; ctx->ncolors = ncolors;
/* fill the pixels shortcut array */
for (i = 0; i < ncolors; i++) {
ctx->pixels[i] = ctx->colors[i].pixel;
}
return True; return True;
} }

View File

@@ -45,7 +45,7 @@ extern Pixmap R_CreateXImageMappedPixmap(RContext *context, RXImage *ximage);
extern int x86_check_mmx(); extern int x86_check_mmx();
extern void x86_TrueColor_32_to_16(unsigned char *image, extern void x86_mmx_TrueColor_32_to_16(unsigned char *image,
unsigned short *ximage, unsigned short *ximage,
short *err, short *nerr, short *err, short *nerr,
short *rtable, short *gtable, short *rtable, short *gtable,
@@ -311,7 +311,7 @@ image2TrueColor(RContext *ctx, RImage *image)
memset(err, 0, 8*(image->width+3)); memset(err, 0, 8*(image->width+3));
memset(nerr, 0, 8*(image->width+3)); memset(nerr, 0, 8*(image->width+3));
x86_TrueColor_32_to_16(image->data, x86_mmx_TrueColor_32_to_16(image->data,
(unsigned short*)ximg->image->data, (unsigned short*)ximg->image->data,
err+8, nerr+8, err+8, nerr+8,
rtable, gtable, btable, rtable, gtable, btable,
@@ -361,6 +361,82 @@ image2TrueColor(RContext *ctx, RImage *image)
/***************************************************************************/ /***************************************************************************/
static void
convertPseudoColor_to_8(RXImage *ximg, RImage *image,
char *err, char *nerr,
const short *rtable,
const short *gtable,
const short *btable,
const int dr, const int dg, const int db,
unsigned long *pixels,
int cpc)
{
char *terr;
int x, y, r, g, b;
int pixel;
int rer, ger, ber;
unsigned char *ptr = image->data;
unsigned char *optr = ximg->image->data;
int channels = image->format == RRGBAFormat ? 4 : 3;
int cpcpc = cpc*cpc;
/* convert and dither the image to XImage */
for (y=0; y<image->height; y++) {
nerr[0] = 0;
nerr[1] = 0;
nerr[2] = 0;
for (x=0; x<image->width*3; x+=3, ptr+=channels) {
/* reduce pixel */
pixel = *ptr + err[x];
if (pixel<0) pixel=0; else if (pixel>0xff) pixel=0xff;
r = rtable[pixel];
/* calc error */
rer = pixel - r*dr;
/* reduce pixel */
pixel = *(ptr+1) + err[x+1];
if (pixel<0) pixel=0; else if (pixel>0xff) pixel=0xff;
g = gtable[pixel];
/* calc error */
ger = pixel - g*dg;
/* reduce pixel */
pixel = *(ptr+2) + err[x+2];
if (pixel<0) pixel=0; else if (pixel>0xff) pixel=0xff;
b = btable[pixel];
/* calc error */
ber = pixel - b*db;
*optr++ = pixels[r*cpcpc + g*cpc + b];
/* distribute error */
r = (rer*3)/8;
g = (ger*3)/8;
b = (ber*3)/8;
/* x+1, y */
err[x+3*1]+=r;
err[x+1+3*1]+=g;
err[x+2+3*1]+=b;
/* x, y+1 */
nerr[x]+=r;
nerr[x+1]+=g;
nerr[x+2]+=b;
/* x+1, y+1 */
nerr[x+3*1]=rer-2*r;
nerr[x+1+3*1]=ger-2*g;
nerr[x+2+3*1]=ber-2*b;
}
/* skip to next line */
terr = err;
err = nerr;
nerr = terr;
optr += ximg->image->bytes_per_line - image->width;
}
}
static RXImage* static RXImage*
image2PseudoColor(RContext *ctx, RImage *image) image2PseudoColor(RContext *ctx, RImage *image)
@@ -375,10 +451,7 @@ image2PseudoColor(RContext *ctx, RImage *image)
const unsigned short bmask = rmask; const unsigned short bmask = rmask;
unsigned short *rtable, *gtable, *btable; unsigned short *rtable, *gtable, *btable;
const int cpccpc = cpc*cpc; const int cpccpc = cpc*cpc;
unsigned char *data;
int ofs;
int channels = image->format == RRGBAFormat ? 4 : 3; int channels = image->format == RRGBAFormat ? 4 : 3;
/*register unsigned char maxrgb = 0xff;*/
ximg = RCreateXImage(ctx, ctx->depth, image->width, image->height); ximg = RCreateXImage(ctx, ctx->depth, image->width, image->height);
if (!ximg) { if (!ximg) {
@@ -387,8 +460,6 @@ image2PseudoColor(RContext *ctx, RImage *image)
ptr = image->data; ptr = image->data;
data = (unsigned char *)ximg->image->data;
/* Tables are same at the moment because rmask==gmask==bmask. */ /* Tables are same at the moment because rmask==gmask==bmask. */
rtable = computeTable(rmask); rtable = computeTable(rmask);
gtable = computeTable(gmask); gtable = computeTable(gmask);
@@ -418,20 +489,18 @@ image2PseudoColor(RContext *ctx, RImage *image)
} }
} else { } else {
/* dither */ /* dither */
short *err; char *err;
short *nerr; char *nerr;
short *terr;
int rer, ger, ber;
const int dr=0xff/rmask; const int dr=0xff/rmask;
const int dg=0xff/gmask; const int dg=0xff/gmask;
const int db=0xff/bmask; const int db=0xff/bmask;
int i;
#ifdef DEBUG #ifdef DEBUG
printf("pseudo color dithering with %d colors per channel\n", cpc); printf("pseudo color dithering with %d colors per channel\n", cpc);
#endif #endif
err = (short*)malloc(3*(image->width+2)*sizeof(short)); err = malloc(3*(image->width+2));
nerr = (short*)malloc(3*(image->width+2)*sizeof(short)); nerr = malloc(3*(image->width+2));
if (!err || !nerr) { if (!err || !nerr) {
if (nerr) if (nerr)
free(nerr); free(nerr);
@@ -439,76 +508,16 @@ image2PseudoColor(RContext *ctx, RImage *image)
RDestroyXImage(ctx, ximg); RDestroyXImage(ctx, ximg);
return NULL; return NULL;
} }
for (x=0, i=0; x<image->width*3; x+=channels-3) { memset(err, 0, 3*(image->width+3));
err[i++] = ptr[x++]; memset(nerr, 0, 3*(image->width+3));
err[i++] = ptr[x++];
err[i++] = ptr[x++];
}
err[x++] = err[x++] = err[x++] = 0;
/* convert and dither the image to XImage */
for (ofs=0, y=0; y<image->height; y++) {
if (y<image->height-1) {
int x1;
for (x=0, x1=((y+1)*image->width)*channels;
x<image->width*3; x1+=channels-3) {
nerr[x++] = ptr[x1++];
nerr[x++] = ptr[x1++];
nerr[x++] = ptr[x1++];
}
/* last column */
x1-=channels;
nerr[x++] = ptr[x1++];
nerr[x++] = ptr[x1++];
nerr[x++] = ptr[x1++];
}
for (x=0; x<image->width*3; x+=3, ofs++) {
/* reduce pixel */
if (err[x]>0xff) err[x]=0xff; else if (err[x]<0) err[x]=0;
if (err[x+1]>0xff) err[x+1]=0xff; else if (err[x+1]<0) err[x+1]=0;
if (err[x+2]>0xff) err[x+2]=0xff; else if (err[x+2]<0) err[x+2]=0;
r = rtable[err[x]]; convertPseudoColor_to_8(ximg, image, err+4, nerr+4,
g = gtable[err[x+1]]; rtable, gtable, btable,
b = btable[err[x+2]]; dr, dg, db, ctx->pixels, cpc);
pixel = r*cpccpc + g*cpc + b;
data[ofs] = ctx->colors[pixel].pixel;
/* calc error */
rer = err[x] - r*dr;
ger = err[x+1] - g*dg;
ber = err[x+2] - b*db;
/* distribute error */
err[x+3*1]+=(rer*7)/16;
err[x+1+3*1]+=(ger*7)/16;
err[x+2+3*1]+=(ber*7)/16;
nerr[x]+=(rer*5)/16;
nerr[x]+=(ger*5)/16;
nerr[x]+=(ber*5)/16;
if (x>0) {
nerr[x-3*1]+=(rer*3)/16;
nerr[x-(3*1)+1]+=(ger*3)/16;
nerr[x-(3*1)+2]+=(ber*3)/16;
}
nerr[x+3*1]+=rer/16;
nerr[x+1+3*1]+=ger/16;
nerr[x+2+3*1]+=ber/16;
}
/* skip to next line */
terr = err;
err = nerr;
nerr = terr;
ofs += ximg->image->bytes_per_line - image->width;
}
free(err); free(err);
free(nerr); free(nerr);
} }
ximg->image->data = (char*)data;
return ximg; return ximg;
} }
@@ -621,9 +630,6 @@ image2StandardPseudoColor(RContext *ctx, RImage *image)
pixel = r + g + b; pixel = r + g + b;
data[ofs] = base_pixel + pixel; data[ofs] = base_pixel + pixel;
/*
XPutPixel(ximg->image, x, y, pixel+base_pixel);
*/
/* calc error */ /* calc error */
rer = err[x] - (ctx->colors[pixel].red>>8); rer = err[x] - (ctx->colors[pixel].red>>8);
@@ -832,11 +838,16 @@ RConvertImage(RContext *context, RImage *image, Pixmap *pixmap)
} else if (context->vclass == PseudoColor } else if (context->vclass == PseudoColor
|| context->vclass == StaticColor) { || context->vclass == StaticColor) {
#ifdef BENCH
cycle_bench(1);
#endif
if (context->attribs->standard_colormap_mode != RIgnoreStdColormap) if (context->attribs->standard_colormap_mode != RIgnoreStdColormap)
ximg = image2StandardPseudoColor(context, image); ximg = image2StandardPseudoColor(context, image);
else else
ximg = image2PseudoColor(context, image); ximg = image2PseudoColor(context, image);
#ifdef BENCH
cycle_bench(0);
#endif
} else if (context->vclass == GrayScale || context->vclass == StaticGray) { } else if (context->vclass == GrayScale || context->vclass == StaticGray) {
ximg = image2GrayScale(context, image); ximg = image2GrayScale(context, image);

View File

@@ -250,7 +250,7 @@ renderDGradient(unsigned width, unsigned height, int r0, int g0, int b0,
return NULL; return NULL;
} }
a = (((width - 1)<<16) / (height - 1)) * 3; a = (((width - 1)<<16) / (height - 1))*3;
width *= 3; width *= 3;
/* copy the first line to the other lines with corresponding offset */ /* copy the first line to the other lines with corresponding offset */

View File

@@ -142,6 +142,7 @@ typedef struct RContext {
int ncolors; /* total number of colors we can use */ int ncolors; /* total number of colors we can use */
XColor *colors; /* internal colormap */ XColor *colors; /* internal colormap */
unsigned long *pixels; /* RContext->colors[].pixel */
struct { struct {
unsigned int use_shared_pixmap:1; unsigned int use_shared_pixmap:1;

View File

@@ -21,26 +21,30 @@
#include <config.h> #include <config.h>
#ifdef ASM_X86
#ifdef ASM_X86_MMX #ifdef ASM_X86_MMX
int int
x86_check_mmx() x86_check_mmx()
{ {
static int result = 1; static int result = -1;
return 1;
if (result >= 0) if (result >= 0)
return result; return result;
result = 0; result = 0;
#if 0
asm volatile asm volatile
("pushfl \n" // check whether cpuid supported ("pushfl \n" // check whether cpuid supported
"pop %%eax \n" "pop %%eax \n"
"movl %%eax, %%ebx \n" "movl %%eax, %%ebx \n"
"xorl 1<<21, %%eax \n" "xorl 1<<21, %%eax \n"
"pushfl %%eax \n" "pushl %%eax \n"
"popfd \n" "popfl \n"
"pushfl \n" "pushfl \n"
"popl %%eax \n" "popl %%eax \n"
"xorl %%eax, %%ebx \n" "xorl %%eax, %%ebx \n"
@@ -52,20 +56,31 @@ x86_check_mmx()
"cpuid \n" "cpuid \n"
"test 1<<23, %%edx \n" "test 1<<23, %%edx \n"
"jz .NotMMX \n" "jz .NotMMX \n"
"movl $1, %%0 \n" "movl $1, %0 \n"
".NotMMX: \n" ".NotMMX: \n"
".Bye: \n" ".Bye: \n"
".NotPentium: \n" ".NotPentium: \n"
: "=rm" (result)); : "=rm" (result));
#endif
return result; return result;
} }
/*
* TODO:
* 32/8 24/8 32/16 24/16 32/24 24/24
* PPlain YES YES
* MMX DONE
*
*/
void void
x86_TrueColor_32_to_16(unsigned char *image, // 8 x86_mmx_TrueColor_32_to_16(unsigned char *image, // 8
unsigned short *ximage, // 12 unsigned short *ximage, // 12
short *err, // 16 short *err, // 16
short *nerr, // 20 short *nerr, // 20
@@ -92,7 +107,8 @@ x86_TrueColor_32_to_16(unsigned char *image, // 8
asm volatile asm volatile
( (
"subl $64, %esp \n" // alloc some more stack "andl $-8, %ebp \n" // make it align
"subl $128, %esp \n" // alloc some more stack
"pusha \n" "pusha \n"
@@ -129,27 +145,27 @@ x86_TrueColor_32_to_16(unsigned char *image, // 8
// process 1 pixel / cycle, each component treated as 16bit // process 1 pixel / cycle, each component treated as 16bit
"movl 8(%ebp), %esi \n" // esi = image->data "movl 8(%ebp), %esi \n" // esi = image->data
".LoopY: \n" ".LoopYa: \n"
"movl 60(%ebp), %eax \n" "movl 60(%ebp), %eax \n"
"movl %eax, -4(%ebp) \n" // x = width "movl %eax, -4(%ebp) \n" // x = width
"movl 64(%ebp), %eax \n" "movl 64(%ebp), %eax \n"
"decl %eax \n" // y-- "decl %eax \n" // y--
"movl %eax, 64(%ebp) \n" "movl %eax, 64(%ebp) \n"
"js .End \n" // if y < 0, goto end "js .Enda \n" // if y < 0, goto end
"andl $1, %eax \n" "andl $1, %eax \n"
"jz .LoopY_1 \n" // if (y&1) goto LoopY_1 "jz .LoopY_1a \n" // if (y&1) goto LoopY_1
".LoopY_0: \n" ".LoopY_0a: \n"
"movl 16(%ebp), %ebx \n" // ebx = err "movl 16(%ebp), %ebx \n" // ebx = err
"movl %ebx, -36(%ebp) \n" // [-36] = err "movl %ebx, -36(%ebp) \n" // [-36] = err
"movl 20(%ebp), %eax \n" // "movl 20(%ebp), %eax \n" //
"movl %eax, -32(%ebp) \n" // [-32] = nerr "movl %eax, -32(%ebp) \n" // [-32] = nerr
"jmp .LoopX \n" "jmp .LoopXa \n"
".LoopY_1: \n" ".LoopY_1a: \n"
"movl 20(%ebp), %ebx \n" // ebx = nerr "movl 20(%ebp), %ebx \n" // ebx = nerr
"movl %ebx, -36(%ebp) \n" // [-36] = nerr "movl %ebx, -36(%ebp) \n" // [-36] = nerr
@@ -157,7 +173,7 @@ x86_TrueColor_32_to_16(unsigned char *image, // 8
"movl %eax, -32(%ebp) \n" // [-32] = eerr "movl %eax, -32(%ebp) \n" // [-32] = eerr
".LoopX: \n" ".LoopXa: \n"
// calculate errors and pixel components // calculate errors and pixel components
@@ -261,7 +277,7 @@ x86_TrueColor_32_to_16(unsigned char *image, // 8
"decl -4(%ebp) \n" // x-- "decl -4(%ebp) \n" // x--
"jnz .LoopX \n" // if x>0, goto .LoopX "jnz .LoopXa \n" // if x>0, goto .LoopX
// depend on edx // depend on edx
@@ -269,9 +285,9 @@ x86_TrueColor_32_to_16(unsigned char *image, // 8
"movl %edx, 12(%ebp) \n" "movl %edx, 12(%ebp) \n"
"jmp .LoopY \n" "jmp .LoopYa \n"
".End: \n" // THE END ".Enda: \n" // THE END
"emms \n" "emms \n"
@@ -280,6 +296,139 @@ x86_TrueColor_32_to_16(unsigned char *image, // 8
} }
#endif /* ASM_X86_MMX */ #endif /* ASM_X86_MMX */
#if 0
/* convert and dither the image to XImage */
for (y=0; y<image->height; y++) {
nerr[0] = 0;
nerr[1] = 0;
nerr[2] = 0;
for (x=0; x<image->width*3; x+=3, ptr+=channels) {
/* reduce pixel */
pixel = *ptr + err[x];
if (pixel<0) pixel=0; else if (pixel>0xff) pixel=0xff;
r = rtable[pixel];
/* calc error */
rer = pixel - r*dr;
/* reduce pixel */
pixel = *(ptr+1) + err[x+1];
if (pixel<0) pixel=0; else if (pixel>0xff) pixel=0xff;
g = gtable[pixel];
/* calc error */
ger = pixel - g*dg;
/* reduce pixel */
pixel = *(ptr+2) + err[x+2];
if (pixel<0) pixel=0; else if (pixel>0xff) pixel=0xff;
b = btable[pixel];
/* calc error */
ber = pixel - b*db;
*optr++ = pixels[r*cpcpc + g*cpc + b];
/* distribute error */
r = (rer*3)/8;
g = (ger*3)/8;
b = (ber*3)/8;
/* x+1, y */
err[x+3*1]+=r;
err[x+1+3*1]+=g;
err[x+2+3*1]+=b;
/* x, y+1 */
nerr[x]+=r;
nerr[x+1]+=g;
nerr[x+2]+=b;
/* x+1, y+1 */
nerr[x+3*1]=rer-2*r;
nerr[x+1+3*1]=ger-2*g;
nerr[x+2+3*1]=ber-2*b;
}
/* skip to next line */
terr = err;
err = nerr;
nerr = terr;
optr += ximg->image->bytes_per_line - image->width;
}
}
#endif
void
x86_PseudoColor_32_to_8(unsigned char *image, // 8
unsigned char *ximage, // 12
char *err, // 16
char *nerr, // 20
short *rtable, // 24
short *gtable, // 28
short *btable, // 32
int dr, // 36
int dg, // 40
int db, // 44
unsigned long *pixels, // 48
int cpc, // 52
int width, // 56
int height, // 60
int line_offset) // 64
{
asm volatile
(
"andl $-8, %ebp \n"
"subl $128, %esp \n" // alloc some stack space
"pusha \n"
// process 1 pixel / cycle, each component treated as 16bit
"movl 8(%ebp), %esi \n" // esi = image->data
".LoopYb: \n"
"movl 56(%ebp), %eax \n"
"movl %eax, -4(%ebp) \n" // x = width
"movl 60(%ebp), %eax \n"
"decl %eax \n" // y--
"movl %eax, 64(%ebp) \n"
"js .Endb \n" // if y < 0, goto end
"andl $1, %eax \n"
"jz .LoopY_1b \n" // if (y&1) goto LoopY_1
".LoopY_0b: \n"
"movl 16(%ebp), %ebx \n" // ebx = err
"movl %ebx, -36(%ebp) \n" // [-36] = err
"movl 20(%ebp), %eax \n" //
"movl %eax, -32(%ebp) \n" // [-32] = nerr
"movl $0, -32(%ebp) \n" // init error of nerr[0] to 0
"jmp .LoopXb \n"
".LoopY_1b: \n"
"movl 20(%ebp), %ebx \n" // ebx = nerr
"movl %ebx, -36(%ebp) \n" // [-36] = nerr
"movl 16(%ebp), %eax \n" //
"movl %eax, -32(%ebp) \n" // [-32] = err
"movl $0, -32(%ebp) \n" // init error of nerr[0] to 0
".LoopXb: \n"
"movl (%esi), %edx \n" // fetch a pixel
// "movl \n"
".Endb: \n"
"popa \n"
);
}
#endif /* ASM_X86 */