From a0ebed9cb2b8ca76f9ae8c5432206e19b0193f2f Mon Sep 17 00:00:00 2001 From: kojima Date: Wed, 16 Feb 2000 18:22:46 +0000 Subject: [PATCH] *** empty log message *** --- wrlib/Makefile.in | 8 ++- wrlib/convert.c | 7 +-- wrlib/gradient.c | 51 +++++++++------- wrlib/scale.c | 24 +++++--- wrlib/testgrad.c | 8 ++- wrlib/x86_specific.c | 138 +++++++++++++++++++++++++++++++++++++++++-- 6 files changed, 195 insertions(+), 41 deletions(-) diff --git a/wrlib/Makefile.in b/wrlib/Makefile.in index f9d18924..2071b217 100644 --- a/wrlib/Makefile.in +++ b/wrlib/Makefile.in @@ -1,4 +1,4 @@ -# Makefile.in generated automatically by automake 1.4 from Makefile.am +# Makefile.in generated automatically by automake 1.4a from Makefile.am # Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation @@ -46,9 +46,10 @@ AUTOMAKE = @AUTOMAKE@ AUTOHEADER = @AUTOHEADER@ INSTALL = @INSTALL@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS) +INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_FLAG = transform = @program_transform_name@ NORMAL_INSTALL = : @@ -89,6 +90,7 @@ XLFLAGS = @XLFLAGS@ XLIBS = @XLIBS@ X_EXTRA_LIBS = @X_EXTRA_LIBS@ X_LIBRARY_PATH = @X_LIBRARY_PATH@ +supported_locales = @supported_locales@ wprefsdir = @wprefsdir@ AUTOMAKE_OPTIONS = no-dependencies @@ -373,7 +375,7 @@ uninstall: uninstall-am all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) $(SCRIPTS) $(HEADERS) all-redirect: all-am install-strip: - $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install + $(MAKE) $(AM_MAKEFLAGS) INSTALL_STRIP_FLAG=-s install installdirs: $(mkinstalldirs) $(DESTDIR)$(libdir) $(DESTDIR)$(bindir) \ $(DESTDIR)$(includedir) diff --git a/wrlib/convert.c b/wrlib/convert.c index 0a6ace14..805f8090 100644 --- a/wrlib/convert.c +++ b/wrlib/convert.c @@ -42,7 +42,7 @@ extern Pixmap R_CreateXImageMappedPixmap(RContext *context, RXImage *ximage); #ifdef ASM_X86 -extern void x86_PseudoColor_to_8(unsigned char *image, +extern void x86_PseudoColor_32_to_8(unsigned char *image, unsigned char *ximage, char *err, char *nerr, short *ctable, @@ -307,7 +307,7 @@ image2TrueColor(RContext *ctx, RImage *image) #endif #ifdef ASM_X86_MMX - if (ctx->depth == 16 && image->format == RRGBAFormat + if (ctx->depth == 16 && image->format == RRGBAFormat && x86_check_mmx()) { short *err; short *nerr; @@ -324,7 +324,6 @@ image2TrueColor(RContext *ctx, RImage *image) memset(err, 0, 8*(image->width+3)); memset(nerr, 0, 8*(image->width+3)); - puts("USING MMX"); x86_mmx_TrueColor_32_to_16(image->data, (unsigned short*)ximg->image->data, err+8, nerr+8, @@ -527,7 +526,7 @@ image2PseudoColor(RContext *ctx, RImage *image) memset(nerr, 0, 4*(image->width+3)); #ifdef ASM_X86 - x86_PseudoColor_to_8(image->data, ximg->image->data, + x86_PseudoColor_32_to_8(image->data, ximg->image->data, err+4, nerr+4, rtable, dr, dg, db, ctx->pixels, cpc, diff --git a/wrlib/gradient.c b/wrlib/gradient.c index 838d7c0a..b4612eb4 100644 --- a/wrlib/gradient.c +++ b/wrlib/gradient.c @@ -149,6 +149,8 @@ renderHGradient(unsigned width, unsigned height, int r0, int g0, int b0, return image; } + + /* *---------------------------------------------------------------------- * renderVGradient-- @@ -178,7 +180,7 @@ renderVGradient(unsigned width, unsigned height, int r0, int g0, int b0, return NULL; } iptr = (unsigned int*)ptr = image->data; - + r = r0<<16; g = g0<<16; b = b0<<16; @@ -186,22 +188,29 @@ renderVGradient(unsigned width, unsigned height, int r0, int g0, int b0, dr = ((rf-r0)<<16)/(int)height; dg = ((gf-g0)<<16)/(int)height; db = ((bf-b0)<<16)/(int)height; - for (i=0; i>16; gg = g>>16; bb = b>>16; - for (j=0; jdata[j]), &ptr[offset], width); + for (j=0, offset=0.0; jdata[j]), &ptr[3*(int)offset], width); + offset += a; } RDestroyImage(tmp); @@ -431,8 +440,8 @@ static RImage* renderMDGradient(unsigned width, unsigned height, RColor **colors, int count) { RImage *image, *tmp; - float a; - int i, offset, j; + float a, offset; + int j; unsigned char *ptr; assert(count > 2); @@ -470,9 +479,9 @@ renderMDGradient(unsigned width, unsigned height, RColor **colors, int count) width = width * 3; /* copy the first line to the other lines with corresponding offset */ - for (i=0, j=0, offset=0; idata[j]), &ptr[offset], width); + for (j=0, offset=0; jdata[j]), &ptr[3*(int)offset], width); + offset += a; } RDestroyImage(tmp); return image; diff --git a/wrlib/scale.c b/wrlib/scale.c index 03bc2349..97544ace 100644 --- a/wrlib/scale.c +++ b/wrlib/scale.c @@ -493,14 +493,19 @@ RSmoothScaleImage(RImage *src, unsigned new_width, unsigned new_height) for(k = 0; k < tmp->height; ++k) { + CONTRIB *pp; + sp = src->data + src->width*k*sch; - + for(i = 0; i < tmp->width; ++i) { rweight = gweight = bweight = 0.0; + + pp = contrib[i].p; + for(j = 0; j < contrib[i].n; ++j) { - rweight += sp[contrib[i].p[j].pixel] * contrib[i].p[j].weight; - gweight += sp[contrib[i].p[j].pixel+1] * contrib[i].p[j].weight; - bweight += sp[contrib[i].p[j].pixel+2] * contrib[i].p[j].weight; + rweight += sp[pp[j].pixel] * pp[j].weight; + gweight += sp[pp[j].pixel+1] * pp[j].weight; + bweight += sp[pp[j].pixel+2] * pp[j].weight; } *p++ = CLAMP(rweight, 0, 255); *p++ = CLAMP(gweight, 0, 255); @@ -570,6 +575,8 @@ RSmoothScaleImage(RImage *src, unsigned new_width, unsigned new_height) sp = malloc(tmp->height*3); for(k = 0; k < new_width; ++k) { + CONTRIB *pp; + p = dst->data + k*3; /* copy a column into a row */ @@ -587,10 +594,13 @@ RSmoothScaleImage(RImage *src, unsigned new_width, unsigned new_height) } for(i = 0; i < new_height; ++i) { rweight = gweight = bweight = 0.0; + + pp = contrib[i].p; + for(j = 0; j < contrib[i].n; ++j) { - rweight += sp[contrib[i].p[j].pixel] * contrib[i].p[j].weight; - gweight += sp[contrib[i].p[j].pixel+1] * contrib[i].p[j].weight; - bweight += sp[contrib[i].p[j].pixel+2] * contrib[i].p[j].weight; + rweight += sp[pp[j].pixel] * pp[j].weight; + gweight += sp[pp[j].pixel+1] * pp[j].weight; + bweight += sp[pp[j].pixel+2] * pp[j].weight; } *p = CLAMP(rweight, 0, 255); *(p+1) = CLAMP(gweight, 0, 255); diff --git a/wrlib/testgrad.c b/wrlib/testgrad.c index b7b426ad..b30960b7 100644 --- a/wrlib/testgrad.c +++ b/wrlib/testgrad.c @@ -30,7 +30,7 @@ print_help() puts(" -v visual id to use"); } - +#include "bench.h" int main(int argc, char **argv) { RContextAttributes attr; @@ -190,9 +190,15 @@ int main(int argc, char **argv) printf("average time per convertion %f sec\n", rt/i); printf("------------------------------------------\n"); #else + cycle_bench(1); imgh = RRenderMultiGradient(250, 250, colors, RGRD_HORIZONTAL); + cycle_bench(0); + cycle_bench(1); imgv = RRenderMultiGradient(250, 250, colors, RGRD_VERTICAL); + cycle_bench(0); + cycle_bench(1); imgd = RRenderMultiGradient(250, 250, colors, RGRD_DIAGONAL); + cycle_bench(0); RConvertImage(ctx, imgh, &pix); XCopyArea(dpy, pix, win, ctx->copy_gc, 0, 0, 250, 250, 0, 0); diff --git a/wrlib/x86_specific.c b/wrlib/x86_specific.c index 5ff13db8..e4c08c43 100644 --- a/wrlib/x86_specific.c +++ b/wrlib/x86_specific.c @@ -37,7 +37,7 @@ x86_check_mmx() result = 0; asm volatile - ("pushal \n" // please don't forget this in any asm + ("pushal \n" // please dont forget this in any asm "pushfl \n" // check whether cpuid supported "pop %%eax \n" "movl %%eax, %%ebx \n" @@ -83,7 +83,6 @@ x86_check_mmx() - void x86_mmx_TrueColor_32_to_16(unsigned char *image, // 8 unsigned short *ximage, // 12 @@ -190,19 +189,25 @@ x86_mmx_TrueColor_32_to_16(unsigned char *image, // 8 "movzwl -24(%ebp), %ecx \n" // ecx = pixel.red "movl 24(%ebp), %edi \n" // edi = rtable + //agi "leal (%edi, %ecx, 2), %eax \n" // eax = &rtable[pixel.red] + // agi "movw (%eax), %dx \n" // dx = rtable[pixel.red] "movw %dx, -16(%ebp) \n" // save rr "movzwl -22(%ebp), %ecx \n" // ecx = pixel.green "movl 28(%ebp), %edi \n" // edi = gtable + //agi "leal (%edi, %ecx, 2), %eax \n" // eax = >able[pixel.green] + //agi "movw (%eax), %dx \n" // dx = gtable[pixel.green] "movw %dx, -14(%ebp) \n" // save gg "movzwl -20(%ebp), %ecx \n" // ecx = pixel.blue "movl 32(%ebp), %edi \n" // ebx = btable + //agi "leal (%edi, %ecx, 2), %eax \n" // eax = &btable[pixel.blue] + //agi "movw (%eax), %dx \n" // dx = btable[pixel.blue] "movw %dx, -12(%ebp) \n" // save bb @@ -300,12 +305,127 @@ x86_mmx_TrueColor_32_to_16(unsigned char *image, // 8 } + + + + +void +x86_mmx_TrueColor_24_to_16(unsigned char *image, // 8 + unsigned short *ximage, // 12 + short *err, // 16 + short *nerr, // 20 + short *rtable, // 24 + short *gtable, // 28 + short *btable, // 32 + int dr, // 36 + int dg, // 40 + int db, // 44 + unsigned int roffs, // 48 + unsigned int goffs, // 52 + unsigned int boffs, // 56 + int width, // 60 + int height, // 64 + int line_offset) // 68 +{ + /* + int x; //-4 + long long rrggbbaa;// -16 + long long pixel; //-24 + short *tmp_err; //-32 + short *tmp_nerr; //-36 + * + int w1; // -64 + int w2; // -68 + */ + + asm volatile + ( + "subl $128, %esp \n" // alloc some more stack + + "pushal \n" + + "movl 60(%ebp), %eax \n" // eax = width + "movl %eax, %ebx \n" + "shrl $2, %eax \n" + "movl %eax, -64(%ebp) \n" // w1 = width / 4 + "andl $3, %ebx \n" + "movl %ebx, -68(%ebp) \n" // w2 = width % 4 + + +".LoopYc: \n" + "movl 60(%ebp), %eax \n" + "movl %eax, -4(%ebp) \n" // x = width + + "decl 64(%ebp) \n" // height-- + "js .Endc \n" // if height < 0 then end + + "movl 64(%ebp), %eax \n" + "decl %eax \n" // y-- + "movl %eax, 64(%ebp) \n" + "js .Endc \n" // if y < 0, goto end + "andl $1, %eax \n" + "jz .LoopY_1c \n" // if (y&1) goto LoopY_1 + +".LoopY_0c: \n" + + "movl 16(%ebp), %ebx \n" // ebx = err + "movl %ebx, -36(%ebp) \n" // [-36] = err + "movl 20(%ebp), %eax \n" // + "movl %eax, -32(%ebp) \n" // [-32] = nerr + + "jmp .LoopX_1c \n" + +".LoopY_1c: \n" + + "movl 20(%ebp), %ebx \n" // ebx = nerr + "movl %ebx, -36(%ebp) \n" // [-36] = nerr + "movl 16(%ebp), %eax \n" // + "movl %eax, -32(%ebp) \n" // [-32] = eerr + + ".align 16 \n" + + "movl %eax, -4(%ebp) \n" // x = w1 +".LoopX_1c: \n" + "decl -4(%ebp) \n" // x-- + "js .Xend1_c \n" // if x < 0 then end + + // do conversion of 4 pixels + "movq 16(%ebp), %mm0 \n" // mm0 = err + + + + + "jmp .LoopX_1c \n" +".Xend1_c: \n" + + "movl -68(%ebp), %eax \n" + "movl %eax, -4(%ebp) \n" // x = w2 +".LoopX_2c: \n" + "decl -4(%ebp) \n" // x-- + "js .Xend2_c \n" // + // do conversion + "jmp .LoopX_2c \n" +".Xend2_c: \n" + + "movl -64(%ebp), %eax \n" + "jmp .LoopYc \n" + +".Endc: \n" // THE END + + "emms \n" + + "popal \n" + ); +} + + + #endif /* ASM_X86_MMX */ void -x86_PseudoColor_to_8(unsigned char *image, // 8 +x86_PseudoColor_32_to_8(unsigned char *image, // 8 unsigned char *ximage, // 12 char *err, // 16 char *nerr, // 20 @@ -364,7 +484,7 @@ x86_PseudoColor_to_8(unsigned char *image, // 8 ".LoopY_0b: \n" "movl 16(%ebp), %ebx \n" // ebx = err -// "movl %ebx, -36(%ebp) \n" // [-36] = err +//useless "movl %ebx, -36(%ebp) \n" // [-36] = err "movl 20(%ebp), %ecx \n" // "movl %ecx, -32(%ebp) \n" // [-32] = nerr @@ -375,7 +495,7 @@ x86_PseudoColor_to_8(unsigned char *image, // 8 ".LoopY_1b: \n" "movl 20(%ebp), %ebx \n" // ebx = nerr -// "movl %ebx, -36(%ebp) \n" // [-36] = nerr +//useless "movl %ebx, -36(%ebp) \n" // [-36] = nerr "movl 16(%ebp), %ecx \n" // "movl %ecx, -32(%ebp) \n" // [-32] = err @@ -404,7 +524,9 @@ x86_PseudoColor_to_8(unsigned char *image, // 8 ".NEGRb: \n" "xorw %dx, %dx \n" ".OKRb: \n" + //partial reg "leal (%edi, %edx, 2), %ecx \n" // ecx = &ctable[pixel.red] + //agi "movl (%ecx), %eax \n" // ax = ctable[pixel.red] "movw %ax, -12(%ebp) \n" // save rr @@ -448,7 +570,9 @@ x86_PseudoColor_to_8(unsigned char *image, // 8 ".NEGGb: \n" "xorw %dx, %dx \n" ".OKGb: \n" + // partial reg "leal (%edi, %edx, 2), %ecx \n" // ecx = &ctable[pixel.grn] + //agi "movw (%ecx), %ax \n" // ax = ctable[pixel.grn] "movw %ax, -16(%ebp) \n" // save gg @@ -493,7 +617,9 @@ x86_PseudoColor_to_8(unsigned char *image, // 8 ".NEGBb: \n" "xorw %dx, %dx \n" ".OKBb: \n" + //partial reg "leal (%edi, %edx, 2), %ecx \n" // ecx = &ctable[pixel.blu] + //agi "movw (%ecx), %ax \n" // ax = ctable[pixel.blu] "movw %ax, -20(%ebp) \n" // save bb @@ -532,7 +658,9 @@ x86_PseudoColor_to_8(unsigned char *image, // 8 "addw -20(%ebp), %ax \n" // ax = cpcpc*rr + cpc*gg + bb "movl 40(%ebp), %ecx \n" + //agi "leal (%ecx, %eax, 4), %edx \n" + //agi "movb (%edx), %cl \n" // cl = pixels[ax] // store the pixel