From a0ebed9cb2b8ca76f9ae8c5432206e19b0193f2f Mon Sep 17 00:00:00 2001
From: kojima <kojima>
Date: Wed, 16 Feb 2000 18:22:46 +0000
Subject: [PATCH] *** empty log message ***

---
 wrlib/Makefile.in    |   8 ++-
 wrlib/convert.c      |   7 +--
 wrlib/gradient.c     |  51 +++++++++-------
 wrlib/scale.c        |  24 +++++---
 wrlib/testgrad.c     |   8 ++-
 wrlib/x86_specific.c | 138 +++++++++++++++++++++++++++++++++++++++++--
 6 files changed, 195 insertions(+), 41 deletions(-)
diff --git a/wrlib/Makefile.in b/wrlib/Makefile.in
index f9d18924..2071b217 100644
--- a/wrlib/Makefile.in
+++ b/wrlib/Makefile.in
@@ -1,4 +1,4 @@
-# Makefile.in generated automatically by automake 1.4 from Makefile.am
+# Makefile.in generated automatically by automake 1.4a from Makefile.am
 
 # Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
 # This Makefile.in is free software; the Free Software Foundation
@@ -46,9 +46,10 @@ AUTOMAKE = @AUTOMAKE@
 AUTOHEADER = @AUTOHEADER@
 
 INSTALL = @INSTALL@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS)
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
 INSTALL_DATA = @INSTALL_DATA@
 INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_FLAG =
 transform = @program_transform_name@
 
 NORMAL_INSTALL = :
@@ -89,6 +90,7 @@ XLFLAGS = @XLFLAGS@
 XLIBS = @XLIBS@
 X_EXTRA_LIBS = @X_EXTRA_LIBS@
 X_LIBRARY_PATH = @X_LIBRARY_PATH@
+supported_locales = @supported_locales@
 wprefsdir = @wprefsdir@
 
 AUTOMAKE_OPTIONS = no-dependencies
@@ -373,7 +375,7 @@ uninstall: uninstall-am
 all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) $(SCRIPTS) $(HEADERS)
 all-redirect: all-am
 install-strip:
-	$(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_STRIP_FLAG=-s install
 installdirs:
 	$(mkinstalldirs)  $(DESTDIR)$(libdir) $(DESTDIR)$(bindir) \
 		$(DESTDIR)$(includedir)
diff --git a/wrlib/convert.c b/wrlib/convert.c
index 0a6ace14..805f8090 100644
--- a/wrlib/convert.c
+++ b/wrlib/convert.c
@@ -42,7 +42,7 @@ extern Pixmap R_CreateXImageMappedPixmap(RContext *context, RXImage *ximage);
 
 
 #ifdef ASM_X86
-extern void x86_PseudoColor_to_8(unsigned char *image,
+extern void x86_PseudoColor_32_to_8(unsigned char *image,
 				 unsigned char *ximage, 
 				 char *err, char *nerr,
 				 short *ctable,
@@ -307,7 +307,7 @@ image2TrueColor(RContext *ctx, RImage *image)
 #endif
 
 #ifdef ASM_X86_MMX
-	if (ctx->depth == 16 && image->format == RRGBAFormat 
+	if (ctx->depth == 16 && image->format == RRGBAFormat
 	    && x86_check_mmx()) {
 	    short *err;
 	    short *nerr;
@@ -324,7 +324,6 @@ image2TrueColor(RContext *ctx, RImage *image)
 	    memset(err, 0, 8*(image->width+3));
 	    memset(nerr, 0, 8*(image->width+3));
 
-	    puts("USING MMX");
 	    x86_mmx_TrueColor_32_to_16(image->data, 
 				       (unsigned short*)ximg->image->data, 
 				       err+8, nerr+8,
@@ -527,7 +526,7 @@ image2PseudoColor(RContext *ctx, RImage *image)
 	memset(nerr, 0, 4*(image->width+3));
 
 #ifdef ASM_X86
-	x86_PseudoColor_to_8(image->data, ximg->image->data,
+	x86_PseudoColor_32_to_8(image->data, ximg->image->data,
 			     err+4, nerr+4,
 			     rtable,
 			     dr, dg, db, ctx->pixels, cpc,
diff --git a/wrlib/gradient.c b/wrlib/gradient.c
index 838d7c0a..b4612eb4 100644
--- a/wrlib/gradient.c
+++ b/wrlib/gradient.c
@@ -149,6 +149,8 @@ renderHGradient(unsigned width, unsigned height, int r0, int g0, int b0,
     return image;
 }
 
+
+
 /*
  *----------------------------------------------------------------------
  * renderVGradient--
@@ -178,7 +180,7 @@ renderVGradient(unsigned width, unsigned height, int r0, int g0, int b0,
 	return NULL;
     }    
     iptr = (unsigned int*)ptr = image->data;
-    
+
     r = r0<<16;
     g = g0<<16;
     b = b0<<16;
@@ -186,22 +188,29 @@ renderVGradient(unsigned width, unsigned height, int r0, int g0, int b0,
     dr = ((rf-r0)<<16)/(int)height;
     dg = ((gf-g0)<<16)/(int)height;
     db = ((bf-b0)<<16)/(int)height;
-    
 
     for (i=0; i<height; i++) {
 	rr = r>>16;
 	gg = g>>16;
 	bb = b>>16;
-	for (j=0; j<width/4; j++) {
-	    *ptr++ = rr; *ptr++ = gg; *ptr++ = bb;
-	    *ptr++ = rr; *ptr++ = gg; *ptr++ = bb;
-	    *ptr++ = rr; *ptr++ = gg; *ptr++ = bb;
-	    *ptr++ = rr; *ptr++ = gg; *ptr++ = bb;
+	for (j=0; j<width/8; j++) {
+	    *(ptr++) = rr; *(ptr++) = gg; *(ptr++) = bb;
+	    *(ptr++) = rr; *(ptr++) = gg; *(ptr++) = bb;
+	    *(ptr++) = rr; *(ptr++) = gg; *(ptr++) = bb;
+	    *(ptr++) = rr; *(ptr++) = gg; *(ptr++) = bb;
+	    *(ptr++) = rr; *(ptr++) = gg; *(ptr++) = bb;
+	    *(ptr++) = rr; *(ptr++) = gg; *(ptr++) = bb;
+	    *(ptr++) = rr; *(ptr++) = gg; *(ptr++) = bb;
+	    *(ptr++) = rr; *(ptr++) = gg; *(ptr++) = bb;
 	}
-	switch (width%4) {
-	 case 3: *ptr++ = rr; *ptr++ = gg; *ptr++ = bb;
-	 case 2: *ptr++ = rr; *ptr++ = gg; *ptr++ = bb;
-	 case 1: *ptr++ = rr; *ptr++ = gg; *ptr++ = bb;
+	switch (width%8) {
+	 case 7: *(ptr++) = rr; *(ptr++) = gg; *(ptr++) = bb;
+	 case 6: *(ptr++) = rr; *(ptr++) = gg; *(ptr++) = bb;
+	 case 5: *(ptr++) = rr; *(ptr++) = gg; *(ptr++) = bb;
+	 case 4: *(ptr++) = rr; *(ptr++) = gg; *(ptr++) = bb;
+	 case 3: *(ptr++) = rr; *(ptr++) = gg; *(ptr++) = bb;
+	 case 2: *(ptr++) = rr; *(ptr++) = gg; *(ptr++) = bb;
+	 case 1: *(ptr++) = rr; *(ptr++) = gg; *(ptr++) = bb;
 	}
         r+=dr;
         g+=dg;
@@ -231,8 +240,8 @@ renderDGradient(unsigned width, unsigned height, int r0, int g0, int b0,
 		int rf, int gf, int bf)
 {
     RImage *image, *tmp;
-    int i, j, offset;
-    float a;
+    int j;
+    float a, offset;
     char *ptr;
 
     if (width == 1)
@@ -257,9 +266,9 @@ renderDGradient(unsigned width, unsigned height, int r0, int g0, int b0,
     width = width * 3;
 
     /* copy the first line to the other lines with corresponding offset */
-    for (i=0, j=0, offset=0; i<height; i++, j += width) {
-        offset = (int)(a*i+0.5)*3;
-	memcpy(&(image->data[j]), &ptr[offset], width);
+    for (j=0, offset=0.0; j<width*height; j += width) {
+	memcpy(&(image->data[j]), &ptr[3*(int)offset], width);
+        offset += a;
     }
 
     RDestroyImage(tmp);
@@ -431,8 +440,8 @@ static RImage*
 renderMDGradient(unsigned width, unsigned height, RColor **colors, int count)
 {
     RImage *image, *tmp;
-    float a;
-    int i, offset, j;
+    float a, offset;
+    int j;
     unsigned char *ptr;
 
     assert(count > 2);
@@ -470,9 +479,9 @@ renderMDGradient(unsigned width, unsigned height, RColor **colors, int count)
     width = width * 3;
 
     /* copy the first line to the other lines with corresponding offset */
-    for (i=0, j=0, offset=0; i<height; i++, j += width) {
-        offset = (int)(a*i+0.5)*3;
-	memcpy(&(image->data[j]), &ptr[offset], width);
+    for (j=0, offset=0; j<width*height; j += width) {
+	memcpy(&(image->data[j]), &ptr[3*(int)offset], width);
+        offset += a;
     }
     RDestroyImage(tmp);
     return image;
diff --git a/wrlib/scale.c b/wrlib/scale.c
index 03bc2349..97544ace 100644
--- a/wrlib/scale.c
+++ b/wrlib/scale.c
@@ -493,14 +493,19 @@ RSmoothScaleImage(RImage *src, unsigned new_width, unsigned new_height)
 
 
     for(k = 0; k < tmp->height; ++k) {
+	CONTRIB *pp;
+
 	sp = src->data + src->width*k*sch;
-	
+
 	for(i = 0; i < tmp->width; ++i) {
 	    rweight = gweight = bweight = 0.0;
+	    
+	    pp = contrib[i].p;
+	    
 	    for(j = 0; j < contrib[i].n; ++j) {
-		rweight += sp[contrib[i].p[j].pixel] * contrib[i].p[j].weight;
-		gweight += sp[contrib[i].p[j].pixel+1] * contrib[i].p[j].weight;
-		bweight += sp[contrib[i].p[j].pixel+2] * contrib[i].p[j].weight;
+		rweight += sp[pp[j].pixel] * pp[j].weight;
+		gweight += sp[pp[j].pixel+1] * pp[j].weight;
+		bweight += sp[pp[j].pixel+2] * pp[j].weight;
 	    }
 	    *p++ = CLAMP(rweight, 0, 255);
 	    *p++ = CLAMP(gweight, 0, 255);
@@ -570,6 +575,8 @@ RSmoothScaleImage(RImage *src, unsigned new_width, unsigned new_height)
     sp = malloc(tmp->height*3);
 
     for(k = 0; k < new_width; ++k) {
+	CONTRIB *pp;
+	
 	p = dst->data + k*3;
 
 	/* copy a column into a row */
@@ -587,10 +594,13 @@ RSmoothScaleImage(RImage *src, unsigned new_width, unsigned new_height)
 	}
 	for(i = 0; i < new_height; ++i) {
 	    rweight = gweight = bweight = 0.0;
+	    
+ 	    pp = contrib[i].p;
+	    
 	    for(j = 0; j < contrib[i].n; ++j) {
-		rweight += sp[contrib[i].p[j].pixel] * contrib[i].p[j].weight;
-		gweight += sp[contrib[i].p[j].pixel+1] * contrib[i].p[j].weight;
-		bweight += sp[contrib[i].p[j].pixel+2] * contrib[i].p[j].weight;
+		rweight += sp[pp[j].pixel] * pp[j].weight;
+		gweight += sp[pp[j].pixel+1] * pp[j].weight;
+		bweight += sp[pp[j].pixel+2] * pp[j].weight;
 	    }
 	    *p = CLAMP(rweight, 0, 255);
 	    *(p+1) = CLAMP(gweight, 0, 255);
diff --git a/wrlib/testgrad.c b/wrlib/testgrad.c
index b7b426ad..b30960b7 100644
--- a/wrlib/testgrad.c
+++ b/wrlib/testgrad.c
@@ -30,7 +30,7 @@ print_help()
     puts(" -v <vis-id>	visual id to use");
 }
 
-
+#include "bench.h"
 int main(int argc, char **argv)
 {
     RContextAttributes attr;
@@ -190,9 +190,15 @@ int main(int argc, char **argv)
     printf("average time per convertion %f sec\n", rt/i);
     printf("------------------------------------------\n");
 #else
+    cycle_bench(1);
     imgh = RRenderMultiGradient(250, 250, colors, RGRD_HORIZONTAL);
+    cycle_bench(0);
+    cycle_bench(1);
     imgv = RRenderMultiGradient(250, 250, colors, RGRD_VERTICAL);
+    cycle_bench(0);
+    cycle_bench(1);
     imgd = RRenderMultiGradient(250, 250, colors, RGRD_DIAGONAL);
+    cycle_bench(0);
     RConvertImage(ctx, imgh, &pix);
     XCopyArea(dpy, pix, win, ctx->copy_gc, 0, 0, 250, 250, 0, 0);
 
diff --git a/wrlib/x86_specific.c b/wrlib/x86_specific.c
index 5ff13db8..e4c08c43 100644
--- a/wrlib/x86_specific.c
+++ b/wrlib/x86_specific.c
@@ -37,7 +37,7 @@ x86_check_mmx()
     result = 0;
 
     asm volatile
-        ("pushal		\n" // please don't forget this in any asm
+        ("pushal		\n" // please dont forget this in any asm
          "pushfl		\n" // check whether cpuid supported
 	 "pop %%eax		\n"
 	 "movl %%eax, %%ebx	\n"
@@ -83,7 +83,6 @@ x86_check_mmx()
 
 
 
-
 void
 x86_mmx_TrueColor_32_to_16(unsigned char *image, // 8
 			   unsigned short *ximage, // 12
@@ -190,19 +189,25 @@ x86_mmx_TrueColor_32_to_16(unsigned char *image, // 8
 
 	 "movzwl -24(%ebp), %ecx	\n" // ecx = pixel.red
 	 "movl 24(%ebp), %edi		\n" // edi = rtable
+	 //agi
 	 "leal (%edi, %ecx, 2), %eax	\n" // eax = &rtable[pixel.red]
+	 // agi
 	 "movw (%eax), %dx		\n" // dx = rtable[pixel.red]
 	 "movw %dx, -16(%ebp)		\n" // save rr
 
 	 "movzwl -22(%ebp), %ecx	\n" // ecx = pixel.green
 	 "movl 28(%ebp), %edi		\n" // edi = gtable
+	 //agi
 	 "leal (%edi, %ecx, 2), %eax	\n" // eax = &gtable[pixel.green]
+	 //agi
 	 "movw (%eax), %dx		\n" // dx = gtable[pixel.green]
 	 "movw %dx, -14(%ebp)		\n" // save gg
 
 	 "movzwl -20(%ebp), %ecx	\n" // ecx = pixel.blue
 	 "movl 32(%ebp), %edi		\n" // ebx = btable
+	 //agi
 	 "leal (%edi, %ecx, 2), %eax	\n" // eax = &btable[pixel.blue]
+	 //agi
 	 "movw (%eax), %dx		\n" // dx = btable[pixel.blue]
 	 "movw %dx, -12(%ebp)		\n" // save bb
 
@@ -300,12 +305,127 @@ x86_mmx_TrueColor_32_to_16(unsigned char *image, // 8
 }
 
 
+
+
+
+
+void
+x86_mmx_TrueColor_24_to_16(unsigned char *image, // 8
+			   unsigned short *ximage, // 12
+			   short *err, // 16
+			   short *nerr, // 20
+			   short *rtable, // 24
+			   short *gtable, // 28
+			   short *btable, // 32
+			   int dr, // 36
+			   int dg, // 40
+			   int db, // 44
+			   unsigned int roffs, // 48
+			   unsigned int goffs, // 52
+			   unsigned int boffs, // 56
+			   int width, // 60
+			   int height, // 64
+			   int line_offset) // 68
+{
+    /*
+     int x; //-4
+     long long rrggbbaa;// -16
+     long long pixel; //-24
+     short *tmp_err; //-32
+     short *tmp_nerr; //-36
+     * 
+     int w1; // -64
+     int w2; // -68
+     */
+
+    asm volatile
+	(
+	 "subl $128, %esp		\n" // alloc some more stack
+
+	 "pushal       			\n"
+	 
+	 "movl 60(%ebp), %eax		\n" // eax = width
+	 "movl %eax, %ebx		\n"
+	 "shrl $2, %eax			\n"
+	 "movl %eax, -64(%ebp)		\n" // w1 = width / 4
+	 "andl $3, %ebx			\n"
+	 "movl %ebx, -68(%ebp)		\n" // w2 = width % 4
+
+	 
+".LoopYc:				\n"
+	 "movl 60(%ebp), %eax		\n"
+	 "movl %eax, -4(%ebp)		\n" // x = width
+
+	 "decl 64(%ebp)			\n" // height--
+	 "js .Endc			\n" // if height < 0 then end
+
+	 "movl 64(%ebp), %eax		\n"
+	 "decl %eax			\n" // y--
+	 "movl %eax, 64(%ebp)		\n"
+	 "js .Endc			\n" // if y < 0, goto end
+	 "andl $1, %eax			\n"
+	 "jz .LoopY_1c			\n" // if (y&1) goto LoopY_1
+
+".LoopY_0c:				\n"
+
+	 "movl 16(%ebp), %ebx		\n" // ebx = err
+	 "movl %ebx, -36(%ebp)		\n" // [-36] = err
+	 "movl 20(%ebp), %eax		\n" //
+	 "movl %eax, -32(%ebp)		\n" // [-32] = nerr
+
+	 "jmp .LoopX_1c			\n"
+
+".LoopY_1c:				\n"
+
+	 "movl 20(%ebp), %ebx		\n" // ebx = nerr
+	 "movl %ebx, -36(%ebp)		\n" // [-36] = nerr
+	 "movl 16(%ebp), %eax		\n" //
+	 "movl %eax, -32(%ebp)		\n" // [-32] = eerr
+
+	 ".align 16			\n"
+
+	 "movl %eax, -4(%ebp)		\n" // x = w1
+".LoopX_1c:				\n"
+	 "decl -4(%ebp)			\n" // x--
+	 "js .Xend1_c			\n" // if x < 0 then end
+	 
+	 // do conversion of 4 pixels
+	 "movq 16(%ebp), %mm0		\n" // mm0 = err
+
+
+
+	 
+	 "jmp .LoopX_1c			\n"
+".Xend1_c:				\n"
+	 
+	 "movl -68(%ebp), %eax		\n"
+	 "movl %eax, -4(%ebp)		\n" // x = w2	 
+".LoopX_2c:				\n"
+	 "decl -4(%ebp)			\n" // x--
+	 "js .Xend2_c			\n" //
+	 // do conversion
+	 "jmp .LoopX_2c			\n"
+".Xend2_c:				\n"
+
+	 "movl -64(%ebp), %eax		\n"
+	 "jmp .LoopYc			\n"
+
+".Endc:					\n" // THE END
+	 
+	 "emms				\n"
+
+	 "popal				\n"
+	 );
+}
+
+
+
 #endif /* ASM_X86_MMX */
 
 
 
 void
-x86_PseudoColor_to_8(unsigned char *image, // 8
+x86_PseudoColor_32_to_8(unsigned char *image, // 8
 		     unsigned char *ximage, // 12
 		     char *err, // 16
 		     char *nerr, // 20
@@ -364,7 +484,7 @@ x86_PseudoColor_to_8(unsigned char *image, // 8
 ".LoopY_0b:				\n"
 
 	 "movl 16(%ebp), %ebx		\n" // ebx = err
-//	 "movl %ebx, -36(%ebp)		\n" // [-36] = err
+//useless "movl %ebx, -36(%ebp)		\n" // [-36] = err
 	 "movl 20(%ebp), %ecx		\n" //
 	 "movl %ecx, -32(%ebp)		\n" // [-32] = nerr
 
@@ -375,7 +495,7 @@ x86_PseudoColor_to_8(unsigned char *image, // 8
 ".LoopY_1b:				\n"
 
 	 "movl 20(%ebp), %ebx		\n" // ebx = nerr
-//	 "movl %ebx, -36(%ebp)		\n" // [-36] = nerr
+//useless "movl %ebx, -36(%ebp)		\n" // [-36] = nerr
 	 "movl 16(%ebp), %ecx		\n" //
 	 "movl %ecx, -32(%ebp)		\n" // [-32] = err
 
@@ -404,7 +524,9 @@ x86_PseudoColor_to_8(unsigned char *image, // 8
 ".NEGRb:				\n"
 	 "xorw %dx, %dx			\n"
 ".OKRb:					\n"
+	 //partial reg
 	 "leal (%edi, %edx, 2), %ecx	\n" // ecx = &ctable[pixel.red]
+	 //agi
 	 "movl (%ecx), %eax		\n" // ax = ctable[pixel.red]
 	 "movw %ax, -12(%ebp)		\n" // save rr
 
@@ -448,7 +570,9 @@ x86_PseudoColor_to_8(unsigned char *image, // 8
 ".NEGGb:				\n"
 	 "xorw %dx, %dx			\n"
 ".OKGb:					\n"
+	 // partial reg
 	 "leal (%edi, %edx, 2), %ecx	\n" // ecx = &ctable[pixel.grn]
+	 //agi
 	 "movw (%ecx), %ax		\n" // ax = ctable[pixel.grn]
 	 "movw %ax, -16(%ebp)		\n" // save gg
 
@@ -493,7 +617,9 @@ x86_PseudoColor_to_8(unsigned char *image, // 8
 ".NEGBb:				\n"
 	 "xorw %dx, %dx			\n"
 ".OKBb:					\n"
+	 //partial reg
 	 "leal (%edi, %edx, 2), %ecx	\n" // ecx = &ctable[pixel.blu]
+	 //agi
 	 "movw (%ecx), %ax		\n" // ax = ctable[pixel.blu]
 	 "movw %ax, -20(%ebp)		\n" // save bb
 
@@ -532,7 +658,9 @@ x86_PseudoColor_to_8(unsigned char *image, // 8
 	 "addw -20(%ebp), %ax		\n" // ax = cpcpc*rr + cpc*gg + bb
 
 	 "movl 40(%ebp), %ecx		\n"
+	 //agi
 	 "leal (%ecx, %eax, 4), %edx	\n"
+	 //agi
 	 "movb (%edx), %cl		\n" // cl = pixels[ax]
 
 	 // store the pixel