diff --git a/WINGs/Extras/Makefile.am b/WINGs/Extras/Makefile.am index ca984ea8..ef2b9efd 100644 --- a/WINGs/Extras/Makefile.am +++ b/WINGs/Extras/Makefile.am @@ -20,8 +20,6 @@ libExtraWINGs_la_SOURCES = \ wtableview.h \ wtabledelegates.h -AM_CFLAGS = @NOSTRICTALIASING@ - INCLUDES = -I$(top_srcdir)/wrlib -I$(top_srcdir)/WINGs \ -DRESOURCE_PATH=\"$(datadir)/WINGs\" @HEADER_SEARCH_PATH@ -DDEBUG diff --git a/WINGs/Makefile.am b/WINGs/Makefile.am index 90770d0a..be417dbf 100644 --- a/WINGs/Makefile.am +++ b/WINGs/Makefile.am @@ -83,8 +83,7 @@ libWUtil_la_SOURCES = \ AM_CPPFLAGS = -DLOCALEDIR=\"$(NLSDIR)\" -DRESOURCE_PATH=\"$(datadir)/WINGs\" -DDEBUG - -AM_CFLAGS = @NOSTRICTALIASING@ +AM_CFLAGS = INCLUDES = -I$(top_srcdir)/WINGs/WINGs -I$(top_srcdir)/wrlib -I$(top_srcdir)/src \ @XFTFLAGS@ @HEADER_SEARCH_PATH@ diff --git a/WPrefs.app/Makefile.am b/WPrefs.app/Makefile.am index 393756b2..62c9fb3a 100644 --- a/WPrefs.app/Makefile.am +++ b/WPrefs.app/Makefile.am @@ -43,8 +43,7 @@ WPrefs_SOURCES = \ xmodifier.c AM_CPPFLAGS = -DLOCALEDIR=\"$(NLSDIR)\" -DRESOURCE_PATH=\"$(wpdatadir)\" - -AM_CFLAGS = @NOSTRICTALIASING@ +AM_CFLAGS = INCLUDES = -I$(top_srcdir)/wrlib -I$(top_srcdir)/WINGs @HEADER_SEARCH_PATH@ diff --git a/configure.ac b/configure.ac index 7f3ff79e..2faa9a56 100644 --- a/configure.ac +++ b/configure.ac @@ -218,60 +218,6 @@ AC_C_CONST AC_TYPE_SIGNAL - -dnl Compiler/architecture specific optimizations -dnl ============================================ - - -dnl GCC/as with MMX support -dnl ----------------------- - -# until we fix it, leave it disabled -asm_support=no -mmx_support=no - -check_for_mmx_support=yes -AC_ARG_ENABLE(mmx, AS_HELP_STRING([--disable-mmx], [disable compilation of MMX inline assembly]), - [if test x$enableval != xyes; then - check_for_mmx_support=no - fi]) - -if test "$ac_cv_prog_gcc" = yes -a "$check_for_mmx_support" = yes; then -case $host_cpu in -*i?86*) - - # gcc-3.3 or newer complains about some of our stuff without this - NOSTRICTALIASING="-fno-strict-aliasing" - - AC_CACHE_CHECK(whether gcc supports x86 inline asm, - ac_cv_c_inline_asm, - [AC_TRY_LINK(,[{int x; asm volatile("movl %%eax, %%ebx\n\t pushal\n\t popal":: - "m" (x),"m" (x),"m" (x),"m" (x),"m" (x),"m" (x), - "m" (x),"m" (x),"m" (x),"m" (x),"m" (x),"m" (x));}], - ac_cv_c_inline_asm=yes, - ac_cv_c_inline_asm=no)]) - - if test "x$ac_cv_c_inline_asm" = xyes; then - AC_DEFINE(ASM_X86, 1, [define if processor is x86 (normally detected by configure)]) - asm_support=yes - - AC_CACHE_CHECK(whether gcc supports MMX(tm) inline asm, - ac_cv_c_inline_mmx, - [AC_TRY_LINK(,[asm ("movq %mm0, %mm1");], - ac_cv_c_inline_mmx=yes, - ac_cv_c_inline_mmx=no)]) - - if test "x$ac_cv_c_inline_mmx" = xyes; then - AC_DEFINE(ASM_X86_MMX, 1, [define if processor is x86 with MMX(tm) support (normally autodetected by configure)]) - mmx_support=yes - fi - fi - ;; -esac -fi -AC_SUBST(NOSTRICTALIASING) - - dnl pkg-config dnl ========== dnl AC_ARG_VAR(PKGCONFIG, [pkg-config command]) @@ -1045,8 +991,6 @@ echo "Installation path prefix : $prefix" echo "Installation path for binaries : $_bindir" echo "Installation path for WPrefs.app : $wprefs_base_dir" | sed -e 's|\${prefix}|'"$prefix|" echo "Supported graphic format libraries : $supported_gfx" -echo "Use assembly routines for wrlib : $asm_support" -echo "Use inline MMX(tm) x86 assembly : $mmx_support" echo "Antialiased text support in WINGs : $xft" echo "Xinerama extension support : $xinerama" echo "XRandR extension support : $xrandr" diff --git a/src/Makefile.am b/src/Makefile.am index f883f77e..1fb54d4e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -104,9 +104,7 @@ wmaker_SOURCES = \ EXTRA_wmaker_SOURCES = osdep_bsd.c osdep_darwin.c osdep_linux.c osdep_stub.c AM_CPPFLAGS = $(DFLAGS) -DLOCALEDIR=\"$(NLSDIR)\" - -AM_CFLAGS = @NOSTRICTALIASING@ - +AM_CFLAGS = INCLUDES = \ -I$(top_srcdir)/wrlib \ diff --git a/wrlib/Makefile.am b/wrlib/Makefile.am index 32323f6d..3cd70017 100644 --- a/wrlib/Makefile.am +++ b/wrlib/Makefile.am @@ -28,7 +28,6 @@ libwraster_la_SOURCES = \ xpixmap.c \ bench.h \ convert.c \ - x86_specific.c \ context.c \ misc.c \ scale.c \ @@ -47,14 +46,6 @@ libwraster_la_SOURCES = \ LTCOMPILE2=`echo $(LTCOMPILE) | sed -e s/-fomit-frame-pointer//` COMPILE2=`echo $(COMPILE) | sed -e s/-fomit-frame-pointer//` -# cant compile asm stuff with optimizations -x86_specific.lo: x86_specific.c - $(LTCOMPILE2) -O0 -c $< - -x86_specific.o: x86_specific.c - $(COMPILE2) -O0 -c $< - - INCLUDES = $(DFLAGS) @HEADER_SEARCH_PATH@ libwraster_la_LIBADD = @LIBRARY_SEARCH_PATH@ @GFXLIBS@ @XLIBS@ -lm diff --git a/wrlib/convert.c b/wrlib/convert.c index 139f946f..a05e3633 100644 --- a/wrlib/convert.c +++ b/wrlib/convert.c @@ -25,49 +25,19 @@ */ #include - #include #include #include #include #include - #include #include "wraster.h" #ifdef XSHM extern Pixmap R_CreateXImageMappedPixmap(RContext * context, RXImage * ximage); - #endif -#ifdef ASM_X86 -extern void x86_PseudoColor_32_to_8(unsigned char *image, - unsigned char *ximage, - char *err, char *nerr, - short *ctable, - int dr, int dg, int db, - unsigned long *pixels, - int cpc, int width, int height, int bytesPerPixel, int line_offset); -#endif /* ASM_X86 */ - -#ifdef ASM_X86_MMX - -extern int x86_check_mmx(); - -extern void x86_mmx_TrueColor_32_to_16(unsigned char *image, - unsigned short *ximage, - short *err, short *nerr, - const unsigned short *rtable, - const unsigned short *gtable, - const unsigned short *btable, - int dr, int dg, int db, - unsigned int roffs, - unsigned int goffs, - unsigned int boffs, int width, int height, int line_offset); - -#endif /* ASM_X86_MMX */ - #define NFREE(n) if (n) free(n) #define HAS_ALPHA(I) ((I)->format == RRGBAFormat) @@ -360,36 +330,6 @@ static RXImage *image2TrueColor(RContext * ctx, RImage * image) fputs("true color dither\n", stderr); #endif -#ifdef ASM_X86_MMX - if (ctx->depth == 16 && HAS_ALPHA(image) && x86_check_mmx()) { - short *err; - short *nerr; - - err = malloc(8 * (image->width + 3)); - nerr = malloc(8 * (image->width + 3)); - if (!err || !nerr) { - NFREE(err); - NFREE(nerr); - RErrorCode = RERR_NOMEMORY; - RDestroyXImage(ctx, ximg); - return NULL; - } - memset(err, 0, 8 * (image->width + 3)); - memset(nerr, 0, 8 * (image->width + 3)); - - x86_mmx_TrueColor_32_to_16(image->data, - (unsigned short *)ximg->image->data, - err + 8, nerr + 8, - rtable, gtable, btable, - dr, dg, db, - roffs, goffs, boffs, - image->width, image->height, - ximg->image->bytes_per_line - 2 * image->width); - - free(err); - free(nerr); - } else -#endif /* ASM_X86_MMX */ { signed char *err; signed char *nerr; @@ -575,7 +515,6 @@ static RXImage *image2PseudoColor(RContext * ctx, RImage * image) memset(err, 0, 4 * (image->width + 3)); memset(nerr, 0, 4 * (image->width + 3)); - /*#ifdef ASM_X86 */ convertPseudoColor_to_8(ximg, image, err + 4, nerr + 4, rtable, gtable, btable, dr, dg, db, ctx->pixels, cpc); diff --git a/wrlib/libwraster.map b/wrlib/libwraster.map index 9d05f2df..6282e2c4 100644 --- a/wrlib/libwraster.map +++ b/wrlib/libwraster.map @@ -93,9 +93,5 @@ LIBWRASTER3 # RSaveXPM # _wraster_change_filter # WRasterLibVersion -# x86_check_mmx -# x86_mmx_TrueColor_24_to_16 -# x86_mmx_TrueColor_32_to_16 -# x86_PseudoColor_32_to_8 *; }; diff --git a/wrlib/x86_specific.c b/wrlib/x86_specific.c deleted file mode 100644 index 85b4b12b..00000000 --- a/wrlib/x86_specific.c +++ /dev/null @@ -1,629 +0,0 @@ -/* x86_convert.c - convert RImage to XImage with x86 optimizations - * - * Raster graphics library - * - * Copyright (c) 2000-2003 Alfredo K. Kojima - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public - * License along with this library; if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include - -#ifdef ASM_X86 - -#ifdef ASM_X86_MMX - -int x86_check_mmx() -{ - static int result = -1; - - if (result >= 0) - return result; - - result = 0; - - asm volatile ( - "pushal \n\t" /* please dont forget this in any asm */ - "pushfl \n\t" /* check whether cpuid supported */ - "pop %%eax \n\t" - "movl %%eax, %%ebx \n\t" - "xorl $(1<<21), %%eax \n\t" - "pushl %%eax \n\t" - "popfl \n\t" - "pushfl \n\t" - "popl %%eax \n\t" - "xorl %%ebx, %%eax \n\t" - "andl $(1<<21), %%eax \n\t" - "jz .NotPentium \n\t" - "xorl %%eax, %%eax \n\t" /* no eax effect because of the movl below */ - /* except reseting flags. is it needed? */ - "movl $1, %%eax \n\t" - "cpuid \n\t" - "test $(1<<23), %%edx \n\t" - "jz .NotMMX \n\t" - "popal \n\t" /* popal needed because the address of */ - "movl $1, %0 \n\t" /* variable %0 may be kept in a register */ - "jmp .noPop \n\t" - ".NotMMX: \n\t" - ".NotPentium: \n\t" - "popal \n\t" - ".noPop: \n\t" - : "=m" (result) - ); - - return result; -} - -/* - * TODO: - * 32/8 24/8 32/16 24/16 32/24 24/24 - * PPlain YES YES - * MMX DONE - * - * - * - try to align stack (local variable space) into quadword boundary - */ -void -x86_mmx_TrueColor_32_to_16(unsigned char *image, - unsigned short *ximage, - short *err, - short *nerr, - unsigned short *rtable, - unsigned short *gtable, - unsigned short *btable, - int dr, - int dg, - int db, - unsigned int roffs, - unsigned int goffs, unsigned int boffs, int width, int height, int line_offset) -{ - union { - long long rrggbbaa; - struct { - short int rr, gg, bb, aa; - } words; - } rrggbbaa; - - union { - long long pixel; - struct { - short int rr, gg, bb, aa; - } words; - } pixel; - - short *tmp_err; - short *tmp_nerr; - int x; - - asm volatile ( - "pushl %%ebx \n\t" - /* pack dr, dg and db into mm6 */ - "movl %7, %%eax \n\t" - "movl %8, %%ebx \n\t" - "movl %9, %%ecx \n\t" - "movw %%ax, %16 \n\t" - "movw %%bx, %17 \n\t" - "movw %%cx, %18 \n\t" - "movw $0, %19 \n\t" - "movq %16, %%mm6 \n\t" /* dr dg db 0 */ - /* pack 4|4|4|4 into mm7, for shifting (/16) */ - "movl $0x00040004, %16 \n\t" - "movl $0x00040004, %18 \n\t" - "movq %16, %%mm7 \n\t" - /* store constant values for using with mmx when dithering */ - "movl $0x00070007, %16 \n\t" - "movl $0x00070007, %18 \n\t" - "movq %16, %%mm5 \n\t" - "movl $0x00050005, %16 \n\t" - "movl $0x00050005, %18 \n\t" - "movq %16, %%mm4 \n\t" - "movl $0x00030003, %16 \n\t" - "movl $0x00030003, %18 \n\t" - "movq %16, %%mm3 \n\t" - /* process 1 pixel / cycle, each component treated as 16bit */ - "movl %0, %%esi \n\t" /* esi = image->data */ - ".LoopYa: \n\t" - "movl %13, %%eax \n\t" - "movl %%eax, %26 \n\t" /* x = width */ - "movl %14, %%eax \n\t" - "decl %%eax \n\t" /* y-- */ - "movl %%eax, %14 \n\t" - "js .Enda \n\t" /* if y < 0, goto end */ - "andl $1, %%eax \n\t" - "jz .LoopY_1a \n\t" /* if (y & 1) goto LoopY_1 */ - ".LoopY_0a: \n\t" - "movl %2, %%ebx \n\t" /* ebx = err */ - "movl %%ebx, %25 \n\t" /* [-36] = err */ - "movl %3, %%eax \n\t" - "movl %%eax, %24 \n\t" /* [-32] = nerr */ - "jmp .LoopXa \n\t" - ".LoopY_1a: \n\t" - "movl %3, %%ebx \n\t" /* ebx = nerr */ - "movl %%ebx, %25 \n\t" /* [-36] = nerr */ - "movl %2, %%eax \n\t" - "movl %%eax, %24 \n\t" /* [-32] = eerr */ - ".align 16 \n\t" - ".LoopXa: \n\t" - /* calculate errors and pixel components; depend on ebx, esi, mm6 */ - "movq (%%ebx), %%mm1 \n\t" /* mm1 = error[0..3] */ - "punpcklbw (%%esi), %%mm0 \n\t" /* mm0 = image->data[0..3] */ - "psrlw $8, %%mm0 \n\t" /* fixup mm0 */ - "paddusb %%mm1, %%mm0 \n\t" /* mm0 = mm0 + mm1 (sat. to 255) */ - "movq %%mm0, %20 \n\t" /* save the pixel */ - "movzwl %20, %%ecx \n\t" /* ecx = pixel.red */ - "movl %4, %%edi \n\t" /* edi = rtable */ - /* agi */ - "leal (%%edi, %%ecx, 2), %%eax \n\t" /* eax = &rtable[pixel.red] */ - /* agi */ - "movw (%%eax), %%dx \n\t" /* dx = rtable[pixel.red] */ - "movw %%dx, %16 \n\t" /* save rr */ - "movzwl %21, %%ecx \n\t" /* ecx = pixel.green */ - "movl %5, %%edi \n\t" /* edi = gtable */ - /* agi */ - "leal (%%edi, %%ecx, 2), %%eax \n\t" /* eax = >able[pixel.green] */ - /* agi */ - "movw (%%eax), %%dx \n\t" /* dx = gtable[pixel.green] */ - "movw %%dx, %17 \n\t" /* save gg */ - "movzwl %22, %%ecx \n\t" /* ecx = pixel.blue */ - "movl %6, %%edi \n\t" /* ebx = btable */ - /* agi */ - "leal (%%edi, %%ecx, 2), %%eax \n\t" /* eax = &btable[pixel.blue] */ - /* agi */ - "movw (%%eax), %%dx \n\t" /* dx = btable[pixel.blue] */ - "movw %%dx, %18 \n\t" /* save bb */ - "movw $0, %19 \n\t" /* save dummy aa */ - "movq %16, %%mm1 \n\t" /* load mm1 with rrggbbaa */ - "pmullw %%mm6, %%mm1 \n\t" /* mm1 = rr*dr|... */ - "psubsw %%mm1, %%mm0 \n\t" /* error = pixel - mm1 */ - /* distribute the error; depend on mm0, mm7, mm3, mm4, mm5 */ - "movl %25, %%ebx \n\t" - "movq %%mm0, %%mm1 \n\t" - "pmullw %%mm5, %%mm1 \n\t" /* mm1 = mm1*7 */ - "psrlw %%mm7, %%mm1 \n\t" /* mm1 = mm1/16 */ - "paddw 8(%%ebx), %%mm1 \n\t" - "movq %%mm1, 8(%%ebx) \n\t" /* err[x+1,y] = rer*7/16 */ - "movl %24, %%ebx \n\t" - "movq %%mm0, %%mm1 \n\t" - "pmullw %%mm4, %%mm1 \n\t" /* mm1 = mm1*5 */ - "psrlw %%mm7, %%mm1 \n\t" /* mm1 = mm1/16 */ - "paddw -8(%%ebx), %%mm1 \n\t" - "movq %%mm1, -8(%%ebx) \n\t" /* err[x-1,y+1] += rer*3/16 */ - "movq %%mm0, %%mm1 \n\t" - "pmullw %%mm3, %%mm1 \n\t" /* mm1 = mm1*3 */ - "psrlw %%mm7, %%mm1 \n\t" /* mm1 = mm1/16 */ - "paddw 8(%%ebx), %%mm1 \n\t" - "movq %%mm1, (%%ebx) \n\t" /* err[x,y+1] += rer*5/16 */ - "psrlw %%mm7, %%mm0 \n\t" /* mm0 = mm0/16 */ - "movq %%mm0, 8(%%ebx) \n\t" /* err[x+1,y+1] = rer/16 */ - /* calculate final pixel value and store */ - "movl %10, %%ecx \n\t" - "movw %16, %%ax \n\t" - "shlw %%cl, %%ax \n\t" /* NP* ax = r<data += 4 */ - "decl %26 \n\t" /* x-- */ - "jnz .LoopXa \n\t" /* if x>0, goto .LoopX */ - /* depend on edx */ - "addl %15, %%edx \n\t" /* add extra offset to ximage */ - "movl %%edx, %1 \n\t" - "jmp .LoopYa \n\t" - ".Enda: \n\t" /* THE END */ - "emms \n\t" - "popl %%ebx \n\t" - : - : "m" (image), /* %0 */ - "m" (ximage), /* %1 */ - "m" (err), /* %2 */ - "m" (nerr), /* %3 */ - "m" (rtable), /* %4 */ - "m" (gtable), /* %5 */ - "m" (btable), /* %6 */ - "m" (dr), /* %7 */ - "m" (dg), /* %8 */ - "m" (db), /* %9 */ - "m" (roffs), /* %10 */ - "m" (goffs), /* %11 */ - "m" (boffs), /* %12 */ - "m" (width), /* %13 */ - "m" (height), /* %14 */ - "m" (line_offset), /* %15 */ - "m" (rrggbbaa.words.rr), /* %16 (access to rr) */ - "m" (rrggbbaa.words.gg), /* %17 (access to gg) */ - "m" (rrggbbaa.words.bb), /* %18 (access to bb) */ - "m" (rrggbbaa.words.aa), /* %19 (access to aa) */ - "m" (pixel.words.rr), /* %20 (access to pixel.r) */ - "m" (pixel.words.gg), /* %21 (access to pixel.g) */ - "m" (pixel.words.bb), /* %22 (access to pixel.b) */ - "m" (pixel.words.aa), /* %23 (access to pixel.a) */ - "m" (tmp_err), /* %24 */ - "m" (tmp_nerr), /* %25 */ - "m" (x) /* %26 */ - : "eax", - "ecx", - "edx", - "esi", - "edi" - ); -} - -void -x86_mmx_TrueColor_24_to_16(unsigned char *image, - unsigned short *ximage, - short *err, - short *nerr, - short *rtable, - short *gtable, - short *btable, - int dr, - int dg, - int db, - unsigned int roffs, - unsigned int goffs, unsigned int boffs, int width, int height, int line_offset) -{ - union { - long long rrggbbaa; - struct { - short int rr, gg, bb, aa; - } words; - } rrggbbaa; - - union { - long long pixel; - struct { - short int rr, gg, bb, aa; - } words; - } pixel; - - short *tmp_err; - short *tmp_nerr; - - int x; - int w1; - int w2; - - asm volatile ( - "pushl %%ebx \n\t" - "movl %13, %%eax \n\t" /* eax = width */ - "movl %%eax, %%ebx \n\t" - "shrl $2, %%eax \n\t" - "movl %%eax, %27 \n\t" /* w1 = width / 4 */ - "andl $3, %%ebx \n\t" - "movl %%ebx, %28 \n\t" /* w2 = width %% 4 */ - ".LoopYc: \n\t" - "movl %13, %%eax \n\t" - "movl %%eax, %26 \n\t" /* x = width */ - "decl %14 \n\t" /* height-- */ - "js .Endc \n\t" /* if height < 0 then end */ - "movl %14, %%eax \n\t" - "decl %%eax \n\t" /* y-- */ - "movl %%eax, %14 \n\t" - "js .Endc \n\t" /* if y < 0, goto end */ - "andl $1, %%eax \n\t" - "jz .LoopY_1c \n\t" /* if (y&1) goto LoopY_1 */ - ".LoopY_0c: \n\t" - "movl %2, %%ebx \n\t" /* ebx = err */ - "movl %%ebx, %25 \n\t" /* [-36] = err */ - "movl %3, %%eax \n\t" - "movl %%eax, %24 \n\t" /* [-32] = nerr */ - "jmp .LoopX_1c \n\t" - ".LoopY_1c: \n\t" - "movl %3, %%ebx \n\t" /* ebx = nerr */ - "movl %%ebx, %25 \n\t" /* [-36] = nerr */ - "movl %2, %%eax \n\t" - "movl %%eax, %24 \n\t" /* [-32] = eerr */ - ".align 16 \n\t" - "movl %%eax, %26 \n\t" /* x = w1 */ - ".LoopX_1c: \n\t" - "decl %26 \n\t" /* x-- */ - "js .Xend1_c \n\t" /* if x < 0 then end */ - /* do conversion of 4 pixels */ - "movq %2, %%mm0 \n\t" /* mm0 = err */ - "jmp .LoopX_1c \n\t" - ".Xend1_c: \n\t" - "movl %28, %%eax \n\t" - "movl %%eax, %26 \n\t" /* x = w2 */ - ".LoopX_2c: \n\t" - "decl %26 \n\t" /* x-- */ - "js .Xend2_c \n\t" - /* do conversion */ - "jmp .LoopX_2c \n\t" - ".Xend2_c: \n\t" - "movl %27, %%eax \n\t" - "jmp .LoopYc \n\t" - ".Endc: \n\t" /* THE END */ - "emms \n\t" - "popl %%ebx \n\t" - : - : "m" (image), /* %0 */ - "m" (ximage), /* %1 */ - "m" (err), /* %2 */ - "m" (nerr), /* %3 */ - "m" (rtable), /* %4 */ - "m" (gtable), /* %5 */ - "m" (btable), /* %6 */ - "m" (dr), /* %7 */ - "m" (dg), /* %8 */ - "m" (db), /* %9 */ - "m" (roffs), /* %10 */ - "m" (goffs), /* %11 */ - "m" (boffs), /* %12 */ - "m" (width), /* %13 */ - "m" (height), /* %14 */ - "m" (line_offset), /* %15 */ - "m" (rrggbbaa.words.rr), /* %16 (access to rr) */ - "m" (rrggbbaa.words.gg), /* %17 (access to gg) */ - "m" (rrggbbaa.words.bb), /* %18 (access to bb) */ - "m" (rrggbbaa.words.aa), /* %19 (access to aa) */ - "m" (pixel.words.rr), /* %20 (access to pixel.r) */ - "m" (pixel.words.gg), /* %21 (access to pixel.g) */ - "m" (pixel.words.bb), /* %22 (access to pixel.b) */ - "m" (pixel.words.aa), /* %23 (access to pixel.a) */ - "m" (tmp_err), /* %24 */ - "m" (tmp_nerr), /* %25 */ - "m" (x), /* %26 */ - "m" (w1), /* %27 */ - "m" (w2) /* %28 */ - : - "eax", - "ecx", - "edx", - "esi", - "edi" - ); -} - -#endif /* ASM_X86_MMX */ - -void -x86_PseudoColor_32_to_8(unsigned char *image, - unsigned char *ximage, - char *err, - char *nerr, - short *ctable, - int dr, - int dg, - int db, - unsigned long *pixels, int cpc, int width, int height, int bytesPerPixel, int line_offset) -{ - int x; - int cpcpc; - - int rr; - int gg; - int bb; - - char *tmp_err; - char *tmp_nerr; - - char ndr; // aparently not used - char ndg; // aparently not used - char ndb; // aparently not used - - asm volatile ( - "pushal \n\t" - "movl %9, %%eax \n\t" - "mulb %9 \n\t" - "movl %%eax, %15 \n\t" /* cpcpc = cpc * cpc */ - /* eax will always be <= 0xffff */ - /* process 1 pixel / cycle, each component treated as 16bit */ - "movl %0, %%esi \n\t" /* esi = image->data */ - ".LoopYb: \n\t" - "movl %10, %%ecx \n\t" - "movl %%ecx, %14 \n\t" /* x = width */ - "movl %11, %%ecx \n\t" - "decl %%ecx \n\t" /* y-- */ - "movl %%ecx, %11 \n\t" - "js .Endb \n\t" /* if y < 0, goto end */ - "andl $1, %%ecx \n\t" - "jz .LoopY_1b \n\t" /* if (y & 1) goto LoopY_1 */ - ".LoopY_0b: \n\t" - "movl %2, %%ebx \n\t" /* ebx = err */ - /* "movl %%ebx, %20 \n\t" */ /* [-36] = err */ /* useless */ - "movl %3, %%ecx \n\t" - "movl %%ecx, %19 \n\t" /* [-32] = nerr */ - "movl $0, (%%ecx) \n\t" /* init error of nerr[0] to 0 */ - "jmp .LoopXb \n\t" - ".LoopY_1b: \n\t" - "movl %3, %%ebx \n\t" /* ebx = nerr */ - /* "movl %%ebx, %20 \n\t" */ /* [-36] = nerr */ /* useless */ - "movl %2, %%ecx \n\t" - "movl %%ecx, %19 \n\t" /* [-32] = err */ - "movl $0, (%%ecx) \n\t" /* init error of nerr[0] to 0 */ - ".align 16 \n\t" - ".LoopXb: \n\t" - "movl %4, %%edi \n\t" /* edi = ctable */ - "xorl %%edx, %%edx \n\t" /* zero the upper word on edx */ - /* RED; depends on ebx==err, esi==image->data, edi */ - "movzbw (%%esi), %%dx \n\t" /* dx = image->data[0] */ - "movsbw (%%ebx), %%ax \n\t" /* ax = error[0] */ - "addw %%ax, %%dx \n\t" /* pixel.red = data[0] + error[0] */ - "testb %%dh, %%dh \n\t" /* test if pixel.red < 0 or > 255 */ - "jz .OKRb \n\t" /* 0 <= pixel.red <= 255 */ - "js .NEGRb \n\t" /* pixel.red < 0 */ - "movw $0xff, %%dx \n\t" /* pixel.red > 255 */ - "jmp .OKRb \n\t" - ".NEGRb: \n\t" - "xorw %%dx, %%dx \n\t" - ".OKRb: \n\t" - /* partial reg */ - "leal (%%edi, %%edx, 2), %%ecx \n\t" /* ecx = &ctable[pixel.red] */ - /* agi */ - "movl (%%ecx), %%eax \n\t" /* ax = ctable[pixel.red] */ - "movw %%ax, %16 \n\t" /* save rr */ - "mulb %5 \n\t" /* ax = rr*dr */ - "subw %%ax, %%dx \n\t" /* rer = dx = dx - rr*dr */ - "movswl %%dx, %%eax \n\t" /* save rer */ - /* distribute error */ - "leal (, %%eax, 8), %%ecx \n\t" - "subw %%dx, %%cx \n\t" /* cx = rer * 7 */ - "sarw $4, %%cx \n\t" /* cx = rer * 7 / 16 */ - "addb %%cl, 4(%%ebx) \n\t" /* err[x+1] += rer * 7 / 16 */ - "movl %19, %%ecx \n\t" /* ecx = nerr */ - "leaw (%%eax, %%eax, 4), %%dx \n\t" /* dx = rer * 5 */ - "sarw $4, %%dx \n\t" /* dx = rer * 5 / 16 */ - "addb %%dl, (%%ecx) \n\t" /* nerr[x] += rer * 5 / 16 */ - "leaw (%%eax, %%eax, 2), %%dx \n\t" /* dx = rer * 3 */ - "sarw $4, %%dx \n\t" /* dx = rer * 3 / 16 */ - "addb %%dl, -4(%%ecx) \n\t" /* nerr[x-1] += rer * 3 / 16 */ - "sarw $4, %%ax \n\t" /* ax = rer / 16 */ - "movb %%al, 4(%%ecx) \n\t" /* nerr[x+1] = rer / 16 */ - /* GREEN; depends on ebx, esi, edi */ - "movzbw 1(%%esi), %%dx \n\t" /* dx = image->data[1] */ - "movsbw 1(%%ebx), %%ax \n\t" /* ax = error[1] */ - "addw %%ax, %%dx \n\t" /* pixel.grn = data[1] + error[1] */ - "testb %%dh, %%dh \n\t" /* test if pixel.grn < 0 or > 255 */ - "jz .OKGb \n\t" /* 0 <= pixel.grn <= 255 */ - "js .NEGGb \n\t" /* pixel.grn < 0 */ - "movw $0xff, %%dx \n\t" /* pixel.grn > 255 */ - "jmp .OKGb \n\t" - ".NEGGb: \n\t" - "xorw %%dx, %%dx \n\t" - ".OKGb: \n\t" - /* partial reg */ - "leal (%%edi, %%edx, 2), %%ecx \n\t" /* ecx = &ctable[pixel.grn] */ - /* agi */ - "movw (%%ecx), %%ax \n\t" /* ax = ctable[pixel.grn] */ - "movw %%ax, %17 \n\t" /* save gg */ - "mulb %6 \n\t" /* ax = gg*dg */ - "subw %%ax, %%dx \n\t" /* ger = dx = dx - gg*dg */ - "movswl %%dx, %%eax \n\t" /* save ger */ - /* distribute error */ - "leal (, %%eax, 8), %%ecx \n\t" - "subw %%dx, %%cx \n\t" /* cx = ger * 7 */ - "sarw $4, %%cx \n\t" /* cx = ger * 7 / 16 */ - "addb %%cl, 5(%%ebx) \n\t" /* err[x+1] += ger * 7 / 16 */ - "movl %19, %%ecx \n\t" /* ecx = nerr */ - "leaw (%%eax, %%eax, 4), %%dx \n\t" /* dx = ger * 5 */ - "sarw $4, %%dx \n\t" /* dx = ger * 5 / 16 */ - "addb %%dl, 1(%%ecx) \n\t" /* nerr[x] += ger * 5 / 16 */ - "leaw (%%eax, %%eax, 2), %%dx \n\t" /* dx = ger * 3 */ - "sarw $4, %%dx \n\t" /* dx = ger * 3 / 16 */ - "addb %%dl, -3(%%ecx) \n\t" /* nerr[x-1] += ger * 3 / 16 */ - "sarw $4, %%ax \n\t" /* ax = ger / 16 */ - "movb %%al, 5(%%ecx) \n\t" /* nerr[x+1] = ger / 16 */ - /* BLUE; depends on ebx, esi */ - "movzbw 2(%%esi), %%dx \n\t" /* dx = image->data[2] */ - "movsbw 2(%%ebx), %%ax \n\t" /* ax = error[2] */ - "addw %%ax, %%dx \n\t" /* pixel.grn = data[2] + error[2] */ - "testb %%dh, %%dh \n\t" /* test if pixel.blu < 0 or > 255 */ - "jz .OKBb \n\t" /* 0 <= pixel.blu <= 255 */ - "js .NEGBb \n\t" /* pixel.blu < 0 */ - "movw $0xff, %%dx \n\t" /* pixel.blu > 255 */ - "jmp .OKBb \n\t" - ".NEGBb: \n\t" - "xorw %%dx, %%dx \n\t" - ".OKBb: \n\t" - /* partial reg */ - "leal (%%edi, %%edx, 2), %%ecx \n\t" /* ecx = &ctable[pixel.blu] */ - /* agi */ - "movw (%%ecx), %%ax \n\t" /* ax = ctable[pixel.blu] */ - "movw %%ax, %18 \n\t" /* save bb */ - "mulb %7 \n\t" /* ax = bb*db */ - "subw %%ax, %%dx \n\t" /* ber = dx = dx - bb*db */ - "movswl %%dx, %%eax \n\t" /* save ber */ - /* distribute error */ - "leal (, %%eax, 8), %%ecx \n\t" - "subw %%dx, %%cx \n\t" /* cx = ber * 7 */ - "sarw $4, %%cx \n\t" /* cx = ber * 7 / 16 */ - "addb %%cl, 6(%%ebx) \n\t" /* err[x+1] += ber * 7 / 16 */ - "movl %19, %%ecx \n\t" /* ecx = nerr */ - "leaw (%%eax, %%eax, 4), %%dx \n\t" /* dx = ber * 5 */ - "sarw $4, %%dx \n\t" /* dx = ber * 5 / 16 */ - "addb %%dl, 2(%%ecx) \n\t" /* nerr[x] += ber * 5 / 16 */ - "leaw (%%eax, %%eax, 2), %%dx \n\t" /* dx = ber * 3 */ - "sarw $4, %%dx \n\t" /* dx = ber * 3 / 16 */ - "addb %%dl, -4(%%ecx) \n\t" /* nerr[x-1] += ber * 3 / 16 */ - "sarw $4, %%ax \n\t" /* ax = ber / 16 */ - "movb %%al, 6(%%ecx) \n\t" /* nerr[x+1] = ber / 16 */ - "andl $0xffff, %%eax \n\t" - /* depends on eax & 0xffff0000 == 0 - * calculate the index of the value of the pixel */ - "movw %16, %%ax \n\t" /* ax = rr */ - "mulb %15 \n\t" /* ax = cpcpc*rr */ - "movw %%ax, %%cx \n\t" - "movw %17, %%ax \n\t" /* ax = gg */ - "mulb %9 \n\t" /* ax = cpc*gg */ - "addw %%cx, %%ax \n\t" /* ax = cpc*gg + cpcpc*rr */ - "addw %18, %%ax \n\t" /* ax = cpcpc*rr + cpc*gg + bb */ - "movl %8, %%ecx \n\t" - /* agi */ - "leal (%%ecx, %%eax, 4), %%edx \n\t" - /* agi */ - "movb (%%edx), %%cl \n\t" /* cl = pixels[ax] */ - /* store the pixel */ - "movl %1, %%eax \n\t" - "movb %%cl, (%%eax) \n\t" /* *ximage = cl */ - "incl %1 \n\t" /* ximage++ */ - /* prepare for next iteration on X */ - "addl $4, %19 \n\t" /* nerr += 4 */ - "addl $4, %%ebx \n\t" /* err += 4 */ - "addl %12, %%esi \n\t" /* image->data += bpp */ - "decl %14 \n\t" /* x-- */ - "jnz .LoopXb \n\t" /* if x>0, goto .LoopX */ - "movl %13, %%eax \n\t" - "addl %%eax, %1 \n\t" /* add extra offset to ximage */ - "jmp .LoopYb \n\t" - ".Endb: \n\t" - "emms \n\t" - "popal \n\t" - : - : "m" (image), /* %0 */ - "m" (ximage), /* %1 */ - "m" (err), /* %2 */ - "m" (nerr), /* %3 */ - "m" (ctable), /* %4 */ - "m" (dr), /* %5 */ - "m" (dg), /* %6 */ - "m" (db), /* %7 */ - "m" (pixels), /* %8 */ - "m" (cpc), /* %9 */ - "m" (width), /* %10 */ - "m" (height), /* %11 */ - "m" (bytesPerPixel), /* %12 */ - "m" (line_offset), /* %13 */ - "m" (x), /* %14 */ - "m" (cpcpc), /* %15 */ - "m" (rr), /* %16 */ - "m" (gg), /* %17 */ - "m" (bb), /* %18 */ - "m" (tmp_err), /* %19 */ - "m" (tmp_nerr), /* %20 */ - "m" (ndr), /* %21 */ - "m" (ndg), /* %22 */ - "m" (ndb) /* %23 */ - ); -} - -#endif /* ASM_X86 */