Bug 573948 - Part 1: Use libjpeg-turbo instead of libjpeg. r=jmuizelaar

This commit is contained in:
Justin Lebar 2010-07-19 10:34:41 -07:00
Родитель 6a0f72ac71
Коммит 81b4973141
144 изменённых файлов: 30232 добавлений и 14657 удалений

Просмотреть файл

@ -168,6 +168,10 @@ VPX_AS_CONVERSION = @VPX_AS_CONVERSION@
VPX_ASM_SUFFIX = @VPX_ASM_SUFFIX@
VPX_X86_ASM = @VPX_X86_ASM@
VPX_ARM_ASM = @VPX_ARM_ASM@
LIBJPEG_TURBO_AS = @LIBJPEG_TURBO_AS@
LIBJPEG_TURBO_ASFLAGS = @LIBJPEG_TURBO_ASFLAGS@
LIBJPEG_TURBO_X86_ASM = @LIBJPEG_TURBO_X86_ASM@
LIBJPEG_TURBO_X64_ASM = @LIBJPEG_TURBO_X64_ASM@
NS_PRINTING = @NS_PRINTING@
MOZ_PDF_PRINTING = @MOZ_PDF_PRINTING@
MOZ_CRASHREPORTER = @MOZ_CRASHREPORTER@

Просмотреть файл

@ -4971,6 +4971,10 @@ VPX_AS_CONVERSION=
VPX_ASM_SUFFIX=
VPX_X86_ASM=
VPX_ARM_ASM=
LIBJPEG_TURBO_AS=
LIBJPEG_TURBO_ASFLAGS=
LIBJPEG_TURBO_X86_ASM=
LIBJPEG_TURBO_X64_ASM=
MOZ_PANGO=1
MOZ_PERMISSIONS=1
MOZ_PLACES=1
@ -6441,6 +6445,67 @@ if test -z "$MOZ_CRASHREPORTER_ENABLE_PERCENT"; then
fi
AC_DEFINE_UNQUOTED(MOZ_CRASHREPORTER_ENABLE_PERCENT, $MOZ_CRASHREPORTER_ENABLE_PERCENT)
dnl ========================================================
dnl = libjpeg-turbo configuration
dnl ========================================================
dnl Detect if we can use yasm to compile libjpeg-turbo's optimized assembly
dnl files.
AC_MSG_CHECKING([for YASM assembler])
AC_CHECK_PROGS(LIBJPEG_TURBO_AS, yasm, "")
dnl XXX jlebar -- need a yasm version check here.
if test -n "LIBJPEG_TURBO_AS"; then
LIBJPEG_TURBO_AS="yasm"
dnl We have YASM; see if we support it on this platform.
case "$OS_ARCH:$OS_TEST" in
Linux:x86|Linux:i?86)
LIBJPEG_TURBO_ASFLAGS="-f elf32 -rnasm -pnasm -DPIC -DELF"
LIBJPEG_TURBO_X86_ASM=1
;;
Linux:x86_64)
LIBJPEG_TURBO_ASFLAGS="-f elf64 -rnasm -pnasm -D__x86_64__ -DPIC -DELF"
LIBJPEG_TURBO_X64_ASM=1
;;
SunOS:i?86)
LIBJPEG_TURBO_ASFLAGS="-f elf32 -rnasm -pnasm -DPIC -DELF"
LIBJPEG_TURBO_X86_ASM=1
;;
SunOS:x86_64)
LIBJPEG_TURBO_ASFLAGS="-f elf64 -rnasm -pnasm -D__x86_64__ -DPIC -DELF"
LIBJPEG_TURBO_X64_ASM=1
;;
Darwin:i?86)
LIBJPEG_TURBO_ASFLAGS="-f macho32 -rnasm -pnasm -DPIC -DMACHO"
LIBJPEG_TURBO_X86_ASM=1
;;
Darwin:x86_64)
LIBJPEG_TURBO_ASFLAGS="-f macho64 -rnasm -pnasm -D__x86_64__ -DPIC -DMACHO"
LIBJPEG_TURBO_X64_ASM=1
;;
WINNT:x86|WINNT:i?86)
LIBJPEG_TURBO_ASFLAGS="-f win32 -rnasm -pnasm -DPIC -DWIN32"
LIBJPEG_TURBO_X86_ASM=1
;;
WINNT:x86_64)
LIBJPEG_TURBO_ASFLAGS="-f win64 -rnasm -pnasm -D__x86_64__ -DPIC -DWIN64"
LIBJPEG_TURBO_X64_ASM=1
;;
esac
fi # end have YASM
if test -n "$LIBJPEG_TURBO_X86_ASM"; then
AC_DEFINE(LIBJPEG_TURBO_X86_ASM)
elif test -n "$LIBJPEG_TURBO_X64_ASM"; then
AC_DEFINE(LIBJPEG_TURBO_X64_ASM)
else
AC_MSG_WARN([No assembler or assembly support for libjpeg-turbo. Using unoptimized C routines.])
fi
dnl ========================================================
dnl = Enable compilation of specific extension modules
dnl ========================================================
@ -9209,6 +9274,10 @@ AC_SUBST(VPX_AS_CONVERSION)
AC_SUBST(VPX_ASM_SUFFIX)
AC_SUBST(VPX_X86_ASM)
AC_SUBST(VPX_ARM_ASM)
AC_SUBST(LIBJPEG_TURBO_AS)
AC_SUBST(LIBJPEG_TURBO_ASFLAGS)
AC_SUBST(LIBJPEG_TURBO_X86_ASM)
AC_SUBST(LIBJPEG_TURBO_X64_ASM)
if test "$USING_HCC"; then
CC='${topsrcdir}/build/hcc'

Просмотреть файл

@ -1,10 +1,69 @@
To upgrade to a new revision of libjpeg-turbo, do the following:
Changes made to pristine jpeg source by mozilla.org developers.
* Check out libjpeg-turbo from SVN:
2003/08/18 -- change default mapping for METHODDEF, LOCAL, GLOBAL, EXTERN to better match NSPR
$ svn co https://libjpeg-turbo.svn.sourceforge.net/svnroot/libjpeg-turbo/trunk libjpeg-turbo
2003/03/14 -- mingw bustage fix. w32api uses different header guard define
for <basestd.h> than msvc.
* In a clean clone of mozilla-central, run the following commands
????/??/?? -- Lots of undocumented changes. :(
$ rm -rf jpeg
$ svn export --ignore-externals /path/to/libjpeg-turbo jpeg
$ cd jpeg
* Now look through the new files and rm any which are npotb. When I upgraded
to libjpeg-turbo 1.1.0, the only files I kept which didn't match
*.c *.h *.asm *.inc
were README and README-turbo.
You can easily look for all non *.c, *.h, *.asm, and *.inc files by running
$ hg status -nu | grep -v '\(c\|h\|asm\|inc\)$'
Once you're comfortable that you're only deleting files you want to delete
(and you've hg add'ed the files you want to keep), you can nuke the remaining
files with
$ hg status -nu | grep -v '\(c\|h\|asm\|inc\)$' | xargs rm
A helpful command for finding the *.c files which aren't *currently* part of
the build is
diff <(ls *.c | sort) <(grep -o '\w*\.c' Makefile.in | sort)
of course, libjpeg-turbo might have added some new source files, so you'll
have to look though and figure out which of these files to keep.
* Restore files modified in the Mozilla repository.
$ hg revert --no-backup Makefile.in jconfig.h jmorecfg.h simd/Makefile.in \
simd/jsimdcfg.inc jchuff.c jdhuff.c jdhuff.h MOZCHANGES
* Update Makefile.in to build any new files.
* Finally, tell hg that we've added or removed some files:
$ hg addremove
== March 28, 2011 (initial commit, libjpeg-turbo v1.1.0 r469 2011-02-27) ==
* Modified jmorecfg.h to define UINT8, UINT16, INT16, and INT32 in terms of
prtypes to fix a build error on Windows.
* Defined INLINE as NS_ALWAYS_INLINE in jconfig.h.
* Removed the following files which are licensed under the wxWindows license:
bmp.c, bmp.h, jpegut.c, jpgtest.cxx, rrtimer.h, rrutil.h, turbojpeg.h,
turbojpegl.c
* Reverted the following files to what was previously in Mozilla's tree
(nominally libjpeg 6.2):
jchuff.c, jdhuff.c, jdhuff.h
since the versions of these files in libjpeg-turbo are also under the
wxWindows license. (It would have been nicer to revert them to the new
libjpeg-8b code, but that doesn't easily integrate with libjpeg-turbo.)

Просмотреть файл

@ -15,11 +15,12 @@
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# Mozilla Corporation
# Portions created by the Initial Developer are Copyright (C) 2010
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Justin Lebar <justin.lebar@gmail.com>
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
@ -42,6 +43,7 @@ VPATH = @srcdir@
include $(DEPTH)/config/autoconf.mk
DIRS = simd
MODULE = jpeg
LIBRARY_NAME = mozjpeg
@ -58,64 +60,136 @@ endif
GRE_MODULE = 1
CSRCS = \
jcomapi.c \
jdapimin.c \
jdapistd.c \
jdatasrc.c \
jdatadst.c \
jdmaster.c \
jdinput.c \
jdmarker.c \
jdhuff.c \
jdphuff.c \
jdmainct.c \
jdatasrc.c \
jdcoefct.c \
jdpostct.c \
jddctmgr.c \
jidctfst.c \
jidctflt.c \
jidctint.c \
jdsample.c \
jdcolor.c \
jquant1.c \
jquant2.c \
jddctmgr.c \
jdhuff.c \
jdinput.c \
jdmainct.c \
jdmarker.c \
jdmaster.c \
jdmerge.c \
jcomapi.c \
jutils.c \
jdphuff.c \
jdpostct.c \
jdsample.c \
jdtrans.c \
jerror.c \
jmemmgr.c \
jmemnobs.c \
jfdctflt.c \
jfdctfst.c \
jfdctint.c \
$(NULL)
EXPORTS = \
jconfig.h \
jerror.h \
jinclude.h \
jmorecfg.h \
jpeglib.h \
jpegint.h \
jwinfig.h \
jos2fig.h \
jidctflt.c \
jidctfst.c \
jidctint.c \
jidctred.c \
jmemmgr.c \
jmemnobs.c \
jquant1.c \
jquant2.c \
jutils.c \
$(NULL)
# These files enable support for writing JPEGs
CSRCS += \
jcapimin.c \
jcparam.c \
jcapistd.c \
jcmarker.c \
jcinit.c \
jcmainct.c \
jchuff.c \
jcsample.c \
jcmaster.c \
jccoefct.c \
jccolor.c \
jcphuff.c \
jcdctmgr.c \
jchuff.c \
jcinit.c \
jcmainct.c \
jcmarker.c \
jcmaster.c \
jcparam.c \
jcphuff.c \
jcprepct.c \
jcsample.c \
$(NULL)
AS=$(LIBJPEG_TURBO_AS)
ASM_SUFFIX=asm
ASFLAGS=$(LIBJPEG_TURBO_ASFLAGS) -I$(topsrcdir)/modules/libjpeg-turbo/simd/
ifeq ($(AS),yasm)
# yasm doesn't like -c
AS_DASH_C_FLAG=
endif
# No SIMD support?
ifeq (,$(LIBJPEG_TURBO_X86_ASM)$(LIBJPEG_TURBO_X64_ASM))
CSRCS += jsimd_none.c
endif
ifeq (1,$(LIBJPEG_TURBO_X64_ASM))
CSRCS += simd/jsimd_x86_64.c
ASFILES += \
simd/jccolss2-64.asm \
simd/jcqnts2f-64.asm \
simd/jcqnts2i-64.asm \
simd/jcsamss2-64.asm \
simd/jdcolss2-64.asm \
simd/jdmerss2-64.asm \
simd/jdsamss2-64.asm \
simd/jfss2fst-64.asm \
simd/jfss2int-64.asm \
simd/jfsseflt-64.asm \
simd/jiss2flt-64.asm \
simd/jiss2fst-64.asm \
simd/jiss2int-64.asm \
simd/jiss2red-64.asm \
$(NULL)
endif
ifeq (1,$(LIBJPEG_TURBO_X86_ASM))
CSRCS +=simd/jsimd_i386.c
ASFILES += \
simd/jccolmmx.asm \
simd/jccolss2.asm \
simd/jcqnt3dn.asm \
simd/jcqntmmx.asm \
simd/jcqnts2f.asm \
simd/jcqnts2i.asm \
simd/jcqntsse.asm \
simd/jcsammmx.asm \
simd/jcsamss2.asm \
simd/jdcolmmx.asm \
simd/jdcolss2.asm \
simd/jdmermmx.asm \
simd/jdmerss2.asm \
simd/jdsammmx.asm \
simd/jdsamss2.asm \
simd/jf3dnflt.asm \
simd/jfmmxfst.asm \
simd/jfmmxint.asm \
simd/jfss2fst.asm \
simd/jfss2int.asm \
simd/jfsseflt.asm \
simd/ji3dnflt.asm \
simd/jimmxfst.asm \
simd/jimmxint.asm \
simd/jimmxred.asm \
simd/jiss2flt.asm \
simd/jiss2fst.asm \
simd/jiss2int.asm \
simd/jiss2red.asm \
simd/jisseflt.asm \
simd/jsimdcpu.asm \
$(NULL)
endif
# jwinfig.h, jos2fig.h ? XXX
EXPORTS = \
jconfig.h \
jerror.h \
jinclude.h \
jmorecfg.h \
jpegint.h \
jpeglib.h \
$(NULL)
# need static lib for some of the libimg componentry to link properly

Просмотреть файл

@ -1,22 +1,20 @@
libjpeg-turbo note: This file is mostly taken from the libjpeg v8b README
file, and it is included only for reference. Some parts of it may not apply to
libjpeg-turbo. Please see README-turbo.txt for information specific to the
turbo version.
The Independent JPEG Group's JPEG software
==========================================
README for release 6b of 27-Mar-1998
====================================
This distribution contains a release of the Independent JPEG Group's free JPEG
software. You are welcome to redistribute this software and to use it for any
purpose, subject to the conditions under LEGAL ISSUES, below.
This distribution contains the sixth public release of the Independent JPEG
Group's free JPEG software. You are welcome to redistribute this software and
to use it for any purpose, subject to the conditions under LEGAL ISSUES, below.
Serious users of this software (particularly those incorporating it into
larger programs) should contact IJG at jpeg-info@uunet.uu.net to be added to
our electronic mailing list. Mailing list members are notified of updates
and have a chance to participate in technical discussions, etc.
This software is the work of Tom Lane, Philip Gladstone, Jim Boucher,
Lee Crocker, Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi,
Guido Vollbeding, Ge' Weijers, and other members of the Independent JPEG
Group.
This software is the work of Tom Lane, Guido Vollbeding, Philip Gladstone,
Bill Allombert, Jim Boucher, Lee Crocker, Bob Friesenhahn, Ben Jackson,
Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi, Ge' Weijers,
and other members of the Independent JPEG Group.
IJG is not affiliated with the official ISO JPEG standards committee.
@ -30,27 +28,26 @@ OVERVIEW General description of JPEG and the IJG software.
LEGAL ISSUES Copyright, lack of warranty, terms of distribution.
REFERENCES Where to learn more about JPEG.
ARCHIVE LOCATIONS Where to find newer versions of this software.
RELATED SOFTWARE Other stuff you should get.
FILE FORMAT WARS Software *not* to get.
TO DO Plans for future IJG releases.
Other documentation files in the distribution are:
User documentation:
install.doc How to configure and install the IJG software.
usage.doc Usage instructions for cjpeg, djpeg, jpegtran,
install.txt How to configure and install the IJG software.
usage.txt Usage instructions for cjpeg, djpeg, jpegtran,
rdjpgcom, and wrjpgcom.
*.1 Unix-style man pages for programs (same info as usage.doc).
wizard.doc Advanced usage instructions for JPEG wizards only.
*.1 Unix-style man pages for programs (same info as usage.txt).
wizard.txt Advanced usage instructions for JPEG wizards only.
change.log Version-to-version change highlights.
Programmer and internal documentation:
libjpeg.doc How to use the JPEG library in your own programs.
libjpeg.txt How to use the JPEG library in your own programs.
example.c Sample code for calling the JPEG library.
structure.doc Overview of the JPEG library's internal structure.
filelist.doc Road map of IJG files.
coderules.doc Coding style rules --- please read if you contribute code.
structure.txt Overview of the JPEG library's internal structure.
filelist.txt Road map of IJG files.
coderules.txt Coding style rules --- please read if you contribute code.
Please read at least the files install.doc and usage.doc. Useful information
Please read at least the files install.txt and usage.txt. Some information
can also be found in the JPEG FAQ (Frequently Asked Questions) article. See
ARCHIVE LOCATIONS below to find out where to obtain the FAQ article.
@ -62,24 +59,27 @@ the order listed) before diving into the code.
OVERVIEW
========
This package contains C software to implement JPEG image compression and
decompression. JPEG (pronounced "jay-peg") is a standardized compression
method for full-color and gray-scale images. JPEG is intended for compressing
"real-world" scenes; line drawings, cartoons and other non-realistic images
are not its strong suit. JPEG is lossy, meaning that the output image is not
exactly identical to the input image. Hence you must not use JPEG if you
have to have identical output bits. However, on typical photographic images,
very good compression levels can be obtained with no visible change, and
remarkably high compression levels are possible if you can tolerate a
low-quality image. For more details, see the references, or just experiment
with various compression settings.
This package contains C software to implement JPEG image encoding, decoding,
and transcoding. JPEG (pronounced "jay-peg") is a standardized compression
method for full-color and gray-scale images. JPEG's strong suit is compressing
photographic images or other types of images which have smooth color and
brightness transitions between neighboring pixels. Images with sharp lines or
other abrupt features may not compress well with JPEG, and a higher JPEG
quality may have to be used to avoid visible compression artifacts with such
images.
JPEG is lossy, meaning that the output pixels are not necessarily identical to
the input pixels. However, on photographic content and other "smooth" images,
very good compression ratios can be obtained with no visible compression
artifacts, and extremely high compression ratios are possible if you are
willing to sacrifice image quality (by reducing the "quality" setting in the
compressor.)
This software implements JPEG baseline, extended-sequential, and progressive
compression processes. Provision is made for supporting all variants of these
processes, although some uncommon parameter settings aren't implemented yet.
For legal reasons, we are not distributing code for the arithmetic-coding
variants of JPEG; see LEGAL ISSUES. We have made no provision for supporting
the hierarchical or lossless processes defined in the standard.
We have made no provision for supporting the hierarchical or lossless
processes defined in the standard.
We provide a set of library routines for reading and writing JPEG image files,
plus two sample applications "cjpeg" and "djpeg", which use the library to
@ -91,10 +91,11 @@ considerable functionality beyond the bare JPEG coding/decoding capability;
for example, the color quantization modules are not strictly part of JPEG
decoding, but they are essential for output to colormapped file formats or
colormapped displays. These extra functions can be compiled out of the
library if not required for a particular application. We have also included
"jpegtran", a utility for lossless transcoding between different JPEG
processes, and "rdjpgcom" and "wrjpgcom", two simple applications for
inserting and extracting textual comments in JFIF files.
library if not required for a particular application.
We have also included "jpegtran", a utility for lossless transcoding between
different JPEG processes, and "rdjpgcom" and "wrjpgcom", two simple
applications for inserting and extracting textual comments in JFIF files.
The emphasis in designing this software has been on achieving portability and
flexibility, while also making it fast enough to be useful. In particular,
@ -127,7 +128,7 @@ with respect to this software, its quality, accuracy, merchantability, or
fitness for a particular purpose. This software is provided "AS IS", and you,
its user, assume the entire risk as to its quality and accuracy.
This software is copyright (C) 1991-1998, Thomas G. Lane.
This software is copyright (C) 1991-2010, Thomas G. Lane, Guido Vollbeding.
All Rights Reserved except as specified below.
Permission is hereby granted to use, copy, modify, and distribute this
@ -170,17 +171,8 @@ the foregoing paragraphs do.
The Unix configuration script "configure" was produced with GNU Autoconf.
It is copyright by the Free Software Foundation but is freely distributable.
The same holds for its supporting scripts (config.guess, config.sub,
ltconfig, ltmain.sh). Another support script, install-sh, is copyright
by M.I.T. but is also freely distributable.
It appears that the arithmetic coding option of the JPEG spec is covered by
patents owned by IBM, AT&T, and Mitsubishi. Hence arithmetic coding cannot
legally be used without obtaining one or more licenses. For this reason,
support for arithmetic coding has been removed from the free JPEG software.
(Since arithmetic coding provides only a marginal gain over the unpatented
Huffman mode, it is unlikely that very many implementations will support it.)
So far as we are aware, there are no patent restrictions on the remaining
code.
ltmain.sh). Another support script, install-sh, is copyright by X Consortium
but is also freely distributable.
The IJG distribution formerly included code to read and write GIF files.
To avoid entanglement with the Unisys LZW patent, GIF reading support has
@ -198,7 +190,7 @@ We are required to state that
REFERENCES
==========
We highly recommend reading one or more of these references before trying to
We recommend reading one or more of these references before trying to
understand the innards of the JPEG software.
The best short technical introduction to the JPEG compression algorithm is
@ -207,7 +199,7 @@ The best short technical introduction to the JPEG compression algorithm is
(Adjacent articles in that issue discuss MPEG motion picture compression,
applications of JPEG, and related topics.) If you don't have the CACM issue
handy, a PostScript file containing a revised version of Wallace's article is
available at ftp://ftp.uu.net/graphics/jpeg/wallace.ps.gz. The file (actually
available at http://www.ijg.org/files/wallace.ps.gz. The file (actually
a preprint for an article that appeared in IEEE Trans. Consumer Electronics)
omits the sample images that appeared in CACM, but it includes corrections
and some added material. Note: the Wallace article is copyright ACM and IEEE,
@ -222,82 +214,53 @@ code but don't know much about data compression in general. The book's JPEG
sample code is far from industrial-strength, but when you are ready to look
at a full implementation, you've got one here...
The best full description of JPEG is the textbook "JPEG Still Image Data
Compression Standard" by William B. Pennebaker and Joan L. Mitchell, published
by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1. Price US$59.95, 638 pp.
The book includes the complete text of the ISO JPEG standards (DIS 10918-1
and draft DIS 10918-2). This is by far the most complete exposition of JPEG
in existence, and we highly recommend it.
The best currently available description of JPEG is the textbook "JPEG Still
Image Data Compression Standard" by William B. Pennebaker and Joan L.
Mitchell, published by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1.
Price US$59.95, 638 pp. The book includes the complete text of the ISO JPEG
standards (DIS 10918-1 and draft DIS 10918-2).
The JPEG standard itself is not available electronically; you must order a
paper copy through ISO or ITU. (Unless you feel a need to own a certified
official copy, we recommend buying the Pennebaker and Mitchell book instead;
it's much cheaper and includes a great deal of useful explanatory material.)
In the USA, copies of the standard may be ordered from ANSI Sales at (212)
642-4900, or from Global Engineering Documents at (800) 854-7179. (ANSI
doesn't take credit card orders, but Global does.) It's not cheap: as of
1992, ANSI was charging $95 for Part 1 and $47 for Part 2, plus 7%
shipping/handling. The standard is divided into two parts, Part 1 being the
actual specification, while Part 2 covers compliance testing methods. Part 1
is titled "Digital Compression and Coding of Continuous-tone Still Images,
The original JPEG standard is divided into two parts, Part 1 being the actual
specification, while Part 2 covers compliance testing methods. Part 1 is
titled "Digital Compression and Coding of Continuous-tone Still Images,
Part 1: Requirements and guidelines" and has document numbers ISO/IEC IS
10918-1, ITU-T T.81. Part 2 is titled "Digital Compression and Coding of
Continuous-tone Still Images, Part 2: Compliance testing" and has document
numbers ISO/IEC IS 10918-2, ITU-T T.83.
Some extensions to the original JPEG standard are defined in JPEG Part 3,
a newer ISO standard numbered ISO/IEC IS 10918-3 and ITU-T T.84. IJG
currently does not support any Part 3 extensions.
The JPEG standard does not specify all details of an interchangeable file
format. For the omitted details we follow the "JFIF" conventions, revision
1.02. A copy of the JFIF spec is available from:
Literature Department
C-Cube Microsystems, Inc.
1778 McCarthy Blvd.
Milpitas, CA 95035
phone (408) 944-6300, fax (408) 944-6314
A PostScript version of this document is available by FTP at
ftp://ftp.uu.net/graphics/jpeg/jfif.ps.gz. There is also a plain text
version at ftp://ftp.uu.net/graphics/jpeg/jfif.txt.gz, but it is missing
the figures.
1.02. JFIF 1.02 has been adopted as an Ecma International Technical Report
and thus received a formal publication status. It is available as a free
download in PDF format from
http://www.ecma-international.org/publications/techreports/E-TR-098.htm.
A PostScript version of the JFIF document is available at
http://www.ijg.org/files/jfif.ps.gz. There is also a plain text version at
http://www.ijg.org/files/jfif.txt.gz, but it is missing the figures.
The TIFF 6.0 file format specification can be obtained by FTP from
ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz. The JPEG incorporation scheme
found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems.
IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6).
Instead, we recommend the JPEG design proposed by TIFF Technical Note #2
(Compression tag 7). Copies of this Note can be obtained from ftp.sgi.com or
from ftp://ftp.uu.net/graphics/jpeg/. It is expected that the next revision
(Compression tag 7). Copies of this Note can be obtained from
http://www.ijg.org/files/. It is expected that the next revision
of the TIFF spec will replace the 6.0 JPEG design with the Note's design.
Although IJG's own code does not support TIFF/JPEG, the free libtiff library
uses our library to implement TIFF/JPEG per the Note. libtiff is available
from ftp://ftp.sgi.com/graphics/tiff/.
uses our library to implement TIFF/JPEG per the Note.
ARCHIVE LOCATIONS
=================
The "official" archive site for this software is ftp.uu.net (Internet
address 192.48.96.9). The most recent released version can always be found
there in directory graphics/jpeg. This particular version will be archived
as ftp://ftp.uu.net/graphics/jpeg/jpegsrc.v6b.tar.gz. If you don't have
direct Internet access, UUNET's archives are also available via UUCP; contact
help@uunet.uu.net for information on retrieving files that way.
The "official" archive site for this software is www.ijg.org.
The most recent released version can always be found there in
directory "files". This particular version will be archived as
http://www.ijg.org/files/jpegsrc.v8b.tar.gz, and in Windows-compatible
"zip" archive format as http://www.ijg.org/files/jpegsr8b.zip.
Numerous Internet sites maintain copies of the UUNET files. However, only
ftp.uu.net is guaranteed to have the latest official version.
You can also obtain this software in DOS-compatible "zip" archive format from
the SimTel archives (ftp://ftp.simtel.net/pub/simtelnet/msdos/graphics/), or
on CompuServe in the Graphics Support forum (GO CIS:GRAPHSUP), library 12
"JPEG Tools". Again, these versions may sometimes lag behind the ftp.uu.net
release.
The JPEG FAQ (Frequently Asked Questions) article is a useful source of
general information about JPEG. It is updated constantly and therefore is
not included in this distribution. The FAQ is posted every two weeks to
Usenet newsgroups comp.graphics.misc, news.answers, and other groups.
The JPEG FAQ (Frequently Asked Questions) article is a source of some
general information about JPEG.
It is available on the World Wide Web at http://www.faqs.org/faqs/jpeg-faq/
and other news.answers archive sites, including the official news.answers
archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/.
@ -307,79 +270,20 @@ with body
send usenet/news.answers/jpeg-faq/part2
RELATED SOFTWARE
================
Numerous viewing and image manipulation programs now support JPEG. (Quite a
few of them use this library to do so.) The JPEG FAQ described above lists
some of the more popular free and shareware viewers, and tells where to
obtain them on Internet.
If you are on a Unix machine, we highly recommend Jef Poskanzer's free
PBMPLUS software, which provides many useful operations on PPM-format image
files. In particular, it can convert PPM images to and from a wide range of
other formats, thus making cjpeg/djpeg considerably more useful. The latest
version is distributed by the NetPBM group, and is available from numerous
sites, notably ftp://wuarchive.wustl.edu/graphics/graphics/packages/NetPBM/.
Unfortunately PBMPLUS/NETPBM is not nearly as portable as the IJG software is;
you are likely to have difficulty making it work on any non-Unix machine.
A different free JPEG implementation, written by the PVRG group at Stanford,
is available from ftp://havefun.stanford.edu/pub/jpeg/. This program
is designed for research and experimentation rather than production use;
it is slower, harder to use, and less portable than the IJG code, but it
is easier to read and modify. Also, the PVRG code supports lossless JPEG,
which we do not. (On the other hand, it doesn't do progressive JPEG.)
FILE FORMAT WARS
================
Some JPEG programs produce files that are not compatible with our library.
The root of the problem is that the ISO JPEG committee failed to specify a
concrete file format. Some vendors "filled in the blanks" on their own,
creating proprietary formats that no one else could read. (For example, none
of the early commercial JPEG implementations for the Macintosh were able to
exchange compressed files.)
The file format we have adopted is called JFIF (see REFERENCES). This format
has been agreed to by a number of major commercial JPEG vendors, and it has
become the de facto standard. JFIF is a minimal or "low end" representation.
We recommend the use of TIFF/JPEG (TIFF revision 6.0 as modified by TIFF
Technical Note #2) for "high end" applications that need to record a lot of
additional data about an image. TIFF/JPEG is fairly new and not yet widely
supported, unfortunately.
The upcoming JPEG Part 3 standard defines a file format called SPIFF.
SPIFF is interoperable with JFIF, in the sense that most JFIF decoders should
be able to read the most common variant of SPIFF. SPIFF has some technical
advantages over JFIF, but its major claim to fame is simply that it is an
official standard rather than an informal one. At this point it is unclear
whether SPIFF will supersede JFIF or whether JFIF will remain the de-facto
standard. IJG intends to support SPIFF once the standard is frozen, but we
have not decided whether it should become our default output format or not.
(In any case, our decoder will remain capable of reading JFIF indefinitely.)
Various proprietary file formats incorporating JPEG compression also exist.
We have little or no sympathy for the existence of these formats. Indeed,
The ISO JPEG standards committee actually promotes different formats like
"JPEG 2000" or "JPEG XR" which are incompatible with original DCT-based
JPEG. IJG therefore does not support these formats (see REFERENCES). Indeed,
one of the original reasons for developing this free software was to help
force convergence on common, open format standards for JPEG files. Don't
use a proprietary file format!
force convergence on common, interoperable format standards for JPEG files.
Don't use an incompatible file format!
(In any case, our decoder will remain capable of reading existing JPEG
image files indefinitely.)
TO DO
=====
The major thrust for v7 will probably be improvement of visual quality.
The current method for scaling the quantization tables is known not to be
very good at low Q values. We also intend to investigate block boundary
smoothing, "poor man's variable quantization", and other means of improving
quality-vs-file-size performance without sacrificing compatibility.
In future versions, we are considering supporting some of the upcoming JPEG
Part 3 extensions --- principally, variable quantization and the SPIFF file
format.
As always, speeding things up is of great interest.
Please send bug reports, offers of help, etc. to jpeg-info@uunet.uu.net.
Please send bug reports, offers of help, etc. to jpeg-info@uc.ag.

304
jpeg/README-turbo.txt Normal file
Просмотреть файл

@ -0,0 +1,304 @@
*******************************************************************************
** Background
*******************************************************************************
libjpeg-turbo is a derivative of libjpeg which uses SIMD instructions (MMX,
SSE2, etc.) to accelerate baseline JPEG compression and decompression on x86
and x86-64 systems. On such systems, libjpeg-turbo is generally 2-4x as fast
as the unmodified version of libjpeg, all else being equal.
libjpeg-turbo was originally based on libjpeg/SIMD by Miyasaka Masaru, but
the TigerVNC and VirtualGL projects made numerous enhancements to the codec in
2009, including improved support for Mac OS X, 64-bit support, support for
32-bit and big endian pixel formats (RGBX, XBGR, etc.), accelerated Huffman
encoding/decoding, and various bug fixes. The goal was to produce a fully open
source codec that could replace the partially closed source TurboJPEG/IPP codec
used by VirtualGL and TurboVNC. libjpeg-turbo generally performs in the range
of 80-120% of TurboJPEG/IPP. It is faster in some areas but slower in others.
In early 2010, libjpeg-turbo spun off into its own independent project, with
the goal of making high-speed JPEG compression/decompression technology
available to a broader range of users and developers. The libjpeg-turbo shared
libraries can be used as drop-in replacements for libjpeg on most systems.
*******************************************************************************
** License
*******************************************************************************
The TurboJPEG/OSS wrapper, as well as some of the optimizations to the Huffman
encoder (jchuff.c) and decoder (jdhuff.c), were borrowed from VirtualGL, and
thus any distribution of libjpeg-turbo which includes those files must, as a
whole, be subject to the terms of the wxWindows Library Licence, Version 3.1.
A copy of this license can be found in this directory under LICENSE.txt. The
wxWindows Library License is based on the LGPL but includes provisions which
allow the Library to be statically linked into proprietary libraries and
applications without requiring the resulting binaries to be distributed under
the terms of the LGPL.
The rest of the source code, apart from TurboJPEG/OSS and the Huffman codec
optimizations, falls under a less restrictive, BSD-style license (see README.)
You can choose to distribute libjpeg-turbo, as a whole, under this BSD-style
license by simply removing TurboJPEG/OSS and replacing the optimized jchuff.c
and jdhuff.c with their unoptimized counterparts from the libjpeg v6b source.
*******************************************************************************
** Using libjpeg-turbo
*******************************************************************************
=============================
Replacing libjpeg at Run Time
=============================
If a Unix application is dynamically linked with libjpeg, then you can replace
libjpeg with libjpeg-turbo at run time by manipulating LD_LIBRARY_PATH.
For instance:
[Using libjpeg]
> time cjpeg <vgl_5674_0098.ppm >vgl_5674_0098.jpg
real 0m0.392s
user 0m0.074s
sys 0m0.020s
[Using libjpeg-turbo]
> export LD_LIBRARY_PATH=/opt/libjpeg-turbo/{lib}:$LD_LIBRARY_PATH
> time cjpeg <vgl_5674_0098.ppm >vgl_5674_0098.jpg
real 0m0.109s
user 0m0.029s
sys 0m0.010s
NOTE: {lib} can be lib, lib32, lib64, or lib/64, depending on the O/S and
architecture.
System administrators can also replace the libjpeg sym links in /usr/{lib} with
links to the libjpeg dynamic library located in /opt/libjpeg-turbo/{lib}. This
will effectively accelerate every dynamically linked libjpeg application on the
system.
The libjpeg-turbo SDK for Visual C++ installs the libjpeg-turbo DLL
(jpeg62.dll, jpeg7.dll, or jpeg8.dll, depending on whether libjpeg v6b, v7, or
v8 emulation is enabled) into c:\libjpeg-turbo[64]\bin, and the PATH
environment variable can be modified such that this directory is searched
before any others that might contain a libjpeg DLL. However, if a libjpeg
DLL exists in an application's install directory, then Windows will load this
DLL first whenever the application is launched. Thus, if an application ships
with jpeg62.dll, jpeg7.dll, or jpeg8.dll, then back up the application's
version of this DLL and copy c:\libjpeg-turbo[64]\bin\jpeg*.dll into the
application's install directory to accelerate it.
The version of the libjpeg-turbo DLL distributed in the libjpeg-turbo SDK for
Visual C++ requires the Visual C++ 2008 C run time DLL (msvcr90.dll).
msvcr90.dll ships with more recent versions of Windows, but users of older
Windows releases can obtain it from the Visual C++ 2008 Redistributable
Package, which is available as a free download from Microsoft's web site.
NOTE: Features of libjpeg which require passing a C run time structure, such
as a file handle, from an application to libjpeg will probably not work with
the version of the libjpeg-turbo DLL distributed in the libjpeg-turbo SDK for
Visual C++, unless the application is also built to use the Visual C++ 2008 C
run time DLL. In particular, this affects jpeg_stdio_dest() and
jpeg_stdio_src().
Mac applications typically embed their own copies of the libjpeg dylib inside
the (hidden) application bundle, so it is not possible to globally replace
libjpeg on OS X systems. If an application uses a shared library version of
libjpeg, then it may be possible to replace the application's version of it.
This would generally involve copying libjpeg.*.dylib from libjpeg-turbo into
the appropriate place in the application bundle and using install_name_tool to
repoint the dylib to the new directory. This requires an advanced knowledge of
OS X and would not survive an upgrade or a re-install of the application.
Thus, it is not recommended for most users.
=======================
Replacing TurboJPEG/IPP
=======================
libjpeg-turbo is a drop-in replacement for the TurboJPEG/IPP SDK used by
VirtualGL 2.1.x and TurboVNC 0.6 (and prior.) libjpeg-turbo contains a wrapper
library (TurboJPEG/OSS) that emulates the TurboJPEG API using libjpeg-turbo
instead of the closed source Intel Performance Primitives. You can replace the
TurboJPEG/IPP package on Linux systems with the libjpeg-turbo package in order
to make existing releases of VirtualGL 2.1.x and TurboVNC 0.x use the new codec
at run time. Note that the 64-bit libjpeg-turbo packages contain only 64-bit
binaries, whereas the TurboJPEG/IPP 64-bit packages contained both 64-bit and
32-bit binaries. Thus, to replace a TurboJPEG/IPP 64-bit package, install
both the 64-bit and 32-bit versions of libjpeg-turbo.
You can also build the VirtualGL 2.1.x and TurboVNC 0.6 source code with
the libjpeg-turbo SDK instead of TurboJPEG/IPP. It should work identically.
libjpeg-turbo also includes static library versions of TurboJPEG/OSS, which
are used to build TurboVNC 1.0 and later.
========================================
Using libjpeg-turbo in Your Own Programs
========================================
For the most part, libjpeg-turbo should work identically to libjpeg, so in
most cases, an application can be built against libjpeg and then run against
libjpeg-turbo. On Unix systems (including Cygwin), you can build against
libjpeg-turbo instead of libjpeg by setting
CPATH=/opt/libjpeg-turbo/include
and
LIBRARY_PATH=/opt/libjpeg-turbo/{lib}
({lib} = lib32 or lib64, depending on whether you are building a 32-bit or a
64-bit application.)
If using MinGW, then set
CPATH=/c/libjpeg-turbo-gcc[64]/include
and
LIBRARY_PATH=/c/libjpeg-turbo-gcc[64]/lib
Building against libjpeg-turbo is useful, for instance, if you want to build an
application that leverages the libjpeg-turbo colorspace extensions (see below.)
On Linux and Solaris systems, you would still need to manipulate
LD_LIBRARY_PATH or create appropriate sym links to use libjpeg-turbo at run
time. On such systems, you can pass -R /opt/libjpeg-turbo/{lib} to the linker
to force the use of libjpeg-turbo at run time rather than libjpeg (also useful
if you want to leverage the colorspace extensions), or you can link against the
libjpeg-turbo static library.
To force a Linux, Solaris, or MinGW application to link against the static
version of libjpeg-turbo, you can use the following linker options:
-Wl,-Bstatic -ljpeg -Wl,-Bdynamic
On OS X, simply add /opt/libjpeg-turbo/lib/libjpeg.a to the linker command
line (this also works on Linux and Solaris.)
To build Visual C++ applications using libjpeg-turbo, add
c:\libjpeg-turbo[64]\include to the system or user INCLUDE environment
variable and c:\libjpeg-turbo[64]\lib to the system or user LIB environment
variable, and then link against either jpeg.lib (to use the DLL version of
libjpeg-turbo) or jpeg-static.lib (to use the static version of libjpeg-turbo.)
=====================
Colorspace Extensions
=====================
libjpeg-turbo includes extensions which allow JPEG images to be compressed
directly from (and decompressed directly to) buffers which use BGR, BGRX,
RGBX, XBGR, and XRGB pixel ordering. This is implemented with six new
colorspace constants:
JCS_EXT_RGB /* red/green/blue */
JCS_EXT_RGBX /* red/green/blue/x */
JCS_EXT_BGR /* blue/green/red */
JCS_EXT_BGRX /* blue/green/red/x */
JCS_EXT_XBGR /* x/blue/green/red */
JCS_EXT_XRGB /* x/red/green/blue */
Setting cinfo.in_color_space (compression) or cinfo.out_color_space
(decompression) to one of these values will cause libjpeg-turbo to read the
red, green, and blue values from (or write them to) the appropriate position in
the pixel when YUV conversion is performed.
Your application can check for the existence of these extensions at compile
time with:
#ifdef JCS_EXTENSIONS
At run time, attempting to use these extensions with a version of libjpeg
that doesn't support them will result in a "Bogus input colorspace" error.
=================================
libjpeg v7 and v8 API/ABI support
=================================
libjpeg v7 and v8 added new features to the API/ABI, and, unfortunately, the
compression and decompression structures were extended in a backward-
incompatible manner to accommodate these features. Thus, programs which are
built to use libjpeg v7 or v8 did not work with libjpeg-turbo, since it is
based on the libjpeg v6b code base. Although libjpeg v7 and v8 are still not
as widely used as v6b, enough programs (including a few Linux distros) have
made the switch that it was desirable to provide support for the libjpeg v7/v8
API/ABI in libjpeg-turbo.
Some of the libjpeg v7 and v8 features -- DCT scaling, to name one -- involve
deep modifications to the code which cannot be accommodated by libjpeg-turbo
without either breaking compatibility with libjpeg v6b or producing an
unsupportable mess. In order to fully support libjpeg v8 with all of its
features, we would have to essentially port the SIMD extensions to the libjpeg
v8 code base and maintain two separate code trees. We are hesitant to do this
until/unless the newer libjpeg code bases garner more community support and
involvement and until/unless we have some notion of whether future libjpeg
releases will also be backward-incompatible.
By passing an argument of --with-jpeg7 or --with-jpeg8 to configure, or an
argument of -DWITH_JPEG7=1 or -DWITH_JPEG8=1 to cmake, you can build a version
of libjpeg-turbo which emulates the libjpeg v7 or v8 API/ABI, so that programs
which are built against libjpeg v7 or v8 can be run with libjpeg-turbo. The
following section describes which libjpeg v7+ features are supported and which
aren't.
libjpeg v7 and v8 Features:
---------------------------
Fully supported:
-- cjpeg: Separate quality settings for luminance and chrominance
Note that the libpjeg v7+ API was extended to accommodate this feature only
for convenience purposes. It has always been possible to implement this
feature with libjpeg v6b (see rdswitch.c for an example.)
-- cjpeg: 32-bit BMP support
-- jpegtran: lossless cropping
-- jpegtran: -perfect option
-- rdjpgcom: -raw option
-- rdjpgcom: locale awareness
Fully supported when using libjpeg v7/v8 emulation:
-- libjpeg: In-memory source and destination managers
Not supported:
-- libjpeg: DCT scaling in compressor
cinfo.scale_num and cinfo.scale_denom are silently ignored.
-- libjpeg: IDCT scaling extensions in decompressor
libjpeg-turbo still supports IDCT scaling with scaling factors of 1/2, 1/4,
and 1/8 (same as libjpeg v6b.)
-- libjpeg: Fancy downsampling in compressor
cinfo.do_fancy_downsampling is silently ignored.
-- jpegtran: Scaling
Seems to depend on the DCT scaling feature, which isn't supported.
*******************************************************************************
** Performance pitfalls
*******************************************************************************
===============
Restart Markers
===============
The optimized Huffman decoder in libjpeg-turbo does not handle restart markers
in a way that makes libjpeg happy, so it is necessary to use the slow Huffman
decoder when decompressing a JPEG image that has restart markers. This can
cause the decompression performance to drop by as much as 20%, but the
performance will still be much much greater than that of libjpeg v6b. Many
consumer packages, such as PhotoShop, use restart markers when generating JPEG
images, so images generated by those programs will experience this issue.
===============================================
Fast Integer Forward DCT at High Quality Levels
===============================================
The algorithm used by the SIMD-accelerated quantization function cannot produce
correct results whenever the fast integer forward DCT is used along with a JPEG
quality of 98-100. Thus, libjpeg-turbo must use the non-SIMD quantization
function in those cases. This causes performance to drop by as much as 40%.
It is therefore strongly advised that you use the slow integer forward DCT
whenever encoding images with a JPEG quality of 98 or higher.

Просмотреть файл

@ -2,6 +2,7 @@
* cderror.h
*
* Copyright (C) 1994-1997, Thomas G. Lane.
* Modified 2009 by Guido Vollbeding.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -45,6 +46,7 @@ JMESSAGE(JERR_BMP_BADHEADER, "Invalid BMP file: bad header length")
JMESSAGE(JERR_BMP_BADPLANES, "Invalid BMP file: biPlanes not equal to 1")
JMESSAGE(JERR_BMP_COLORSPACE, "BMP output must be grayscale or RGB")
JMESSAGE(JERR_BMP_COMPRESSED, "Sorry, compressed BMPs not yet supported")
JMESSAGE(JERR_BMP_EMPTY, "Empty BMP image")
JMESSAGE(JERR_BMP_NOT, "Not a BMP file - does not start with BM")
JMESSAGE(JTRC_BMP, "%ux%u 24-bit BMP image")
JMESSAGE(JTRC_BMP_MAPPED, "%ux%u 8-bit colormapped BMP image")

Просмотреть файл

@ -1,181 +0,0 @@
/*
* cdjpeg.c
*
* Copyright (C) 1991-1997, Thomas G. Lane.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
* This file contains common support routines used by the IJG application
* programs (cjpeg, djpeg, jpegtran).
*/
#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */
#include <ctype.h> /* to declare isupper(), tolower() */
#ifdef NEED_SIGNAL_CATCHER
#include <signal.h> /* to declare signal() */
#endif
#ifdef USE_SETMODE
#include <fcntl.h> /* to declare setmode()'s parameter macros */
/* If you have setmode() but not <io.h>, just delete this line: */
#include <io.h> /* to declare setmode() */
#endif
/*
* Signal catcher to ensure that temporary files are removed before aborting.
* NB: for Amiga Manx C this is actually a global routine named _abort();
* we put "#define signal_catcher _abort" in jconfig.h. Talk about bogus...
*/
#ifdef NEED_SIGNAL_CATCHER
static j_common_ptr sig_cinfo;
void /* must be global for Manx C */
signal_catcher (int signum)
{
if (sig_cinfo != NULL) {
if (sig_cinfo->err != NULL) /* turn off trace output */
sig_cinfo->err->trace_level = 0;
jpeg_destroy(sig_cinfo); /* clean up memory allocation & temp files */
}
exit(EXIT_FAILURE);
}
GLOBAL(void)
enable_signal_catcher (j_common_ptr cinfo)
{
sig_cinfo = cinfo;
#ifdef SIGINT /* not all systems have SIGINT */
signal(SIGINT, signal_catcher);
#endif
#ifdef SIGTERM /* not all systems have SIGTERM */
signal(SIGTERM, signal_catcher);
#endif
}
#endif
/*
* Optional progress monitor: display a percent-done figure on stderr.
*/
#ifdef PROGRESS_REPORT
METHODDEF(void)
progress_monitor (j_common_ptr cinfo)
{
cd_progress_ptr prog = (cd_progress_ptr) cinfo->progress;
int total_passes = prog->pub.total_passes + prog->total_extra_passes;
int percent_done = (int) (prog->pub.pass_counter*100L/prog->pub.pass_limit);
if (percent_done != prog->percent_done) {
prog->percent_done = percent_done;
if (total_passes > 1) {
fprintf(stderr, "\rPass %d/%d: %3d%% ",
prog->pub.completed_passes + prog->completed_extra_passes + 1,
total_passes, percent_done);
} else {
fprintf(stderr, "\r %3d%% ", percent_done);
}
fflush(stderr);
}
}
GLOBAL(void)
start_progress_monitor (j_common_ptr cinfo, cd_progress_ptr progress)
{
/* Enable progress display, unless trace output is on */
if (cinfo->err->trace_level == 0) {
progress->pub.progress_monitor = progress_monitor;
progress->completed_extra_passes = 0;
progress->total_extra_passes = 0;
progress->percent_done = -1;
cinfo->progress = &progress->pub;
}
}
GLOBAL(void)
end_progress_monitor (j_common_ptr cinfo)
{
/* Clear away progress display */
if (cinfo->err->trace_level == 0) {
fprintf(stderr, "\r \r");
fflush(stderr);
}
}
#endif
/*
* Case-insensitive matching of possibly-abbreviated keyword switches.
* keyword is the constant keyword (must be lower case already),
* minchars is length of minimum legal abbreviation.
*/
GLOBAL(boolean)
keymatch (char * arg, const char * keyword, int minchars)
{
register int ca, ck;
register int nmatched = 0;
while ((ca = *arg++) != '\0') {
if ((ck = *keyword++) == '\0')
return FALSE; /* arg longer than keyword, no good */
if (isupper(ca)) /* force arg to lcase (assume ck is already) */
ca = tolower(ca);
if (ca != ck)
return FALSE; /* no good */
nmatched++; /* count matched characters */
}
/* reached end of argument; fail if it's too short for unique abbrev */
if (nmatched < minchars)
return FALSE;
return TRUE; /* A-OK */
}
/*
* Routines to establish binary I/O mode for stdin and stdout.
* Non-Unix systems often require some hacking to get out of text mode.
*/
GLOBAL(FILE *)
read_stdin (void)
{
FILE * input_file = stdin;
#ifdef USE_SETMODE /* need to hack file mode? */
setmode(fileno(stdin), O_BINARY);
#endif
#ifdef USE_FDOPEN /* need to re-open in binary mode? */
if ((input_file = fdopen(fileno(stdin), READ_BINARY)) == NULL) {
fprintf(stderr, "Cannot reopen stdin\n");
exit(EXIT_FAILURE);
}
#endif
return input_file;
}
GLOBAL(FILE *)
write_stdout (void)
{
FILE * output_file = stdout;
#ifdef USE_SETMODE /* need to hack file mode? */
setmode(fileno(stdout), O_BINARY);
#endif
#ifdef USE_FDOPEN /* need to re-open in binary mode? */
if ((output_file = fdopen(fileno(stdout), WRITE_BINARY)) == NULL) {
fprintf(stderr, "Cannot reopen stdout\n");
exit(EXIT_FAILURE);
}
#endif
return output_file;
}

Просмотреть файл

@ -104,6 +104,7 @@ typedef struct cdjpeg_progress_mgr * cd_progress_ptr;
#define jinit_write_targa jIWrTarga
#define read_quant_tables RdQTables
#define read_scan_script RdScnScript
#define set_quality_ratings SetQRates
#define set_quant_slots SetQSlots
#define set_sample_factors SetSFacts
#define read_color_map RdCMap
@ -131,8 +132,10 @@ EXTERN(djpeg_dest_ptr) jinit_write_targa JPP((j_decompress_ptr cinfo));
/* cjpeg support routines (in rdswitch.c) */
EXTERN(boolean) read_quant_tables JPP((j_compress_ptr cinfo, char * filename,
int scale_factor, boolean force_baseline));
boolean force_baseline));
EXTERN(boolean) read_scan_script JPP((j_compress_ptr cinfo, char * filename));
EXTERN(boolean) set_quality_ratings JPP((j_compress_ptr cinfo, char *arg,
boolean force_baseline));
EXTERN(boolean) set_quant_slots JPP((j_compress_ptr cinfo, char *arg));
EXTERN(boolean) set_sample_factors JPP((j_compress_ptr cinfo, char *arg));

Просмотреть файл

@ -1,217 +0,0 @@
CHANGE LOG for Independent JPEG Group's JPEG software
Version 6b 27-Mar-1998
-----------------------
jpegtran has new features for lossless image transformations (rotation
and flipping) as well as "lossless" reduction to grayscale.
jpegtran now copies comments by default; it has a -copy switch to enable
copying all APPn blocks as well, or to suppress comments. (Formerly it
always suppressed comments and APPn blocks.) jpegtran now also preserves
JFIF version and resolution information.
New decompressor library feature: COM and APPn markers found in the input
file can be saved in memory for later use by the application. (Before,
you had to code this up yourself with a custom marker processor.)
There is an unused field "void * client_data" now in compress and decompress
parameter structs; this may be useful in some applications.
JFIF version number information is now saved by the decoder and accepted by
the encoder. jpegtran uses this to copy the source file's version number,
to ensure "jpegtran -copy all" won't create bogus files that contain JFXX
extensions but claim to be version 1.01. Applications that generate their
own JFXX extension markers also (finally) have a supported way to cause the
encoder to emit JFIF version number 1.02.
djpeg's trace mode reports JFIF 1.02 thumbnail images as such, rather
than as unknown APP0 markers.
In -verbose mode, djpeg and rdjpgcom will try to print the contents of
APP12 markers as text. Some digital cameras store useful text information
in APP12 markers.
Handling of truncated data streams is more robust: blocks beyond the one in
which the error occurs will be output as uniform gray, or left unchanged
if decoding a progressive JPEG. The appearance no longer depends on the
Huffman tables being used.
Huffman tables are checked for validity much more carefully than before.
To avoid the Unisys LZW patent, djpeg's GIF output capability has been
changed to produce "uncompressed GIFs", and cjpeg's GIF input capability
has been removed altogether. We're not happy about it either, but there
seems to be no good alternative.
The configure script now supports building libjpeg as a shared library
on many flavors of Unix (all the ones that GNU libtool knows how to
build shared libraries for). Use "./configure --enable-shared" to
try this out.
New jconfig file and makefiles for Microsoft Visual C++ and Developer Studio.
Also, a jconfig file and a build script for Metrowerks CodeWarrior
on Apple Macintosh. makefile.dj has been updated for DJGPP v2, and there
are miscellaneous other minor improvements in the makefiles.
jmemmac.c now knows how to create temporary files following Mac System 7
conventions.
djpeg's -map switch is now able to read raw-format PPM files reliably.
cjpeg -progressive -restart no longer generates any unnecessary DRI markers.
Multiple calls to jpeg_simple_progression for a single JPEG object
no longer leak memory.
Version 6a 7-Feb-96
--------------------
Library initialization sequence modified to detect version mismatches
and struct field packing mismatches between library and calling application.
This change requires applications to be recompiled, but does not require
any application source code change.
All routine declarations changed to the style "GLOBAL(type) name ...",
that is, GLOBAL, LOCAL, METHODDEF, EXTERN are now macros taking the
routine's return type as an argument. This makes it possible to add
Microsoft-style linkage keywords to all the routines by changing just
these macros. Note that any application code that was using these macros
will have to be changed.
DCT coefficient quantization tables are now stored in normal array order
rather than zigzag order. Application code that calls jpeg_add_quant_table,
or otherwise manipulates quantization tables directly, will need to be
changed. If you need to make such code work with either older or newer
versions of the library, a test like "#if JPEG_LIB_VERSION >= 61" is
recommended.
djpeg's trace capability now dumps DQT tables in natural order, not zigzag
order. This allows the trace output to be made into a "-qtables" file
more easily.
New system-dependent memory manager module for use on Apple Macintosh.
Fix bug in cjpeg's -smooth option: last one or two scanlines would be
duplicates of the prior line unless the image height mod 16 was 1 or 2.
Repair minor problems in VMS, BCC, MC6 makefiles.
New configure script based on latest GNU Autoconf.
Correct the list of include files needed by MetroWerks C for ccommand().
Numerous small documentation updates.
Version 6 2-Aug-95
-------------------
Progressive JPEG support: library can read and write full progressive JPEG
files. A "buffered image" mode supports incremental decoding for on-the-fly
display of progressive images. Simply recompiling an existing IJG-v5-based
decoder with v6 should allow it to read progressive files, though of course
without any special progressive display.
New "jpegtran" application performs lossless transcoding between different
JPEG formats; primarily, it can be used to convert baseline to progressive
JPEG and vice versa. In support of jpegtran, the library now allows lossless
reading and writing of JPEG files as DCT coefficient arrays. This ability
may be of use in other applications.
Notes for programmers:
* We changed jpeg_start_decompress() to be able to suspend; this makes all
decoding modes available to suspending-input applications. However,
existing applications that use suspending input will need to be changed
to check the return value from jpeg_start_decompress(). You don't need to
do anything if you don't use a suspending data source.
* We changed the interface to the virtual array routines: access_virt_array
routines now take a count of the number of rows to access this time. The
last parameter to request_virt_array routines is now interpreted as the
maximum number of rows that may be accessed at once, but not necessarily
the height of every access.
Version 5b 15-Mar-95
---------------------
Correct bugs with grayscale images having v_samp_factor > 1.
jpeg_write_raw_data() now supports output suspension.
Correct bugs in "configure" script for case of compiling in
a directory other than the one containing the source files.
Repair bug in jquant1.c: sometimes didn't use as many colors as it could.
Borland C makefile and jconfig file work under either MS-DOS or OS/2.
Miscellaneous improvements to documentation.
Version 5a 7-Dec-94
--------------------
Changed color conversion roundoff behavior so that grayscale values are
represented exactly. (This causes test image files to change.)
Make ordered dither use 16x16 instead of 4x4 pattern for a small quality
improvement.
New configure script based on latest GNU Autoconf.
Fix configure script to handle CFLAGS correctly.
Rename *.auto files to *.cfg, so that configure script still works if
file names have been truncated for DOS.
Fix bug in rdbmp.c: didn't allow for extra data between header and image.
Modify rdppm.c/wrppm.c to handle 2-byte raw PPM/PGM formats for 12-bit data.
Fix several bugs in rdrle.c.
NEED_SHORT_EXTERNAL_NAMES option was broken.
Revise jerror.h/jerror.c for more flexibility in message table.
Repair oversight in jmemname.c NO_MKTEMP case: file could be there
but unreadable.
Version 5 24-Sep-94
--------------------
Version 5 represents a nearly complete redesign and rewrite of the IJG
software. Major user-visible changes include:
* Automatic configuration simplifies installation for most Unix systems.
* A range of speed vs. image quality tradeoffs are supported.
This includes resizing of an image during decompression: scaling down
by a factor of 1/2, 1/4, or 1/8 is handled very efficiently.
* New programs rdjpgcom and wrjpgcom allow insertion and extraction
of text comments in a JPEG file.
The application programmer's interface to the library has changed completely.
Notable improvements include:
* We have eliminated the use of callback routines for handling the
uncompressed image data. The application now sees the library as a
set of routines that it calls to read or write image data on a
scanline-by-scanline basis.
* The application image data is represented in a conventional interleaved-
pixel format, rather than as a separate array for each color channel.
This can save a copying step in many programs.
* The handling of compressed data has been cleaned up: the application can
supply routines to source or sink the compressed data. It is possible to
suspend processing on source/sink buffer overrun, although this is not
supported in all operating modes.
* All static state has been eliminated from the library, so that multiple
instances of compression or decompression can be active concurrently.
* JPEG abbreviated datastream formats are supported, ie, quantization and
Huffman tables can be stored separately from the image data.
* And not only that, but the documentation of the library has improved
considerably!
The last widely used release before the version 5 rewrite was version 4A of
18-Feb-93. Change logs before that point have been discarded, since they
are not of much interest after the rewrite.

Просмотреть файл

@ -1,606 +0,0 @@
/*
* cjpeg.c
*
* Copyright (C) 1991-1998, Thomas G. Lane.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
* This file contains a command-line user interface for the JPEG compressor.
* It should work on any system with Unix- or MS-DOS-style command lines.
*
* Two different command line styles are permitted, depending on the
* compile-time switch TWO_FILE_COMMANDLINE:
* cjpeg [options] inputfile outputfile
* cjpeg [options] [inputfile]
* In the second style, output is always to standard output, which you'd
* normally redirect to a file or pipe to some other program. Input is
* either from a named file or from standard input (typically redirected).
* The second style is convenient on Unix but is unhelpful on systems that
* don't support pipes. Also, you MUST use the first style if your system
* doesn't do binary I/O to stdin/stdout.
* To simplify script writing, the "-outfile" switch is provided. The syntax
* cjpeg [options] -outfile outputfile inputfile
* works regardless of which command line style is used.
*/
#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */
#include "jversion.h" /* for version message */
#ifdef USE_CCOMMAND /* command-line reader for Macintosh */
#ifdef __MWERKS__
#include <SIOUX.h> /* Metrowerks needs this */
#include <console.h> /* ... and this */
#endif
#ifdef THINK_C
#include <console.h> /* Think declares it here */
#endif
#endif
/* Create the add-on message string table. */
#define JMESSAGE(code,string) string ,
static const char * const cdjpeg_message_table[] = {
#include "cderror.h"
NULL
};
/*
* This routine determines what format the input file is,
* and selects the appropriate input-reading module.
*
* To determine which family of input formats the file belongs to,
* we may look only at the first byte of the file, since C does not
* guarantee that more than one character can be pushed back with ungetc.
* Looking at additional bytes would require one of these approaches:
* 1) assume we can fseek() the input file (fails for piped input);
* 2) assume we can push back more than one character (works in
* some C implementations, but unportable);
* 3) provide our own buffering (breaks input readers that want to use
* stdio directly, such as the RLE library);
* or 4) don't put back the data, and modify the input_init methods to assume
* they start reading after the start of file (also breaks RLE library).
* #1 is attractive for MS-DOS but is untenable on Unix.
*
* The most portable solution for file types that can't be identified by their
* first byte is to make the user tell us what they are. This is also the
* only approach for "raw" file types that contain only arbitrary values.
* We presently apply this method for Targa files. Most of the time Targa
* files start with 0x00, so we recognize that case. Potentially, however,
* a Targa file could start with any byte value (byte 0 is the length of the
* seldom-used ID field), so we provide a switch to force Targa input mode.
*/
static boolean is_targa; /* records user -targa switch */
LOCAL(cjpeg_source_ptr)
select_file_type (j_compress_ptr cinfo, FILE * infile)
{
int c;
if (is_targa) {
#ifdef TARGA_SUPPORTED
return jinit_read_targa(cinfo);
#else
ERREXIT(cinfo, JERR_TGA_NOTCOMP);
#endif
}
if ((c = getc(infile)) == EOF)
ERREXIT(cinfo, JERR_INPUT_EMPTY);
if (ungetc(c, infile) == EOF)
ERREXIT(cinfo, JERR_UNGETC_FAILED);
switch (c) {
#ifdef BMP_SUPPORTED
case 'B':
return jinit_read_bmp(cinfo);
#endif
#ifdef GIF_SUPPORTED
case 'G':
return jinit_read_gif(cinfo);
#endif
#ifdef PPM_SUPPORTED
case 'P':
return jinit_read_ppm(cinfo);
#endif
#ifdef RLE_SUPPORTED
case 'R':
return jinit_read_rle(cinfo);
#endif
#ifdef TARGA_SUPPORTED
case 0x00:
return jinit_read_targa(cinfo);
#endif
default:
ERREXIT(cinfo, JERR_UNKNOWN_FORMAT);
break;
}
return NULL; /* suppress compiler warnings */
}
/*
* Argument-parsing code.
* The switch parser is designed to be useful with DOS-style command line
* syntax, ie, intermixed switches and file names, where only the switches
* to the left of a given file name affect processing of that file.
* The main program in this file doesn't actually use this capability...
*/
static const char * progname; /* program name for error messages */
static char * outfilename; /* for -outfile switch */
LOCAL(void)
usage (void)
/* complain about bad command line */
{
fprintf(stderr, "usage: %s [switches] ", progname);
#ifdef TWO_FILE_COMMANDLINE
fprintf(stderr, "inputfile outputfile\n");
#else
fprintf(stderr, "[inputfile]\n");
#endif
fprintf(stderr, "Switches (names may be abbreviated):\n");
fprintf(stderr, " -quality N Compression quality (0..100; 5-95 is useful range)\n");
fprintf(stderr, " -grayscale Create monochrome JPEG file\n");
#ifdef ENTROPY_OPT_SUPPORTED
fprintf(stderr, " -optimize Optimize Huffman table (smaller file, but slow compression)\n");
#endif
#ifdef C_PROGRESSIVE_SUPPORTED
fprintf(stderr, " -progressive Create progressive JPEG file\n");
#endif
#ifdef TARGA_SUPPORTED
fprintf(stderr, " -targa Input file is Targa format (usually not needed)\n");
#endif
fprintf(stderr, "Switches for advanced users:\n");
#ifdef DCT_ISLOW_SUPPORTED
fprintf(stderr, " -dct int Use integer DCT method%s\n",
(JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : ""));
#endif
#ifdef DCT_IFAST_SUPPORTED
fprintf(stderr, " -dct fast Use fast integer DCT (less accurate)%s\n",
(JDCT_DEFAULT == JDCT_IFAST ? " (default)" : ""));
#endif
#ifdef DCT_FLOAT_SUPPORTED
fprintf(stderr, " -dct float Use floating-point DCT method%s\n",
(JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : ""));
#endif
fprintf(stderr, " -restart N Set restart interval in rows, or in blocks with B\n");
#ifdef INPUT_SMOOTHING_SUPPORTED
fprintf(stderr, " -smooth N Smooth dithered input (N=1..100 is strength)\n");
#endif
fprintf(stderr, " -maxmemory N Maximum memory to use (in kbytes)\n");
fprintf(stderr, " -outfile name Specify name for output file\n");
fprintf(stderr, " -verbose or -debug Emit debug output\n");
fprintf(stderr, "Switches for wizards:\n");
#ifdef C_ARITH_CODING_SUPPORTED
fprintf(stderr, " -arithmetic Use arithmetic coding\n");
#endif
fprintf(stderr, " -baseline Force baseline quantization tables\n");
fprintf(stderr, " -qtables file Use quantization tables given in file\n");
fprintf(stderr, " -qslots N[,...] Set component quantization tables\n");
fprintf(stderr, " -sample HxV[,...] Set component sampling factors\n");
#ifdef C_MULTISCAN_FILES_SUPPORTED
fprintf(stderr, " -scans file Create multi-scan JPEG per script file\n");
#endif
exit(EXIT_FAILURE);
}
LOCAL(int)
parse_switches (j_compress_ptr cinfo, int argc, char **argv,
int last_file_arg_seen, boolean for_real)
/* Parse optional switches.
* Returns argv[] index of first file-name argument (== argc if none).
* Any file names with indexes <= last_file_arg_seen are ignored;
* they have presumably been processed in a previous iteration.
* (Pass 0 for last_file_arg_seen on the first or only iteration.)
* for_real is FALSE on the first (dummy) pass; we may skip any expensive
* processing.
*/
{
int argn;
char * arg;
int quality; /* -quality parameter */
int q_scale_factor; /* scaling percentage for -qtables */
boolean force_baseline;
boolean simple_progressive;
char * qtablefile = NULL; /* saves -qtables filename if any */
char * qslotsarg = NULL; /* saves -qslots parm if any */
char * samplearg = NULL; /* saves -sample parm if any */
char * scansarg = NULL; /* saves -scans parm if any */
/* Set up default JPEG parameters. */
/* Note that default -quality level need not, and does not,
* match the default scaling for an explicit -qtables argument.
*/
quality = 75; /* default -quality value */
q_scale_factor = 100; /* default to no scaling for -qtables */
force_baseline = FALSE; /* by default, allow 16-bit quantizers */
simple_progressive = FALSE;
is_targa = FALSE;
outfilename = NULL;
cinfo->err->trace_level = 0;
/* Scan command line options, adjust parameters */
for (argn = 1; argn < argc; argn++) {
arg = argv[argn];
if (*arg != '-') {
/* Not a switch, must be a file name argument */
if (argn <= last_file_arg_seen) {
outfilename = NULL; /* -outfile applies to just one input file */
continue; /* ignore this name if previously processed */
}
break; /* else done parsing switches */
}
arg++; /* advance past switch marker character */
if (keymatch(arg, "arithmetic", 1)) {
/* Use arithmetic coding. */
#ifdef C_ARITH_CODING_SUPPORTED
cinfo->arith_code = TRUE;
#else
fprintf(stderr, "%s: sorry, arithmetic coding not supported\n",
progname);
exit(EXIT_FAILURE);
#endif
} else if (keymatch(arg, "baseline", 1)) {
/* Force baseline-compatible output (8-bit quantizer values). */
force_baseline = TRUE;
} else if (keymatch(arg, "dct", 2)) {
/* Select DCT algorithm. */
if (++argn >= argc) /* advance to next argument */
usage();
if (keymatch(argv[argn], "int", 1)) {
cinfo->dct_method = JDCT_ISLOW;
} else if (keymatch(argv[argn], "fast", 2)) {
cinfo->dct_method = JDCT_IFAST;
} else if (keymatch(argv[argn], "float", 2)) {
cinfo->dct_method = JDCT_FLOAT;
} else
usage();
} else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) {
/* Enable debug printouts. */
/* On first -d, print version identification */
static boolean printed_version = FALSE;
if (! printed_version) {
fprintf(stderr, "Independent JPEG Group's CJPEG, version %s\n%s\n",
JVERSION, JCOPYRIGHT);
printed_version = TRUE;
}
cinfo->err->trace_level++;
} else if (keymatch(arg, "grayscale", 2) || keymatch(arg, "greyscale",2)) {
/* Force a monochrome JPEG file to be generated. */
jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
} else if (keymatch(arg, "maxmemory", 3)) {
/* Maximum memory in Kb (or Mb with 'm'). */
long lval;
char ch = 'x';
if (++argn >= argc) /* advance to next argument */
usage();
if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
usage();
if (ch == 'm' || ch == 'M')
lval *= 1000L;
cinfo->mem->max_memory_to_use = lval * 1000L;
} else if (keymatch(arg, "optimize", 1) || keymatch(arg, "optimise", 1)) {
/* Enable entropy parm optimization. */
#ifdef ENTROPY_OPT_SUPPORTED
cinfo->optimize_coding = TRUE;
#else
fprintf(stderr, "%s: sorry, entropy optimization was not compiled\n",
progname);
exit(EXIT_FAILURE);
#endif
} else if (keymatch(arg, "outfile", 4)) {
/* Set output file name. */
if (++argn >= argc) /* advance to next argument */
usage();
outfilename = argv[argn]; /* save it away for later use */
} else if (keymatch(arg, "progressive", 1)) {
/* Select simple progressive mode. */
#ifdef C_PROGRESSIVE_SUPPORTED
simple_progressive = TRUE;
/* We must postpone execution until num_components is known. */
#else
fprintf(stderr, "%s: sorry, progressive output was not compiled\n",
progname);
exit(EXIT_FAILURE);
#endif
} else if (keymatch(arg, "quality", 1)) {
/* Quality factor (quantization table scaling factor). */
if (++argn >= argc) /* advance to next argument */
usage();
if (sscanf(argv[argn], "%d", &quality) != 1)
usage();
/* Change scale factor in case -qtables is present. */
q_scale_factor = jpeg_quality_scaling(quality);
} else if (keymatch(arg, "qslots", 2)) {
/* Quantization table slot numbers. */
if (++argn >= argc) /* advance to next argument */
usage();
qslotsarg = argv[argn];
/* Must delay setting qslots until after we have processed any
* colorspace-determining switches, since jpeg_set_colorspace sets
* default quant table numbers.
*/
} else if (keymatch(arg, "qtables", 2)) {
/* Quantization tables fetched from file. */
if (++argn >= argc) /* advance to next argument */
usage();
qtablefile = argv[argn];
/* We postpone actually reading the file in case -quality comes later. */
} else if (keymatch(arg, "restart", 1)) {
/* Restart interval in MCU rows (or in MCUs with 'b'). */
long lval;
char ch = 'x';
if (++argn >= argc) /* advance to next argument */
usage();
if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
usage();
if (lval < 0 || lval > 65535L)
usage();
if (ch == 'b' || ch == 'B') {
cinfo->restart_interval = (unsigned int) lval;
cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */
} else {
cinfo->restart_in_rows = (int) lval;
/* restart_interval will be computed during startup */
}
} else if (keymatch(arg, "sample", 2)) {
/* Set sampling factors. */
if (++argn >= argc) /* advance to next argument */
usage();
samplearg = argv[argn];
/* Must delay setting sample factors until after we have processed any
* colorspace-determining switches, since jpeg_set_colorspace sets
* default sampling factors.
*/
} else if (keymatch(arg, "scans", 2)) {
/* Set scan script. */
#ifdef C_MULTISCAN_FILES_SUPPORTED
if (++argn >= argc) /* advance to next argument */
usage();
scansarg = argv[argn];
/* We must postpone reading the file in case -progressive appears. */
#else
fprintf(stderr, "%s: sorry, multi-scan output was not compiled\n",
progname);
exit(EXIT_FAILURE);
#endif
} else if (keymatch(arg, "smooth", 2)) {
/* Set input smoothing factor. */
int val;
if (++argn >= argc) /* advance to next argument */
usage();
if (sscanf(argv[argn], "%d", &val) != 1)
usage();
if (val < 0 || val > 100)
usage();
cinfo->smoothing_factor = val;
} else if (keymatch(arg, "targa", 1)) {
/* Input file is Targa format. */
is_targa = TRUE;
} else {
usage(); /* bogus switch */
}
}
/* Post-switch-scanning cleanup */
if (for_real) {
/* Set quantization tables for selected quality. */
/* Some or all may be overridden if -qtables is present. */
jpeg_set_quality(cinfo, quality, force_baseline);
if (qtablefile != NULL) /* process -qtables if it was present */
if (! read_quant_tables(cinfo, qtablefile,
q_scale_factor, force_baseline))
usage();
if (qslotsarg != NULL) /* process -qslots if it was present */
if (! set_quant_slots(cinfo, qslotsarg))
usage();
if (samplearg != NULL) /* process -sample if it was present */
if (! set_sample_factors(cinfo, samplearg))
usage();
#ifdef C_PROGRESSIVE_SUPPORTED
if (simple_progressive) /* process -progressive; -scans can override */
jpeg_simple_progression(cinfo);
#endif
#ifdef C_MULTISCAN_FILES_SUPPORTED
if (scansarg != NULL) /* process -scans if it was present */
if (! read_scan_script(cinfo, scansarg))
usage();
#endif
}
return argn; /* return index of next arg (file name) */
}
/*
* The main program.
*/
int
main (int argc, char **argv)
{
struct jpeg_compress_struct cinfo;
struct jpeg_error_mgr jerr;
#ifdef PROGRESS_REPORT
struct cdjpeg_progress_mgr progress;
#endif
int file_index;
cjpeg_source_ptr src_mgr;
FILE * input_file;
FILE * output_file;
JDIMENSION num_scanlines;
/* On Mac, fetch a command line. */
#ifdef USE_CCOMMAND
argc = ccommand(&argv);
#endif
progname = argv[0];
if (progname == NULL || progname[0] == 0)
progname = "cjpeg"; /* in case C library doesn't provide it */
/* Initialize the JPEG compression object with default error handling. */
cinfo.err = jpeg_std_error(&jerr);
jpeg_create_compress(&cinfo);
/* Add some application-specific error messages (from cderror.h) */
jerr.addon_message_table = cdjpeg_message_table;
jerr.first_addon_message = JMSG_FIRSTADDONCODE;
jerr.last_addon_message = JMSG_LASTADDONCODE;
/* Now safe to enable signal catcher. */
#ifdef NEED_SIGNAL_CATCHER
enable_signal_catcher((j_common_ptr) &cinfo);
#endif
/* Initialize JPEG parameters.
* Much of this may be overridden later.
* In particular, we don't yet know the input file's color space,
* but we need to provide some value for jpeg_set_defaults() to work.
*/
cinfo.in_color_space = JCS_RGB; /* arbitrary guess */
jpeg_set_defaults(&cinfo);
/* Scan command line to find file names.
* It is convenient to use just one switch-parsing routine, but the switch
* values read here are ignored; we will rescan the switches after opening
* the input file.
*/
file_index = parse_switches(&cinfo, argc, argv, 0, FALSE);
#ifdef TWO_FILE_COMMANDLINE
/* Must have either -outfile switch or explicit output file name */
if (outfilename == NULL) {
if (file_index != argc-2) {
fprintf(stderr, "%s: must name one input and one output file\n",
progname);
usage();
}
outfilename = argv[file_index+1];
} else {
if (file_index != argc-1) {
fprintf(stderr, "%s: must name one input and one output file\n",
progname);
usage();
}
}
#else
/* Unix style: expect zero or one file name */
if (file_index < argc-1) {
fprintf(stderr, "%s: only one input file\n", progname);
usage();
}
#endif /* TWO_FILE_COMMANDLINE */
/* Open the input file. */
if (file_index < argc) {
if ((input_file = fopen(argv[file_index], READ_BINARY)) == NULL) {
fprintf(stderr, "%s: can't open %s\n", progname, argv[file_index]);
exit(EXIT_FAILURE);
}
} else {
/* default input file is stdin */
input_file = read_stdin();
}
/* Open the output file. */
if (outfilename != NULL) {
if ((output_file = fopen(outfilename, WRITE_BINARY)) == NULL) {
fprintf(stderr, "%s: can't open %s\n", progname, outfilename);
exit(EXIT_FAILURE);
}
} else {
/* default output file is stdout */
output_file = write_stdout();
}
#ifdef PROGRESS_REPORT
start_progress_monitor((j_common_ptr) &cinfo, &progress);
#endif
/* Figure out the input file format, and set up to read it. */
src_mgr = select_file_type(&cinfo, input_file);
src_mgr->input_file = input_file;
/* Read the input file header to obtain file size & colorspace. */
(*src_mgr->start_input) (&cinfo, src_mgr);
/* Now that we know input colorspace, fix colorspace-dependent defaults */
jpeg_default_colorspace(&cinfo);
/* Adjust default compression parameters by re-parsing the options */
file_index = parse_switches(&cinfo, argc, argv, 0, TRUE);
/* Specify data destination for compression */
jpeg_stdio_dest(&cinfo, output_file);
/* Start compressor */
jpeg_start_compress(&cinfo, TRUE);
/* Process data */
while (cinfo.next_scanline < cinfo.image_height) {
num_scanlines = (*src_mgr->get_pixel_rows) (&cinfo, src_mgr);
(void) jpeg_write_scanlines(&cinfo, src_mgr->buffer, num_scanlines);
}
/* Finish compression and release memory */
(*src_mgr->finish_input) (&cinfo, src_mgr);
jpeg_finish_compress(&cinfo);
jpeg_destroy_compress(&cinfo);
/* Close files, if we opened them */
if (input_file != stdin)
fclose(input_file);
if (output_file != stdout)
fclose(output_file);
#ifdef PROGRESS_REPORT
end_progress_monitor((j_common_ptr) &cinfo);
#endif
/* All done. */
exit(jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS);
return 0; /* suppress no-return-value warnings */
}

Просмотреть файл

@ -1,402 +0,0 @@
/*
* ckconfig.c
*
* Copyright (C) 1991-1994, Thomas G. Lane.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*/
/*
* This program is intended to help you determine how to configure the JPEG
* software for installation on a particular system. The idea is to try to
* compile and execute this program. If your compiler fails to compile the
* program, make changes as indicated in the comments below. Once you can
* compile the program, run it, and it will produce a "jconfig.h" file for
* your system.
*
* As a general rule, each time you try to compile this program,
* pay attention only to the *first* error message you get from the compiler.
* Many C compilers will issue lots of spurious error messages once they
* have gotten confused. Go to the line indicated in the first error message,
* and read the comments preceding that line to see what to change.
*
* Almost all of the edits you may need to make to this program consist of
* changing a line that reads "#define SOME_SYMBOL" to "#undef SOME_SYMBOL",
* or vice versa. This is called defining or undefining that symbol.
*/
/* First we must see if your system has the include files we need.
* We start out with the assumption that your system has all the ANSI-standard
* include files. If you get any error trying to include one of these files,
* undefine the corresponding HAVE_xxx symbol.
*/
#define HAVE_STDDEF_H /* replace 'define' by 'undef' if error here */
#ifdef HAVE_STDDEF_H /* next line will be skipped if you undef... */
#include <stddef.h>
#endif
#define HAVE_STDLIB_H /* same thing for stdlib.h */
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#include <stdio.h> /* If you ain't got this, you ain't got C. */
/* We have to see if your string functions are defined by
* strings.h (old BSD convention) or string.h (everybody else).
* We try the non-BSD convention first; define NEED_BSD_STRINGS
* if the compiler says it can't find string.h.
*/
#undef NEED_BSD_STRINGS
#ifdef NEED_BSD_STRINGS
#include <strings.h>
#else
#include <string.h>
#endif
/* On some systems (especially older Unix machines), type size_t is
* defined only in the include file <sys/types.h>. If you get a failure
* on the size_t test below, try defining NEED_SYS_TYPES_H.
*/
#undef NEED_SYS_TYPES_H /* start by assuming we don't need it */
#ifdef NEED_SYS_TYPES_H
#include <sys/types.h>
#endif
/* Usually type size_t is defined in one of the include files we've included
* above. If not, you'll get an error on the "typedef size_t my_size_t;" line.
* In that case, first try defining NEED_SYS_TYPES_H just above.
* If that doesn't work, you'll have to search through your system library
* to figure out which include file defines "size_t". Look for a line that
* says "typedef something-or-other size_t;". Then, change the line below
* that says "#include <someincludefile.h>" to instead include the file
* you found size_t in, and define NEED_SPECIAL_INCLUDE. If you can't find
* type size_t anywhere, try replacing "#include <someincludefile.h>" with
* "typedef unsigned int size_t;".
*/
#undef NEED_SPECIAL_INCLUDE /* assume we DON'T need it, for starters */
#ifdef NEED_SPECIAL_INCLUDE
#include <someincludefile.h>
#endif
typedef size_t my_size_t; /* The payoff: do we have size_t now? */
/* The next question is whether your compiler supports ANSI-style function
* prototypes. You need to know this in order to choose between using
* makefile.ansi and using makefile.unix.
* The #define line below is set to assume you have ANSI function prototypes.
* If you get an error in this group of lines, undefine HAVE_PROTOTYPES.
*/
#define HAVE_PROTOTYPES
#ifdef HAVE_PROTOTYPES
int testfunction (int arg1, int * arg2); /* check prototypes */
struct methods_struct { /* check method-pointer declarations */
int (*error_exit) (char *msgtext);
int (*trace_message) (char *msgtext);
int (*another_method) (void);
};
int testfunction (int arg1, int * arg2) /* check definitions */
{
return arg2[arg1];
}
int test2function (void) /* check void arg list */
{
return 0;
}
#endif
/* Now we want to find out if your compiler knows what "unsigned char" means.
* If you get an error on the "unsigned char un_char;" line,
* then undefine HAVE_UNSIGNED_CHAR.
*/
#define HAVE_UNSIGNED_CHAR
#ifdef HAVE_UNSIGNED_CHAR
unsigned char un_char;
#endif
/* Now we want to find out if your compiler knows what "unsigned short" means.
* If you get an error on the "unsigned short un_short;" line,
* then undefine HAVE_UNSIGNED_SHORT.
*/
#define HAVE_UNSIGNED_SHORT
#ifdef HAVE_UNSIGNED_SHORT
unsigned short un_short;
#endif
/* Now we want to find out if your compiler understands type "void".
* If you get an error anywhere in here, undefine HAVE_VOID.
*/
#define HAVE_VOID
#ifdef HAVE_VOID
/* Caution: a C++ compiler will insist on complete prototypes */
typedef void * void_ptr; /* check void * */
#ifdef HAVE_PROTOTYPES /* check ptr to function returning void */
typedef void (*void_func) (int a, int b);
#else
typedef void (*void_func) ();
#endif
#ifdef HAVE_PROTOTYPES /* check void function result */
void test3function (void_ptr arg1, void_func arg2)
#else
void test3function (arg1, arg2)
void_ptr arg1;
void_func arg2;
#endif
{
char * locptr = (char *) arg1; /* check casting to and from void * */
arg1 = (void *) locptr;
(*arg2) (1, 2); /* check call of fcn returning void */
}
#endif
/* Now we want to find out if your compiler knows what "const" means.
* If you get an error here, undefine HAVE_CONST.
*/
#define HAVE_CONST
#ifdef HAVE_CONST
static const int carray[3] = {1, 2, 3};
#ifdef HAVE_PROTOTYPES
int test4function (const int arg1)
#else
int test4function (arg1)
const int arg1;
#endif
{
return carray[arg1];
}
#endif
/* If you get an error or warning about this structure definition,
* define INCOMPLETE_TYPES_BROKEN.
*/
#undef INCOMPLETE_TYPES_BROKEN
#ifndef INCOMPLETE_TYPES_BROKEN
typedef struct undefined_structure * undef_struct_ptr;
#endif
/* If you get an error about duplicate names,
* define NEED_SHORT_EXTERNAL_NAMES.
*/
#undef NEED_SHORT_EXTERNAL_NAMES
#ifndef NEED_SHORT_EXTERNAL_NAMES
int possibly_duplicate_function ()
{
return 0;
}
int possibly_dupli_function ()
{
return 1;
}
#endif
/************************************************************************
* OK, that's it. You should not have to change anything beyond this
* point in order to compile and execute this program. (You might get
* some warnings, but you can ignore them.)
* When you run the program, it will make a couple more tests that it
* can do automatically, and then it will create jconfig.h and print out
* any additional suggestions it has.
************************************************************************
*/
#ifdef HAVE_PROTOTYPES
int is_char_signed (int arg)
#else
int is_char_signed (arg)
int arg;
#endif
{
if (arg == 189) { /* expected result for unsigned char */
return 0; /* type char is unsigned */
}
else if (arg != -67) { /* expected result for signed char */
printf("Hmm, it seems 'char' is not eight bits wide on your machine.\n");
printf("I fear the JPEG software will not work at all.\n\n");
}
return 1; /* assume char is signed otherwise */
}
#ifdef HAVE_PROTOTYPES
int is_shifting_signed (long arg)
#else
int is_shifting_signed (arg)
long arg;
#endif
/* See whether right-shift on a long is signed or not. */
{
long res = arg >> 4;
if (res == -0x7F7E80CL) { /* expected result for signed shift */
return 1; /* right shift is signed */
}
/* see if unsigned-shift hack will fix it. */
/* we can't just test exact value since it depends on width of long... */
res |= (~0L) << (32-4);
if (res == -0x7F7E80CL) { /* expected result now? */
return 0; /* right shift is unsigned */
}
printf("Right shift isn't acting as I expect it to.\n");
printf("I fear the JPEG software will not work at all.\n\n");
return 0; /* try it with unsigned anyway */
}
#ifdef HAVE_PROTOTYPES
int main (int argc, char ** argv)
#else
int main (argc, argv)
int argc;
char ** argv;
#endif
{
char signed_char_check = (char) (-67);
FILE *outfile;
/* Attempt to write jconfig.h */
if ((outfile = fopen("jconfig.h", "w")) == NULL) {
printf("Failed to write jconfig.h\n");
return 1;
}
/* Write out all the info */
fprintf(outfile, "/* jconfig.h --- generated by ckconfig.c */\n");
fprintf(outfile, "/* see jconfig.doc for explanations */\n\n");
#ifdef HAVE_PROTOTYPES
fprintf(outfile, "#define HAVE_PROTOTYPES\n");
#else
fprintf(outfile, "#undef HAVE_PROTOTYPES\n");
#endif
#ifdef HAVE_UNSIGNED_CHAR
fprintf(outfile, "#define HAVE_UNSIGNED_CHAR\n");
#else
fprintf(outfile, "#undef HAVE_UNSIGNED_CHAR\n");
#endif
#ifdef HAVE_UNSIGNED_SHORT
fprintf(outfile, "#define HAVE_UNSIGNED_SHORT\n");
#else
fprintf(outfile, "#undef HAVE_UNSIGNED_SHORT\n");
#endif
#ifdef HAVE_VOID
fprintf(outfile, "/* #define void char */\n");
#else
fprintf(outfile, "#define void char\n");
#endif
#ifdef HAVE_CONST
fprintf(outfile, "/* #define const */\n");
#else
fprintf(outfile, "#define const\n");
#endif
if (is_char_signed((int) signed_char_check))
fprintf(outfile, "#undef CHAR_IS_UNSIGNED\n");
else
fprintf(outfile, "#define CHAR_IS_UNSIGNED\n");
#ifdef HAVE_STDDEF_H
fprintf(outfile, "#define HAVE_STDDEF_H\n");
#else
fprintf(outfile, "#undef HAVE_STDDEF_H\n");
#endif
#ifdef HAVE_STDLIB_H
fprintf(outfile, "#define HAVE_STDLIB_H\n");
#else
fprintf(outfile, "#undef HAVE_STDLIB_H\n");
#endif
#ifdef NEED_BSD_STRINGS
fprintf(outfile, "#define NEED_BSD_STRINGS\n");
#else
fprintf(outfile, "#undef NEED_BSD_STRINGS\n");
#endif
#ifdef NEED_SYS_TYPES_H
fprintf(outfile, "#define NEED_SYS_TYPES_H\n");
#else
fprintf(outfile, "#undef NEED_SYS_TYPES_H\n");
#endif
fprintf(outfile, "#undef NEED_FAR_POINTERS\n");
#ifdef NEED_SHORT_EXTERNAL_NAMES
fprintf(outfile, "#define NEED_SHORT_EXTERNAL_NAMES\n");
#else
fprintf(outfile, "#undef NEED_SHORT_EXTERNAL_NAMES\n");
#endif
#ifdef INCOMPLETE_TYPES_BROKEN
fprintf(outfile, "#define INCOMPLETE_TYPES_BROKEN\n");
#else
fprintf(outfile, "#undef INCOMPLETE_TYPES_BROKEN\n");
#endif
fprintf(outfile, "\n#ifdef JPEG_INTERNALS\n\n");
if (is_shifting_signed(-0x7F7E80B1L))
fprintf(outfile, "#undef RIGHT_SHIFT_IS_UNSIGNED\n");
else
fprintf(outfile, "#define RIGHT_SHIFT_IS_UNSIGNED\n");
fprintf(outfile, "\n#endif /* JPEG_INTERNALS */\n");
fprintf(outfile, "\n#ifdef JPEG_CJPEG_DJPEG\n\n");
fprintf(outfile, "#define BMP_SUPPORTED /* BMP image file format */\n");
fprintf(outfile, "#define GIF_SUPPORTED /* GIF image file format */\n");
fprintf(outfile, "#define PPM_SUPPORTED /* PBMPLUS PPM/PGM image file format */\n");
fprintf(outfile, "#undef RLE_SUPPORTED /* Utah RLE image file format */\n");
fprintf(outfile, "#define TARGA_SUPPORTED /* Targa image file format */\n\n");
fprintf(outfile, "#undef TWO_FILE_COMMANDLINE /* You may need this on non-Unix systems */\n");
fprintf(outfile, "#undef NEED_SIGNAL_CATCHER /* Define this if you use jmemname.c */\n");
fprintf(outfile, "#undef DONT_USE_B_MODE\n");
fprintf(outfile, "/* #define PROGRESS_REPORT */ /* optional */\n");
fprintf(outfile, "\n#endif /* JPEG_CJPEG_DJPEG */\n");
/* Close the jconfig.h file */
fclose(outfile);
/* User report */
printf("Configuration check for Independent JPEG Group's software done.\n");
printf("\nI have written the jconfig.h file for you.\n\n");
#ifdef HAVE_PROTOTYPES
printf("You should use makefile.ansi as the starting point for your Makefile.\n");
#else
printf("You should use makefile.unix as the starting point for your Makefile.\n");
#endif
#ifdef NEED_SPECIAL_INCLUDE
printf("\nYou'll need to change jconfig.h to include the system include file\n");
printf("that you found type size_t in, or add a direct definition of type\n");
printf("size_t if that's what you used. Just add it to the end.\n");
#endif
return 0;
}

Просмотреть файл

@ -1,118 +0,0 @@
IJG JPEG LIBRARY: CODING RULES
Copyright (C) 1991-1996, Thomas G. Lane.
This file is part of the Independent JPEG Group's software.
For conditions of distribution and use, see the accompanying README file.
Since numerous people will be contributing code and bug fixes, it's important
to establish a common coding style. The goal of using similar coding styles
is much more important than the details of just what that style is.
In general we follow the recommendations of "Recommended C Style and Coding
Standards" revision 6.1 (Cannon et al. as modified by Spencer, Keppel and
Brader). This document is available in the IJG FTP archive (see
jpeg/doc/cstyle.ms.tbl.Z, or cstyle.txt.Z for those without nroff/tbl).
Block comments should be laid out thusly:
/*
* Block comments in this style.
*/
We indent statements in K&R style, e.g.,
if (test) {
then-part;
} else {
else-part;
}
with two spaces per indentation level. (This indentation convention is
handled automatically by GNU Emacs and many other text editors.)
Multi-word names should be written in lower case with underscores, e.g.,
multi_word_name (not multiWordName). Preprocessor symbols and enum constants
are similar but upper case (MULTI_WORD_NAME). Names should be unique within
the first fifteen characters. (On some older systems, global names must be
unique within six characters. We accommodate this without cluttering the
source code by using macros to substitute shorter names.)
We use function prototypes everywhere; we rely on automatic source code
transformation to feed prototype-less C compilers. Transformation is done
by the simple and portable tool 'ansi2knr.c' (courtesy of Ghostscript).
ansi2knr is not very bright, so it imposes a format requirement on function
declarations: the function name MUST BEGIN IN COLUMN 1. Thus all functions
should be written in the following style:
LOCAL(int *)
function_name (int a, char *b)
{
code...
}
Note that each function definition must begin with GLOBAL(type), LOCAL(type),
or METHODDEF(type). These macros expand to "static type" or just "type" as
appropriate. They provide a readable indication of the routine's usage and
can readily be changed for special needs. (For instance, special linkage
keywords can be inserted for use in Windows DLLs.)
ansi2knr does not transform method declarations (function pointers in
structs). We handle these with a macro JMETHOD, defined as
#ifdef HAVE_PROTOTYPES
#define JMETHOD(type,methodname,arglist) type (*methodname) arglist
#else
#define JMETHOD(type,methodname,arglist) type (*methodname) ()
#endif
which is used like this:
struct function_pointers {
JMETHOD(void, init_entropy_encoder, (int somearg, jparms *jp));
JMETHOD(void, term_entropy_encoder, (void));
};
Note the set of parentheses surrounding the parameter list.
A similar solution is used for forward and external function declarations
(see the EXTERN and JPP macros).
If the code is to work on non-ANSI compilers, we cannot rely on a prototype
declaration to coerce actual parameters into the right types. Therefore, use
explicit casts on actual parameters whenever the actual parameter type is not
identical to the formal parameter. Beware of implicit conversions to "int".
It seems there are some non-ANSI compilers in which the sizeof() operator
is defined to return int, yet size_t is defined as long. Needless to say,
this is brain-damaged. Always use the SIZEOF() macro in place of sizeof(),
so that the result is guaranteed to be of type size_t.
The JPEG library is intended to be used within larger programs. Furthermore,
we want it to be reentrant so that it can be used by applications that process
multiple images concurrently. The following rules support these requirements:
1. Avoid direct use of file I/O, "malloc", error report printouts, etc;
pass these through the common routines provided.
2. Minimize global namespace pollution. Functions should be declared static
wherever possible. (Note that our method-based calling conventions help this
a lot: in many modules only the initialization function will ever need to be
called directly, so only that function need be externally visible.) All
global function names should begin with "jpeg_", and should have an
abbreviated name (unique in the first six characters) substituted by macro
when NEED_SHORT_EXTERNAL_NAMES is set.
3. Don't use global variables; anything that must be used in another module
should be in the common data structures.
4. Don't use static variables except for read-only constant tables. Variables
that should be private to a module can be placed into private structures (see
the system architecture document, structure.doc).
5. Source file names should begin with "j" for files that are part of the
library proper; source files that are not part of the library, such as cjpeg.c
and djpeg.c, do not begin with "j". Keep source file names to eight
characters (plus ".c" or ".h", etc) to make life easy for MS-DOSers. Keep
compression and decompression code in separate source files --- some
applications may want only one half of the library.
Note: these rules (particularly #4) are not followed religiously in the
modules that are used in cjpeg/djpeg but are not part of the JPEG library
proper. Those modules are not really intended to be used in other
applications.

Просмотреть файл

@ -1,616 +0,0 @@
/*
* djpeg.c
*
* Copyright (C) 1991-1997, Thomas G. Lane.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
* This file contains a command-line user interface for the JPEG decompressor.
* It should work on any system with Unix- or MS-DOS-style command lines.
*
* Two different command line styles are permitted, depending on the
* compile-time switch TWO_FILE_COMMANDLINE:
* djpeg [options] inputfile outputfile
* djpeg [options] [inputfile]
* In the second style, output is always to standard output, which you'd
* normally redirect to a file or pipe to some other program. Input is
* either from a named file or from standard input (typically redirected).
* The second style is convenient on Unix but is unhelpful on systems that
* don't support pipes. Also, you MUST use the first style if your system
* doesn't do binary I/O to stdin/stdout.
* To simplify script writing, the "-outfile" switch is provided. The syntax
* djpeg [options] -outfile outputfile inputfile
* works regardless of which command line style is used.
*/
#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */
#include "jversion.h" /* for version message */
#include <ctype.h> /* to declare isprint() */
#ifdef USE_CCOMMAND /* command-line reader for Macintosh */
#ifdef __MWERKS__
#include <SIOUX.h> /* Metrowerks needs this */
#include <console.h> /* ... and this */
#endif
#ifdef THINK_C
#include <console.h> /* Think declares it here */
#endif
#endif
/* Create the add-on message string table. */
#define JMESSAGE(code,string) string ,
static const char * const cdjpeg_message_table[] = {
#include "cderror.h"
NULL
};
/*
* This list defines the known output image formats
* (not all of which need be supported by a given version).
* You can change the default output format by defining DEFAULT_FMT;
* indeed, you had better do so if you undefine PPM_SUPPORTED.
*/
typedef enum {
FMT_BMP, /* BMP format (Windows flavor) */
FMT_GIF, /* GIF format */
FMT_OS2, /* BMP format (OS/2 flavor) */
FMT_PPM, /* PPM/PGM (PBMPLUS formats) */
FMT_RLE, /* RLE format */
FMT_TARGA, /* Targa format */
FMT_TIFF /* TIFF format */
} IMAGE_FORMATS;
#ifndef DEFAULT_FMT /* so can override from CFLAGS in Makefile */
#define DEFAULT_FMT FMT_PPM
#endif
static IMAGE_FORMATS requested_fmt;
/*
* Argument-parsing code.
* The switch parser is designed to be useful with DOS-style command line
* syntax, ie, intermixed switches and file names, where only the switches
* to the left of a given file name affect processing of that file.
* The main program in this file doesn't actually use this capability...
*/
static const char * progname; /* program name for error messages */
static char * outfilename; /* for -outfile switch */
LOCAL(void)
usage (void)
/* complain about bad command line */
{
fprintf(stderr, "usage: %s [switches] ", progname);
#ifdef TWO_FILE_COMMANDLINE
fprintf(stderr, "inputfile outputfile\n");
#else
fprintf(stderr, "[inputfile]\n");
#endif
fprintf(stderr, "Switches (names may be abbreviated):\n");
fprintf(stderr, " -colors N Reduce image to no more than N colors\n");
fprintf(stderr, " -fast Fast, low-quality processing\n");
fprintf(stderr, " -grayscale Force grayscale output\n");
#ifdef IDCT_SCALING_SUPPORTED
fprintf(stderr, " -scale M/N Scale output image by fraction M/N, eg, 1/8\n");
#endif
#ifdef BMP_SUPPORTED
fprintf(stderr, " -bmp Select BMP output format (Windows style)%s\n",
(DEFAULT_FMT == FMT_BMP ? " (default)" : ""));
#endif
#ifdef GIF_SUPPORTED
fprintf(stderr, " -gif Select GIF output format%s\n",
(DEFAULT_FMT == FMT_GIF ? " (default)" : ""));
#endif
#ifdef BMP_SUPPORTED
fprintf(stderr, " -os2 Select BMP output format (OS/2 style)%s\n",
(DEFAULT_FMT == FMT_OS2 ? " (default)" : ""));
#endif
#ifdef PPM_SUPPORTED
fprintf(stderr, " -pnm Select PBMPLUS (PPM/PGM) output format%s\n",
(DEFAULT_FMT == FMT_PPM ? " (default)" : ""));
#endif
#ifdef RLE_SUPPORTED
fprintf(stderr, " -rle Select Utah RLE output format%s\n",
(DEFAULT_FMT == FMT_RLE ? " (default)" : ""));
#endif
#ifdef TARGA_SUPPORTED
fprintf(stderr, " -targa Select Targa output format%s\n",
(DEFAULT_FMT == FMT_TARGA ? " (default)" : ""));
#endif
fprintf(stderr, "Switches for advanced users:\n");
#ifdef DCT_ISLOW_SUPPORTED
fprintf(stderr, " -dct int Use integer DCT method%s\n",
(JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : ""));
#endif
#ifdef DCT_IFAST_SUPPORTED
fprintf(stderr, " -dct fast Use fast integer DCT (less accurate)%s\n",
(JDCT_DEFAULT == JDCT_IFAST ? " (default)" : ""));
#endif
#ifdef DCT_FLOAT_SUPPORTED
fprintf(stderr, " -dct float Use floating-point DCT method%s\n",
(JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : ""));
#endif
fprintf(stderr, " -dither fs Use F-S dithering (default)\n");
fprintf(stderr, " -dither none Don't use dithering in quantization\n");
fprintf(stderr, " -dither ordered Use ordered dither (medium speed, quality)\n");
#ifdef QUANT_2PASS_SUPPORTED
fprintf(stderr, " -map FILE Map to colors used in named image file\n");
#endif
fprintf(stderr, " -nosmooth Don't use high-quality upsampling\n");
#ifdef QUANT_1PASS_SUPPORTED
fprintf(stderr, " -onepass Use 1-pass quantization (fast, low quality)\n");
#endif
fprintf(stderr, " -maxmemory N Maximum memory to use (in kbytes)\n");
fprintf(stderr, " -outfile name Specify name for output file\n");
fprintf(stderr, " -verbose or -debug Emit debug output\n");
exit(EXIT_FAILURE);
}
LOCAL(int)
parse_switches (j_decompress_ptr cinfo, int argc, char **argv,
int last_file_arg_seen, boolean for_real)
/* Parse optional switches.
* Returns argv[] index of first file-name argument (== argc if none).
* Any file names with indexes <= last_file_arg_seen are ignored;
* they have presumably been processed in a previous iteration.
* (Pass 0 for last_file_arg_seen on the first or only iteration.)
* for_real is FALSE on the first (dummy) pass; we may skip any expensive
* processing.
*/
{
int argn;
char * arg;
/* Set up default JPEG parameters. */
requested_fmt = DEFAULT_FMT; /* set default output file format */
outfilename = NULL;
cinfo->err->trace_level = 0;
/* Scan command line options, adjust parameters */
for (argn = 1; argn < argc; argn++) {
arg = argv[argn];
if (*arg != '-') {
/* Not a switch, must be a file name argument */
if (argn <= last_file_arg_seen) {
outfilename = NULL; /* -outfile applies to just one input file */
continue; /* ignore this name if previously processed */
}
break; /* else done parsing switches */
}
arg++; /* advance past switch marker character */
if (keymatch(arg, "bmp", 1)) {
/* BMP output format. */
requested_fmt = FMT_BMP;
} else if (keymatch(arg, "colors", 1) || keymatch(arg, "colours", 1) ||
keymatch(arg, "quantize", 1) || keymatch(arg, "quantise", 1)) {
/* Do color quantization. */
int val;
if (++argn >= argc) /* advance to next argument */
usage();
if (sscanf(argv[argn], "%d", &val) != 1)
usage();
cinfo->desired_number_of_colors = val;
cinfo->quantize_colors = TRUE;
} else if (keymatch(arg, "dct", 2)) {
/* Select IDCT algorithm. */
if (++argn >= argc) /* advance to next argument */
usage();
if (keymatch(argv[argn], "int", 1)) {
cinfo->dct_method = JDCT_ISLOW;
} else if (keymatch(argv[argn], "fast", 2)) {
cinfo->dct_method = JDCT_IFAST;
} else if (keymatch(argv[argn], "float", 2)) {
cinfo->dct_method = JDCT_FLOAT;
} else
usage();
} else if (keymatch(arg, "dither", 2)) {
/* Select dithering algorithm. */
if (++argn >= argc) /* advance to next argument */
usage();
if (keymatch(argv[argn], "fs", 2)) {
cinfo->dither_mode = JDITHER_FS;
} else if (keymatch(argv[argn], "none", 2)) {
cinfo->dither_mode = JDITHER_NONE;
} else if (keymatch(argv[argn], "ordered", 2)) {
cinfo->dither_mode = JDITHER_ORDERED;
} else
usage();
} else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) {
/* Enable debug printouts. */
/* On first -d, print version identification */
static boolean printed_version = FALSE;
if (! printed_version) {
fprintf(stderr, "Independent JPEG Group's DJPEG, version %s\n%s\n",
JVERSION, JCOPYRIGHT);
printed_version = TRUE;
}
cinfo->err->trace_level++;
} else if (keymatch(arg, "fast", 1)) {
/* Select recommended processing options for quick-and-dirty output. */
cinfo->two_pass_quantize = FALSE;
cinfo->dither_mode = JDITHER_ORDERED;
if (! cinfo->quantize_colors) /* don't override an earlier -colors */
cinfo->desired_number_of_colors = 216;
cinfo->dct_method = JDCT_FASTEST;
cinfo->do_fancy_upsampling = FALSE;
} else if (keymatch(arg, "gif", 1)) {
/* GIF output format. */
requested_fmt = FMT_GIF;
} else if (keymatch(arg, "grayscale", 2) || keymatch(arg, "greyscale",2)) {
/* Force monochrome output. */
cinfo->out_color_space = JCS_GRAYSCALE;
} else if (keymatch(arg, "map", 3)) {
/* Quantize to a color map taken from an input file. */
if (++argn >= argc) /* advance to next argument */
usage();
if (for_real) { /* too expensive to do twice! */
#ifdef QUANT_2PASS_SUPPORTED /* otherwise can't quantize to supplied map */
FILE * mapfile;
if ((mapfile = fopen(argv[argn], READ_BINARY)) == NULL) {
fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]);
exit(EXIT_FAILURE);
}
read_color_map(cinfo, mapfile);
fclose(mapfile);
cinfo->quantize_colors = TRUE;
#else
ERREXIT(cinfo, JERR_NOT_COMPILED);
#endif
}
} else if (keymatch(arg, "maxmemory", 3)) {
/* Maximum memory in Kb (or Mb with 'm'). */
long lval;
char ch = 'x';
if (++argn >= argc) /* advance to next argument */
usage();
if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
usage();
if (ch == 'm' || ch == 'M')
lval *= 1000L;
cinfo->mem->max_memory_to_use = lval * 1000L;
} else if (keymatch(arg, "nosmooth", 3)) {
/* Suppress fancy upsampling */
cinfo->do_fancy_upsampling = FALSE;
} else if (keymatch(arg, "onepass", 3)) {
/* Use fast one-pass quantization. */
cinfo->two_pass_quantize = FALSE;
} else if (keymatch(arg, "os2", 3)) {
/* BMP output format (OS/2 flavor). */
requested_fmt = FMT_OS2;
} else if (keymatch(arg, "outfile", 4)) {
/* Set output file name. */
if (++argn >= argc) /* advance to next argument */
usage();
outfilename = argv[argn]; /* save it away for later use */
} else if (keymatch(arg, "pnm", 1) || keymatch(arg, "ppm", 1)) {
/* PPM/PGM output format. */
requested_fmt = FMT_PPM;
} else if (keymatch(arg, "rle", 1)) {
/* RLE output format. */
requested_fmt = FMT_RLE;
} else if (keymatch(arg, "scale", 1)) {
/* Scale the output image by a fraction M/N. */
if (++argn >= argc) /* advance to next argument */
usage();
if (sscanf(argv[argn], "%d/%d",
&cinfo->scale_num, &cinfo->scale_denom) != 2)
usage();
} else if (keymatch(arg, "targa", 1)) {
/* Targa output format. */
requested_fmt = FMT_TARGA;
} else {
usage(); /* bogus switch */
}
}
return argn; /* return index of next arg (file name) */
}
/*
* Marker processor for COM and interesting APPn markers.
* This replaces the library's built-in processor, which just skips the marker.
* We want to print out the marker as text, to the extent possible.
* Note this code relies on a non-suspending data source.
*/
LOCAL(unsigned int)
jpeg_getc (j_decompress_ptr cinfo)
/* Read next byte */
{
struct jpeg_source_mgr * datasrc = cinfo->src;
if (datasrc->bytes_in_buffer == 0) {
if (! (*datasrc->fill_input_buffer) (cinfo))
ERREXIT(cinfo, JERR_CANT_SUSPEND);
}
datasrc->bytes_in_buffer--;
return GETJOCTET(*datasrc->next_input_byte++);
}
METHODDEF(boolean)
print_text_marker (j_decompress_ptr cinfo)
{
boolean traceit = (cinfo->err->trace_level >= 1);
INT32 length;
unsigned int ch;
unsigned int lastch = 0;
length = jpeg_getc(cinfo) << 8;
length += jpeg_getc(cinfo);
length -= 2; /* discount the length word itself */
if (traceit) {
if (cinfo->unread_marker == JPEG_COM)
fprintf(stderr, "Comment, length %ld:\n", (long) length);
else /* assume it is an APPn otherwise */
fprintf(stderr, "APP%d, length %ld:\n",
cinfo->unread_marker - JPEG_APP0, (long) length);
}
while (--length >= 0) {
ch = jpeg_getc(cinfo);
if (traceit) {
/* Emit the character in a readable form.
* Nonprintables are converted to \nnn form,
* while \ is converted to \\.
* Newlines in CR, CR/LF, or LF form will be printed as one newline.
*/
if (ch == '\r') {
fprintf(stderr, "\n");
} else if (ch == '\n') {
if (lastch != '\r')
fprintf(stderr, "\n");
} else if (ch == '\\') {
fprintf(stderr, "\\\\");
} else if (isprint(ch)) {
putc(ch, stderr);
} else {
fprintf(stderr, "\\%03o", ch);
}
lastch = ch;
}
}
if (traceit)
fprintf(stderr, "\n");
return TRUE;
}
/*
* The main program.
*/
int
main (int argc, char **argv)
{
struct jpeg_decompress_struct cinfo;
struct jpeg_error_mgr jerr;
#ifdef PROGRESS_REPORT
struct cdjpeg_progress_mgr progress;
#endif
int file_index;
djpeg_dest_ptr dest_mgr = NULL;
FILE * input_file;
FILE * output_file;
JDIMENSION num_scanlines;
/* On Mac, fetch a command line. */
#ifdef USE_CCOMMAND
argc = ccommand(&argv);
#endif
progname = argv[0];
if (progname == NULL || progname[0] == 0)
progname = "djpeg"; /* in case C library doesn't provide it */
/* Initialize the JPEG decompression object with default error handling. */
cinfo.err = jpeg_std_error(&jerr);
jpeg_create_decompress(&cinfo);
/* Add some application-specific error messages (from cderror.h) */
jerr.addon_message_table = cdjpeg_message_table;
jerr.first_addon_message = JMSG_FIRSTADDONCODE;
jerr.last_addon_message = JMSG_LASTADDONCODE;
/* Insert custom marker processor for COM and APP12.
* APP12 is used by some digital camera makers for textual info,
* so we provide the ability to display it as text.
* If you like, additional APPn marker types can be selected for display,
* but don't try to override APP0 or APP14 this way (see libjpeg.doc).
*/
jpeg_set_marker_processor(&cinfo, JPEG_COM, print_text_marker);
jpeg_set_marker_processor(&cinfo, JPEG_APP0+12, print_text_marker);
/* Now safe to enable signal catcher. */
#ifdef NEED_SIGNAL_CATCHER
enable_signal_catcher((j_common_ptr) &cinfo);
#endif
/* Scan command line to find file names. */
/* It is convenient to use just one switch-parsing routine, but the switch
* values read here are ignored; we will rescan the switches after opening
* the input file.
* (Exception: tracing level set here controls verbosity for COM markers
* found during jpeg_read_header...)
*/
file_index = parse_switches(&cinfo, argc, argv, 0, FALSE);
#ifdef TWO_FILE_COMMANDLINE
/* Must have either -outfile switch or explicit output file name */
if (outfilename == NULL) {
if (file_index != argc-2) {
fprintf(stderr, "%s: must name one input and one output file\n",
progname);
usage();
}
outfilename = argv[file_index+1];
} else {
if (file_index != argc-1) {
fprintf(stderr, "%s: must name one input and one output file\n",
progname);
usage();
}
}
#else
/* Unix style: expect zero or one file name */
if (file_index < argc-1) {
fprintf(stderr, "%s: only one input file\n", progname);
usage();
}
#endif /* TWO_FILE_COMMANDLINE */
/* Open the input file. */
if (file_index < argc) {
if ((input_file = fopen(argv[file_index], READ_BINARY)) == NULL) {
fprintf(stderr, "%s: can't open %s\n", progname, argv[file_index]);
exit(EXIT_FAILURE);
}
} else {
/* default input file is stdin */
input_file = read_stdin();
}
/* Open the output file. */
if (outfilename != NULL) {
if ((output_file = fopen(outfilename, WRITE_BINARY)) == NULL) {
fprintf(stderr, "%s: can't open %s\n", progname, outfilename);
exit(EXIT_FAILURE);
}
} else {
/* default output file is stdout */
output_file = write_stdout();
}
#ifdef PROGRESS_REPORT
start_progress_monitor((j_common_ptr) &cinfo, &progress);
#endif
/* Specify data source for decompression */
jpeg_stdio_src(&cinfo, input_file);
/* Read file header, set default decompression parameters */
(void) jpeg_read_header(&cinfo, TRUE);
/* Adjust default decompression parameters by re-parsing the options */
file_index = parse_switches(&cinfo, argc, argv, 0, TRUE);
/* Initialize the output module now to let it override any crucial
* option settings (for instance, GIF wants to force color quantization).
*/
switch (requested_fmt) {
#ifdef BMP_SUPPORTED
case FMT_BMP:
dest_mgr = jinit_write_bmp(&cinfo, FALSE);
break;
case FMT_OS2:
dest_mgr = jinit_write_bmp(&cinfo, TRUE);
break;
#endif
#ifdef GIF_SUPPORTED
case FMT_GIF:
dest_mgr = jinit_write_gif(&cinfo);
break;
#endif
#ifdef PPM_SUPPORTED
case FMT_PPM:
dest_mgr = jinit_write_ppm(&cinfo);
break;
#endif
#ifdef RLE_SUPPORTED
case FMT_RLE:
dest_mgr = jinit_write_rle(&cinfo);
break;
#endif
#ifdef TARGA_SUPPORTED
case FMT_TARGA:
dest_mgr = jinit_write_targa(&cinfo);
break;
#endif
default:
ERREXIT(&cinfo, JERR_UNSUPPORTED_FORMAT);
break;
}
dest_mgr->output_file = output_file;
/* Start decompressor */
(void) jpeg_start_decompress(&cinfo);
/* Write output file header */
(*dest_mgr->start_output) (&cinfo, dest_mgr);
/* Process data */
while (cinfo.output_scanline < cinfo.output_height) {
num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer,
dest_mgr->buffer_height);
(*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines);
}
#ifdef PROGRESS_REPORT
/* Hack: count final pass as done in case finish_output does an extra pass.
* The library won't have updated completed_passes.
*/
progress.pub.completed_passes = progress.pub.total_passes;
#endif
/* Finish decompression and release memory.
* I must do it in this order because output module has allocated memory
* of lifespan JPOOL_IMAGE; it needs to finish before releasing memory.
*/
(*dest_mgr->finish_output) (&cinfo, dest_mgr);
(void) jpeg_finish_decompress(&cinfo);
jpeg_destroy_decompress(&cinfo);
/* Close files, if we opened them */
if (input_file != stdin)
fclose(input_file);
if (output_file != stdout)
fclose(output_file);
#ifdef PROGRESS_REPORT
end_progress_monitor((j_common_ptr) &cinfo);
#endif
/* All done. */
exit(jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS);
return 0; /* suppress no-return-value warnings */
}

Просмотреть файл

@ -1,433 +0,0 @@
/*
* example.c
*
* This file illustrates how to use the IJG code as a subroutine library
* to read or write JPEG image files. You should look at this code in
* conjunction with the documentation file libjpeg.doc.
*
* This code will not do anything useful as-is, but it may be helpful as a
* skeleton for constructing routines that call the JPEG library.
*
* We present these routines in the same coding style used in the JPEG code
* (ANSI function definitions, etc); but you are of course free to code your
* routines in a different style if you prefer.
*/
#include <stdio.h>
/*
* Include file for users of JPEG library.
* You will need to have included system headers that define at least
* the typedefs FILE and size_t before you can include jpeglib.h.
* (stdio.h is sufficient on ANSI-conforming systems.)
* You may also wish to include "jerror.h".
*/
#include "jpeglib.h"
/*
* <setjmp.h> is used for the optional error recovery mechanism shown in
* the second part of the example.
*/
#include <setjmp.h>
/******************** JPEG COMPRESSION SAMPLE INTERFACE *******************/
/* This half of the example shows how to feed data into the JPEG compressor.
* We present a minimal version that does not worry about refinements such
* as error recovery (the JPEG code will just exit() if it gets an error).
*/
/*
* IMAGE DATA FORMATS:
*
* The standard input image format is a rectangular array of pixels, with
* each pixel having the same number of "component" values (color channels).
* Each pixel row is an array of JSAMPLEs (which typically are unsigned chars).
* If you are working with color data, then the color values for each pixel
* must be adjacent in the row; for example, R,G,B,R,G,B,R,G,B,... for 24-bit
* RGB color.
*
* For this example, we'll assume that this data structure matches the way
* our application has stored the image in memory, so we can just pass a
* pointer to our image buffer. In particular, let's say that the image is
* RGB color and is described by:
*/
extern JSAMPLE * image_buffer; /* Points to large array of R,G,B-order data */
extern int image_height; /* Number of rows in image */
extern int image_width; /* Number of columns in image */
/*
* Sample routine for JPEG compression. We assume that the target file name
* and a compression quality factor are passed in.
*/
GLOBAL(void)
write_JPEG_file (char * filename, int quality)
{
/* This struct contains the JPEG compression parameters and pointers to
* working space (which is allocated as needed by the JPEG library).
* It is possible to have several such structures, representing multiple
* compression/decompression processes, in existence at once. We refer
* to any one struct (and its associated working data) as a "JPEG object".
*/
struct jpeg_compress_struct cinfo;
/* This struct represents a JPEG error handler. It is declared separately
* because applications often want to supply a specialized error handler
* (see the second half of this file for an example). But here we just
* take the easy way out and use the standard error handler, which will
* print a message on stderr and call exit() if compression fails.
* Note that this struct must live as long as the main JPEG parameter
* struct, to avoid dangling-pointer problems.
*/
struct jpeg_error_mgr jerr;
/* More stuff */
FILE * outfile; /* target file */
JSAMPROW row_pointer[1]; /* pointer to JSAMPLE row[s] */
int row_stride; /* physical row width in image buffer */
/* Step 1: allocate and initialize JPEG compression object */
/* We have to set up the error handler first, in case the initialization
* step fails. (Unlikely, but it could happen if you are out of memory.)
* This routine fills in the contents of struct jerr, and returns jerr's
* address which we place into the link field in cinfo.
*/
cinfo.err = jpeg_std_error(&jerr);
/* Now we can initialize the JPEG compression object. */
jpeg_create_compress(&cinfo);
/* Step 2: specify data destination (eg, a file) */
/* Note: steps 2 and 3 can be done in either order. */
/* Here we use the library-supplied code to send compressed data to a
* stdio stream. You can also write your own code to do something else.
* VERY IMPORTANT: use "b" option to fopen() if you are on a machine that
* requires it in order to write binary files.
*/
if ((outfile = fopen(filename, "wb")) == NULL) {
fprintf(stderr, "can't open %s\n", filename);
exit(1);
}
jpeg_stdio_dest(&cinfo, outfile);
/* Step 3: set parameters for compression */
/* First we supply a description of the input image.
* Four fields of the cinfo struct must be filled in:
*/
cinfo.image_width = image_width; /* image width and height, in pixels */
cinfo.image_height = image_height;
cinfo.input_components = 3; /* # of color components per pixel */
cinfo.in_color_space = JCS_RGB; /* colorspace of input image */
/* Now use the library's routine to set default compression parameters.
* (You must set at least cinfo.in_color_space before calling this,
* since the defaults depend on the source color space.)
*/
jpeg_set_defaults(&cinfo);
/* Now you can set any non-default parameters you wish to.
* Here we just illustrate the use of quality (quantization table) scaling:
*/
jpeg_set_quality(&cinfo, quality, TRUE /* limit to baseline-JPEG values */);
/* Step 4: Start compressor */
/* TRUE ensures that we will write a complete interchange-JPEG file.
* Pass TRUE unless you are very sure of what you're doing.
*/
jpeg_start_compress(&cinfo, TRUE);
/* Step 5: while (scan lines remain to be written) */
/* jpeg_write_scanlines(...); */
/* Here we use the library's state variable cinfo.next_scanline as the
* loop counter, so that we don't have to keep track ourselves.
* To keep things simple, we pass one scanline per call; you can pass
* more if you wish, though.
*/
row_stride = image_width * 3; /* JSAMPLEs per row in image_buffer */
while (cinfo.next_scanline < cinfo.image_height) {
/* jpeg_write_scanlines expects an array of pointers to scanlines.
* Here the array is only one element long, but you could pass
* more than one scanline at a time if that's more convenient.
*/
row_pointer[0] = & image_buffer[cinfo.next_scanline * row_stride];
(void) jpeg_write_scanlines(&cinfo, row_pointer, 1);
}
/* Step 6: Finish compression */
jpeg_finish_compress(&cinfo);
/* After finish_compress, we can close the output file. */
fclose(outfile);
/* Step 7: release JPEG compression object */
/* This is an important step since it will release a good deal of memory. */
jpeg_destroy_compress(&cinfo);
/* And we're done! */
}
/*
* SOME FINE POINTS:
*
* In the above loop, we ignored the return value of jpeg_write_scanlines,
* which is the number of scanlines actually written. We could get away
* with this because we were only relying on the value of cinfo.next_scanline,
* which will be incremented correctly. If you maintain additional loop
* variables then you should be careful to increment them properly.
* Actually, for output to a stdio stream you needn't worry, because
* then jpeg_write_scanlines will write all the lines passed (or else exit
* with a fatal error). Partial writes can only occur if you use a data
* destination module that can demand suspension of the compressor.
* (If you don't know what that's for, you don't need it.)
*
* If the compressor requires full-image buffers (for entropy-coding
* optimization or a multi-scan JPEG file), it will create temporary
* files for anything that doesn't fit within the maximum-memory setting.
* (Note that temp files are NOT needed if you use the default parameters.)
* On some systems you may need to set up a signal handler to ensure that
* temporary files are deleted if the program is interrupted. See libjpeg.doc.
*
* Scanlines MUST be supplied in top-to-bottom order if you want your JPEG
* files to be compatible with everyone else's. If you cannot readily read
* your data in that order, you'll need an intermediate array to hold the
* image. See rdtarga.c or rdbmp.c for examples of handling bottom-to-top
* source data using the JPEG code's internal virtual-array mechanisms.
*/
/******************** JPEG DECOMPRESSION SAMPLE INTERFACE *******************/
/* This half of the example shows how to read data from the JPEG decompressor.
* It's a bit more refined than the above, in that we show:
* (a) how to modify the JPEG library's standard error-reporting behavior;
* (b) how to allocate workspace using the library's memory manager.
*
* Just to make this example a little different from the first one, we'll
* assume that we do not intend to put the whole image into an in-memory
* buffer, but to send it line-by-line someplace else. We need a one-
* scanline-high JSAMPLE array as a work buffer, and we will let the JPEG
* memory manager allocate it for us. This approach is actually quite useful
* because we don't need to remember to deallocate the buffer separately: it
* will go away automatically when the JPEG object is cleaned up.
*/
/*
* ERROR HANDLING:
*
* The JPEG library's standard error handler (jerror.c) is divided into
* several "methods" which you can override individually. This lets you
* adjust the behavior without duplicating a lot of code, which you might
* have to update with each future release.
*
* Our example here shows how to override the "error_exit" method so that
* control is returned to the library's caller when a fatal error occurs,
* rather than calling exit() as the standard error_exit method does.
*
* We use C's setjmp/longjmp facility to return control. This means that the
* routine which calls the JPEG library must first execute a setjmp() call to
* establish the return point. We want the replacement error_exit to do a
* longjmp(). But we need to make the setjmp buffer accessible to the
* error_exit routine. To do this, we make a private extension of the
* standard JPEG error handler object. (If we were using C++, we'd say we
* were making a subclass of the regular error handler.)
*
* Here's the extended error handler struct:
*/
struct my_error_mgr {
struct jpeg_error_mgr pub; /* "public" fields */
jmp_buf setjmp_buffer; /* for return to caller */
};
typedef struct my_error_mgr * my_error_ptr;
/*
* Here's the routine that will replace the standard error_exit method:
*/
METHODDEF(void)
my_error_exit (j_common_ptr cinfo)
{
/* cinfo->err really points to a my_error_mgr struct, so coerce pointer */
my_error_ptr myerr = (my_error_ptr) cinfo->err;
/* Always display the message. */
/* We could postpone this until after returning, if we chose. */
(*cinfo->err->output_message) (cinfo);
/* Return control to the setjmp point */
longjmp(myerr->setjmp_buffer, 1);
}
/*
* Sample routine for JPEG decompression. We assume that the source file name
* is passed in. We want to return 1 on success, 0 on error.
*/
GLOBAL(int)
read_JPEG_file (char * filename)
{
/* This struct contains the JPEG decompression parameters and pointers to
* working space (which is allocated as needed by the JPEG library).
*/
struct jpeg_decompress_struct cinfo;
/* We use our private extension JPEG error handler.
* Note that this struct must live as long as the main JPEG parameter
* struct, to avoid dangling-pointer problems.
*/
struct my_error_mgr jerr;
/* More stuff */
FILE * infile; /* source file */
JSAMPARRAY buffer; /* Output row buffer */
int row_stride; /* physical row width in output buffer */
/* In this example we want to open the input file before doing anything else,
* so that the setjmp() error recovery below can assume the file is open.
* VERY IMPORTANT: use "b" option to fopen() if you are on a machine that
* requires it in order to read binary files.
*/
if ((infile = fopen(filename, "rb")) == NULL) {
fprintf(stderr, "can't open %s\n", filename);
return 0;
}
/* Step 1: allocate and initialize JPEG decompression object */
/* We set up the normal JPEG error routines, then override error_exit. */
cinfo.err = jpeg_std_error(&jerr.pub);
jerr.pub.error_exit = my_error_exit;
/* Establish the setjmp return context for my_error_exit to use. */
if (setjmp(jerr.setjmp_buffer)) {
/* If we get here, the JPEG code has signaled an error.
* We need to clean up the JPEG object, close the input file, and return.
*/
jpeg_destroy_decompress(&cinfo);
fclose(infile);
return 0;
}
/* Now we can initialize the JPEG decompression object. */
jpeg_create_decompress(&cinfo);
/* Step 2: specify data source (eg, a file) */
jpeg_stdio_src(&cinfo, infile);
/* Step 3: read file parameters with jpeg_read_header() */
(void) jpeg_read_header(&cinfo, TRUE);
/* We can ignore the return value from jpeg_read_header since
* (a) suspension is not possible with the stdio data source, and
* (b) we passed TRUE to reject a tables-only JPEG file as an error.
* See libjpeg.doc for more info.
*/
/* Step 4: set parameters for decompression */
/* In this example, we don't need to change any of the defaults set by
* jpeg_read_header(), so we do nothing here.
*/
/* Step 5: Start decompressor */
(void) jpeg_start_decompress(&cinfo);
/* We can ignore the return value since suspension is not possible
* with the stdio data source.
*/
/* We may need to do some setup of our own at this point before reading
* the data. After jpeg_start_decompress() we have the correct scaled
* output image dimensions available, as well as the output colormap
* if we asked for color quantization.
* In this example, we need to make an output work buffer of the right size.
*/
/* JSAMPLEs per row in output buffer */
row_stride = cinfo.output_width * cinfo.output_components;
/* Make a one-row-high sample array that will go away when done with image */
buffer = (*cinfo.mem->alloc_sarray)
((j_common_ptr) &cinfo, JPOOL_IMAGE, row_stride, 1);
/* Step 6: while (scan lines remain to be read) */
/* jpeg_read_scanlines(...); */
/* Here we use the library's state variable cinfo.output_scanline as the
* loop counter, so that we don't have to keep track ourselves.
*/
while (cinfo.output_scanline < cinfo.output_height) {
/* jpeg_read_scanlines expects an array of pointers to scanlines.
* Here the array is only one element long, but you could ask for
* more than one scanline at a time if that's more convenient.
*/
(void) jpeg_read_scanlines(&cinfo, buffer, 1);
/* Assume put_scanline_someplace wants a pointer and sample count. */
put_scanline_someplace(buffer[0], row_stride);
}
/* Step 7: Finish decompression */
(void) jpeg_finish_decompress(&cinfo);
/* We can ignore the return value since suspension is not possible
* with the stdio data source.
*/
/* Step 8: Release JPEG decompression object */
/* This is an important step since it will release a good deal of memory. */
jpeg_destroy_decompress(&cinfo);
/* After finish_decompress, we can close the input file.
* Here we postpone it until after no more JPEG errors are possible,
* so as to simplify the setjmp error logic above. (Actually, I don't
* think that jpeg_destroy can do an error exit, but why assume anything...)
*/
fclose(infile);
/* At this point you may want to check to see whether any corrupt-data
* warnings occurred (test whether jerr.pub.num_warnings is nonzero).
*/
/* And we're done! */
return 1;
}
/*
* SOME FINE POINTS:
*
* In the above code, we ignored the return value of jpeg_read_scanlines,
* which is the number of scanlines actually read. We could get away with
* this because we asked for only one line at a time and we weren't using
* a suspending data source. See libjpeg.doc for more info.
*
* We cheated a bit by calling alloc_sarray() after jpeg_start_decompress();
* we should have done it beforehand to ensure that the space would be
* counted against the JPEG max_memory setting. In some systems the above
* code would risk an out-of-memory error. However, in general we don't
* know the output image dimensions before jpeg_start_decompress(), unless we
* call jpeg_calc_output_dimensions(). See libjpeg.doc for more about this.
*
* Scanlines are returned in the same order as they appear in the JPEG file,
* which is standardly top-to-bottom. If you must emit data bottom-to-top,
* you can use one of the virtual arrays provided by the JPEG memory manager
* to invert the data. See wrbmp.c for an example.
*
* As with compression, some operating modes may require temporary files.
* On some systems you may need to set up a signal handler to ensure that
* temporary files are deleted if the program is interrupted. See libjpeg.doc.
*/

Просмотреть файл

@ -1,210 +0,0 @@
IJG JPEG LIBRARY: FILE LIST
Copyright (C) 1994-1998, Thomas G. Lane.
This file is part of the Independent JPEG Group's software.
For conditions of distribution and use, see the accompanying README file.
Here is a road map to the files in the IJG JPEG distribution. The
distribution includes the JPEG library proper, plus two application
programs ("cjpeg" and "djpeg") which use the library to convert JPEG
files to and from some other popular image formats. A third application
"jpegtran" uses the library to do lossless conversion between different
variants of JPEG. There are also two stand-alone applications,
"rdjpgcom" and "wrjpgcom".
THE JPEG LIBRARY
================
Include files:
jpeglib.h JPEG library's exported data and function declarations.
jconfig.h Configuration declarations. Note: this file is not present
in the distribution; it is generated during installation.
jmorecfg.h Additional configuration declarations; need not be changed
for a standard installation.
jerror.h Declares JPEG library's error and trace message codes.
jinclude.h Central include file used by all IJG .c files to reference
system include files.
jpegint.h JPEG library's internal data structures.
jchuff.h Private declarations for Huffman encoder modules.
jdhuff.h Private declarations for Huffman decoder modules.
jdct.h Private declarations for forward & reverse DCT subsystems.
jmemsys.h Private declarations for memory management subsystem.
jversion.h Version information.
Applications using the library should include jpeglib.h (which in turn
includes jconfig.h and jmorecfg.h). Optionally, jerror.h may be included
if the application needs to reference individual JPEG error codes. The
other include files are intended for internal use and would not normally
be included by an application program. (cjpeg/djpeg/etc do use jinclude.h,
since its function is to improve portability of the whole IJG distribution.
Most other applications will directly include the system include files they
want, and hence won't need jinclude.h.)
C source code files:
These files contain most of the functions intended to be called directly by
an application program:
jcapimin.c Application program interface: core routines for compression.
jcapistd.c Application program interface: standard compression.
jdapimin.c Application program interface: core routines for decompression.
jdapistd.c Application program interface: standard decompression.
jcomapi.c Application program interface routines common to compression
and decompression.
jcparam.c Compression parameter setting helper routines.
jctrans.c API and library routines for transcoding compression.
jdtrans.c API and library routines for transcoding decompression.
Compression side of the library:
jcinit.c Initialization: determines which other modules to use.
jcmaster.c Master control: setup and inter-pass sequencing logic.
jcmainct.c Main buffer controller (preprocessor => JPEG compressor).
jcprepct.c Preprocessor buffer controller.
jccoefct.c Buffer controller for DCT coefficient buffer.
jccolor.c Color space conversion.
jcsample.c Downsampling.
jcdctmgr.c DCT manager (DCT implementation selection & control).
jfdctint.c Forward DCT using slow-but-accurate integer method.
jfdctfst.c Forward DCT using faster, less accurate integer method.
jfdctflt.c Forward DCT using floating-point arithmetic.
jchuff.c Huffman entropy coding for sequential JPEG.
jcphuff.c Huffman entropy coding for progressive JPEG.
jcmarker.c JPEG marker writing.
jdatadst.c Data destination manager for stdio output.
Decompression side of the library:
jdmaster.c Master control: determines which other modules to use.
jdinput.c Input controller: controls input processing modules.
jdmainct.c Main buffer controller (JPEG decompressor => postprocessor).
jdcoefct.c Buffer controller for DCT coefficient buffer.
jdpostct.c Postprocessor buffer controller.
jdmarker.c JPEG marker reading.
jdhuff.c Huffman entropy decoding for sequential JPEG.
jdphuff.c Huffman entropy decoding for progressive JPEG.
jddctmgr.c IDCT manager (IDCT implementation selection & control).
jidctint.c Inverse DCT using slow-but-accurate integer method.
jidctfst.c Inverse DCT using faster, less accurate integer method.
jidctflt.c Inverse DCT using floating-point arithmetic.
jidctred.c Inverse DCTs with reduced-size outputs.
jdsample.c Upsampling.
jdcolor.c Color space conversion.
jdmerge.c Merged upsampling/color conversion (faster, lower quality).
jquant1.c One-pass color quantization using a fixed-spacing colormap.
jquant2.c Two-pass color quantization using a custom-generated colormap.
Also handles one-pass quantization to an externally given map.
jdatasrc.c Data source manager for stdio input.
Support files for both compression and decompression:
jerror.c Standard error handling routines (application replaceable).
jmemmgr.c System-independent (more or less) memory management code.
jutils.c Miscellaneous utility routines.
jmemmgr.c relies on a system-dependent memory management module. The IJG
distribution includes the following implementations of the system-dependent
module:
jmemnobs.c "No backing store": assumes adequate virtual memory exists.
jmemansi.c Makes temporary files with ANSI-standard routine tmpfile().
jmemname.c Makes temporary files with program-generated file names.
jmemdos.c Custom implementation for MS-DOS (16-bit environment only):
can use extended and expanded memory as well as temp files.
jmemmac.c Custom implementation for Apple Macintosh.
Exactly one of the system-dependent modules should be configured into an
installed JPEG library (see install.doc for hints about which one to use).
On unusual systems you may find it worthwhile to make a special
system-dependent memory manager.
Non-C source code files:
jmemdosa.asm 80x86 assembly code support for jmemdos.c; used only in
MS-DOS-specific configurations of the JPEG library.
CJPEG/DJPEG/JPEGTRAN
====================
Include files:
cdjpeg.h Declarations shared by cjpeg/djpeg/jpegtran modules.
cderror.h Additional error and trace message codes for cjpeg et al.
transupp.h Declarations for jpegtran support routines in transupp.c.
C source code files:
cjpeg.c Main program for cjpeg.
djpeg.c Main program for djpeg.
jpegtran.c Main program for jpegtran.
cdjpeg.c Utility routines used by all three programs.
rdcolmap.c Code to read a colormap file for djpeg's "-map" switch.
rdswitch.c Code to process some of cjpeg's more complex switches.
Also used by jpegtran.
transupp.c Support code for jpegtran: lossless image manipulations.
Image file reader modules for cjpeg:
rdbmp.c BMP file input.
rdgif.c GIF file input (now just a stub).
rdppm.c PPM/PGM file input.
rdrle.c Utah RLE file input.
rdtarga.c Targa file input.
Image file writer modules for djpeg:
wrbmp.c BMP file output.
wrgif.c GIF file output (a mere shadow of its former self).
wrppm.c PPM/PGM file output.
wrrle.c Utah RLE file output.
wrtarga.c Targa file output.
RDJPGCOM/WRJPGCOM
=================
C source code files:
rdjpgcom.c Stand-alone rdjpgcom application.
wrjpgcom.c Stand-alone wrjpgcom application.
These programs do not depend on the IJG library. They do use
jconfig.h and jinclude.h, only to improve portability.
ADDITIONAL FILES
================
Documentation (see README for a guide to the documentation files):
README Master documentation file.
*.doc Other documentation files.
*.1 Documentation in Unix man page format.
change.log Version-to-version change highlights.
example.c Sample code for calling JPEG library.
Configuration/installation files and programs (see install.doc for more info):
configure Unix shell script to perform automatic configuration.
ltconfig Support scripts for configure (from GNU libtool).
ltmain.sh
config.guess
config.sub
install-sh Install shell script for those Unix systems lacking one.
ckconfig.c Program to generate jconfig.h on non-Unix systems.
jconfig.doc Template for making jconfig.h by hand.
makefile.* Sample makefiles for particular systems.
jconfig.* Sample jconfig.h for particular systems.
ansi2knr.c De-ANSIfier for pre-ANSI C compilers (courtesy of
L. Peter Deutsch and Aladdin Enterprises).
Test files (see install.doc for test procedure):
test*.* Source and comparison files for confidence test.
These are binary image files, NOT text files.

Разница между файлами не показана из-за своего большого размера Загрузить разницу

152
jpeg/jaricom.c Normal file
Просмотреть файл

@ -0,0 +1,152 @@
/*
* jaricom.c
*
* Developed 1997-2009 by Guido Vollbeding.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
* This file contains probability estimation tables for common use in
* arithmetic entropy encoding and decoding routines.
*
* This data represents Table D.2 in the JPEG spec (ISO/IEC IS 10918-1
* and CCITT Recommendation ITU-T T.81) and Table 24 in the JBIG spec
* (ISO/IEC IS 11544 and CCITT Recommendation ITU-T T.82).
*/
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
/* The following #define specifies the packing of the four components
* into the compact INT32 representation.
* Note that this formula must match the actual arithmetic encoder
* and decoder implementation. The implementation has to be changed
* if this formula is changed.
* The current organization is leaned on Markus Kuhn's JBIG
* implementation (jbig_tab.c).
*/
#define V(i,a,b,c,d) (((INT32)a << 16) | ((INT32)c << 8) | ((INT32)d << 7) | b)
const INT32 jpeg_aritab[113+1] = {
/*
* Index, Qe_Value, Next_Index_LPS, Next_Index_MPS, Switch_MPS
*/
V( 0, 0x5a1d, 1, 1, 1 ),
V( 1, 0x2586, 14, 2, 0 ),
V( 2, 0x1114, 16, 3, 0 ),
V( 3, 0x080b, 18, 4, 0 ),
V( 4, 0x03d8, 20, 5, 0 ),
V( 5, 0x01da, 23, 6, 0 ),
V( 6, 0x00e5, 25, 7, 0 ),
V( 7, 0x006f, 28, 8, 0 ),
V( 8, 0x0036, 30, 9, 0 ),
V( 9, 0x001a, 33, 10, 0 ),
V( 10, 0x000d, 35, 11, 0 ),
V( 11, 0x0006, 9, 12, 0 ),
V( 12, 0x0003, 10, 13, 0 ),
V( 13, 0x0001, 12, 13, 0 ),
V( 14, 0x5a7f, 15, 15, 1 ),
V( 15, 0x3f25, 36, 16, 0 ),
V( 16, 0x2cf2, 38, 17, 0 ),
V( 17, 0x207c, 39, 18, 0 ),
V( 18, 0x17b9, 40, 19, 0 ),
V( 19, 0x1182, 42, 20, 0 ),
V( 20, 0x0cef, 43, 21, 0 ),
V( 21, 0x09a1, 45, 22, 0 ),
V( 22, 0x072f, 46, 23, 0 ),
V( 23, 0x055c, 48, 24, 0 ),
V( 24, 0x0406, 49, 25, 0 ),
V( 25, 0x0303, 51, 26, 0 ),
V( 26, 0x0240, 52, 27, 0 ),
V( 27, 0x01b1, 54, 28, 0 ),
V( 28, 0x0144, 56, 29, 0 ),
V( 29, 0x00f5, 57, 30, 0 ),
V( 30, 0x00b7, 59, 31, 0 ),
V( 31, 0x008a, 60, 32, 0 ),
V( 32, 0x0068, 62, 33, 0 ),
V( 33, 0x004e, 63, 34, 0 ),
V( 34, 0x003b, 32, 35, 0 ),
V( 35, 0x002c, 33, 9, 0 ),
V( 36, 0x5ae1, 37, 37, 1 ),
V( 37, 0x484c, 64, 38, 0 ),
V( 38, 0x3a0d, 65, 39, 0 ),
V( 39, 0x2ef1, 67, 40, 0 ),
V( 40, 0x261f, 68, 41, 0 ),
V( 41, 0x1f33, 69, 42, 0 ),
V( 42, 0x19a8, 70, 43, 0 ),
V( 43, 0x1518, 72, 44, 0 ),
V( 44, 0x1177, 73, 45, 0 ),
V( 45, 0x0e74, 74, 46, 0 ),
V( 46, 0x0bfb, 75, 47, 0 ),
V( 47, 0x09f8, 77, 48, 0 ),
V( 48, 0x0861, 78, 49, 0 ),
V( 49, 0x0706, 79, 50, 0 ),
V( 50, 0x05cd, 48, 51, 0 ),
V( 51, 0x04de, 50, 52, 0 ),
V( 52, 0x040f, 50, 53, 0 ),
V( 53, 0x0363, 51, 54, 0 ),
V( 54, 0x02d4, 52, 55, 0 ),
V( 55, 0x025c, 53, 56, 0 ),
V( 56, 0x01f8, 54, 57, 0 ),
V( 57, 0x01a4, 55, 58, 0 ),
V( 58, 0x0160, 56, 59, 0 ),
V( 59, 0x0125, 57, 60, 0 ),
V( 60, 0x00f6, 58, 61, 0 ),
V( 61, 0x00cb, 59, 62, 0 ),
V( 62, 0x00ab, 61, 63, 0 ),
V( 63, 0x008f, 61, 32, 0 ),
V( 64, 0x5b12, 65, 65, 1 ),
V( 65, 0x4d04, 80, 66, 0 ),
V( 66, 0x412c, 81, 67, 0 ),
V( 67, 0x37d8, 82, 68, 0 ),
V( 68, 0x2fe8, 83, 69, 0 ),
V( 69, 0x293c, 84, 70, 0 ),
V( 70, 0x2379, 86, 71, 0 ),
V( 71, 0x1edf, 87, 72, 0 ),
V( 72, 0x1aa9, 87, 73, 0 ),
V( 73, 0x174e, 72, 74, 0 ),
V( 74, 0x1424, 72, 75, 0 ),
V( 75, 0x119c, 74, 76, 0 ),
V( 76, 0x0f6b, 74, 77, 0 ),
V( 77, 0x0d51, 75, 78, 0 ),
V( 78, 0x0bb6, 77, 79, 0 ),
V( 79, 0x0a40, 77, 48, 0 ),
V( 80, 0x5832, 80, 81, 1 ),
V( 81, 0x4d1c, 88, 82, 0 ),
V( 82, 0x438e, 89, 83, 0 ),
V( 83, 0x3bdd, 90, 84, 0 ),
V( 84, 0x34ee, 91, 85, 0 ),
V( 85, 0x2eae, 92, 86, 0 ),
V( 86, 0x299a, 93, 87, 0 ),
V( 87, 0x2516, 86, 71, 0 ),
V( 88, 0x5570, 88, 89, 1 ),
V( 89, 0x4ca9, 95, 90, 0 ),
V( 90, 0x44d9, 96, 91, 0 ),
V( 91, 0x3e22, 97, 92, 0 ),
V( 92, 0x3824, 99, 93, 0 ),
V( 93, 0x32b4, 99, 94, 0 ),
V( 94, 0x2e17, 93, 86, 0 ),
V( 95, 0x56a8, 95, 96, 1 ),
V( 96, 0x4f46, 101, 97, 0 ),
V( 97, 0x47e5, 102, 98, 0 ),
V( 98, 0x41cf, 103, 99, 0 ),
V( 99, 0x3c3d, 104, 100, 0 ),
V( 100, 0x375e, 99, 93, 0 ),
V( 101, 0x5231, 105, 102, 0 ),
V( 102, 0x4c0f, 106, 103, 0 ),
V( 103, 0x4639, 107, 104, 0 ),
V( 104, 0x415e, 103, 99, 0 ),
V( 105, 0x5627, 105, 106, 1 ),
V( 106, 0x50e7, 108, 107, 0 ),
V( 107, 0x4b85, 109, 103, 0 ),
V( 108, 0x5597, 110, 109, 0 ),
V( 109, 0x504f, 111, 107, 0 ),
V( 110, 0x5a10, 110, 111, 1 ),
V( 111, 0x5522, 112, 109, 0 ),
V( 112, 0x59eb, 112, 111, 1 ),
/*
* This last entry is used for fixed probability estimate of 0.5
* as recommended in Section 10.3 Table 5 of ITU-T Rec. T.851.
*/
V( 113, 0x5a1d, 113, 113, 0 )

Просмотреть файл

@ -2,6 +2,7 @@
* jcapimin.c
*
* Copyright (C) 1994-1998, Thomas G. Lane.
* Modified 2003-2010 by Guido Vollbeding.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -63,14 +64,25 @@ jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize)
cinfo->comp_info = NULL;
for (i = 0; i < NUM_QUANT_TBLS; i++)
for (i = 0; i < NUM_QUANT_TBLS; i++) {
cinfo->quant_tbl_ptrs[i] = NULL;
#if JPEG_LIB_VERSION >= 70
cinfo->q_scale_factor[i] = 100;
#endif
}
for (i = 0; i < NUM_HUFF_TBLS; i++) {
cinfo->dc_huff_tbl_ptrs[i] = NULL;
cinfo->ac_huff_tbl_ptrs[i] = NULL;
}
#if JPEG_LIB_VERSION >= 80
/* Must do it here for emit_dqt in case jpeg_write_tables is used */
cinfo->block_size = DCTSIZE;
cinfo->natural_order = jpeg_natural_order;
cinfo->lim_Se = DCTSIZE2-1;
#endif
cinfo->script_space = NULL;
cinfo->input_gamma = 1.0; /* in case application forgets */

925
jpeg/jcarith.c Normal file
Просмотреть файл

@ -0,0 +1,925 @@
/*
* jcarith.c
*
* Developed 1997-2009 by Guido Vollbeding.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
* This file contains portable arithmetic entropy encoding routines for JPEG
* (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81).
*
* Both sequential and progressive modes are supported in this single module.
*
* Suspension is not currently supported in this module.
*/
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
/* Expanded entropy encoder object for arithmetic encoding. */
typedef struct {
struct jpeg_entropy_encoder pub; /* public fields */
INT32 c; /* C register, base of coding interval, layout as in sec. D.1.3 */
INT32 a; /* A register, normalized size of coding interval */
INT32 sc; /* counter for stacked 0xFF values which might overflow */
INT32 zc; /* counter for pending 0x00 output values which might *
* be discarded at the end ("Pacman" termination) */
int ct; /* bit shift counter, determines when next byte will be written */
int buffer; /* buffer for most recent output byte != 0xFF */
int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
unsigned int restarts_to_go; /* MCUs left in this restart interval */
int next_restart_num; /* next restart number to write (0-7) */
/* Pointers to statistics areas (these workspaces have image lifespan) */
unsigned char * dc_stats[NUM_ARITH_TBLS];
unsigned char * ac_stats[NUM_ARITH_TBLS];
/* Statistics bin for coding with fixed probability 0.5 */
unsigned char fixed_bin[4];
} arith_entropy_encoder;
typedef arith_entropy_encoder * arith_entropy_ptr;
/* The following two definitions specify the allocation chunk size
* for the statistics area.
* According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
* 49 statistics bins for DC, and 245 statistics bins for AC coding.
*
* We use a compact representation with 1 byte per statistics bin,
* thus the numbers directly represent byte sizes.
* This 1 byte per statistics bin contains the meaning of the MPS
* (more probable symbol) in the highest bit (mask 0x80), and the
* index into the probability estimation state machine table
* in the lower bits (mask 0x7F).
*/
#define DC_STAT_BINS 64
#define AC_STAT_BINS 256
/* NOTE: Uncomment the following #define if you want to use the
* given formula for calculating the AC conditioning parameter Kx
* for spectral selection progressive coding in section G.1.3.2
* of the spec (Kx = Kmin + SRL (8 + Se - Kmin) 4).
* Although the spec and P&M authors claim that this "has proven
* to give good results for 8 bit precision samples", I'm not
* convinced yet that this is really beneficial.
* Early tests gave only very marginal compression enhancements
* (a few - around 5 or so - bytes even for very large files),
* which would turn out rather negative if we'd suppress the
* DAC (Define Arithmetic Conditioning) marker segments for
* the default parameters in the future.
* Note that currently the marker writing module emits 12-byte
* DAC segments for a full-component scan in a color image.
* This is not worth worrying about IMHO. However, since the
* spec defines the default values to be used if the tables
* are omitted (unlike Huffman tables, which are required
* anyway), one might optimize this behaviour in the future,
* and then it would be disadvantageous to use custom tables if
* they don't provide sufficient gain to exceed the DAC size.
*
* On the other hand, I'd consider it as a reasonable result
* that the conditioning has no significant influence on the
* compression performance. This means that the basic
* statistical model is already rather stable.
*
* Thus, at the moment, we use the default conditioning values
* anyway, and do not use the custom formula.
*
#define CALCULATE_SPECTRAL_CONDITIONING
*/
/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
* We assume that int right shift is unsigned if INT32 right shift is,
* which should be safe.
*/
#ifdef RIGHT_SHIFT_IS_UNSIGNED
#define ISHIFT_TEMPS int ishift_temp;
#define IRIGHT_SHIFT(x,shft) \
((ishift_temp = (x)) < 0 ? \
(ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
(ishift_temp >> (shft)))
#else
#define ISHIFT_TEMPS
#define IRIGHT_SHIFT(x,shft) ((x) >> (shft))
#endif
LOCAL(void)
emit_byte (int val, j_compress_ptr cinfo)
/* Write next output byte; we do not support suspension in this module. */
{
struct jpeg_destination_mgr * dest = cinfo->dest;
*dest->next_output_byte++ = (JOCTET) val;
if (--dest->free_in_buffer == 0)
if (! (*dest->empty_output_buffer) (cinfo))
ERREXIT(cinfo, JERR_CANT_SUSPEND);
}
/*
* Finish up at the end of an arithmetic-compressed scan.
*/
METHODDEF(void)
finish_pass (j_compress_ptr cinfo)
{
arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
INT32 temp;
/* Section D.1.8: Termination of encoding */
/* Find the e->c in the coding interval with the largest
* number of trailing zero bits */
if ((temp = (e->a - 1 + e->c) & 0xFFFF0000L) < e->c)
e->c = temp + 0x8000L;
else
e->c = temp;
/* Send remaining bytes to output */
e->c <<= e->ct;
if (e->c & 0xF8000000L) {
/* One final overflow has to be handled */
if (e->buffer >= 0) {
if (e->zc)
do emit_byte(0x00, cinfo);
while (--e->zc);
emit_byte(e->buffer + 1, cinfo);
if (e->buffer + 1 == 0xFF)
emit_byte(0x00, cinfo);
}
e->zc += e->sc; /* carry-over converts stacked 0xFF bytes to 0x00 */
e->sc = 0;
} else {
if (e->buffer == 0)
++e->zc;
else if (e->buffer >= 0) {
if (e->zc)
do emit_byte(0x00, cinfo);
while (--e->zc);
emit_byte(e->buffer, cinfo);
}
if (e->sc) {
if (e->zc)
do emit_byte(0x00, cinfo);
while (--e->zc);
do {
emit_byte(0xFF, cinfo);
emit_byte(0x00, cinfo);
} while (--e->sc);
}
}
/* Output final bytes only if they are not 0x00 */
if (e->c & 0x7FFF800L) {
if (e->zc) /* output final pending zero bytes */
do emit_byte(0x00, cinfo);
while (--e->zc);
emit_byte((e->c >> 19) & 0xFF, cinfo);
if (((e->c >> 19) & 0xFF) == 0xFF)
emit_byte(0x00, cinfo);
if (e->c & 0x7F800L) {
emit_byte((e->c >> 11) & 0xFF, cinfo);
if (((e->c >> 11) & 0xFF) == 0xFF)
emit_byte(0x00, cinfo);
}
}
}
/*
* The core arithmetic encoding routine (common in JPEG and JBIG).
* This needs to go as fast as possible.
* Machine-dependent optimization facilities
* are not utilized in this portable implementation.
* However, this code should be fairly efficient and
* may be a good base for further optimizations anyway.
*
* Parameter 'val' to be encoded may be 0 or 1 (binary decision).
*
* Note: I've added full "Pacman" termination support to the
* byte output routines, which is equivalent to the optional
* Discard_final_zeros procedure (Figure D.15) in the spec.
* Thus, we always produce the shortest possible output
* stream compliant to the spec (no trailing zero bytes,
* except for FF stuffing).
*
* I've also introduced a new scheme for accessing
* the probability estimation state machine table,
* derived from Markus Kuhn's JBIG implementation.
*/
LOCAL(void)
arith_encode (j_compress_ptr cinfo, unsigned char *st, int val)
{
register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
register unsigned char nl, nm;
register INT32 qe, temp;
register int sv;
/* Fetch values from our compact representation of Table D.2:
* Qe values and probability estimation state machine
*/
sv = *st;
qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */
nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */
nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */
/* Encode & estimation procedures per sections D.1.4 & D.1.5 */
e->a -= qe;
if (val != (sv >> 7)) {
/* Encode the less probable symbol */
if (e->a >= qe) {
/* If the interval size (qe) for the less probable symbol (LPS)
* is larger than the interval size for the MPS, then exchange
* the two symbols for coding efficiency, otherwise code the LPS
* as usual: */
e->c += e->a;
e->a = qe;
}
*st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */
} else {
/* Encode the more probable symbol */
if (e->a >= 0x8000L)
return; /* A >= 0x8000 -> ready, no renormalization required */
if (e->a < qe) {
/* If the interval size (qe) for the less probable symbol (LPS)
* is larger than the interval size for the MPS, then exchange
* the two symbols for coding efficiency: */
e->c += e->a;
e->a = qe;
}
*st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */
}
/* Renormalization & data output per section D.1.6 */
do {
e->a <<= 1;
e->c <<= 1;
if (--e->ct == 0) {
/* Another byte is ready for output */
temp = e->c >> 19;
if (temp > 0xFF) {
/* Handle overflow over all stacked 0xFF bytes */
if (e->buffer >= 0) {
if (e->zc)
do emit_byte(0x00, cinfo);
while (--e->zc);
emit_byte(e->buffer + 1, cinfo);
if (e->buffer + 1 == 0xFF)
emit_byte(0x00, cinfo);
}
e->zc += e->sc; /* carry-over converts stacked 0xFF bytes to 0x00 */
e->sc = 0;
/* Note: The 3 spacer bits in the C register guarantee
* that the new buffer byte can't be 0xFF here
* (see page 160 in the P&M JPEG book). */
e->buffer = temp & 0xFF; /* new output byte, might overflow later */
} else if (temp == 0xFF) {
++e->sc; /* stack 0xFF byte (which might overflow later) */
} else {
/* Output all stacked 0xFF bytes, they will not overflow any more */
if (e->buffer == 0)
++e->zc;
else if (e->buffer >= 0) {
if (e->zc)
do emit_byte(0x00, cinfo);
while (--e->zc);
emit_byte(e->buffer, cinfo);
}
if (e->sc) {
if (e->zc)
do emit_byte(0x00, cinfo);
while (--e->zc);
do {
emit_byte(0xFF, cinfo);
emit_byte(0x00, cinfo);
} while (--e->sc);
}
e->buffer = temp & 0xFF; /* new output byte (can still overflow) */
}
e->c &= 0x7FFFFL;
e->ct += 8;
}
} while (e->a < 0x8000L);
}
/*
* Emit a restart marker & resynchronize predictions.
*/
LOCAL(void)
emit_restart (j_compress_ptr cinfo, int restart_num)
{
arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
int ci;
jpeg_component_info * compptr;
finish_pass(cinfo);
emit_byte(0xFF, cinfo);
emit_byte(JPEG_RST0 + restart_num, cinfo);
/* Re-initialize statistics areas */
for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
compptr = cinfo->cur_comp_info[ci];
/* DC needs no table for refinement scan */
if (cinfo->progressive_mode == 0 || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
/* Reset DC predictions to 0 */
entropy->last_dc_val[ci] = 0;
entropy->dc_context[ci] = 0;
}
/* AC needs no table when not present */
if (cinfo->progressive_mode == 0 || cinfo->Se) {
MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
}
}
/* Reset arithmetic encoding variables */
entropy->c = 0;
entropy->a = 0x10000L;
entropy->sc = 0;
entropy->zc = 0;
entropy->ct = 11;
entropy->buffer = -1; /* empty */
}
/*
* MCU encoding for DC initial scan (either spectral selection,
* or first pass of successive approximation).
*/
METHODDEF(boolean)
encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
{
arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
JBLOCKROW block;
unsigned char *st;
int blkn, ci, tbl;
int v, v2, m;
ISHIFT_TEMPS
/* Emit restart marker if needed */
if (cinfo->restart_interval) {
if (entropy->restarts_to_go == 0) {
emit_restart(cinfo, entropy->next_restart_num);
entropy->restarts_to_go = cinfo->restart_interval;
entropy->next_restart_num++;
entropy->next_restart_num &= 7;
}
entropy->restarts_to_go--;
}
/* Encode the MCU data blocks */
for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
block = MCU_data[blkn];
ci = cinfo->MCU_membership[blkn];
tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
/* Compute the DC value after the required point transform by Al.
* This is simply an arithmetic right shift.
*/
m = IRIGHT_SHIFT((int) ((*block)[0]), cinfo->Al);
/* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */
/* Table F.4: Point to statistics bin S0 for DC coefficient coding */
st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
/* Figure F.4: Encode_DC_DIFF */
if ((v = m - entropy->last_dc_val[ci]) == 0) {
arith_encode(cinfo, st, 0);
entropy->dc_context[ci] = 0; /* zero diff category */
} else {
entropy->last_dc_val[ci] = m;
arith_encode(cinfo, st, 1);
/* Figure F.6: Encoding nonzero value v */
/* Figure F.7: Encoding the sign of v */
if (v > 0) {
arith_encode(cinfo, st + 1, 0); /* Table F.4: SS = S0 + 1 */
st += 2; /* Table F.4: SP = S0 + 2 */
entropy->dc_context[ci] = 4; /* small positive diff category */
} else {
v = -v;
arith_encode(cinfo, st + 1, 1); /* Table F.4: SS = S0 + 1 */
st += 3; /* Table F.4: SN = S0 + 3 */
entropy->dc_context[ci] = 8; /* small negative diff category */
}
/* Figure F.8: Encoding the magnitude category of v */
m = 0;
if (v -= 1) {
arith_encode(cinfo, st, 1);
m = 1;
v2 = v;
st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
while (v2 >>= 1) {
arith_encode(cinfo, st, 1);
m <<= 1;
st += 1;
}
}
arith_encode(cinfo, st, 0);
/* Section F.1.4.4.1.2: Establish dc_context conditioning category */
if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
entropy->dc_context[ci] = 0; /* zero diff category */
else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
entropy->dc_context[ci] += 8; /* large diff category */
/* Figure F.9: Encoding the magnitude bit pattern of v */
st += 14;
while (m >>= 1)
arith_encode(cinfo, st, (m & v) ? 1 : 0);
}
}
return TRUE;
}
/*
* MCU encoding for AC initial scan (either spectral selection,
* or first pass of successive approximation).
*/
METHODDEF(boolean)
encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
{
arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
JBLOCKROW block;
unsigned char *st;
int tbl, k, ke;
int v, v2, m;
/* Emit restart marker if needed */
if (cinfo->restart_interval) {
if (entropy->restarts_to_go == 0) {
emit_restart(cinfo, entropy->next_restart_num);
entropy->restarts_to_go = cinfo->restart_interval;
entropy->next_restart_num++;
entropy->next_restart_num &= 7;
}
entropy->restarts_to_go--;
}
/* Encode the MCU data block */
block = MCU_data[0];
tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
/* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */
/* Establish EOB (end-of-block) index */
for (ke = cinfo->Se; ke > 0; ke--)
/* We must apply the point transform by Al. For AC coefficients this
* is an integer division with rounding towards 0. To do this portably
* in C, we shift after obtaining the absolute value.
*/
if ((v = (*block)[jpeg_natural_order[ke]]) >= 0) {
if (v >>= cinfo->Al) break;
} else {
v = -v;
if (v >>= cinfo->Al) break;
}
/* Figure F.5: Encode_AC_Coefficients */
for (k = cinfo->Ss; k <= ke; k++) {
st = entropy->ac_stats[tbl] + 3 * (k - 1);
arith_encode(cinfo, st, 0); /* EOB decision */
for (;;) {
if ((v = (*block)[jpeg_natural_order[k]]) >= 0) {
if (v >>= cinfo->Al) {
arith_encode(cinfo, st + 1, 1);
arith_encode(cinfo, entropy->fixed_bin, 0);
break;
}
} else {
v = -v;
if (v >>= cinfo->Al) {
arith_encode(cinfo, st + 1, 1);
arith_encode(cinfo, entropy->fixed_bin, 1);
break;
}
}
arith_encode(cinfo, st + 1, 0); st += 3; k++;
}
st += 2;
/* Figure F.8: Encoding the magnitude category of v */
m = 0;
if (v -= 1) {
arith_encode(cinfo, st, 1);
m = 1;
v2 = v;
if (v2 >>= 1) {
arith_encode(cinfo, st, 1);
m <<= 1;
st = entropy->ac_stats[tbl] +
(k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
while (v2 >>= 1) {
arith_encode(cinfo, st, 1);
m <<= 1;
st += 1;
}
}
}
arith_encode(cinfo, st, 0);
/* Figure F.9: Encoding the magnitude bit pattern of v */
st += 14;
while (m >>= 1)
arith_encode(cinfo, st, (m & v) ? 1 : 0);
}
/* Encode EOB decision only if k <= cinfo->Se */
if (k <= cinfo->Se) {
st = entropy->ac_stats[tbl] + 3 * (k - 1);
arith_encode(cinfo, st, 1);
}
return TRUE;
}
/*
* MCU encoding for DC successive approximation refinement scan.
*/
METHODDEF(boolean)
encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
{
arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
unsigned char *st;
int Al, blkn;
/* Emit restart marker if needed */
if (cinfo->restart_interval) {
if (entropy->restarts_to_go == 0) {
emit_restart(cinfo, entropy->next_restart_num);
entropy->restarts_to_go = cinfo->restart_interval;
entropy->next_restart_num++;
entropy->next_restart_num &= 7;
}
entropy->restarts_to_go--;
}
st = entropy->fixed_bin; /* use fixed probability estimation */
Al = cinfo->Al;
/* Encode the MCU data blocks */
for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
/* We simply emit the Al'th bit of the DC coefficient value. */
arith_encode(cinfo, st, (MCU_data[blkn][0][0] >> Al) & 1);
}
return TRUE;
}
/*
* MCU encoding for AC successive approximation refinement scan.
*/
METHODDEF(boolean)
encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
{
arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
JBLOCKROW block;
unsigned char *st;
int tbl, k, ke, kex;
int v;
/* Emit restart marker if needed */
if (cinfo->restart_interval) {
if (entropy->restarts_to_go == 0) {
emit_restart(cinfo, entropy->next_restart_num);
entropy->restarts_to_go = cinfo->restart_interval;
entropy->next_restart_num++;
entropy->next_restart_num &= 7;
}
entropy->restarts_to_go--;
}
/* Encode the MCU data block */
block = MCU_data[0];
tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
/* Section G.1.3.3: Encoding of AC coefficients */
/* Establish EOB (end-of-block) index */
for (ke = cinfo->Se; ke > 0; ke--)
/* We must apply the point transform by Al. For AC coefficients this
* is an integer division with rounding towards 0. To do this portably
* in C, we shift after obtaining the absolute value.
*/
if ((v = (*block)[jpeg_natural_order[ke]]) >= 0) {
if (v >>= cinfo->Al) break;
} else {
v = -v;
if (v >>= cinfo->Al) break;
}
/* Establish EOBx (previous stage end-of-block) index */
for (kex = ke; kex > 0; kex--)
if ((v = (*block)[jpeg_natural_order[kex]]) >= 0) {
if (v >>= cinfo->Ah) break;
} else {
v = -v;
if (v >>= cinfo->Ah) break;
}
/* Figure G.10: Encode_AC_Coefficients_SA */
for (k = cinfo->Ss; k <= ke; k++) {
st = entropy->ac_stats[tbl] + 3 * (k - 1);
if (k > kex)
arith_encode(cinfo, st, 0); /* EOB decision */
for (;;) {
if ((v = (*block)[jpeg_natural_order[k]]) >= 0) {
if (v >>= cinfo->Al) {
if (v >> 1) /* previously nonzero coef */
arith_encode(cinfo, st + 2, (v & 1));
else { /* newly nonzero coef */
arith_encode(cinfo, st + 1, 1);
arith_encode(cinfo, entropy->fixed_bin, 0);
}
break;
}
} else {
v = -v;
if (v >>= cinfo->Al) {
if (v >> 1) /* previously nonzero coef */
arith_encode(cinfo, st + 2, (v & 1));
else { /* newly nonzero coef */
arith_encode(cinfo, st + 1, 1);
arith_encode(cinfo, entropy->fixed_bin, 1);
}
break;
}
}
arith_encode(cinfo, st + 1, 0); st += 3; k++;
}
}
/* Encode EOB decision only if k <= cinfo->Se */
if (k <= cinfo->Se) {
st = entropy->ac_stats[tbl] + 3 * (k - 1);
arith_encode(cinfo, st, 1);
}
return TRUE;
}
/*
* Encode and output one MCU's worth of arithmetic-compressed coefficients.
*/
METHODDEF(boolean)
encode_mcu (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
{
arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
jpeg_component_info * compptr;
JBLOCKROW block;
unsigned char *st;
int blkn, ci, tbl, k, ke;
int v, v2, m;
/* Emit restart marker if needed */
if (cinfo->restart_interval) {
if (entropy->restarts_to_go == 0) {
emit_restart(cinfo, entropy->next_restart_num);
entropy->restarts_to_go = cinfo->restart_interval;
entropy->next_restart_num++;
entropy->next_restart_num &= 7;
}
entropy->restarts_to_go--;
}
/* Encode the MCU data blocks */
for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
block = MCU_data[blkn];
ci = cinfo->MCU_membership[blkn];
compptr = cinfo->cur_comp_info[ci];
/* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */
tbl = compptr->dc_tbl_no;
/* Table F.4: Point to statistics bin S0 for DC coefficient coding */
st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
/* Figure F.4: Encode_DC_DIFF */
if ((v = (*block)[0] - entropy->last_dc_val[ci]) == 0) {
arith_encode(cinfo, st, 0);
entropy->dc_context[ci] = 0; /* zero diff category */
} else {
entropy->last_dc_val[ci] = (*block)[0];
arith_encode(cinfo, st, 1);
/* Figure F.6: Encoding nonzero value v */
/* Figure F.7: Encoding the sign of v */
if (v > 0) {
arith_encode(cinfo, st + 1, 0); /* Table F.4: SS = S0 + 1 */
st += 2; /* Table F.4: SP = S0 + 2 */
entropy->dc_context[ci] = 4; /* small positive diff category */
} else {
v = -v;
arith_encode(cinfo, st + 1, 1); /* Table F.4: SS = S0 + 1 */
st += 3; /* Table F.4: SN = S0 + 3 */
entropy->dc_context[ci] = 8; /* small negative diff category */
}
/* Figure F.8: Encoding the magnitude category of v */
m = 0;
if (v -= 1) {
arith_encode(cinfo, st, 1);
m = 1;
v2 = v;
st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
while (v2 >>= 1) {
arith_encode(cinfo, st, 1);
m <<= 1;
st += 1;
}
}
arith_encode(cinfo, st, 0);
/* Section F.1.4.4.1.2: Establish dc_context conditioning category */
if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
entropy->dc_context[ci] = 0; /* zero diff category */
else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
entropy->dc_context[ci] += 8; /* large diff category */
/* Figure F.9: Encoding the magnitude bit pattern of v */
st += 14;
while (m >>= 1)
arith_encode(cinfo, st, (m & v) ? 1 : 0);
}
/* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */
tbl = compptr->ac_tbl_no;
/* Establish EOB (end-of-block) index */
for (ke = DCTSIZE2 - 1; ke > 0; ke--)
if ((*block)[jpeg_natural_order[ke]]) break;
/* Figure F.5: Encode_AC_Coefficients */
for (k = 1; k <= ke; k++) {
st = entropy->ac_stats[tbl] + 3 * (k - 1);
arith_encode(cinfo, st, 0); /* EOB decision */
while ((v = (*block)[jpeg_natural_order[k]]) == 0) {
arith_encode(cinfo, st + 1, 0); st += 3; k++;
}
arith_encode(cinfo, st + 1, 1);
/* Figure F.6: Encoding nonzero value v */
/* Figure F.7: Encoding the sign of v */
if (v > 0) {
arith_encode(cinfo, entropy->fixed_bin, 0);
} else {
v = -v;
arith_encode(cinfo, entropy->fixed_bin, 1);
}
st += 2;
/* Figure F.8: Encoding the magnitude category of v */
m = 0;
if (v -= 1) {
arith_encode(cinfo, st, 1);
m = 1;
v2 = v;
if (v2 >>= 1) {
arith_encode(cinfo, st, 1);
m <<= 1;
st = entropy->ac_stats[tbl] +
(k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
while (v2 >>= 1) {
arith_encode(cinfo, st, 1);
m <<= 1;
st += 1;
}
}
}
arith_encode(cinfo, st, 0);
/* Figure F.9: Encoding the magnitude bit pattern of v */
st += 14;
while (m >>= 1)
arith_encode(cinfo, st, (m & v) ? 1 : 0);
}
/* Encode EOB decision only if k <= DCTSIZE2 - 1 */
if (k <= DCTSIZE2 - 1) {
st = entropy->ac_stats[tbl] + 3 * (k - 1);
arith_encode(cinfo, st, 1);
}
}
return TRUE;
}
/*
* Initialize for an arithmetic-compressed scan.
*/
METHODDEF(void)
start_pass (j_compress_ptr cinfo, boolean gather_statistics)
{
arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
int ci, tbl;
jpeg_component_info * compptr;
if (gather_statistics)
/* Make sure to avoid that in the master control logic!
* We are fully adaptive here and need no extra
* statistics gathering pass!
*/
ERREXIT(cinfo, JERR_NOT_COMPILED);
/* We assume jcmaster.c already validated the progressive scan parameters. */
/* Select execution routines */
if (cinfo->progressive_mode) {
if (cinfo->Ah == 0) {
if (cinfo->Ss == 0)
entropy->pub.encode_mcu = encode_mcu_DC_first;
else
entropy->pub.encode_mcu = encode_mcu_AC_first;
} else {
if (cinfo->Ss == 0)
entropy->pub.encode_mcu = encode_mcu_DC_refine;
else
entropy->pub.encode_mcu = encode_mcu_AC_refine;
}
} else
entropy->pub.encode_mcu = encode_mcu;
/* Allocate & initialize requested statistics areas */
for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
compptr = cinfo->cur_comp_info[ci];
/* DC needs no table for refinement scan */
if (cinfo->progressive_mode == 0 || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
tbl = compptr->dc_tbl_no;
if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
if (entropy->dc_stats[tbl] == NULL)
entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
/* Initialize DC predictions to 0 */
entropy->last_dc_val[ci] = 0;
entropy->dc_context[ci] = 0;
}
/* AC needs no table when not present */
if (cinfo->progressive_mode == 0 || cinfo->Se) {
tbl = compptr->ac_tbl_no;
if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
if (entropy->ac_stats[tbl] == NULL)
entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
#ifdef CALCULATE_SPECTRAL_CONDITIONING
if (cinfo->progressive_mode)
/* Section G.1.3.2: Set appropriate arithmetic conditioning value Kx */
cinfo->arith_ac_K[tbl] = cinfo->Ss + ((8 + cinfo->Se - cinfo->Ss) >> 4);
#endif
}
}
/* Initialize arithmetic encoding variables */
entropy->c = 0;
entropy->a = 0x10000L;
entropy->sc = 0;
entropy->zc = 0;
entropy->ct = 11;
entropy->buffer = -1; /* empty */
/* Initialize restart stuff */
entropy->restarts_to_go = cinfo->restart_interval;
entropy->next_restart_num = 0;
}
/*
* Module initialization routine for arithmetic entropy encoding.
*/
GLOBAL(void)
jinit_arith_encoder (j_compress_ptr cinfo)
{
arith_entropy_ptr entropy;
int i;
entropy = (arith_entropy_ptr)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
SIZEOF(arith_entropy_encoder));
cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
entropy->pub.start_pass = start_pass;
entropy->pub.finish_pass = finish_pass;
/* Mark tables unallocated */
for (i = 0; i < NUM_ARITH_TBLS; i++) {
entropy->dc_stats[i] = NULL;
entropy->ac_stats[i] = NULL;
}
/* Initialize index for fixed probability estimation */
entropy->fixed_bin[0] = 113;
}

Просмотреть файл

@ -2,6 +2,8 @@
* jccolor.c
*
* Copyright (C) 1991-1996, Thomas G. Lane.
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright 2009 D. R. Commander
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -11,6 +13,7 @@
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jsimd.h"
/* Private subobject */
@ -78,6 +81,74 @@ typedef my_color_converter * my_cconvert_ptr;
#define TABLE_SIZE (8*(MAXJSAMPLE+1))
#if BITS_IN_JSAMPLE == 8
static const unsigned char red_lut[256] = {
0 , 0 , 1 , 1 , 1 , 1 , 2 , 2 , 2 , 3 , 3 , 3 , 4 , 4 , 4 , 4 ,
5 , 5 , 5 , 6 , 6 , 6 , 7 , 7 , 7 , 7 , 8 , 8 , 8 , 9 , 9 , 9 ,
10, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 13, 14, 14,
14, 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19,
19, 19, 20, 20, 20, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 24,
24, 24, 25, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28, 28, 28, 28,
29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 32, 32, 32, 33, 33, 33,
33, 34, 34, 34, 35, 35, 35, 36, 36, 36, 36, 37, 37, 37, 38, 38,
38, 39, 39, 39, 39, 40, 40, 40, 41, 41, 41, 42, 42, 42, 42, 43,
43, 43, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 47, 47, 47, 48,
48, 48, 48, 49, 49, 49, 50, 50, 50, 51, 51, 51, 51, 52, 52, 52,
53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57,
57, 58, 58, 58, 59, 59, 59, 60, 60, 60, 60, 61, 61, 61, 62, 62,
62, 62, 63, 63, 63, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 67,
67, 67, 68, 68, 68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, 71,
72, 72, 72, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 76, 76, 76
};
static const unsigned char green_lut[256] = {
0 , 1 , 1 , 2 , 2 , 3 , 4 , 4 , 5 , 5 , 6 , 6 ,
7 , 8 , 8 , 9 , 9 , 10 , 11 , 11 , 12 , 12 , 13 , 14 ,
14 , 15 , 15 , 16 , 16 , 17 , 18 , 18 , 19 , 19 , 20 , 21 ,
21 , 22 , 22 , 23 , 23 , 24 , 25 , 25 , 26 , 26 , 27 , 28 ,
28 , 29 , 29 , 30 , 31 , 31 , 32 , 32 , 33 , 33 , 34 , 35 ,
35 , 36 , 36 , 37 , 38 , 38 , 39 , 39 , 40 , 41 , 41 , 42 ,
42 , 43 , 43 , 44 , 45 , 45 , 46 , 46 , 47 , 48 , 48 , 49 ,
49 , 50 , 50 , 51 , 52 , 52 , 53 , 53 , 54 , 55 , 55 , 56 ,
56 , 57 , 58 , 58 , 59 , 59 , 60 , 60 , 61 , 62 , 62 , 63 ,
63 , 64 , 65 , 65 , 66 , 66 , 67 , 68 , 68 , 69 , 69 , 70 ,
70 , 71 , 72 , 72 , 73 , 73 , 74 , 75 , 75 , 76 , 76 , 77 ,
77 , 78 , 79 , 79 , 80 , 80 , 81 , 82 , 82 , 83 , 83 , 84 ,
85 , 85 , 86 , 86 , 87 , 87 , 88 , 89 , 89 , 90 , 90 , 91 ,
92 , 92 , 93 , 93 , 94 , 95 , 95 , 96 , 96 , 97 , 97 , 98 ,
99 , 99 , 100, 100, 101, 102, 102, 103, 103, 104, 104, 105,
106, 106, 107, 107, 108, 109, 109, 110, 110, 111, 112, 112,
113, 113, 114, 114, 115, 116, 116, 117, 117, 118, 119, 119,
120, 120, 121, 122, 122, 123, 123, 124, 124, 125, 126, 126,
127, 127, 128, 129, 129, 130, 130, 131, 131, 132, 133, 133,
134, 134, 135, 136, 136, 137, 137, 138, 139, 139, 140, 140,
141, 141, 142, 143, 143, 144, 144, 145, 146, 146, 147, 147,
148, 149, 149, 150
};
static const unsigned char blue_lut[256] = {
0 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 2 , 2 ,
2 , 2 , 2 , 2 , 2 , 2 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 4 ,
4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 ,
5 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 7 , 7 , 7 , 7 , 7 , 7 ,
7 , 7 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 9 , 9 , 9 , 9 , 9 ,
9 , 9 , 9 , 9 , 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11,
11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13,
13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14,
15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18,
18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20,
20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22,
22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 24,
24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25,
26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27,
27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29
};
#endif
/*
* Initialize for RGB->YCC colorspace conversion.
*/
@ -146,10 +217,10 @@ rgb_ycc_convert (j_compress_ptr cinfo,
outptr2 = output_buf[2][output_row];
output_row++;
for (col = 0; col < num_cols; col++) {
r = GETJSAMPLE(inptr[RGB_RED]);
g = GETJSAMPLE(inptr[RGB_GREEN]);
b = GETJSAMPLE(inptr[RGB_BLUE]);
inptr += RGB_PIXELSIZE;
r = GETJSAMPLE(inptr[rgb_red[cinfo->in_color_space]]);
g = GETJSAMPLE(inptr[rgb_green[cinfo->in_color_space]]);
b = GETJSAMPLE(inptr[rgb_blue[cinfo->in_color_space]]);
inptr += rgb_pixelsize[cinfo->in_color_space];
/* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
* must be too; we do not need an explicit range-limiting operation.
* Hence the value being shifted is never negative, and we don't
@ -187,27 +258,35 @@ rgb_gray_convert (j_compress_ptr cinfo,
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
JDIMENSION output_row, int num_rows)
{
my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
register int r, g, b;
#if BITS_IN_JSAMPLE != 8
register INT32 * ctab = cconvert->rgb_ycc_tab;
#endif
register JSAMPROW inptr;
register JSAMPROW outptr;
register JDIMENSION col;
JSAMPLE *maxoutptr;
JDIMENSION num_cols = cinfo->image_width;
int rindex = rgb_red[cinfo->in_color_space];
int gindex = rgb_green[cinfo->in_color_space];
int bindex = rgb_blue[cinfo->in_color_space];
int rgbstride = rgb_pixelsize[cinfo->in_color_space];
while (--num_rows >= 0) {
inptr = *input_buf++;
outptr = output_buf[0][output_row];
maxoutptr = &outptr[num_cols];
output_row++;
for (col = 0; col < num_cols; col++) {
r = GETJSAMPLE(inptr[RGB_RED]);
g = GETJSAMPLE(inptr[RGB_GREEN]);
b = GETJSAMPLE(inptr[RGB_BLUE]);
inptr += RGB_PIXELSIZE;
for (; outptr < maxoutptr; outptr++, inptr += rgbstride) {
/* Y */
outptr[col] = (JSAMPLE)
((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
>> SCALEBITS);
#if BITS_IN_JSAMPLE == 8
*outptr = red_lut[inptr[rindex]] + green_lut[inptr[gindex]]
+ blue_lut[inptr[bindex]];
#else
*outptr = (JSAMPLE)
((ctab[GETJSAMPLE(inptr[rindex])+R_Y_OFF]
+ ctab[GETJSAMPLE(inptr[gindex])+G_Y_OFF]
+ ctab[GETJSAMPLE(inptr[bindex])+B_Y_OFF])
>> SCALEBITS);
#endif
}
}
}
@ -368,11 +447,15 @@ jinit_color_converter (j_compress_ptr cinfo)
break;
case JCS_RGB:
#if RGB_PIXELSIZE != 3
if (cinfo->input_components != RGB_PIXELSIZE)
case JCS_EXT_RGB:
case JCS_EXT_RGBX:
case JCS_EXT_BGR:
case JCS_EXT_BGRX:
case JCS_EXT_XBGR:
case JCS_EXT_XRGB:
if (cinfo->input_components != rgb_pixelsize[cinfo->in_color_space])
ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
break;
#endif /* else share code with YCbCr */
case JCS_YCbCr:
if (cinfo->input_components != 3)
@ -398,7 +481,13 @@ jinit_color_converter (j_compress_ptr cinfo)
ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
if (cinfo->in_color_space == JCS_GRAYSCALE)
cconvert->pub.color_convert = grayscale_convert;
else if (cinfo->in_color_space == JCS_RGB) {
else if (cinfo->in_color_space == JCS_RGB ||
cinfo->in_color_space == JCS_EXT_RGB ||
cinfo->in_color_space == JCS_EXT_RGBX ||
cinfo->in_color_space == JCS_EXT_BGR ||
cinfo->in_color_space == JCS_EXT_BGRX ||
cinfo->in_color_space == JCS_EXT_XBGR ||
cinfo->in_color_space == JCS_EXT_XRGB) {
cconvert->pub.start_pass = rgb_ycc_start;
cconvert->pub.color_convert = rgb_gray_convert;
} else if (cinfo->in_color_space == JCS_YCbCr)
@ -408,9 +497,16 @@ jinit_color_converter (j_compress_ptr cinfo)
break;
case JCS_RGB:
case JCS_EXT_RGB:
case JCS_EXT_RGBX:
case JCS_EXT_BGR:
case JCS_EXT_BGRX:
case JCS_EXT_XBGR:
case JCS_EXT_XRGB:
if (cinfo->num_components != 3)
ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
if (cinfo->in_color_space == JCS_RGB && RGB_PIXELSIZE == 3)
if (cinfo->in_color_space == cinfo->jpeg_color_space &&
rgb_pixelsize[cinfo->in_color_space] == 3)
cconvert->pub.color_convert = null_convert;
else
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
@ -419,9 +515,19 @@ jinit_color_converter (j_compress_ptr cinfo)
case JCS_YCbCr:
if (cinfo->num_components != 3)
ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
if (cinfo->in_color_space == JCS_RGB) {
cconvert->pub.start_pass = rgb_ycc_start;
cconvert->pub.color_convert = rgb_ycc_convert;
if (cinfo->in_color_space == JCS_RGB ||
cinfo->in_color_space == JCS_EXT_RGB ||
cinfo->in_color_space == JCS_EXT_RGBX ||
cinfo->in_color_space == JCS_EXT_BGR ||
cinfo->in_color_space == JCS_EXT_BGRX ||
cinfo->in_color_space == JCS_EXT_XBGR ||
cinfo->in_color_space == JCS_EXT_XRGB) {
if (jsimd_can_rgb_ycc())
cconvert->pub.color_convert = jsimd_rgb_ycc_convert;
else {
cconvert->pub.start_pass = rgb_ycc_start;
cconvert->pub.color_convert = rgb_ycc_convert;
}
} else if (cinfo->in_color_space == JCS_YCbCr)
cconvert->pub.color_convert = null_convert;
else

Просмотреть файл

@ -2,6 +2,9 @@
* jcdctmgr.c
*
* Copyright (C) 1994-1996, Thomas G. Lane.
* Copyright (C) 1999-2006, MIYASAKA Masaru.
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2011 D. R. Commander
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -15,15 +18,37 @@
#include "jinclude.h"
#include "jpeglib.h"
#include "jdct.h" /* Private declarations for DCT subsystem */
#include "jsimddct.h"
/* Private subobject for this module */
typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
typedef JMETHOD(void, convsamp_method_ptr,
(JSAMPARRAY sample_data, JDIMENSION start_col,
DCTELEM * workspace));
typedef JMETHOD(void, float_convsamp_method_ptr,
(JSAMPARRAY sample_data, JDIMENSION start_col,
FAST_FLOAT *workspace));
typedef JMETHOD(void, quantize_method_ptr,
(JCOEFPTR coef_block, DCTELEM * divisors,
DCTELEM * workspace));
typedef JMETHOD(void, float_quantize_method_ptr,
(JCOEFPTR coef_block, FAST_FLOAT * divisors,
FAST_FLOAT * workspace));
METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *);
typedef struct {
struct jpeg_forward_dct pub; /* public fields */
/* Pointer to the DCT routine actually in use */
forward_DCT_method_ptr do_dct;
forward_DCT_method_ptr dct;
convsamp_method_ptr convsamp;
quantize_method_ptr quantize;
/* The actual post-DCT divisors --- not identical to the quant table
* entries, because of scaling (especially for an unnormalized DCT).
@ -31,16 +56,147 @@ typedef struct {
*/
DCTELEM * divisors[NUM_QUANT_TBLS];
/* work area for FDCT subroutine */
DCTELEM * workspace;
#ifdef DCT_FLOAT_SUPPORTED
/* Same as above for the floating-point case. */
float_DCT_method_ptr do_float_dct;
float_DCT_method_ptr float_dct;
float_convsamp_method_ptr float_convsamp;
float_quantize_method_ptr float_quantize;
FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
FAST_FLOAT * float_workspace;
#endif
} my_fdct_controller;
typedef my_fdct_controller * my_fdct_ptr;
/*
* Find the highest bit in an integer through binary search.
*/
LOCAL(int)
flss (UINT16 val)
{
int bit;
bit = 16;
if (!val)
return 0;
if (!(val & 0xff00)) {
bit -= 8;
val <<= 8;
}
if (!(val & 0xf000)) {
bit -= 4;
val <<= 4;
}
if (!(val & 0xc000)) {
bit -= 2;
val <<= 2;
}
if (!(val & 0x8000)) {
bit -= 1;
val <<= 1;
}
return bit;
}
/*
* Compute values to do a division using reciprocal.
*
* This implementation is based on an algorithm described in
* "How to optimize for the Pentium family of microprocessors"
* (http://www.agner.org/assem/).
* More information about the basic algorithm can be found in
* the paper "Integer Division Using Reciprocals" by Robert Alverson.
*
* The basic idea is to replace x/d by x * d^-1. In order to store
* d^-1 with enough precision we shift it left a few places. It turns
* out that this algoright gives just enough precision, and also fits
* into DCTELEM:
*
* b = (the number of significant bits in divisor) - 1
* r = (word size) + b
* f = 2^r / divisor
*
* f will not be an integer for most cases, so we need to compensate
* for the rounding error introduced:
*
* no fractional part:
*
* result = input >> r
*
* fractional part of f < 0.5:
*
* round f down to nearest integer
* result = ((input + 1) * f) >> r
*
* fractional part of f > 0.5:
*
* round f up to nearest integer
* result = (input * f) >> r
*
* This is the original algorithm that gives truncated results. But we
* want properly rounded results, so we replace "input" with
* "input + divisor/2".
*
* In order to allow SIMD implementations we also tweak the values to
* allow the same calculation to be made at all times:
*
* dctbl[0] = f rounded to nearest integer
* dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
* dctbl[2] = 1 << ((word size) * 2 - r)
* dctbl[3] = r - (word size)
*
* dctbl[2] is for stupid instruction sets where the shift operation
* isn't member wise (e.g. MMX).
*
* The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
* is that most SIMD implementations have a "multiply and store top
* half" operation.
*
* Lastly, we store each of the values in their own table instead
* of in a consecutive manner, yet again in order to allow SIMD
* routines.
*/
LOCAL(int)
compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
{
UDCTELEM2 fq, fr;
UDCTELEM c;
int b, r;
b = flss(divisor) - 1;
r = sizeof(DCTELEM) * 8 + b;
fq = ((UDCTELEM2)1 << r) / divisor;
fr = ((UDCTELEM2)1 << r) % divisor;
c = divisor / 2; /* for rounding */
if (fr == 0) { /* divisor is power of two */
/* fq will be one bit too large to fit in DCTELEM, so adjust */
fq >>= 1;
r--;
} else if (fr <= (divisor / 2)) { /* fractional part is < 0.5 */
c++;
} else { /* fractional part is > 0.5 */
fq++;
}
dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */
dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */
dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
if(r <= 16) return 0;
else return 1;
}
/*
* Initialize for a processing pass.
* Verify that all referenced Q-tables are present, and set up
@ -78,11 +234,13 @@ start_pass_fdctmgr (j_compress_ptr cinfo)
if (fdct->divisors[qtblno] == NULL) {
fdct->divisors[qtblno] = (DCTELEM *)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DCTSIZE2 * SIZEOF(DCTELEM));
(DCTSIZE2 * 4) * SIZEOF(DCTELEM));
}
dtbl = fdct->divisors[qtblno];
for (i = 0; i < DCTSIZE2; i++) {
dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
&& fdct->quantize == jsimd_quantize)
fdct->quantize = quantize;
}
break;
#endif
@ -112,14 +270,16 @@ start_pass_fdctmgr (j_compress_ptr cinfo)
if (fdct->divisors[qtblno] == NULL) {
fdct->divisors[qtblno] = (DCTELEM *)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DCTSIZE2 * SIZEOF(DCTELEM));
(DCTSIZE2 * 4) * SIZEOF(DCTELEM));
}
dtbl = fdct->divisors[qtblno];
for (i = 0; i < DCTSIZE2; i++) {
dtbl[i] = (DCTELEM)
if(!compute_reciprocal(
DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
(INT32) aanscales[i]),
CONST_BITS-3);
CONST_BITS-3), &dtbl[i])
&& fdct->quantize == jsimd_quantize)
fdct->quantize = quantize;
}
}
break;
@ -168,6 +328,77 @@ start_pass_fdctmgr (j_compress_ptr cinfo)
}
/*
* Load data into workspace, applying unsigned->signed conversion.
*/
METHODDEF(void)
convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
{
register DCTELEM *workspaceptr;
register JSAMPROW elemptr;
register int elemr;
workspaceptr = workspace;
for (elemr = 0; elemr < DCTSIZE; elemr++) {
elemptr = sample_data[elemr] + start_col;
#if DCTSIZE == 8 /* unroll the inner loop */
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
#else
{
register int elemc;
for (elemc = DCTSIZE; elemc > 0; elemc--)
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
}
#endif
}
}
/*
* Quantize/descale the coefficients, and store into coef_blocks[].
*/
METHODDEF(void)
quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
{
int i;
DCTELEM temp;
UDCTELEM recip, corr, shift;
UDCTELEM2 product;
JCOEFPTR output_ptr = coef_block;
for (i = 0; i < DCTSIZE2; i++) {
temp = workspace[i];
recip = divisors[i + DCTSIZE2 * 0];
corr = divisors[i + DCTSIZE2 * 1];
shift = divisors[i + DCTSIZE2 * 3];
if (temp < 0) {
temp = -temp;
product = (UDCTELEM2)(temp + corr) * recip;
product >>= shift + sizeof(DCTELEM)*8;
temp = product;
temp = -temp;
} else {
product = (UDCTELEM2)(temp + corr) * recip;
product >>= shift + sizeof(DCTELEM)*8;
temp = product;
}
output_ptr[i] = (JCOEF) temp;
}
}
/*
* Perform forward DCT on one or more blocks of a component.
*
@ -185,87 +416,87 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
{
/* This routine is heavily used, so it's worth coding it tightly. */
my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
forward_DCT_method_ptr do_dct = fdct->do_dct;
DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */
DCTELEM * workspace;
JDIMENSION bi;
/* Make sure the compiler doesn't look up these every pass */
forward_DCT_method_ptr do_dct = fdct->dct;
convsamp_method_ptr do_convsamp = fdct->convsamp;
quantize_method_ptr do_quantize = fdct->quantize;
workspace = fdct->workspace;
sample_data += start_row; /* fold in the vertical offset once */
for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
/* Load data into workspace, applying unsigned->signed conversion */
{ register DCTELEM *workspaceptr;
register JSAMPROW elemptr;
register int elemr;
workspaceptr = workspace;
for (elemr = 0; elemr < DCTSIZE; elemr++) {
elemptr = sample_data[elemr] + start_col;
#if DCTSIZE == 8 /* unroll the inner loop */
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
#else
{ register int elemc;
for (elemc = DCTSIZE; elemc > 0; elemc--) {
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
}
}
#endif
}
}
(*do_convsamp) (sample_data, start_col, workspace);
/* Perform the DCT */
(*do_dct) (workspace);
/* Quantize/descale the coefficients, and store into coef_blocks[] */
{ register DCTELEM temp, qval;
register int i;
register JCOEFPTR output_ptr = coef_blocks[bi];
for (i = 0; i < DCTSIZE2; i++) {
qval = divisors[i];
temp = workspace[i];
/* Divide the coefficient value by qval, ensuring proper rounding.
* Since C does not specify the direction of rounding for negative
* quotients, we have to force the dividend positive for portability.
*
* In most files, at least half of the output values will be zero
* (at default quantization settings, more like three-quarters...)
* so we should ensure that this case is fast. On many machines,
* a comparison is enough cheaper than a divide to make a special test
* a win. Since both inputs will be nonnegative, we need only test
* for a < b to discover whether a/b is 0.
* If your machine's division is fast enough, define FAST_DIVIDE.
*/
#ifdef FAST_DIVIDE
#define DIVIDE_BY(a,b) a /= b
#else
#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
#endif
if (temp < 0) {
temp = -temp;
temp += qval>>1; /* for rounding */
DIVIDE_BY(temp, qval);
temp = -temp;
} else {
temp += qval>>1; /* for rounding */
DIVIDE_BY(temp, qval);
}
output_ptr[i] = (JCOEF) temp;
}
}
(*do_quantize) (coef_blocks[bi], divisors, workspace);
}
}
#ifdef DCT_FLOAT_SUPPORTED
METHODDEF(void)
convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
{
register FAST_FLOAT *workspaceptr;
register JSAMPROW elemptr;
register int elemr;
workspaceptr = workspace;
for (elemr = 0; elemr < DCTSIZE; elemr++) {
elemptr = sample_data[elemr] + start_col;
#if DCTSIZE == 8 /* unroll the inner loop */
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
#else
{
register int elemc;
for (elemc = DCTSIZE; elemc > 0; elemc--)
*workspaceptr++ = (FAST_FLOAT)
(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
}
#endif
}
}
METHODDEF(void)
quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
{
register FAST_FLOAT temp;
register int i;
register JCOEFPTR output_ptr = coef_block;
for (i = 0; i < DCTSIZE2; i++) {
/* Apply the quantization and scaling factor */
temp = workspace[i] * divisors[i];
/* Round to nearest integer.
* Since C does not specify the direction of rounding for negative
* quotients, we have to force the dividend positive for portability.
* The maximum coefficient size is +-16K (for 12-bit data), so this
* code should work for either 16-bit or 32-bit ints.
*/
output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
}
}
METHODDEF(void)
forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
@ -275,62 +506,28 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
{
/* This routine is heavily used, so it's worth coding it tightly. */
my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
float_DCT_method_ptr do_dct = fdct->do_float_dct;
FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
FAST_FLOAT * workspace;
JDIMENSION bi;
/* Make sure the compiler doesn't look up these every pass */
float_DCT_method_ptr do_dct = fdct->float_dct;
float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
float_quantize_method_ptr do_quantize = fdct->float_quantize;
workspace = fdct->float_workspace;
sample_data += start_row; /* fold in the vertical offset once */
for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
/* Load data into workspace, applying unsigned->signed conversion */
{ register FAST_FLOAT *workspaceptr;
register JSAMPROW elemptr;
register int elemr;
workspaceptr = workspace;
for (elemr = 0; elemr < DCTSIZE; elemr++) {
elemptr = sample_data[elemr] + start_col;
#if DCTSIZE == 8 /* unroll the inner loop */
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
#else
{ register int elemc;
for (elemc = DCTSIZE; elemc > 0; elemc--) {
*workspaceptr++ = (FAST_FLOAT)
(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
}
}
#endif
}
}
(*do_convsamp) (sample_data, start_col, workspace);
/* Perform the DCT */
(*do_dct) (workspace);
/* Quantize/descale the coefficients, and store into coef_blocks[] */
{ register FAST_FLOAT temp;
register int i;
register JCOEFPTR output_ptr = coef_blocks[bi];
for (i = 0; i < DCTSIZE2; i++) {
/* Apply the quantization and scaling factor */
temp = workspace[i] * divisors[i];
/* Round to nearest integer.
* Since C does not specify the direction of rounding for negative
* quotients, we have to force the dividend positive for portability.
* The maximum coefficient size is +-16K (for 12-bit data), so this
* code should work for either 16-bit or 32-bit ints.
*/
output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
}
}
(*do_quantize) (coef_blocks[bi], divisors, workspace);
}
}
@ -353,23 +550,33 @@ jinit_forward_dct (j_compress_ptr cinfo)
cinfo->fdct = (struct jpeg_forward_dct *) fdct;
fdct->pub.start_pass = start_pass_fdctmgr;
/* First determine the DCT... */
switch (cinfo->dct_method) {
#ifdef DCT_ISLOW_SUPPORTED
case JDCT_ISLOW:
fdct->pub.forward_DCT = forward_DCT;
fdct->do_dct = jpeg_fdct_islow;
if (jsimd_can_fdct_islow())
fdct->dct = jsimd_fdct_islow;
else
fdct->dct = jpeg_fdct_islow;
break;
#endif
#ifdef DCT_IFAST_SUPPORTED
case JDCT_IFAST:
fdct->pub.forward_DCT = forward_DCT;
fdct->do_dct = jpeg_fdct_ifast;
if (jsimd_can_fdct_ifast())
fdct->dct = jsimd_fdct_ifast;
else
fdct->dct = jpeg_fdct_ifast;
break;
#endif
#ifdef DCT_FLOAT_SUPPORTED
case JDCT_FLOAT:
fdct->pub.forward_DCT = forward_DCT_float;
fdct->do_float_dct = jpeg_fdct_float;
if (jsimd_can_fdct_float())
fdct->float_dct = jsimd_fdct_float;
else
fdct->float_dct = jpeg_fdct_float;
break;
#endif
default:
@ -377,6 +584,54 @@ jinit_forward_dct (j_compress_ptr cinfo)
break;
}
/* ...then the supporting stages. */
switch (cinfo->dct_method) {
#ifdef DCT_ISLOW_SUPPORTED
case JDCT_ISLOW:
#endif
#ifdef DCT_IFAST_SUPPORTED
case JDCT_IFAST:
#endif
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
if (jsimd_can_convsamp())
fdct->convsamp = jsimd_convsamp;
else
fdct->convsamp = convsamp;
if (jsimd_can_quantize())
fdct->quantize = jsimd_quantize;
else
fdct->quantize = quantize;
break;
#endif
#ifdef DCT_FLOAT_SUPPORTED
case JDCT_FLOAT:
if (jsimd_can_convsamp_float())
fdct->float_convsamp = jsimd_convsamp_float;
else
fdct->float_convsamp = convsamp_float;
if (jsimd_can_quantize_float())
fdct->float_quantize = jsimd_quantize_float;
else
fdct->float_quantize = quantize_float;
break;
#endif
default:
ERREXIT(cinfo, JERR_NOT_COMPILED);
break;
}
/* Allocate workspace memory */
#ifdef DCT_FLOAT_SUPPORTED
if (cinfo->dct_method == JDCT_FLOAT)
fdct->float_workspace = (FAST_FLOAT *)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
SIZEOF(FAST_FLOAT) * DCTSIZE2);
else
#endif
fdct->workspace = (DCTELEM *)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
SIZEOF(DCTELEM) * DCTSIZE2);
/* Mark divisor tables unallocated */
for (i = 0; i < NUM_QUANT_TBLS; i++) {
fdct->divisors[i] = NULL;

Просмотреть файл

@ -19,7 +19,6 @@
#include "jpeglib.h"
#include "jchuff.h" /* Declarations shared with jcphuff.c */
/* Expanded entropy encoder object for Huffman encoding.
*
* The savable_state subrecord contains fields that change within an MCU,

Просмотреть файл

@ -42,7 +42,11 @@ jinit_compress_master (j_compress_ptr cinfo)
jinit_forward_dct(cinfo);
/* Entropy encoding: either Huffman or arithmetic coding. */
if (cinfo->arith_code) {
#ifdef C_ARITH_CODING_SUPPORTED
jinit_arith_encoder(cinfo);
#else
ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
#endif
} else {
if (cinfo->progressive_mode) {
#ifdef C_PROGRESSIVE_SUPPORTED

Просмотреть файл

@ -2,6 +2,7 @@
* jcmarker.c
*
* Copyright (C) 1991-1998, Thomas G. Lane.
* Copyright (C) 2010, D. R. Commander.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -11,6 +12,7 @@
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jpegcomp.h"
typedef enum { /* JPEG marker codes */
@ -75,7 +77,9 @@ typedef enum { /* JPEG marker codes */
M_JPG13 = 0xfd,
M_COM = 0xfe,
M_TEM = 0x01
M_TEM = 0x01,
M_ERROR = 0x100
} JPEG_MARKER;
@ -103,7 +107,7 @@ typedef my_marker_writer * my_marker_ptr;
*/
LOCAL(void)
emit_byte (j_compress_ptr cinfo, int16 val)
emit_byte (j_compress_ptr cinfo, int val)
/* Emit a byte */
{
struct jpeg_destination_mgr * dest = cinfo->dest;
@ -121,12 +125,12 @@ emit_marker (j_compress_ptr cinfo, JPEG_MARKER mark)
/* Emit a marker code */
{
emit_byte(cinfo, 0xFF);
emit_byte(cinfo, (int16) mark);
emit_byte(cinfo, (int) mark);
}
LOCAL(void)
emit_2bytes (j_compress_ptr cinfo, int16 value)
emit_2bytes (j_compress_ptr cinfo, int value)
/* Emit a 2-byte integer; these are always MSB first in JPEG files */
{
emit_byte(cinfo, (value >> 8) & 0xFF);
@ -138,14 +142,14 @@ emit_2bytes (j_compress_ptr cinfo, int16 value)
* Routines to write specific marker types.
*/
LOCAL(int16)
emit_dqt (j_compress_ptr cinfo, int16 index)
LOCAL(int)
emit_dqt (j_compress_ptr cinfo, int index)
/* Emit a DQT marker */
/* Returns the precision used (0 = 8bits, 1 = 16bits) for baseline checking */
{
JQUANT_TBL * qtbl = cinfo->quant_tbl_ptrs[index];
int16 prec;
int16 i;
int prec;
int i;
if (qtbl == NULL)
ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, index);
@ -167,8 +171,8 @@ emit_dqt (j_compress_ptr cinfo, int16 index)
/* The table entries must be emitted in zigzag order. */
unsigned int qval = qtbl->quantval[jpeg_natural_order[i]];
if (prec)
emit_byte(cinfo, (int16) (qval >> 8));
emit_byte(cinfo, (int16) (qval & 0xFF));
emit_byte(cinfo, (int) (qval >> 8));
emit_byte(cinfo, (int) (qval & 0xFF));
}
qtbl->sent_table = TRUE;
@ -179,11 +183,11 @@ emit_dqt (j_compress_ptr cinfo, int16 index)
LOCAL(void)
emit_dht (j_compress_ptr cinfo, int16 index, boolean is_ac)
emit_dht (j_compress_ptr cinfo, int index, boolean is_ac)
/* Emit a DHT marker */
{
JHUFF_TBL * htbl;
int16 length, i;
int length, i;
if (is_ac) {
htbl = cinfo->ac_huff_tbl_ptrs[index];
@ -225,7 +229,7 @@ emit_dac (j_compress_ptr cinfo)
#ifdef C_ARITH_CODING_SUPPORTED
char dc_in_use[NUM_ARITH_TBLS];
char ac_in_use[NUM_ARITH_TBLS];
int16 length, i;
int length, i;
jpeg_component_info *compptr;
for (i = 0; i < NUM_ARITH_TBLS; i++)
@ -267,7 +271,7 @@ emit_dri (j_compress_ptr cinfo)
emit_2bytes(cinfo, 4); /* fixed length */
emit_2bytes(cinfo, (int16) cinfo->restart_interval);
emit_2bytes(cinfo, (int) cinfo->restart_interval);
}
@ -275,7 +279,7 @@ LOCAL(void)
emit_sof (j_compress_ptr cinfo, JPEG_MARKER code)
/* Emit a SOF marker */
{
int16 ci;
int ci;
jpeg_component_info *compptr;
emit_marker(cinfo, code);
@ -283,13 +287,13 @@ emit_sof (j_compress_ptr cinfo, JPEG_MARKER code)
emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */
/* Make sure image isn't bigger than SOF field can handle */
if ((long) cinfo->image_height > 65535L ||
(long) cinfo->image_width > 65535L)
if ((long) cinfo->_jpeg_height > 65535L ||
(long) cinfo->_jpeg_width > 65535L)
ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) 65535);
emit_byte(cinfo, cinfo->data_precision);
emit_2bytes(cinfo, (int16) cinfo->image_height);
emit_2bytes(cinfo, (int16) cinfo->image_width);
emit_2bytes(cinfo, (int) cinfo->_jpeg_height);
emit_2bytes(cinfo, (int) cinfo->_jpeg_width);
emit_byte(cinfo, cinfo->num_components);
@ -306,7 +310,7 @@ LOCAL(void)
emit_sos (j_compress_ptr cinfo)
/* Emit a SOS marker */
{
int16 i, td, ta;
int i, td, ta;
jpeg_component_info *compptr;
emit_marker(cinfo, M_SOS);
@ -371,8 +375,8 @@ emit_jfif_app0 (j_compress_ptr cinfo)
emit_byte(cinfo, cinfo->JFIF_major_version); /* Version fields */
emit_byte(cinfo, cinfo->JFIF_minor_version);
emit_byte(cinfo, cinfo->density_unit); /* Pixel size information */
emit_2bytes(cinfo, (int16) cinfo->X_density);
emit_2bytes(cinfo, (int16) cinfo->Y_density);
emit_2bytes(cinfo, (int) cinfo->X_density);
emit_2bytes(cinfo, (int) cinfo->Y_density);
emit_byte(cinfo, 0); /* No thumbnail image */
emit_byte(cinfo, 0);
}
@ -441,14 +445,14 @@ write_marker_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
emit_marker(cinfo, (JPEG_MARKER) marker);
emit_2bytes(cinfo, (int16) (datalen + 2)); /* total length */
emit_2bytes(cinfo, (int) (datalen + 2)); /* total length */
}
METHODDEF(void)
write_marker_byte (j_compress_ptr cinfo, int val)
/* Emit one byte of marker parameters following write_marker_header */
{
emit_byte(cinfo, (int16) val);
emit_byte(cinfo, val);
}
@ -491,7 +495,7 @@ write_file_header (j_compress_ptr cinfo)
METHODDEF(void)
write_frame_header (j_compress_ptr cinfo)
{
int16 ci, prec;
int ci, prec;
boolean is_baseline;
jpeg_component_info *compptr;
@ -549,7 +553,7 @@ METHODDEF(void)
write_scan_header (j_compress_ptr cinfo)
{
my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
int16 i;
int i;
jpeg_component_info *compptr;
if (cinfo->arith_code) {
@ -613,7 +617,7 @@ write_file_trailer (j_compress_ptr cinfo)
METHODDEF(void)
write_tables_only (j_compress_ptr cinfo)
{
int16 i;
int i;
emit_marker(cinfo, M_SOI);

Просмотреть файл

@ -2,6 +2,8 @@
* jcmaster.c
*
* Copyright (C) 1991-1997, Thomas G. Lane.
* Modified 2003-2010 by Guido Vollbeding.
* Copyright (C) 2010, D. R. Commander.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -14,6 +16,7 @@
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jpegcomp.h"
/* Private state */
@ -42,8 +45,28 @@ typedef my_comp_master * my_master_ptr;
* Support routines that do various essential calculations.
*/
#if JPEG_LIB_VERSION >= 70
/*
* Compute JPEG image dimensions and related values.
* NOTE: this is exported for possible use by application.
* Hence it mustn't do anything that can't be done twice.
*/
GLOBAL(void)
jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo)
/* Do computations that are needed before master selection phase */
{
/* Hardwire it to "no scaling" */
cinfo->jpeg_width = cinfo->image_width;
cinfo->jpeg_height = cinfo->image_height;
cinfo->min_DCT_h_scaled_size = DCTSIZE;
cinfo->min_DCT_v_scaled_size = DCTSIZE;
}
#endif
LOCAL(void)
initial_setup (j_compress_ptr cinfo)
initial_setup (j_compress_ptr cinfo, boolean transcode_only)
/* Do computations that are needed before master selection phase */
{
int ci;
@ -51,14 +74,19 @@ initial_setup (j_compress_ptr cinfo)
long samplesperrow;
JDIMENSION jd_samplesperrow;
#if JPEG_LIB_VERSION >= 70
if (!transcode_only)
jpeg_calc_jpeg_dimensions(cinfo);
#endif
/* Sanity check on image dimensions */
if (cinfo->image_height <= 0 || cinfo->image_width <= 0
if (cinfo->_jpeg_height <= 0 || cinfo->_jpeg_width <= 0
|| cinfo->num_components <= 0 || cinfo->input_components <= 0)
ERREXIT(cinfo, JERR_EMPTY_IMAGE);
/* Make sure image isn't bigger than I can handle */
if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION ||
(long) cinfo->image_width > (long) JPEG_MAX_DIMENSION)
if ((long) cinfo->_jpeg_height > (long) JPEG_MAX_DIMENSION ||
(long) cinfo->_jpeg_width > (long) JPEG_MAX_DIMENSION)
ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
/* Width of an input scanline must be representable as JDIMENSION. */
@ -96,20 +124,24 @@ initial_setup (j_compress_ptr cinfo)
/* Fill in the correct component_index value; don't rely on application */
compptr->component_index = ci;
/* For compression, we never do DCT scaling. */
#if JPEG_LIB_VERSION >= 70
compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = DCTSIZE;
#else
compptr->DCT_scaled_size = DCTSIZE;
#endif
/* Size in DCT blocks */
compptr->width_in_blocks = (JDIMENSION)
jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
jdiv_round_up((long) cinfo->_jpeg_width * (long) compptr->h_samp_factor,
(long) (cinfo->max_h_samp_factor * DCTSIZE));
compptr->height_in_blocks = (JDIMENSION)
jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
jdiv_round_up((long) cinfo->_jpeg_height * (long) compptr->v_samp_factor,
(long) (cinfo->max_v_samp_factor * DCTSIZE));
/* Size in samples */
compptr->downsampled_width = (JDIMENSION)
jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
jdiv_round_up((long) cinfo->_jpeg_width * (long) compptr->h_samp_factor,
(long) cinfo->max_h_samp_factor);
compptr->downsampled_height = (JDIMENSION)
jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
jdiv_round_up((long) cinfo->_jpeg_height * (long) compptr->v_samp_factor,
(long) cinfo->max_v_samp_factor);
/* Mark component needed (this flag isn't actually used for compression) */
compptr->component_needed = TRUE;
@ -119,7 +151,7 @@ initial_setup (j_compress_ptr cinfo)
* main controller will call coefficient controller).
*/
cinfo->total_iMCU_rows = (JDIMENSION)
jdiv_round_up((long) cinfo->image_height,
jdiv_round_up((long) cinfo->_jpeg_height,
(long) (cinfo->max_v_samp_factor*DCTSIZE));
}
@ -347,10 +379,10 @@ per_scan_setup (j_compress_ptr cinfo)
/* Overall image size in MCUs */
cinfo->MCUs_per_row = (JDIMENSION)
jdiv_round_up((long) cinfo->image_width,
jdiv_round_up((long) cinfo->_jpeg_width,
(long) (cinfo->max_h_samp_factor*DCTSIZE));
cinfo->MCU_rows_in_scan = (JDIMENSION)
jdiv_round_up((long) cinfo->image_height,
jdiv_round_up((long) cinfo->_jpeg_height,
(long) (cinfo->max_v_samp_factor*DCTSIZE));
cinfo->blocks_in_MCU = 0;
@ -554,7 +586,7 @@ jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only)
master->pub.is_last_pass = FALSE;
/* Validate parameters, determine derived values */
initial_setup(cinfo);
initial_setup(cinfo, transcode_only);
if (cinfo->scan_info != NULL) {
#ifdef C_MULTISCAN_FILES_SUPPORTED

Просмотреть файл

@ -1,43 +0,0 @@
/* jconfig.h --- generated by ckconfig.c */
/* see jconfig.doc for explanations */
#define ALIGN_TYPE long /* memory alignment */
#define NO_GETENV /* we do have the function, but it's dead */
#ifdef __cplusplus
#define INLINE inline /* we have them in C++ */
#endif
#define HAVE_PROTOTYPES
#define HAVE_UNSIGNED_CHAR
#define HAVE_UNSIGNED_SHORT
/* #define void char */
/* #define const */
#undef CHAR_IS_UNSIGNED
#define HAVE_STDDEF_H
#define HAVE_STDLIB_H
#undef NEED_BSD_STRINGS
#undef NEED_SYS_TYPES_H
#undef NEED_FAR_POINTERS
#undef NEED_SHORT_EXTERNAL_NAMES
#undef INCOMPLETE_TYPES_BROKEN
#ifdef JPEG_INTERNALS
#undef RIGHT_SHIFT_IS_UNSIGNED
#endif /* JPEG_INTERNALS */
#ifdef JPEG_CJPEG_DJPEG
#define BMP_SUPPORTED /* BMP image file format */
#define GIF_SUPPORTED /* GIF image file format */
#define PPM_SUPPORTED /* PBMPLUS PPM/PGM image file format */
#undef RLE_SUPPORTED /* Utah RLE image file format */
#define TARGA_SUPPORTED /* Targa image file format */
#undef TWO_FILE_COMMANDLINE /* You may need this on non-Unix systems */
#undef NEED_SIGNAL_CATCHER /* Define this if you use jmemname.c */
#undef DONT_USE_B_MODE
/* #define PROGRESS_REPORT */ /* optional */
#endif /* JPEG_CJPEG_DJPEG */

Просмотреть файл

@ -1,155 +0,0 @@
/*
* jconfig.doc
*
* Copyright (C) 1991-1994, Thomas G. Lane.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
* This file documents the configuration options that are required to
* customize the JPEG software for a particular system.
*
* The actual configuration options for a particular installation are stored
* in jconfig.h. On many machines, jconfig.h can be generated automatically
* or copied from one of the "canned" jconfig files that we supply. But if
* you need to generate a jconfig.h file by hand, this file tells you how.
*
* DO NOT EDIT THIS FILE --- IT WON'T ACCOMPLISH ANYTHING.
* EDIT A COPY NAMED JCONFIG.H.
*/
/*
* These symbols indicate the properties of your machine or compiler.
* #define the symbol if yes, #undef it if no.
*/
/* Does your compiler support function prototypes?
* (If not, you also need to use ansi2knr, see install.doc)
*/
#define HAVE_PROTOTYPES
/* Does your compiler support the declaration "unsigned char" ?
* How about "unsigned short" ?
*/
#define HAVE_UNSIGNED_CHAR
#define HAVE_UNSIGNED_SHORT
/* Define "void" as "char" if your compiler doesn't know about type void.
* NOTE: be sure to define void such that "void *" represents the most general
* pointer type, e.g., that returned by malloc().
*/
/* #define void char */
/* Define "const" as empty if your compiler doesn't know the "const" keyword.
*/
/* #define const */
/* Define this if an ordinary "char" type is unsigned.
* If you're not sure, leaving it undefined will work at some cost in speed.
* If you defined HAVE_UNSIGNED_CHAR then the speed difference is minimal.
*/
#undef CHAR_IS_UNSIGNED
/* Define this if your system has an ANSI-conforming <stddef.h> file.
*/
#define HAVE_STDDEF_H
/* Define this if your system has an ANSI-conforming <stdlib.h> file.
*/
#define HAVE_STDLIB_H
/* Define this if your system does not have an ANSI/SysV <string.h>,
* but does have a BSD-style <strings.h>.
*/
#undef NEED_BSD_STRINGS
/* Define this if your system does not provide typedef size_t in any of the
* ANSI-standard places (stddef.h, stdlib.h, or stdio.h), but places it in
* <sys/types.h> instead.
*/
#undef NEED_SYS_TYPES_H
/* For 80x86 machines, you need to define NEED_FAR_POINTERS,
* unless you are using a large-data memory model or 80386 flat-memory mode.
* On less brain-damaged CPUs this symbol must not be defined.
* (Defining this symbol causes large data structures to be referenced through
* "far" pointers and to be allocated with a special version of malloc.)
*/
#undef NEED_FAR_POINTERS
/* Define this if your linker needs global names to be unique in less
* than the first 15 characters.
*/
#undef NEED_SHORT_EXTERNAL_NAMES
/* Although a real ANSI C compiler can deal perfectly well with pointers to
* unspecified structures (see "incomplete types" in the spec), a few pre-ANSI
* and pseudo-ANSI compilers get confused. To keep one of these bozos happy,
* define INCOMPLETE_TYPES_BROKEN. This is not recommended unless you
* actually get "missing structure definition" warnings or errors while
* compiling the JPEG code.
*/
#undef INCOMPLETE_TYPES_BROKEN
/*
* The following options affect code selection within the JPEG library,
* but they don't need to be visible to applications using the library.
* To minimize application namespace pollution, the symbols won't be
* defined unless JPEG_INTERNALS has been defined.
*/
#ifdef JPEG_INTERNALS
/* Define this if your compiler implements ">>" on signed values as a logical
* (unsigned) shift; leave it undefined if ">>" is a signed (arithmetic) shift,
* which is the normal and rational definition.
*/
#undef RIGHT_SHIFT_IS_UNSIGNED
#endif /* JPEG_INTERNALS */
/*
* The remaining options do not affect the JPEG library proper,
* but only the sample applications cjpeg/djpeg (see cjpeg.c, djpeg.c).
* Other applications can ignore these.
*/
#ifdef JPEG_CJPEG_DJPEG
/* These defines indicate which image (non-JPEG) file formats are allowed. */
#define BMP_SUPPORTED /* BMP image file format */
#define GIF_SUPPORTED /* GIF image file format */
#define PPM_SUPPORTED /* PBMPLUS PPM/PGM image file format */
#undef RLE_SUPPORTED /* Utah RLE image file format */
#define TARGA_SUPPORTED /* Targa image file format */
/* Define this if you want to name both input and output files on the command
* line, rather than using stdout and optionally stdin. You MUST do this if
* your system can't cope with binary I/O to stdin/stdout. See comments at
* head of cjpeg.c or djpeg.c.
*/
#undef TWO_FILE_COMMANDLINE
/* Define this if your system needs explicit cleanup of temporary files.
* This is crucial under MS-DOS, where the temporary "files" may be areas
* of extended memory; on most other systems it's not as important.
*/
#undef NEED_SIGNAL_CATCHER
/* By default, we open image files with fopen(...,"rb") or fopen(...,"wb").
* This is necessary on systems that distinguish text files from binary files,
* and is harmless on most systems that don't. If you have one of the rare
* systems that complains about the "b" spec, define this symbol.
*/
#undef DONT_USE_B_MODE
/* Define this if you want percent-done progress reports from cjpeg/djpeg.
*/
#undef PROGRESS_REPORT
#endif /* JPEG_CJPEG_DJPEG */

Просмотреть файл

@ -1,107 +1,59 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/* jconfig.h. Generated from jconfig.h.in by configure, then manually edited
for Mozilla. */
/*
* jconfig.h to configure the IJG JPEG library for the Mozilla/Netscape
* environment. Note that there are also Mozilla mods in jmorecfg.h.
*/
/* Export libjpeg v6.2's ABI. */
#define JPEG_LIB_VERSION 62
/* We assume an ANSI C or C++ compilation environment */
#define HAVE_PROTOTYPES
#define HAVE_UNSIGNED_CHAR
#define HAVE_UNSIGNED_SHORT
/* #define void char */
/* #define const */
#ifndef HAVE_STDDEF_H
#define HAVE_STDDEF_H
#endif /* HAVE_STDDEF_H */
#ifndef HAVE_STDLIB_H
#define HAVE_STDLIB_H
#endif /* HAVE_STDLIB_H */
#undef NEED_BSD_STRINGS
#undef NEED_SYS_TYPES_H
#undef NEED_FAR_POINTERS
#undef NEED_SHORT_EXTERNAL_NAMES
/* Define this if you get warnings about undefined structures. */
#undef INCOMPLETE_TYPES_BROKEN
/* Define if your compiler supports prototypes */
#define HAVE_PROTOTYPES 1
/* With this setting, the IJG code will work regardless of whether
* type "char" is signed or unsigned.
*/
#undef CHAR_IS_UNSIGNED
/* Define to 1 if you have the <stddef.h> header file. */
#define HAVE_STDDEF_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* defines that need not be visible to callers of the IJG library */
/* Define to 1 if the system has the type `unsigned char'. */
#define HAVE_UNSIGNED_CHAR 1
#ifdef JPEG_INTERNALS
/* Define to 1 if the system has the type `unsigned short'. */
#define HAVE_UNSIGNED_SHORT 1
/* If right shift of "long" quantities is unsigned on your machine,
* you'll have to define this. Fortunately few people should need it.
*/
#undef RIGHT_SHIFT_IS_UNSIGNED
/* Define if you want use complete types */
/* #define INCOMPLETE_TYPES_BROKEN 1 */
#endif /* JPEG_INTERNALS */
/* Define if you have BSD-like bzero and bcopy */
/* #undef NEED_BSD_STRINGS */
/* Define if you need short function names */
/* #undef NEED_SHORT_EXTERNAL_NAMES */
/* these defines are not interesting for building just the IJG library,
* but we leave 'em here anyway.
*/
#ifdef JPEG_CJPEG_DJPEG
/* Define if you have sys/types.h */
#define NEED_SYS_TYPES_H 1
#define BMP_SUPPORTED /* BMP image file format */
#define GIF_SUPPORTED /* GIF image file format */
#define PPM_SUPPORTED /* PBMPLUS PPM/PGM image file format */
#undef RLE_SUPPORTED /* Utah RLE image file format */
#define TARGA_SUPPORTED /* Targa image file format */
/* Define if shift is unsigned */
/* #undef RIGHT_SHIFT_IS_UNSIGNED */
#undef TWO_FILE_COMMANDLINE
#undef NEED_SIGNAL_CATCHER
#undef DONT_USE_B_MODE
#undef PROGRESS_REPORT
/* Use accelerated SIMD routines. */
#define WITH_SIMD 1
#endif /* JPEG_CJPEG_DJPEG */
/* Define to 1 if type `char' is unsigned and you are not using gcc. */
#ifndef __CHAR_UNSIGNED__
/* # undef __CHAR_UNSIGNED__ */
#endif
/* SSE* alignment support - only use on platforms that support declspec and __attribute__ */
/* Define to empty if `const' does not conform to ANSI C. */
/* #undef const */
#if defined(XP_WIN32) && defined(_M_IX86) && !defined(__GNUC__)
#define ALIGN16_const_vector_short(name) __declspec(align(16)) const short name[8]
#define ALIGN16_const_vector_uchar(name) __declspec(align(16)) const unsigned char name[16]
#else
#define ALIGN16_const_vector_short(name) const short name[8] __attribute__ ((aligned (16)))
#define ALIGN16_const_vector_uchar(name) const unsigned char name[16] __attribute__ ((aligned (16)))
#endif /* ! XP_WIN32 && _M_IX86 && !__GNUC */
/* Define to `__inline__' or `__inline' if that's what the C compiler
calls it, or to nothing if 'inline' is not supported under any name. */
#ifndef __cplusplus
/* #undef inline */
#endif
/* Define to `unsigned int' if <sys/types.h> does not define. */
/* #undef size_t */
/* MOZILLA CHANGE: libjpeg-turbo doesn't define INLINE in its config file, so
* we define it here. */
#define INLINE NS_ALWAYS_INLINE

60
jpeg/jconfig.h.in Normal file
Просмотреть файл

@ -0,0 +1,60 @@
/* Version ID for the JPEG library.
* Might be useful for tests like "#if JPEG_LIB_VERSION >= 60".
*/
#define JPEG_LIB_VERSION 62 /* Version 6b */
/* Support arithmetic encoding */
#undef C_ARITH_CODING_SUPPORTED
/* Support arithmetic decoding */
#undef D_ARITH_CODING_SUPPORTED
/* Define if your compiler supports prototypes */
#undef HAVE_PROTOTYPES
/* Define to 1 if you have the <stddef.h> header file. */
#undef HAVE_STDDEF_H
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
/* Define to 1 if the system has the type `unsigned char'. */
#undef HAVE_UNSIGNED_CHAR
/* Define to 1 if the system has the type `unsigned short'. */
#undef HAVE_UNSIGNED_SHORT
/* Define if you want use complete types */
#undef INCOMPLETE_TYPES_BROKEN
/* Define if you have BSD-like bzero and bcopy */
#undef NEED_BSD_STRINGS
/* Define if you need short function names */
#undef NEED_SHORT_EXTERNAL_NAMES
/* Define if you have sys/types.h */
#undef NEED_SYS_TYPES_H
/* Define if shift is unsigned */
#undef RIGHT_SHIFT_IS_UNSIGNED
/* Use accelerated SIMD routines. */
#undef WITH_SIMD
/* Define to 1 if type `char' is unsigned and you are not using gcc. */
#ifndef __CHAR_UNSIGNED__
# undef __CHAR_UNSIGNED__
#endif
/* Define to empty if `const' does not conform to ANSI C. */
#undef const
/* Define to `__inline__' or `__inline' if that's what the C compiler
calls it, or to nothing if 'inline' is not supported under any name. */
#ifndef __cplusplus
#undef inline
#endif
/* Define to `unsigned int' if <sys/types.h> does not define. */
#undef size_t

Просмотреть файл

@ -1,38 +0,0 @@
/* jconfig.wat --- jconfig.h for Watcom C/C++ on MS-DOS or OS/2. */
/* see jconfig.doc for explanations */
#define HAVE_PROTOTYPES
#define HAVE_UNSIGNED_CHAR
#define HAVE_UNSIGNED_SHORT
/* #define void char */
/* #define const */
#define CHAR_IS_UNSIGNED
#define HAVE_STDDEF_H
#define HAVE_STDLIB_H
#undef NEED_BSD_STRINGS
#undef NEED_SYS_TYPES_H
#undef NEED_FAR_POINTERS /* Watcom uses flat 32-bit addressing */
#undef NEED_SHORT_EXTERNAL_NAMES
#undef INCOMPLETE_TYPES_BROKEN
#ifdef JPEG_INTERNALS
#undef RIGHT_SHIFT_IS_UNSIGNED
#endif /* JPEG_INTERNALS */
#ifdef JPEG_CJPEG_DJPEG
#define BMP_SUPPORTED /* BMP image file format */
#define GIF_SUPPORTED /* GIF image file format */
#define PPM_SUPPORTED /* PBMPLUS PPM/PGM image file format */
#undef RLE_SUPPORTED /* Utah RLE image file format */
#define TARGA_SUPPORTED /* Targa image file format */
#undef TWO_FILE_COMMANDLINE /* optional */
#define USE_SETMODE /* Needed to make one-file style work in Watcom */
#undef NEED_SIGNAL_CATCHER /* Define this if you use jmemname.c */
#undef DONT_USE_B_MODE
#undef PROGRESS_REPORT /* optional */
#endif /* JPEG_CJPEG_DJPEG */

Просмотреть файл

@ -2,6 +2,8 @@
* jcparam.c
*
* Copyright (C) 1991-1998, Thomas G. Lane.
* Modified 2003-2008 by Guido Vollbeding.
* Copyright (C) 2009-2010, D. R. Commander.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -60,6 +62,49 @@ jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl,
}
/* These are the sample quantization tables given in JPEG spec section K.1.
* The spec says that the values given produce "good" quality, and
* when divided by 2, "very good" quality.
*/
static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
16, 11, 10, 16, 24, 40, 51, 61,
12, 12, 14, 19, 26, 58, 60, 55,
14, 13, 16, 24, 40, 57, 69, 56,
14, 17, 22, 29, 51, 87, 80, 62,
18, 22, 37, 56, 68, 109, 103, 77,
24, 35, 55, 64, 81, 104, 113, 92,
49, 64, 78, 87, 103, 121, 120, 101,
72, 92, 95, 98, 112, 100, 103, 99
};
static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
17, 18, 24, 47, 99, 99, 99, 99,
18, 21, 26, 66, 99, 99, 99, 99,
24, 26, 56, 99, 99, 99, 99, 99,
47, 66, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99
};
#if JPEG_LIB_VERSION >= 70
GLOBAL(void)
jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline)
/* Set or change the 'quality' (quantization) setting, using default tables
* and straight percentage-scaling quality scales.
* This entry point allows different scalings for luminance and chrominance.
*/
{
/* Set up two quantization tables using the specified scaling */
jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
cinfo->q_scale_factor[0], force_baseline);
jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
cinfo->q_scale_factor[1], force_baseline);
}
#endif
GLOBAL(void)
jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
boolean force_baseline)
@ -69,35 +114,10 @@ jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
* applications that insist on a linear percentage scaling.
*/
{
/* These are the sample quantization tables given in JPEG spec section K.1.
* The spec says that the values given produce "good" quality, and
* when divided by 2, "very good" quality.
*/
static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
16, 11, 10, 16, 24, 40, 51, 61,
12, 12, 14, 19, 26, 58, 60, 55,
14, 13, 16, 24, 40, 57, 69, 56,
14, 17, 22, 29, 51, 87, 80, 62,
18, 22, 37, 56, 68, 109, 103, 77,
24, 35, 55, 64, 81, 104, 113, 92,
49, 64, 78, 87, 103, 121, 120, 101,
72, 92, 95, 98, 112, 100, 103, 99
};
static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
17, 18, 24, 47, 99, 99, 99, 99,
18, 21, 26, 66, 99, 99, 99, 99,
24, 26, 56, 99, 99, 99, 99, 99,
47, 66, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99
};
/* Set up two quantization tables using the specified scaling */
jpeg_add_quant_table(cinfo, 0, (const unsigned int *)std_luminance_quant_tbl,
jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
scale_factor, force_baseline);
jpeg_add_quant_table(cinfo, 1, (const unsigned int *)std_chrominance_quant_tbl,
jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
scale_factor, force_baseline);
}
@ -284,6 +304,10 @@ jpeg_set_defaults (j_compress_ptr cinfo)
/* Initialize everything not dependent on the color space */
#if JPEG_LIB_VERSION >= 70
cinfo->scale_num = 1; /* 1:1 scaling */
cinfo->scale_denom = 1;
#endif
cinfo->data_precision = BITS_IN_JSAMPLE;
/* Set up two quantization tables using default quality of 75 */
jpeg_set_quality(cinfo, 75, TRUE);
@ -320,6 +344,11 @@ jpeg_set_defaults (j_compress_ptr cinfo)
/* By default, use the simpler non-cosited sampling alignment */
cinfo->CCIR601_sampling = FALSE;
#if JPEG_LIB_VERSION >= 70
/* By default, apply fancy downsampling */
cinfo->do_fancy_downsampling = TRUE;
#endif
/* No input smoothing */
cinfo->smoothing_factor = 0;
@ -363,6 +392,12 @@ jpeg_default_colorspace (j_compress_ptr cinfo)
jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
break;
case JCS_RGB:
case JCS_EXT_RGB:
case JCS_EXT_RGBX:
case JCS_EXT_BGR:
case JCS_EXT_BGRX:
case JCS_EXT_XBGR:
case JCS_EXT_XRGB:
jpeg_set_colorspace(cinfo, JCS_YCbCr);
break;
case JCS_YCbCr:

Просмотреть файл

@ -223,7 +223,6 @@ dump_buffer (phuff_entropy_ptr entropy)
* between calls, so 24 bits are sufficient.
*/
INLINE
LOCAL(void)
emit_bits (phuff_entropy_ptr entropy, unsigned int code, int size)
/* Emit some bits, unless we are in gather mode */
@ -276,7 +275,6 @@ flush_bits (phuff_entropy_ptr entropy)
* Emit (or just count) a Huffman symbol.
*/
INLINE
LOCAL(void)
emit_symbol (phuff_entropy_ptr entropy, int tbl_no, int symbol)
{

Просмотреть файл

@ -2,6 +2,7 @@
* jcsample.c
*
* Copyright (C) 1991-1996, Thomas G. Lane.
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -48,6 +49,7 @@
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jsimd.h"
/* Pointer to routine to downsample a single component */
@ -494,7 +496,10 @@ jinit_downsampler (j_compress_ptr cinfo)
} else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
compptr->v_samp_factor == cinfo->max_v_samp_factor) {
smoothok = FALSE;
downsample->methods[ci] = h2v1_downsample;
if (jsimd_can_h2v1_downsample())
downsample->methods[ci] = jsimd_h2v1_downsample;
else
downsample->methods[ci] = h2v1_downsample;
} else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) {
#ifdef INPUT_SMOOTHING_SUPPORTED
@ -503,7 +508,10 @@ jinit_downsampler (j_compress_ptr cinfo)
downsample->pub.need_context_rows = TRUE;
} else
#endif
downsample->methods[ci] = h2v2_downsample;
if (jsimd_can_h2v2_downsample())
downsample->methods[ci] = jsimd_h2v2_downsample;
else
downsample->methods[ci] = h2v2_downsample;
} else if ((cinfo->max_h_samp_factor % compptr->h_samp_factor) == 0 &&
(cinfo->max_v_samp_factor % compptr->v_samp_factor) == 0) {
smoothok = FALSE;

Просмотреть файл

@ -20,62 +20,6 @@
#include "jinclude.h"
#include "jpeglib.h"
#ifdef HAVE_MMX_INTEL_MNEMONICS
#if _MSC_VER >= 1400
#include "intrin.h"
#else
/* no __cpuid intrinsic, use a manually rewritten replacement */
void __stdcall __cpuid( int CPUInfo[4], int InfoType )
{
int my_eax = 0, my_ebx = 0, my_ecx = 0, my_edx = 0;
__asm {
/* check eflags bit 21 to see if cpuid is supported */
pushfd /* save eflags to stack */
pop eax /* and put it in eax */
mov ecx, eax /* save a copy in ecx to compare against */
xor eax, 0x200000 /* toggle ID bit (bit 21) in eflags */
push eax /* save modified eflags to stack */
popfd /* set eflags register with modified value */
pushfd /* read eflags back out */
pop eax
xor eax, ecx /* check for modified eflags */
jz NOT_SUPPORTED /* cpuid not supported */
/* check to see if the requested cpuid type is supported */
xor eax, eax /* set eax to zero */
cpuid
cmp eax, InfoType
jl NOT_SUPPORTED /* the requested cpuid type is not supported */
/* actually make the cpuid call */
mov eax, InfoType
cpuid
mov my_eax, eax
mov my_ebx, ebx
mov my_ecx, ecx
mov my_edx, edx
NOT_SUPPORTED:
}
CPUInfo[0] = my_eax;
CPUInfo[1] = my_ebx;
CPUInfo[2] = my_ecx;
CPUInfo[3] = my_edx;
}
#endif /* _MSC_VER >= 1400 */
int MMXAvailable;
static int mmxsupport();
#endif
#ifdef HAVE_SSE2_INTRINSICS
int SSE2Available = 0;
#ifdef HAVE_SSE2_INTEL_MNEMONICS
static int sse2support();
#else
static int sse2supportGCC();
#endif /* HAVE_SSE2_INTEL_MNEMONICS */
#endif /* HAVE_SSE2_INTRINSICS */
/*
* Initialization of a JPEG decompression object.
@ -87,38 +31,6 @@ jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, size_t structsize)
{
int i;
#ifdef HAVE_MMX_INTEL_MNEMONICS
static int cpuidDetected = 0;
if(!cpuidDetected)
{
MMXAvailable = mmxsupport();
#ifdef HAVE_SSE2_INTEL_MNEMONICS
/* only do the sse2 support check if mmx is supported (so
we know the processor supports cpuid) */
if (MMXAvailable)
SSE2Available = sse2support();
#endif
cpuidDetected = 1;
}
#else
#ifdef HAVE_SSE2_INTRINSICS
static int cpuidDetected = 0;
if(!cpuidDetected) {
SSE2Available = sse2supportGCC();
cpuidDetected = 1;
}
#endif /* HAVE_SSE2_INTRINSICS */
#endif /* HAVE_MMX_INTEL_MNEMONICS */
/* For debugging purposes, zero the whole master structure.
* But error manager pointer is already there, so save and restore it.
*/
/* Guard against version mismatches between library and caller. */
cinfo->mem = NULL; /* so jpeg_destroy knows mem mgr not called */
if (version != JPEG_LIB_VERSION)
@ -193,6 +105,7 @@ jpeg_abort_decompress (j_decompress_ptr cinfo)
jpeg_abort((j_common_ptr) cinfo); /* use common routine */
}
/*
* Set default decompression parameters.
*/
@ -480,51 +393,3 @@ jpeg_finish_decompress (j_decompress_ptr cinfo)
jpeg_abort((j_common_ptr) cinfo);
return TRUE;
}
#ifdef HAVE_MMX_INTEL_MNEMONICS
static int mmxsupport()
{
int CPUInfo[4];
__cpuid(CPUInfo, 1);
if (CPUInfo[3] & (0x1 << 23))
return 1;
else
return 0;
}
#endif
#ifdef HAVE_SSE2_INTEL_MNEMONICS
static int sse2support()
{
int CPUInfo[4];
__cpuid(CPUInfo, 1);
if (CPUInfo[3] & (0x1 << 26))
return 1;
else
return 2;
}
#else
#ifdef HAVE_SSE2_INTRINSICS
static int sse2supportGCC()
{
/* Mac Intel started with Core Duo chips which have SSE2 Support */
#if defined(__GNUC__) && defined(__i386__)
#if defined(XP_MACOSX)
return 1;
#endif /* XP_MACOSX */
#endif /* GNUC && i386 */
/* Add checking for SSE2 support for other platforms here */
/* We don't have SSE2 intrinsics support */
return 2;
}
#endif /* HAVE_SSE2_INTRINSICS */
#endif /* HAVE_SSE2_INTEL_MNEMONICS */

Просмотреть файл

@ -2,6 +2,7 @@
* jdapistd.c
*
* Copyright (C) 1994-1996, Thomas G. Lane.
* Copyright (C) 2010, D. R. Commander.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -17,6 +18,7 @@
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jpegcomp.h"
/* Forward declarations */
@ -202,7 +204,7 @@ jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data,
}
/* Verify that at least one iMCU row can be returned. */
lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_scaled_size;
lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size;
if (max_lines < lines_per_iMCU_row)
ERREXIT(cinfo, JERR_BUFFER_SIZE);

761
jpeg/jdarith.c Normal file
Просмотреть файл

@ -0,0 +1,761 @@
/*
* jdarith.c
*
* Developed 1997-2009 by Guido Vollbeding.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
* This file contains portable arithmetic entropy decoding routines for JPEG
* (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81).
*
* Both sequential and progressive modes are supported in this single module.
*
* Suspension is not currently supported in this module.
*/
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
/* Expanded entropy decoder object for arithmetic decoding. */
typedef struct {
struct jpeg_entropy_decoder pub; /* public fields */
INT32 c; /* C register, base of coding interval + input bit buffer */
INT32 a; /* A register, normalized size of coding interval */
int ct; /* bit shift counter, # of bits left in bit buffer part of C */
/* init: ct = -16 */
/* run: ct = 0..7 */
/* error: ct = -1 */
int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
unsigned int restarts_to_go; /* MCUs left in this restart interval */
/* Pointers to statistics areas (these workspaces have image lifespan) */
unsigned char * dc_stats[NUM_ARITH_TBLS];
unsigned char * ac_stats[NUM_ARITH_TBLS];
/* Statistics bin for coding with fixed probability 0.5 */
unsigned char fixed_bin[4];
} arith_entropy_decoder;
typedef arith_entropy_decoder * arith_entropy_ptr;
/* The following two definitions specify the allocation chunk size
* for the statistics area.
* According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
* 49 statistics bins for DC, and 245 statistics bins for AC coding.
*
* We use a compact representation with 1 byte per statistics bin,
* thus the numbers directly represent byte sizes.
* This 1 byte per statistics bin contains the meaning of the MPS
* (more probable symbol) in the highest bit (mask 0x80), and the
* index into the probability estimation state machine table
* in the lower bits (mask 0x7F).
*/
#define DC_STAT_BINS 64
#define AC_STAT_BINS 256
LOCAL(int)
get_byte (j_decompress_ptr cinfo)
/* Read next input byte; we do not support suspension in this module. */
{
struct jpeg_source_mgr * src = cinfo->src;
if (src->bytes_in_buffer == 0)
if (! (*src->fill_input_buffer) (cinfo))
ERREXIT(cinfo, JERR_CANT_SUSPEND);
src->bytes_in_buffer--;
return GETJOCTET(*src->next_input_byte++);
}
/*
* The core arithmetic decoding routine (common in JPEG and JBIG).
* This needs to go as fast as possible.
* Machine-dependent optimization facilities
* are not utilized in this portable implementation.
* However, this code should be fairly efficient and
* may be a good base for further optimizations anyway.
*
* Return value is 0 or 1 (binary decision).
*
* Note: I've changed the handling of the code base & bit
* buffer register C compared to other implementations
* based on the standards layout & procedures.
* While it also contains both the actual base of the
* coding interval (16 bits) and the next-bits buffer,
* the cut-point between these two parts is floating
* (instead of fixed) with the bit shift counter CT.
* Thus, we also need only one (variable instead of
* fixed size) shift for the LPS/MPS decision, and
* we can get away with any renormalization update
* of C (except for new data insertion, of course).
*
* I've also introduced a new scheme for accessing
* the probability estimation state machine table,
* derived from Markus Kuhn's JBIG implementation.
*/
LOCAL(int)
arith_decode (j_decompress_ptr cinfo, unsigned char *st)
{
register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
register unsigned char nl, nm;
register INT32 qe, temp;
register int sv, data;
/* Renormalization & data input per section D.2.6 */
while (e->a < 0x8000L) {
if (--e->ct < 0) {
/* Need to fetch next data byte */
if (cinfo->unread_marker)
data = 0; /* stuff zero data */
else {
data = get_byte(cinfo); /* read next input byte */
if (data == 0xFF) { /* zero stuff or marker code */
do data = get_byte(cinfo);
while (data == 0xFF); /* swallow extra 0xFF bytes */
if (data == 0)
data = 0xFF; /* discard stuffed zero byte */
else {
/* Note: Different from the Huffman decoder, hitting
* a marker while processing the compressed data
* segment is legal in arithmetic coding.
* The convention is to supply zero data
* then until decoding is complete.
*/
cinfo->unread_marker = data;
data = 0;
}
}
}
e->c = (e->c << 8) | data; /* insert data into C register */
if ((e->ct += 8) < 0) /* update bit shift counter */
/* Need more initial bytes */
if (++e->ct == 0)
/* Got 2 initial bytes -> re-init A and exit loop */
e->a = 0x8000L; /* => e->a = 0x10000L after loop exit */
}
e->a <<= 1;
}
/* Fetch values from our compact representation of Table D.2:
* Qe values and probability estimation state machine
*/
sv = *st;
qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */
nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */
nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */
/* Decode & estimation procedures per sections D.2.4 & D.2.5 */
temp = e->a - qe;
e->a = temp;
temp <<= e->ct;
if (e->c >= temp) {
e->c -= temp;
/* Conditional LPS (less probable symbol) exchange */
if (e->a < qe) {
e->a = qe;
*st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */
} else {
e->a = qe;
*st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */
sv ^= 0x80; /* Exchange LPS/MPS */
}
} else if (e->a < 0x8000L) {
/* Conditional MPS (more probable symbol) exchange */
if (e->a < qe) {
*st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */
sv ^= 0x80; /* Exchange LPS/MPS */
} else {
*st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */
}
}
return sv >> 7;
}
/*
* Check for a restart marker & resynchronize decoder.
*/
LOCAL(void)
process_restart (j_decompress_ptr cinfo)
{
arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
int ci;
jpeg_component_info * compptr;
/* Advance past the RSTn marker */
if (! (*cinfo->marker->read_restart_marker) (cinfo))
ERREXIT(cinfo, JERR_CANT_SUSPEND);
/* Re-initialize statistics areas */
for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
compptr = cinfo->cur_comp_info[ci];
if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
/* Reset DC predictions to 0 */
entropy->last_dc_val[ci] = 0;
entropy->dc_context[ci] = 0;
}
if (! cinfo->progressive_mode || cinfo->Ss) {
MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
}
}
/* Reset arithmetic decoding variables */
entropy->c = 0;
entropy->a = 0;
entropy->ct = -16; /* force reading 2 initial bytes to fill C */
/* Reset restart counter */
entropy->restarts_to_go = cinfo->restart_interval;
}
/*
* Arithmetic MCU decoding.
* Each of these routines decodes and returns one MCU's worth of
* arithmetic-compressed coefficients.
* The coefficients are reordered from zigzag order into natural array order,
* but are not dequantized.
*
* The i'th block of the MCU is stored into the block pointed to by
* MCU_data[i]. WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
*/
/*
* MCU decoding for DC initial scan (either spectral selection,
* or first pass of successive approximation).
*/
METHODDEF(boolean)
decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
{
arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
JBLOCKROW block;
unsigned char *st;
int blkn, ci, tbl, sign;
int v, m;
/* Process restart marker if needed */
if (cinfo->restart_interval) {
if (entropy->restarts_to_go == 0)
process_restart(cinfo);
entropy->restarts_to_go--;
}
if (entropy->ct == -1) return TRUE; /* if error do nothing */
/* Outer loop handles each block in the MCU */
for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
block = MCU_data[blkn];
ci = cinfo->MCU_membership[blkn];
tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
/* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */
/* Table F.4: Point to statistics bin S0 for DC coefficient coding */
st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
/* Figure F.19: Decode_DC_DIFF */
if (arith_decode(cinfo, st) == 0)
entropy->dc_context[ci] = 0;
else {
/* Figure F.21: Decoding nonzero value v */
/* Figure F.22: Decoding the sign of v */
sign = arith_decode(cinfo, st + 1);
st += 2; st += sign;
/* Figure F.23: Decoding the magnitude category of v */
if ((m = arith_decode(cinfo, st)) != 0) {
st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
while (arith_decode(cinfo, st)) {
if ((m <<= 1) == 0x8000) {
WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
entropy->ct = -1; /* magnitude overflow */
return TRUE;
}
st += 1;
}
}
/* Section F.1.4.4.1.2: Establish dc_context conditioning category */
if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
entropy->dc_context[ci] = 0; /* zero diff category */
else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
else
entropy->dc_context[ci] = 4 + (sign * 4); /* small diff category */
v = m;
/* Figure F.24: Decoding the magnitude bit pattern of v */
st += 14;
while (m >>= 1)
if (arith_decode(cinfo, st)) v |= m;
v += 1; if (sign) v = -v;
entropy->last_dc_val[ci] += v;
}
/* Scale and output the DC coefficient (assumes jpeg_natural_order[0]=0) */
(*block)[0] = (JCOEF) (entropy->last_dc_val[ci] << cinfo->Al);
}
return TRUE;
}
/*
* MCU decoding for AC initial scan (either spectral selection,
* or first pass of successive approximation).
*/
METHODDEF(boolean)
decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
{
arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
JBLOCKROW block;
unsigned char *st;
int tbl, sign, k;
int v, m;
/* Process restart marker if needed */
if (cinfo->restart_interval) {
if (entropy->restarts_to_go == 0)
process_restart(cinfo);
entropy->restarts_to_go--;
}
if (entropy->ct == -1) return TRUE; /* if error do nothing */
/* There is always only one block per MCU */
block = MCU_data[0];
tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
/* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */
/* Figure F.20: Decode_AC_coefficients */
for (k = cinfo->Ss; k <= cinfo->Se; k++) {
st = entropy->ac_stats[tbl] + 3 * (k - 1);
if (arith_decode(cinfo, st)) break; /* EOB flag */
while (arith_decode(cinfo, st + 1) == 0) {
st += 3; k++;
if (k > cinfo->Se) {
WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
entropy->ct = -1; /* spectral overflow */
return TRUE;
}
}
/* Figure F.21: Decoding nonzero value v */
/* Figure F.22: Decoding the sign of v */
sign = arith_decode(cinfo, entropy->fixed_bin);
st += 2;
/* Figure F.23: Decoding the magnitude category of v */
if ((m = arith_decode(cinfo, st)) != 0) {
if (arith_decode(cinfo, st)) {
m <<= 1;
st = entropy->ac_stats[tbl] +
(k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
while (arith_decode(cinfo, st)) {
if ((m <<= 1) == 0x8000) {
WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
entropy->ct = -1; /* magnitude overflow */
return TRUE;
}
st += 1;
}
}
}
v = m;
/* Figure F.24: Decoding the magnitude bit pattern of v */
st += 14;
while (m >>= 1)
if (arith_decode(cinfo, st)) v |= m;
v += 1; if (sign) v = -v;
/* Scale and output coefficient in natural (dezigzagged) order */
(*block)[jpeg_natural_order[k]] = (JCOEF) (v << cinfo->Al);
}
return TRUE;
}
/*
* MCU decoding for DC successive approximation refinement scan.
*/
METHODDEF(boolean)
decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
{
arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
unsigned char *st;
int p1, blkn;
/* Process restart marker if needed */
if (cinfo->restart_interval) {
if (entropy->restarts_to_go == 0)
process_restart(cinfo);
entropy->restarts_to_go--;
}
st = entropy->fixed_bin; /* use fixed probability estimation */
p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */
/* Outer loop handles each block in the MCU */
for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
/* Encoded data is simply the next bit of the two's-complement DC value */
if (arith_decode(cinfo, st))
MCU_data[blkn][0][0] |= p1;
}
return TRUE;
}
/*
* MCU decoding for AC successive approximation refinement scan.
*/
METHODDEF(boolean)
decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
{
arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
JBLOCKROW block;
JCOEFPTR thiscoef;
unsigned char *st;
int tbl, k, kex;
int p1, m1;
/* Process restart marker if needed */
if (cinfo->restart_interval) {
if (entropy->restarts_to_go == 0)
process_restart(cinfo);
entropy->restarts_to_go--;
}
if (entropy->ct == -1) return TRUE; /* if error do nothing */
/* There is always only one block per MCU */
block = MCU_data[0];
tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */
m1 = (-1) << cinfo->Al; /* -1 in the bit position being coded */
/* Establish EOBx (previous stage end-of-block) index */
for (kex = cinfo->Se; kex > 0; kex--)
if ((*block)[jpeg_natural_order[kex]]) break;
for (k = cinfo->Ss; k <= cinfo->Se; k++) {
st = entropy->ac_stats[tbl] + 3 * (k - 1);
if (k > kex)
if (arith_decode(cinfo, st)) break; /* EOB flag */
for (;;) {
thiscoef = *block + jpeg_natural_order[k];
if (*thiscoef) { /* previously nonzero coef */
if (arith_decode(cinfo, st + 2)) {
if (*thiscoef < 0)
*thiscoef += m1;
else
*thiscoef += p1;
}
break;
}
if (arith_decode(cinfo, st + 1)) { /* newly nonzero coef */
if (arith_decode(cinfo, entropy->fixed_bin))
*thiscoef = m1;
else
*thiscoef = p1;
break;
}
st += 3; k++;
if (k > cinfo->Se) {
WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
entropy->ct = -1; /* spectral overflow */
return TRUE;
}
}
}
return TRUE;
}
/*
* Decode one MCU's worth of arithmetic-compressed coefficients.
*/
METHODDEF(boolean)
decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
{
arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
jpeg_component_info * compptr;
JBLOCKROW block;
unsigned char *st;
int blkn, ci, tbl, sign, k;
int v, m;
/* Process restart marker if needed */
if (cinfo->restart_interval) {
if (entropy->restarts_to_go == 0)
process_restart(cinfo);
entropy->restarts_to_go--;
}
if (entropy->ct == -1) return TRUE; /* if error do nothing */
/* Outer loop handles each block in the MCU */
for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
block = MCU_data[blkn];
ci = cinfo->MCU_membership[blkn];
compptr = cinfo->cur_comp_info[ci];
/* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */
tbl = compptr->dc_tbl_no;
/* Table F.4: Point to statistics bin S0 for DC coefficient coding */
st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
/* Figure F.19: Decode_DC_DIFF */
if (arith_decode(cinfo, st) == 0)
entropy->dc_context[ci] = 0;
else {
/* Figure F.21: Decoding nonzero value v */
/* Figure F.22: Decoding the sign of v */
sign = arith_decode(cinfo, st + 1);
st += 2; st += sign;
/* Figure F.23: Decoding the magnitude category of v */
if ((m = arith_decode(cinfo, st)) != 0) {
st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
while (arith_decode(cinfo, st)) {
if ((m <<= 1) == 0x8000) {
WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
entropy->ct = -1; /* magnitude overflow */
return TRUE;
}
st += 1;
}
}
/* Section F.1.4.4.1.2: Establish dc_context conditioning category */
if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
entropy->dc_context[ci] = 0; /* zero diff category */
else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
else
entropy->dc_context[ci] = 4 + (sign * 4); /* small diff category */
v = m;
/* Figure F.24: Decoding the magnitude bit pattern of v */
st += 14;
while (m >>= 1)
if (arith_decode(cinfo, st)) v |= m;
v += 1; if (sign) v = -v;
entropy->last_dc_val[ci] += v;
}
(*block)[0] = (JCOEF) entropy->last_dc_val[ci];
/* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */
tbl = compptr->ac_tbl_no;
/* Figure F.20: Decode_AC_coefficients */
for (k = 1; k <= DCTSIZE2 - 1; k++) {
st = entropy->ac_stats[tbl] + 3 * (k - 1);
if (arith_decode(cinfo, st)) break; /* EOB flag */
while (arith_decode(cinfo, st + 1) == 0) {
st += 3; k++;
if (k > DCTSIZE2 - 1) {
WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
entropy->ct = -1; /* spectral overflow */
return TRUE;
}
}
/* Figure F.21: Decoding nonzero value v */
/* Figure F.22: Decoding the sign of v */
sign = arith_decode(cinfo, entropy->fixed_bin);
st += 2;
/* Figure F.23: Decoding the magnitude category of v */
if ((m = arith_decode(cinfo, st)) != 0) {
if (arith_decode(cinfo, st)) {
m <<= 1;
st = entropy->ac_stats[tbl] +
(k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
while (arith_decode(cinfo, st)) {
if ((m <<= 1) == 0x8000) {
WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
entropy->ct = -1; /* magnitude overflow */
return TRUE;
}
st += 1;
}
}
}
v = m;
/* Figure F.24: Decoding the magnitude bit pattern of v */
st += 14;
while (m >>= 1)
if (arith_decode(cinfo, st)) v |= m;
v += 1; if (sign) v = -v;
(*block)[jpeg_natural_order[k]] = (JCOEF) v;
}
}
return TRUE;
}
/*
* Initialize for an arithmetic-compressed scan.
*/
METHODDEF(void)
start_pass (j_decompress_ptr cinfo)
{
arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
int ci, tbl;
jpeg_component_info * compptr;
if (cinfo->progressive_mode) {
/* Validate progressive scan parameters */
if (cinfo->Ss == 0) {
if (cinfo->Se != 0)
goto bad;
} else {
/* need not check Ss/Se < 0 since they came from unsigned bytes */
if (cinfo->Se < cinfo->Ss || cinfo->Se > DCTSIZE2 - 1)
goto bad;
/* AC scans may have only one component */
if (cinfo->comps_in_scan != 1)
goto bad;
}
if (cinfo->Ah != 0) {
/* Successive approximation refinement scan: must have Al = Ah-1. */
if (cinfo->Ah-1 != cinfo->Al)
goto bad;
}
if (cinfo->Al > 13) { /* need not check for < 0 */
bad:
ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
}
/* Update progression status, and verify that scan order is legal.
* Note that inter-scan inconsistencies are treated as warnings
* not fatal errors ... not clear if this is right way to behave.
*/
for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
int *coef_bit_ptr = & cinfo->coef_bits[cindex][0];
if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
if (cinfo->Ah != expected)
WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
coef_bit_ptr[coefi] = cinfo->Al;
}
}
/* Select MCU decoding routine */
if (cinfo->Ah == 0) {
if (cinfo->Ss == 0)
entropy->pub.decode_mcu = decode_mcu_DC_first;
else
entropy->pub.decode_mcu = decode_mcu_AC_first;
} else {
if (cinfo->Ss == 0)
entropy->pub.decode_mcu = decode_mcu_DC_refine;
else
entropy->pub.decode_mcu = decode_mcu_AC_refine;
}
} else {
/* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
* This ought to be an error condition, but we make it a warning.
*/
if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 ||
(cinfo->Se < DCTSIZE2 && cinfo->Se != DCTSIZE2 - 1))
WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
/* Select MCU decoding routine */
entropy->pub.decode_mcu = decode_mcu;
}
/* Allocate & initialize requested statistics areas */
for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
compptr = cinfo->cur_comp_info[ci];
if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
tbl = compptr->dc_tbl_no;
if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
if (entropy->dc_stats[tbl] == NULL)
entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
/* Initialize DC predictions to 0 */
entropy->last_dc_val[ci] = 0;
entropy->dc_context[ci] = 0;
}
if (! cinfo->progressive_mode || cinfo->Ss) {
tbl = compptr->ac_tbl_no;
if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
if (entropy->ac_stats[tbl] == NULL)
entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
}
}
/* Initialize arithmetic decoding variables */
entropy->c = 0;
entropy->a = 0;
entropy->ct = -16; /* force reading 2 initial bytes to fill C */
/* Initialize restart counter */
entropy->restarts_to_go = cinfo->restart_interval;
}
/*
* Module initialization routine for arithmetic entropy decoding.
*/
GLOBAL(void)
jinit_arith_decoder (j_decompress_ptr cinfo)
{
arith_entropy_ptr entropy;
int i;
entropy = (arith_entropy_ptr)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
SIZEOF(arith_entropy_decoder));
cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
entropy->pub.start_pass = start_pass;
/* Mark tables unallocated */
for (i = 0; i < NUM_ARITH_TBLS; i++) {
entropy->dc_stats[i] = NULL;
entropy->ac_stats[i] = NULL;
}
/* Initialize index for fixed probability estimation */
entropy->fixed_bin[0] = 113;
if (cinfo->progressive_mode) {
/* Create progression status table */
int *coef_bit_ptr, ci;
cinfo->coef_bits = (int (*)[DCTSIZE2])
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
cinfo->num_components*DCTSIZE2*SIZEOF(int));
coef_bit_ptr = & cinfo->coef_bits[0][0];
for (ci = 0; ci < cinfo->num_components; ci++)
for (i = 0; i < DCTSIZE2; i++)
*coef_bit_ptr++ = -1;
}
}

Просмотреть файл

@ -2,13 +2,14 @@
* jdatadst.c
*
* Copyright (C) 1994-1996, Thomas G. Lane.
* Modified 2009 by Guido Vollbeding.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
* This file contains compression data destination routines for the case of
* emitting JPEG data to a file (or any stdio stream). While these routines
* are sufficient for most applications, some will want to use a different
* destination manager.
* emitting JPEG data to memory or to a file (or any stdio stream).
* While these routines are sufficient for most applications,
* some will want to use a different destination manager.
* IMPORTANT: we assume that fwrite() will correctly transcribe an array of
* JOCTETs into 8-bit-wide elements on external storage. If char is wider
* than 8 bits on your machine, you may need to do some tweaking.
@ -19,6 +20,11 @@
#include "jpeglib.h"
#include "jerror.h"
#ifndef HAVE_STDLIB_H /* <stdlib.h> should declare malloc(),free() */
extern void * malloc JPP((size_t size));
extern void free JPP((void *ptr));
#endif
/* Expanded data destination object for stdio output */
@ -34,6 +40,23 @@ typedef my_destination_mgr * my_dest_ptr;
#define OUTPUT_BUF_SIZE 4096 /* choose an efficiently fwrite'able size */
#if JPEG_LIB_VERSION >= 80
/* Expanded data destination object for memory output */
typedef struct {
struct jpeg_destination_mgr pub; /* public fields */
unsigned char ** outbuffer; /* target buffer */
unsigned long * outsize;
unsigned char * newbuffer; /* newly allocated buffer */
JOCTET * buffer; /* start of buffer */
size_t bufsize;
} my_mem_destination_mgr;
typedef my_mem_destination_mgr * my_mem_dest_ptr;
#endif
/*
* Initialize destination --- called by jpeg_start_compress
* before any data is actually written.
@ -53,6 +76,14 @@ init_destination (j_compress_ptr cinfo)
dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
}
#if JPEG_LIB_VERSION >= 80
METHODDEF(void)
init_mem_destination (j_compress_ptr cinfo)
{
/* no work necessary here */
}
#endif
/*
* Empty the output buffer --- called whenever buffer fills up.
@ -92,6 +123,38 @@ empty_output_buffer (j_compress_ptr cinfo)
return TRUE;
}
#if JPEG_LIB_VERSION >= 80
METHODDEF(boolean)
empty_mem_output_buffer (j_compress_ptr cinfo)
{
size_t nextsize;
JOCTET * nextbuffer;
my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
/* Try to allocate new buffer with double size */
nextsize = dest->bufsize * 2;
nextbuffer = malloc(nextsize);
if (nextbuffer == NULL)
ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
MEMCOPY(nextbuffer, dest->buffer, dest->bufsize);
if (dest->newbuffer != NULL)
free(dest->newbuffer);
dest->newbuffer = nextbuffer;
dest->pub.next_output_byte = nextbuffer + dest->bufsize;
dest->pub.free_in_buffer = dest->bufsize;
dest->buffer = nextbuffer;
dest->bufsize = nextsize;
return TRUE;
}
#endif
/*
* Terminate destination --- called by jpeg_finish_compress
@ -119,6 +182,17 @@ term_destination (j_compress_ptr cinfo)
ERREXIT(cinfo, JERR_FILE_WRITE);
}
#if JPEG_LIB_VERSION >= 80
METHODDEF(void)
term_mem_destination (j_compress_ptr cinfo)
{
my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
*dest->outbuffer = dest->buffer;
*dest->outsize = dest->bufsize - dest->pub.free_in_buffer;
}
#endif
/*
* Prepare for output to a stdio stream.
@ -150,65 +224,54 @@ jpeg_stdio_dest (j_compress_ptr cinfo, FILE * outfile)
dest->outfile = outfile;
}
#if JPEG_LIB_VERSION >= 80
/*
* term_destination_file_close --- called by jpeg_finish_compress
* after all data has been written. Usually needs to flush buffer.
* also will need to close file
* NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
* application must deal with any cleanup that should happen even
* for error exit.
*/
METHODDEF(void)
term_destination_file_close(j_compress_ptr cinfo)
{
my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
size_t datacount = OUTPUT_BUF_SIZE - dest->pub.free_in_buffer;
/* Write any data remaining in the buffer */
if (datacount > 0) {
if (JFWRITE(dest->outfile, dest->buffer, datacount) != datacount)
ERREXIT(cinfo, JERR_FILE_WRITE);
}
fflush(dest->outfile);
/* Make sure we wrote the output file OK */
if (ferror(dest->outfile))
ERREXIT(cinfo, JERR_FILE_WRITE);
else
fclose(dest->outfile);
}
/*
* Prepare for output to a file from a char *
* The caller is responsible
* for closing it after finishing compression.
* Prepare for output to a memory buffer.
* The caller may supply an own initial buffer with appropriate size.
* Otherwise, or when the actual data output exceeds the given size,
* the library adapts the buffer size as necessary.
* The standard library functions malloc/free are used for allocating
* larger memory, so the buffer is available to the application after
* finishing compression, and then the application is responsible for
* freeing the requested memory.
*/
GLOBAL(void)
jpeg_file_dest (j_compress_ptr cinfo, char * outfile)
jpeg_mem_dest (j_compress_ptr cinfo,
unsigned char ** outbuffer, unsigned long * outsize)
{
my_dest_ptr dest;
my_mem_dest_ptr dest;
if (outbuffer == NULL || outsize == NULL) /* sanity check */
ERREXIT(cinfo, JERR_BUFFER_SIZE);
/* The destination object is made permanent so that multiple JPEG images
* can be written to the same file without re-executing jpeg_stdio_dest.
* This makes it dangerous to use this manager and a different destination
* manager serially with the same JPEG object, because their private object
* sizes may be different. Caveat programmer.
* can be written to the same buffer without re-executing jpeg_mem_dest.
*/
if (cinfo->dest == NULL) { /* first time for this JPEG object? */
cinfo->dest = (struct jpeg_destination_mgr *)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
SIZEOF(my_destination_mgr));
SIZEOF(my_mem_destination_mgr));
}
dest = (my_dest_ptr) cinfo->dest;
dest->pub.init_destination = init_destination;
dest->pub.empty_output_buffer = empty_output_buffer;
dest->pub.term_destination = term_destination_file_close;
dest->outfile = fopen(outfile,"wb");
dest = (my_mem_dest_ptr) cinfo->dest;
dest->pub.init_destination = init_mem_destination;
dest->pub.empty_output_buffer = empty_mem_output_buffer;
dest->pub.term_destination = term_mem_destination;
dest->outbuffer = outbuffer;
dest->outsize = outsize;
dest->newbuffer = NULL;
if (*outbuffer == NULL || *outsize == 0) {
/* Allocate initial buffer */
dest->newbuffer = *outbuffer = malloc(OUTPUT_BUF_SIZE);
if (dest->newbuffer == NULL)
ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
*outsize = OUTPUT_BUF_SIZE;
}
dest->pub.next_output_byte = dest->buffer = *outbuffer;
dest->pub.free_in_buffer = dest->bufsize = *outsize;
}
#endif

Просмотреть файл

@ -2,13 +2,14 @@
* jdatasrc.c
*
* Copyright (C) 1994-1996, Thomas G. Lane.
* Modified 2009-2010 by Guido Vollbeding.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
* This file contains decompression data source routines for the case of
* reading JPEG data from a file (or any stdio stream). While these routines
* are sufficient for most applications, some will want to use a different
* source manager.
* reading JPEG data from memory or from a file (or any stdio stream).
* While these routines are sufficient for most applications,
* some will want to use a different source manager.
* IMPORTANT: we assume that fread() will correctly transcribe an array of
* JOCTETs from 8-bit-wide elements on external storage. If char is wider
* than 8 bits on your machine, you may need to do some tweaking.
@ -52,6 +53,14 @@ init_source (j_decompress_ptr cinfo)
src->start_of_file = TRUE;
}
#if JPEG_LIB_VERSION >= 80
METHODDEF(void)
init_mem_source (j_decompress_ptr cinfo)
{
/* no work necessary here */
}
#endif
/*
* Fill the input buffer --- called whenever buffer is emptied.
@ -111,6 +120,28 @@ fill_input_buffer (j_decompress_ptr cinfo)
return TRUE;
}
#if JPEG_LIB_VERSION >= 80
METHODDEF(boolean)
fill_mem_input_buffer (j_decompress_ptr cinfo)
{
static JOCTET mybuffer[4];
/* The whole JPEG data is expected to reside in the supplied memory
* buffer, so any request for more data beyond the given buffer size
* is treated as an error.
*/
WARNMS(cinfo, JWRN_JPEG_EOF);
/* Insert a fake EOI marker */
mybuffer[0] = (JOCTET) 0xFF;
mybuffer[1] = (JOCTET) JPEG_EOI;
cinfo->src->next_input_byte = mybuffer;
cinfo->src->bytes_in_buffer = 2;
return TRUE;
}
#endif
/*
* Skip data --- used to skip over a potentially large amount of
@ -127,22 +158,22 @@ fill_input_buffer (j_decompress_ptr cinfo)
METHODDEF(void)
skip_input_data (j_decompress_ptr cinfo, long num_bytes)
{
my_src_ptr src = (my_src_ptr) cinfo->src;
struct jpeg_source_mgr * src = cinfo->src;
/* Just a dumb implementation for now. Could use fseek() except
* it doesn't work on pipes. Not clear that being smart is worth
* any trouble anyway --- large skips are infrequent.
*/
if (num_bytes > 0) {
while (num_bytes > (long) src->pub.bytes_in_buffer) {
num_bytes -= (long) src->pub.bytes_in_buffer;
(void) fill_input_buffer(cinfo);
while (num_bytes > (long) src->bytes_in_buffer) {
num_bytes -= (long) src->bytes_in_buffer;
(void) (*src->fill_input_buffer) (cinfo);
/* note we assume that fill_input_buffer will never return FALSE,
* so suspension need not be handled.
*/
}
src->pub.next_input_byte += (size_t) num_bytes;
src->pub.bytes_in_buffer -= (size_t) num_bytes;
src->next_input_byte += (size_t) num_bytes;
src->bytes_in_buffer -= (size_t) num_bytes;
}
}
@ -210,3 +241,40 @@ jpeg_stdio_src (j_decompress_ptr cinfo, FILE * infile)
src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */
src->pub.next_input_byte = NULL; /* until buffer loaded */
}
#if JPEG_LIB_VERSION >= 80
/*
* Prepare for input from a supplied memory buffer.
* The buffer must contain the whole JPEG data.
*/
GLOBAL(void)
jpeg_mem_src (j_decompress_ptr cinfo,
unsigned char * inbuffer, unsigned long insize)
{
struct jpeg_source_mgr * src;
if (inbuffer == NULL || insize == 0) /* Treat empty input as fatal error */
ERREXIT(cinfo, JERR_INPUT_EMPTY);
/* The source object is made permanent so that a series of JPEG images
* can be read from the same buffer by calling jpeg_mem_src only before
* the first one.
*/
if (cinfo->src == NULL) { /* first time for this JPEG object? */
cinfo->src = (struct jpeg_source_mgr *)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
SIZEOF(struct jpeg_source_mgr));
}
src = cinfo->src;
src->init_source = init_mem_source;
src->fill_input_buffer = fill_mem_input_buffer;
src->skip_input_data = skip_input_data;
src->resync_to_restart = jpeg_resync_to_restart; /* use default method */
src->term_source = term_source;
src->bytes_in_buffer = (size_t) insize;
src->next_input_byte = (JOCTET *) inbuffer;
}
#endif

Просмотреть файл

@ -2,6 +2,7 @@
* jdcoefct.c
*
* Copyright (C) 1994-1997, Thomas G. Lane.
* Copyright (C) 2010, D. R. Commander.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -17,6 +18,7 @@
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jpegcomp.h"
/* Block smoothing is only applicable for progressive JPEG, so: */
#ifndef D_PROGRESSIVE_SUPPORTED
@ -47,6 +49,9 @@ typedef struct {
*/
JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU];
/* Temporary workspace for one MCU */
JCOEF * workspace;
#ifdef D_MULTISCAN_FILES_SUPPORTED
/* In multi-pass modes, we need a virtual block array for each component. */
jvirt_barray_ptr whole_image[MAX_COMPONENTS];
@ -187,7 +192,7 @@ decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
: compptr->last_col_width;
output_ptr = output_buf[compptr->component_index] +
yoffset * compptr->DCT_scaled_size;
yoffset * compptr->_DCT_scaled_size;
start_col = MCU_col_num * compptr->MCU_sample_width;
for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
if (cinfo->input_iMCU_row < last_iMCU_row ||
@ -197,11 +202,11 @@ decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
(*inverse_DCT) (cinfo, compptr,
(JCOEFPTR) coef->MCU_buffer[blkn+xindex],
output_ptr, output_col);
output_col += compptr->DCT_scaled_size;
output_col += compptr->_DCT_scaled_size;
}
}
blkn += compptr->MCU_width;
output_ptr += compptr->DCT_scaled_size;
output_ptr += compptr->_DCT_scaled_size;
}
}
}
@ -362,9 +367,9 @@ decompress_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr,
output_ptr, output_col);
buffer_ptr++;
output_col += compptr->DCT_scaled_size;
output_col += compptr->_DCT_scaled_size;
}
output_ptr += compptr->DCT_scaled_size;
output_ptr += compptr->_DCT_scaled_size;
}
}
@ -471,13 +476,16 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
jpeg_component_info *compptr;
inverse_DCT_method_ptr inverse_DCT;
boolean first_row, last_row;
JBLOCK workspace;
JCOEF * workspace;
int *coef_bits;
JQUANT_TBL *quanttbl;
INT32 Q00,Q01,Q02,Q10,Q11,Q20, num;
int DC1,DC2,DC3,DC4,DC5,DC6,DC7,DC8,DC9;
int Al, pred;
/* Keep a local variable to avoid looking it up more than once */
workspace = coef->workspace;
/* Force some input to be done if we are getting ahead of the input. */
while (cinfo->input_scan_number <= cinfo->output_scan_number &&
! cinfo->inputctl->eoi_reached) {
@ -654,9 +662,9 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
DC4 = DC5; DC5 = DC6;
DC7 = DC8; DC8 = DC9;
buffer_ptr++, prev_block_row++, next_block_row++;
output_col += compptr->DCT_scaled_size;
output_col += compptr->_DCT_scaled_size;
}
output_ptr += compptr->DCT_scaled_size;
output_ptr += compptr->_DCT_scaled_size;
}
}
@ -733,4 +741,9 @@ jinit_d_coef_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
coef->pub.decompress_data = decompress_onepass;
coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
}
/* Allocate the workspace buffer */
coef->workspace = (JCOEF *)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
SIZEOF(JCOEF) * DCTSIZE2);
}

Просмотреть файл

@ -2,6 +2,8 @@
* jdcolor.c
*
* Copyright (C) 1991-1997, Thomas G. Lane.
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2009, D. R. Commander.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -11,7 +13,7 @@
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jconfig.h"
#include "jsimd.h"
/* Private subobject */
@ -19,15 +21,11 @@
typedef struct {
struct jpeg_color_deconverter pub; /* public fields */
/* These fields are not needed anymore as these are now static tables */
#if 0
/* Private state for YCC->RGB conversion */
int * Cr_r_tab; /* => table for Cr to R conversion */
int * Cb_b_tab; /* => table for Cb to B conversion */
INT32 * Cr_g_tab; /* => table for Cr to G conversion */
INT32 * Cb_g_tab; /* => table for Cb to G conversion */
#endif
} my_color_deconverter;
typedef my_color_deconverter * my_cconvert_ptr;
@ -66,191 +64,6 @@ typedef my_color_deconverter * my_cconvert_ptr;
#define ONE_HALF ((INT32) 1 << (SCALEBITS-1))
#define FIX(x) ((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
/* Use static tables for color processing. */
const int Cr_r_tab[(MAXJSAMPLE+1) * SIZEOF(int)] ={
0xffffff4dUL, 0xffffff4eUL, 0xffffff4fUL, 0xffffff51UL, 0xffffff52UL, 0xffffff54UL,
0xffffff55UL, 0xffffff56UL, 0xffffff58UL, 0xffffff59UL, 0xffffff5bUL, 0xffffff5cUL,
0xffffff5dUL, 0xffffff5fUL, 0xffffff60UL, 0xffffff62UL, 0xffffff63UL, 0xffffff64UL,
0xffffff66UL, 0xffffff67UL, 0xffffff69UL, 0xffffff6aUL, 0xffffff6bUL, 0xffffff6dUL,
0xffffff6eUL, 0xffffff70UL, 0xffffff71UL, 0xffffff72UL, 0xffffff74UL, 0xffffff75UL,
0xffffff77UL, 0xffffff78UL, 0xffffff79UL, 0xffffff7bUL, 0xffffff7cUL, 0xffffff7eUL,
0xffffff7fUL, 0xffffff80UL, 0xffffff82UL, 0xffffff83UL, 0xffffff85UL, 0xffffff86UL,
0xffffff87UL, 0xffffff89UL, 0xffffff8aUL, 0xffffff8cUL, 0xffffff8dUL, 0xffffff8eUL,
0xffffff90UL, 0xffffff91UL, 0xffffff93UL, 0xffffff94UL, 0xffffff95UL, 0xffffff97UL,
0xffffff98UL, 0xffffff9aUL, 0xffffff9bUL, 0xffffff9cUL, 0xffffff9eUL, 0xffffff9fUL,
0xffffffa1UL, 0xffffffa2UL, 0xffffffa3UL, 0xffffffa5UL, 0xffffffa6UL, 0xffffffa8UL,
0xffffffa9UL, 0xffffffaaUL, 0xffffffacUL, 0xffffffadUL, 0xffffffafUL, 0xffffffb0UL,
0xffffffb1UL, 0xffffffb3UL, 0xffffffb4UL, 0xffffffb6UL, 0xffffffb7UL, 0xffffffb8UL,
0xffffffbaUL, 0xffffffbbUL, 0xffffffbdUL, 0xffffffbeUL, 0xffffffc0UL, 0xffffffc1UL,
0xffffffc2UL, 0xffffffc4UL, 0xffffffc5UL, 0xffffffc7UL, 0xffffffc8UL, 0xffffffc9UL,
0xffffffcbUL, 0xffffffccUL, 0xffffffceUL, 0xffffffcfUL, 0xffffffd0UL, 0xffffffd2UL,
0xffffffd3UL, 0xffffffd5UL, 0xffffffd6UL, 0xffffffd7UL, 0xffffffd9UL, 0xffffffdaUL,
0xffffffdcUL, 0xffffffddUL, 0xffffffdeUL, 0xffffffe0UL, 0xffffffe1UL, 0xffffffe3UL,
0xffffffe4UL, 0xffffffe5UL, 0xffffffe7UL, 0xffffffe8UL, 0xffffffeaUL, 0xffffffebUL,
0xffffffecUL, 0xffffffeeUL, 0xffffffefUL, 0xfffffff1UL, 0xfffffff2UL, 0xfffffff3UL,
0xfffffff5UL, 0xfffffff6UL, 0xfffffff8UL, 0xfffffff9UL, 0xfffffffaUL, 0xfffffffcUL,
0xfffffffdUL, 0xffffffffUL, 0x00UL, 0x01UL, 0x03UL, 0x04UL,
0x06UL, 0x07UL, 0x08UL, 0x0aUL, 0x0bUL, 0x0dUL,
0x0eUL, 0x0fUL, 0x11UL, 0x12UL, 0x14UL, 0x15UL,
0x16UL, 0x18UL, 0x19UL, 0x1bUL, 0x1cUL, 0x1dUL,
0x1fUL, 0x20UL, 0x22UL, 0x23UL, 0x24UL, 0x26UL,
0x27UL, 0x29UL, 0x2aUL, 0x2bUL, 0x2dUL, 0x2eUL,
0x30UL, 0x31UL, 0x32UL, 0x34UL, 0x35UL, 0x37UL,
0x38UL, 0x39UL, 0x3bUL, 0x3cUL, 0x3eUL, 0x3fUL,
0x40UL, 0x42UL, 0x43UL, 0x45UL, 0x46UL, 0x48UL,
0x49UL, 0x4aUL, 0x4cUL, 0x4dUL, 0x4fUL, 0x50UL,
0x51UL, 0x53UL, 0x54UL, 0x56UL, 0x57UL, 0x58UL,
0x5aUL, 0x5bUL, 0x5dUL, 0x5eUL, 0x5fUL, 0x61UL,
0x62UL, 0x64UL, 0x65UL, 0x66UL, 0x68UL, 0x69UL,
0x6bUL, 0x6cUL, 0x6dUL, 0x6fUL, 0x70UL, 0x72UL,
0x73UL, 0x74UL, 0x76UL, 0x77UL, 0x79UL, 0x7aUL,
0x7bUL, 0x7dUL, 0x7eUL, 0x80UL, 0x81UL, 0x82UL,
0x84UL, 0x85UL, 0x87UL, 0x88UL, 0x89UL, 0x8bUL,
0x8cUL, 0x8eUL, 0x8fUL, 0x90UL, 0x92UL, 0x93UL,
0x95UL, 0x96UL, 0x97UL, 0x99UL, 0x9aUL, 0x9cUL,
0x9dUL, 0x9eUL, 0xa0UL, 0xa1UL, 0xa3UL, 0xa4UL,
0xa5UL, 0xa7UL, 0xa8UL, 0xaaUL, 0xabUL, 0xacUL,
0xaeUL, 0xafUL, 0xb1UL, 0xb2UL
};
const int Cb_b_tab[(MAXJSAMPLE+1) * SIZEOF(int)] ={
0xffffff1dUL, 0xffffff1fUL, 0xffffff21UL, 0xffffff22UL, 0xffffff24UL, 0xffffff26UL,
0xffffff28UL, 0xffffff2aUL, 0xffffff2bUL, 0xffffff2dUL, 0xffffff2fUL, 0xffffff31UL,
0xffffff32UL, 0xffffff34UL, 0xffffff36UL, 0xffffff38UL, 0xffffff3aUL, 0xffffff3bUL,
0xffffff3dUL, 0xffffff3fUL, 0xffffff41UL, 0xffffff42UL, 0xffffff44UL, 0xffffff46UL,
0xffffff48UL, 0xffffff49UL, 0xffffff4bUL, 0xffffff4dUL, 0xffffff4fUL, 0xffffff51UL,
0xffffff52UL, 0xffffff54UL, 0xffffff56UL, 0xffffff58UL, 0xffffff59UL, 0xffffff5bUL,
0xffffff5dUL, 0xffffff5fUL, 0xffffff61UL, 0xffffff62UL, 0xffffff64UL, 0xffffff66UL,
0xffffff68UL, 0xffffff69UL, 0xffffff6bUL, 0xffffff6dUL, 0xffffff6fUL, 0xffffff70UL,
0xffffff72UL, 0xffffff74UL, 0xffffff76UL, 0xffffff78UL, 0xffffff79UL, 0xffffff7bUL,
0xffffff7dUL, 0xffffff7fUL, 0xffffff80UL, 0xffffff82UL, 0xffffff84UL, 0xffffff86UL,
0xffffff88UL, 0xffffff89UL, 0xffffff8bUL, 0xffffff8dUL, 0xffffff8fUL, 0xffffff90UL,
0xffffff92UL, 0xffffff94UL, 0xffffff96UL, 0xffffff97UL, 0xffffff99UL, 0xffffff9bUL,
0xffffff9dUL, 0xffffff9fUL, 0xffffffa0UL, 0xffffffa2UL, 0xffffffa4UL, 0xffffffa6UL,
0xffffffa7UL, 0xffffffa9UL, 0xffffffabUL, 0xffffffadUL, 0xffffffaeUL, 0xffffffb0UL,
0xffffffb2UL, 0xffffffb4UL, 0xffffffb6UL, 0xffffffb7UL, 0xffffffb9UL, 0xffffffbbUL,
0xffffffbdUL, 0xffffffbeUL, 0xffffffc0UL, 0xffffffc2UL, 0xffffffc4UL, 0xffffffc6UL,
0xffffffc7UL, 0xffffffc9UL, 0xffffffcbUL, 0xffffffcdUL, 0xffffffceUL, 0xffffffd0UL,
0xffffffd2UL, 0xffffffd4UL, 0xffffffd5UL, 0xffffffd7UL, 0xffffffd9UL, 0xffffffdbUL,
0xffffffddUL, 0xffffffdeUL, 0xffffffe0UL, 0xffffffe2UL, 0xffffffe4UL, 0xffffffe5UL,
0xffffffe7UL, 0xffffffe9UL, 0xffffffebUL, 0xffffffedUL, 0xffffffeeUL, 0xfffffff0UL,
0xfffffff2UL, 0xfffffff4UL, 0xfffffff5UL, 0xfffffff7UL, 0xfffffff9UL, 0xfffffffbUL,
0xfffffffcUL, 0xfffffffeUL, 0x00UL, 0x02UL, 0x04UL, 0x05UL,
0x07UL, 0x09UL, 0x0bUL, 0x0cUL, 0x0eUL, 0x10UL,
0x12UL, 0x13UL, 0x15UL, 0x17UL, 0x19UL, 0x1bUL,
0x1cUL, 0x1eUL, 0x20UL, 0x22UL, 0x23UL, 0x25UL,
0x27UL, 0x29UL, 0x2bUL, 0x2cUL, 0x2eUL, 0x30UL,
0x32UL, 0x33UL, 0x35UL, 0x37UL, 0x39UL, 0x3aUL,
0x3cUL, 0x3eUL, 0x40UL, 0x42UL, 0x43UL, 0x45UL,
0x47UL, 0x49UL, 0x4aUL, 0x4cUL, 0x4eUL, 0x50UL,
0x52UL, 0x53UL, 0x55UL, 0x57UL, 0x59UL, 0x5aUL,
0x5cUL, 0x5eUL, 0x60UL, 0x61UL, 0x63UL, 0x65UL,
0x67UL, 0x69UL, 0x6aUL, 0x6cUL, 0x6eUL, 0x70UL,
0x71UL, 0x73UL, 0x75UL, 0x77UL, 0x78UL, 0x7aUL,
0x7cUL, 0x7eUL, 0x80UL, 0x81UL, 0x83UL, 0x85UL,
0x87UL, 0x88UL, 0x8aUL, 0x8cUL, 0x8eUL, 0x90UL,
0x91UL, 0x93UL, 0x95UL, 0x97UL, 0x98UL, 0x9aUL,
0x9cUL, 0x9eUL, 0x9fUL, 0xa1UL, 0xa3UL, 0xa5UL,
0xa7UL, 0xa8UL, 0xaaUL, 0xacUL, 0xaeUL, 0xafUL,
0xb1UL, 0xb3UL, 0xb5UL, 0xb7UL, 0xb8UL, 0xbaUL,
0xbcUL, 0xbeUL, 0xbfUL, 0xc1UL, 0xc3UL, 0xc5UL,
0xc6UL, 0xc8UL, 0xcaUL, 0xccUL, 0xceUL, 0xcfUL,
0xd1UL, 0xd3UL, 0xd5UL, 0xd6UL, 0xd8UL, 0xdaUL,
0xdcUL, 0xdeUL, 0xdfUL, 0xe1UL
};
const int Cr_g_tab[(MAXJSAMPLE+1) * SIZEOF(int)] ={
0x5b6900UL, 0x5ab22eUL, 0x59fb5cUL, 0x59448aUL, 0x588db8UL, 0x57d6e6UL,
0x572014UL, 0x566942UL, 0x55b270UL, 0x54fb9eUL, 0x5444ccUL, 0x538dfaUL,
0x52d728UL, 0x522056UL, 0x516984UL, 0x50b2b2UL, 0x4ffbe0UL, 0x4f450eUL,
0x4e8e3cUL, 0x4dd76aUL, 0x4d2098UL, 0x4c69c6UL, 0x4bb2f4UL, 0x4afc22UL,
0x4a4550UL, 0x498e7eUL, 0x48d7acUL, 0x4820daUL, 0x476a08UL, 0x46b336UL,
0x45fc64UL, 0x454592UL, 0x448ec0UL, 0x43d7eeUL, 0x43211cUL, 0x426a4aUL,
0x41b378UL, 0x40fca6UL, 0x4045d4UL, 0x3f8f02UL, 0x3ed830UL, 0x3e215eUL,
0x3d6a8cUL, 0x3cb3baUL, 0x3bfce8UL, 0x3b4616UL, 0x3a8f44UL, 0x39d872UL,
0x3921a0UL, 0x386aceUL, 0x37b3fcUL, 0x36fd2aUL, 0x364658UL, 0x358f86UL,
0x34d8b4UL, 0x3421e2UL, 0x336b10UL, 0x32b43eUL, 0x31fd6cUL, 0x31469aUL,
0x308fc8UL, 0x2fd8f6UL, 0x2f2224UL, 0x2e6b52UL, 0x2db480UL, 0x2cfdaeUL,
0x2c46dcUL, 0x2b900aUL, 0x2ad938UL, 0x2a2266UL, 0x296b94UL, 0x28b4c2UL,
0x27fdf0UL, 0x27471eUL, 0x26904cUL, 0x25d97aUL, 0x2522a8UL, 0x246bd6UL,
0x23b504UL, 0x22fe32UL, 0x224760UL, 0x21908eUL, 0x20d9bcUL, 0x2022eaUL,
0x1f6c18UL, 0x1eb546UL, 0x1dfe74UL, 0x1d47a2UL, 0x1c90d0UL, 0x1bd9feUL,
0x1b232cUL, 0x1a6c5aUL, 0x19b588UL, 0x18feb6UL, 0x1847e4UL, 0x179112UL,
0x16da40UL, 0x16236eUL, 0x156c9cUL, 0x14b5caUL, 0x13fef8UL, 0x134826UL,
0x129154UL, 0x11da82UL, 0x1123b0UL, 0x106cdeUL, 0xfb60cUL, 0xeff3aUL,
0xe4868UL, 0xd9196UL, 0xcdac4UL, 0xc23f2UL, 0xb6d20UL, 0xab64eUL,
0x9ff7cUL, 0x948aaUL, 0x891d8UL, 0x7db06UL, 0x72434UL, 0x66d62UL,
0x5b690UL, 0x4ffbeUL, 0x448ecUL, 0x3921aUL, 0x2db48UL, 0x22476UL,
0x16da4UL, 0xb6d2UL, 0x0UL, 0xffff492eUL, 0xfffe925cUL, 0xfffddb8aUL,
0xfffd24b8UL, 0xfffc6de6UL, 0xfffbb714UL, 0xfffb0042UL, 0xfffa4970UL, 0xfff9929eUL,
0xfff8dbccUL, 0xfff824faUL, 0xfff76e28UL, 0xfff6b756UL, 0xfff60084UL, 0xfff549b2UL,
0xfff492e0UL, 0xfff3dc0eUL, 0xfff3253cUL, 0xfff26e6aUL, 0xfff1b798UL, 0xfff100c6UL,
0xfff049f4UL, 0xffef9322UL, 0xffeedc50UL, 0xffee257eUL, 0xffed6eacUL, 0xffecb7daUL,
0xffec0108UL, 0xffeb4a36UL, 0xffea9364UL, 0xffe9dc92UL, 0xffe925c0UL, 0xffe86eeeUL,
0xffe7b81cUL, 0xffe7014aUL, 0xffe64a78UL, 0xffe593a6UL, 0xffe4dcd4UL, 0xffe42602UL,
0xffe36f30UL, 0xffe2b85eUL, 0xffe2018cUL, 0xffe14abaUL, 0xffe093e8UL, 0xffdfdd16UL,
0xffdf2644UL, 0xffde6f72UL, 0xffddb8a0UL, 0xffdd01ceUL, 0xffdc4afcUL, 0xffdb942aUL,
0xffdadd58UL, 0xffda2686UL, 0xffd96fb4UL, 0xffd8b8e2UL, 0xffd80210UL, 0xffd74b3eUL,
0xffd6946cUL, 0xffd5dd9aUL, 0xffd526c8UL, 0xffd46ff6UL, 0xffd3b924UL, 0xffd30252UL,
0xffd24b80UL, 0xffd194aeUL, 0xffd0dddcUL, 0xffd0270aUL, 0xffcf7038UL, 0xffceb966UL,
0xffce0294UL, 0xffcd4bc2UL, 0xffcc94f0UL, 0xffcbde1eUL, 0xffcb274cUL, 0xffca707aUL,
0xffc9b9a8UL, 0xffc902d6UL, 0xffc84c04UL, 0xffc79532UL, 0xffc6de60UL, 0xffc6278eUL,
0xffc570bcUL, 0xffc4b9eaUL, 0xffc40318UL, 0xffc34c46UL, 0xffc29574UL, 0xffc1dea2UL,
0xffc127d0UL, 0xffc070feUL, 0xffbfba2cUL, 0xffbf035aUL, 0xffbe4c88UL, 0xffbd95b6UL,
0xffbcdee4UL, 0xffbc2812UL, 0xffbb7140UL, 0xffbaba6eUL, 0xffba039cUL, 0xffb94ccaUL,
0xffb895f8UL, 0xffb7df26UL, 0xffb72854UL, 0xffb67182UL, 0xffb5bab0UL, 0xffb503deUL,
0xffb44d0cUL, 0xffb3963aUL, 0xffb2df68UL, 0xffb22896UL, 0xffb171c4UL, 0xffb0baf2UL,
0xffb00420UL, 0xffaf4d4eUL, 0xffae967cUL, 0xffaddfaaUL, 0xffad28d8UL, 0xffac7206UL,
0xffabbb34UL, 0xffab0462UL, 0xffaa4d90UL, 0xffa996beUL, 0xffa8dfecUL, 0xffa8291aUL,
0xffa77248UL, 0xffa6bb76UL, 0xffa604a4UL, 0xffa54dd2UL
};
const int Cb_g_tab[(MAXJSAMPLE+1) * SIZEOF(int)] ={
0x2c8d00UL, 0x2c34e6UL, 0x2bdcccUL, 0x2b84b2UL, 0x2b2c98UL, 0x2ad47eUL,
0x2a7c64UL, 0x2a244aUL, 0x29cc30UL, 0x297416UL, 0x291bfcUL, 0x28c3e2UL,
0x286bc8UL, 0x2813aeUL, 0x27bb94UL, 0x27637aUL, 0x270b60UL, 0x26b346UL,
0x265b2cUL, 0x260312UL, 0x25aaf8UL, 0x2552deUL, 0x24fac4UL, 0x24a2aaUL,
0x244a90UL, 0x23f276UL, 0x239a5cUL, 0x234242UL, 0x22ea28UL, 0x22920eUL,
0x2239f4UL, 0x21e1daUL, 0x2189c0UL, 0x2131a6UL, 0x20d98cUL, 0x208172UL,
0x202958UL, 0x1fd13eUL, 0x1f7924UL, 0x1f210aUL, 0x1ec8f0UL, 0x1e70d6UL,
0x1e18bcUL, 0x1dc0a2UL, 0x1d6888UL, 0x1d106eUL, 0x1cb854UL, 0x1c603aUL,
0x1c0820UL, 0x1bb006UL, 0x1b57ecUL, 0x1affd2UL, 0x1aa7b8UL, 0x1a4f9eUL,
0x19f784UL, 0x199f6aUL, 0x194750UL, 0x18ef36UL, 0x18971cUL, 0x183f02UL,
0x17e6e8UL, 0x178eceUL, 0x1736b4UL, 0x16de9aUL, 0x168680UL, 0x162e66UL,
0x15d64cUL, 0x157e32UL, 0x152618UL, 0x14cdfeUL, 0x1475e4UL, 0x141dcaUL,
0x13c5b0UL, 0x136d96UL, 0x13157cUL, 0x12bd62UL, 0x126548UL, 0x120d2eUL,
0x11b514UL, 0x115cfaUL, 0x1104e0UL, 0x10acc6UL, 0x1054acUL, 0xffc92UL,
0xfa478UL, 0xf4c5eUL, 0xef444UL, 0xe9c2aUL, 0xe4410UL, 0xdebf6UL,
0xd93dcUL, 0xd3bc2UL, 0xce3a8UL, 0xc8b8eUL, 0xc3374UL, 0xbdb5aUL,
0xb8340UL, 0xb2b26UL, 0xad30cUL, 0xa7af2UL, 0xa22d8UL, 0x9cabeUL,
0x972a4UL, 0x91a8aUL, 0x8c270UL, 0x86a56UL, 0x8123cUL, 0x7ba22UL,
0x76208UL, 0x709eeUL, 0x6b1d4UL, 0x659baUL, 0x601a0UL, 0x5a986UL,
0x5516cUL, 0x4f952UL, 0x4a138UL, 0x4491eUL, 0x3f104UL, 0x398eaUL,
0x340d0UL, 0x2e8b6UL, 0x2909cUL, 0x23882UL, 0x1e068UL, 0x1884eUL,
0x13034UL, 0xd81aUL, 0x8000UL, 0x27e6UL, 0xffffcfccUL, 0xffff77b2UL,
0xffff1f98UL, 0xfffec77eUL, 0xfffe6f64UL, 0xfffe174aUL, 0xfffdbf30UL, 0xfffd6716UL,
0xfffd0efcUL, 0xfffcb6e2UL, 0xfffc5ec8UL, 0xfffc06aeUL, 0xfffbae94UL, 0xfffb567aUL,
0xfffafe60UL, 0xfffaa646UL, 0xfffa4e2cUL, 0xfff9f612UL, 0xfff99df8UL, 0xfff945deUL,
0xfff8edc4UL, 0xfff895aaUL, 0xfff83d90UL, 0xfff7e576UL, 0xfff78d5cUL, 0xfff73542UL,
0xfff6dd28UL, 0xfff6850eUL, 0xfff62cf4UL, 0xfff5d4daUL, 0xfff57cc0UL, 0xfff524a6UL,
0xfff4cc8cUL, 0xfff47472UL, 0xfff41c58UL, 0xfff3c43eUL, 0xfff36c24UL, 0xfff3140aUL,
0xfff2bbf0UL, 0xfff263d6UL, 0xfff20bbcUL, 0xfff1b3a2UL, 0xfff15b88UL, 0xfff1036eUL,
0xfff0ab54UL, 0xfff0533aUL, 0xffeffb20UL, 0xffefa306UL, 0xffef4aecUL, 0xffeef2d2UL,
0xffee9ab8UL, 0xffee429eUL, 0xffedea84UL, 0xffed926aUL, 0xffed3a50UL, 0xffece236UL,
0xffec8a1cUL, 0xffec3202UL, 0xffebd9e8UL, 0xffeb81ceUL, 0xffeb29b4UL, 0xffead19aUL,
0xffea7980UL, 0xffea2166UL, 0xffe9c94cUL, 0xffe97132UL, 0xffe91918UL, 0xffe8c0feUL,
0xffe868e4UL, 0xffe810caUL, 0xffe7b8b0UL, 0xffe76096UL, 0xffe7087cUL, 0xffe6b062UL,
0xffe65848UL, 0xffe6002eUL, 0xffe5a814UL, 0xffe54ffaUL, 0xffe4f7e0UL, 0xffe49fc6UL,
0xffe447acUL, 0xffe3ef92UL, 0xffe39778UL, 0xffe33f5eUL, 0xffe2e744UL, 0xffe28f2aUL,
0xffe23710UL, 0xffe1def6UL, 0xffe186dcUL, 0xffe12ec2UL, 0xffe0d6a8UL, 0xffe07e8eUL,
0xffe02674UL, 0xffdfce5aUL, 0xffdf7640UL, 0xffdf1e26UL, 0xffdec60cUL, 0xffde6df2UL,
0xffde15d8UL, 0xffddbdbeUL, 0xffdd65a4UL, 0xffdd0d8aUL, 0xffdcb570UL, 0xffdc5d56UL,
0xffdc053cUL, 0xffdbad22UL, 0xffdb5508UL, 0xffdafceeUL, 0xffdaa4d4UL, 0xffda4cbaUL,
0xffd9f4a0UL, 0xffd99c86UL, 0xffd9446cUL, 0xffd8ec52UL, 0xffd89438UL, 0xffd83c1eUL,
0xffd7e404UL, 0xffd78beaUL, 0xffd733d0UL, 0xffd6dbb6UL, 0xffd6839cUL, 0xffd62b82UL,
0xffd5d368UL, 0xffd57b4eUL, 0xffd52334UL, 0xffd4cb1aUL
};
/*
* Initialize tables for YCC->RGB colorspace conversion.
@ -259,10 +72,6 @@ const int Cb_g_tab[(MAXJSAMPLE+1) * SIZEOF(int)] ={
LOCAL(void)
build_ycc_rgb_table (j_decompress_ptr cinfo)
{
/* The code below was used to generate the static tables above */
#if 0
my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
int i;
INT32 x;
@ -296,7 +105,6 @@ build_ycc_rgb_table (j_decompress_ptr cinfo)
/* We also add in ONE_HALF so that need not do it in inner loop */
cconvert->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF;
}
#endif /* 0 */
}
@ -318,12 +126,16 @@ ycc_rgb_convert (j_decompress_ptr cinfo,
{
my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
register int y, cb, cr;
JSAMPLE * range_limit_y;
JSAMPROW outptr;
JSAMPROW inptr0, inptr1, inptr2;
JDIMENSION col;
register JSAMPROW outptr;
register JSAMPROW inptr0, inptr1, inptr2;
register JDIMENSION col;
JDIMENSION num_cols = cinfo->output_width;
JSAMPLE * range_limit = cinfo->sample_range_limit;
/* copy these pointers into registers if possible */
register JSAMPLE * range_limit = cinfo->sample_range_limit;
register int * Crrtab = cconvert->Cr_r_tab;
register int * Cbbtab = cconvert->Cb_b_tab;
register INT32 * Crgtab = cconvert->Cr_g_tab;
register INT32 * Cbgtab = cconvert->Cb_g_tab;
SHIFT_TEMPS
while (--num_rows >= 0) {
@ -336,14 +148,13 @@ ycc_rgb_convert (j_decompress_ptr cinfo,
y = GETJSAMPLE(inptr0[col]);
cb = GETJSAMPLE(inptr1[col]);
cr = GETJSAMPLE(inptr2[col]);
range_limit_y = range_limit + y;
/* Range-limiting is essential due to noise introduced by DCT losses. */
outptr[RGB_RED] = range_limit_y[Cr_r_tab[cr]];
outptr[RGB_GREEN] = range_limit_y[
((int) RIGHT_SHIFT(Cb_g_tab[cb] + Cr_g_tab[cr],
outptr[rgb_red[cinfo->out_color_space]] = range_limit[y + Crrtab[cr]];
outptr[rgb_green[cinfo->out_color_space]] = range_limit[y +
((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
SCALEBITS))];
outptr[RGB_BLUE] = range_limit_y[Cb_b_tab[cb]];
outptr += RGB_PIXELSIZE;
outptr[rgb_blue[cinfo->out_color_space]] = range_limit[y + Cbbtab[cb]];
outptr += rgb_pixelsize[cinfo->out_color_space];
}
}
}
@ -411,16 +222,20 @@ gray_rgb_convert (j_decompress_ptr cinfo,
JSAMPARRAY output_buf, int num_rows)
{
register JSAMPROW inptr, outptr;
register JDIMENSION col;
JSAMPLE *maxinptr;
JDIMENSION num_cols = cinfo->output_width;
int rindex = rgb_red[cinfo->out_color_space];
int gindex = rgb_green[cinfo->out_color_space];
int bindex = rgb_blue[cinfo->out_color_space];
int rgbstride = rgb_pixelsize[cinfo->out_color_space];
while (--num_rows >= 0) {
inptr = input_buf[0][input_row++];
maxinptr = &inptr[num_cols];
outptr = *output_buf++;
for (col = 0; col < num_cols; col++) {
for (; inptr < maxinptr; inptr++, outptr += rgbstride) {
/* We can dispense with GETJSAMPLE() here */
outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
outptr += RGB_PIXELSIZE;
outptr[rindex] = outptr[gindex] = outptr[bindex] = *inptr;
}
}
}
@ -430,6 +245,7 @@ gray_rgb_convert (j_decompress_ptr cinfo,
* Adobe-style YCCK->CMYK conversion.
* We convert YCbCr to R=1-C, G=1-M, and B=1-Y using the same
* conversion as above, while passing K (black) unchanged.
* We assume build_ycc_rgb_table has been called.
*/
METHODDEF(void)
@ -445,6 +261,10 @@ ycck_cmyk_convert (j_decompress_ptr cinfo,
JDIMENSION num_cols = cinfo->output_width;
/* copy these pointers into registers if possible */
register JSAMPLE * range_limit = cinfo->sample_range_limit;
register int * Crrtab = cconvert->Cr_r_tab;
register int * Cbbtab = cconvert->Cb_b_tab;
register INT32 * Crgtab = cconvert->Cr_g_tab;
register INT32 * Cbgtab = cconvert->Cb_g_tab;
SHIFT_TEMPS
while (--num_rows >= 0) {
@ -459,11 +279,11 @@ ycck_cmyk_convert (j_decompress_ptr cinfo,
cb = GETJSAMPLE(inptr1[col]);
cr = GETJSAMPLE(inptr2[col]);
/* Range-limiting is essential due to noise introduced by DCT losses. */
outptr[0] = range_limit[MAXJSAMPLE - (y + Cr_r_tab[cr])]; /* red */
outptr[1] = range_limit[MAXJSAMPLE - (y + /* green */
((int) RIGHT_SHIFT(Cb_g_tab[cb] + Cr_g_tab[cr],
SCALEBITS)))];
outptr[2] = range_limit[MAXJSAMPLE - (y + Cb_b_tab[cb])]; /* blue */
outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])]; /* red */
outptr[1] = range_limit[MAXJSAMPLE - (y + /* green */
((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
SCALEBITS)))];
outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])]; /* blue */
/* K passes through unchanged */
outptr[3] = inptr3[col]; /* don't need GETJSAMPLE here */
outptr += 4;
@ -543,13 +363,24 @@ jinit_color_deconverter (j_decompress_ptr cinfo)
break;
case JCS_RGB:
cinfo->out_color_components = RGB_PIXELSIZE;
case JCS_EXT_RGB:
case JCS_EXT_RGBX:
case JCS_EXT_BGR:
case JCS_EXT_BGRX:
case JCS_EXT_XBGR:
case JCS_EXT_XRGB:
cinfo->out_color_components = rgb_pixelsize[cinfo->out_color_space];
if (cinfo->jpeg_color_space == JCS_YCbCr) {
cconvert->pub.color_convert = ycc_rgb_convert;
build_ycc_rgb_table(cinfo);
if (jsimd_can_ycc_rgb())
cconvert->pub.color_convert = jsimd_ycc_rgb_convert;
else {
cconvert->pub.color_convert = ycc_rgb_convert;
build_ycc_rgb_table(cinfo);
}
} else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
cconvert->pub.color_convert = gray_rgb_convert;
} else if (cinfo->jpeg_color_space == JCS_RGB && RGB_PIXELSIZE == 3) {
} else if (cinfo->jpeg_color_space == cinfo->out_color_space &&
rgb_pixelsize[cinfo->out_color_space] == 3) {
cconvert->pub.color_convert = null_convert;
} else
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);

Просмотреть файл

@ -23,18 +23,26 @@
* have a range of +-8K for 8-bit data, +-128K for 12-bit data. This
* convention improves accuracy in integer implementations and saves some
* work in floating-point ones.
* Quantization of the output coefficients is done by jcdctmgr.c.
* Quantization of the output coefficients is done by jcdctmgr.c. This
* step requires an unsigned type and also one with twice the bits.
*/
#if BITS_IN_JSAMPLE == 8
#ifndef WITH_SIMD
typedef int DCTELEM; /* 16 or 32 bits is fine */
typedef unsigned int UDCTELEM;
typedef unsigned long long UDCTELEM2;
#else
typedef short DCTELEM; /* prefer 16 bit with SIMD for parellelism */
typedef unsigned short UDCTELEM;
typedef unsigned int UDCTELEM2;
#endif
#else
typedef INT32 DCTELEM; /* must have 32 bits */
typedef UINT32 UDCTELEM;
typedef unsigned long long UDCTELEM2;
#endif
typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
/*
* An inverse DCT routine is given a pointer to the input JBLOCK and a pointer

Просмотреть файл

@ -2,6 +2,8 @@
* jddctmgr.c
*
* Copyright (C) 1994-1996, Thomas G. Lane.
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2010, D. R. Commander.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -19,9 +21,9 @@
#include "jinclude.h"
#include "jpeglib.h"
#include "jdct.h" /* Private declarations for DCT subsystem */
#ifdef HAVE_SSE2_INTRINSICS
extern int SSE2Available;
#endif
#include "jsimddct.h"
#include "jpegcomp.h"
/*
* The decompressor input side (jdinput.c) saves away the appropriate
@ -80,14 +82,6 @@ typedef union {
#endif
#endif
GLOBAL(void)
jpeg_idct_islow_sse2 (
j_decompress_ptr cinfo,
jpeg_component_info * compptr,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col);
/*
* Prepare for an output pass.
@ -108,18 +102,24 @@ start_pass (j_decompress_ptr cinfo)
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
/* Select the proper IDCT routine for this component's scaling */
switch (compptr->DCT_scaled_size) {
switch (compptr->_DCT_scaled_size) {
#ifdef IDCT_SCALING_SUPPORTED
case 1:
method_ptr = jpeg_idct_1x1;
method = JDCT_ISLOW; /* jidctred uses islow-style table */
break;
case 2:
method_ptr = jpeg_idct_2x2;
if (jsimd_can_idct_2x2())
method_ptr = jsimd_idct_2x2;
else
method_ptr = jpeg_idct_2x2;
method = JDCT_ISLOW; /* jidctred uses islow-style table */
break;
case 4:
method_ptr = jpeg_idct_4x4;
if (jsimd_can_idct_4x4())
method_ptr = jsimd_idct_4x4;
else
method_ptr = jpeg_idct_4x4;
method = JDCT_ISLOW; /* jidctred uses islow-style table */
break;
#endif
@ -127,47 +127,28 @@ start_pass (j_decompress_ptr cinfo)
switch (cinfo->dct_method) {
#ifdef DCT_ISLOW_SUPPORTED
case JDCT_ISLOW:
#ifdef HAVE_SSE2_INTEL_MNEMONICS
if(SSE2Available == 1)
{
method_ptr = jpeg_idct_islow_sse2;
method = JDCT_ISLOW;
}
else
{
method_ptr = jpeg_idct_islow;
method = JDCT_ISLOW;
}
#else
method_ptr = jpeg_idct_islow;
method = JDCT_ISLOW;
#endif /* HAVE_SSE2_INTEL_MNEMONICS */
if (jsimd_can_idct_islow())
method_ptr = jsimd_idct_islow;
else
method_ptr = jpeg_idct_islow;
method = JDCT_ISLOW;
break;
#endif
#ifdef DCT_IFAST_SUPPORTED
case JDCT_IFAST:
#ifdef HAVE_SSE2_INTEL_MNEMONICS
if (SSE2Available==1)
{
method_ptr = jpeg_idct_islow_sse2;
method = JDCT_ISLOW;
}
else
{
method_ptr = jpeg_idct_ifast;
method = JDCT_IFAST;
}
#else
method_ptr = jpeg_idct_ifast;
method = JDCT_IFAST;
#endif /* HAVE_SSE2_INTEL_MNEMONICS */
if (jsimd_can_idct_ifast())
method_ptr = jsimd_idct_ifast;
else
method_ptr = jpeg_idct_ifast;
method = JDCT_IFAST;
break;
#endif
#ifdef DCT_FLOAT_SUPPORTED
case JDCT_FLOAT:
method_ptr = jpeg_idct_float;
if (jsimd_can_idct_float())
method_ptr = jsimd_idct_float;
else
method_ptr = jpeg_idct_float;
method = JDCT_FLOAT;
break;
#endif
@ -177,7 +158,7 @@ start_pass (j_decompress_ptr cinfo)
}
break;
default:
ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->DCT_scaled_size);
ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->_DCT_scaled_size);
break;
}
idct->pub.inverse_DCT[ci] = method_ptr;

Просмотреть файл

@ -2,6 +2,8 @@
* jdinput.c
*
* Copyright (C) 1991-1997, Thomas G. Lane.
* Modified 2002-2009 by Guido Vollbeding.
* Copyright (C) 2010, D. R. Commander.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -14,6 +16,7 @@
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jpegcomp.h"
/* Private state */
@ -35,6 +38,79 @@ METHODDEF(int) consume_markers JPP((j_decompress_ptr cinfo));
* Routines to calculate various quantities related to the size of the image.
*/
#if JPEG_LIB_VERSION >= 80
/*
* Compute output image dimensions and related values.
* NOTE: this is exported for possible use by application.
* Hence it mustn't do anything that can't be done twice.
*/
GLOBAL(void)
jpeg_core_output_dimensions (j_decompress_ptr cinfo)
/* Do computations that are needed before master selection phase.
* This function is used for transcoding and full decompression.
*/
{
#ifdef IDCT_SCALING_SUPPORTED
int ci;
jpeg_component_info *compptr;
/* Compute actual output image dimensions and DCT scaling choices. */
if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom) {
/* Provide 1/block_size scaling */
cinfo->output_width = (JDIMENSION)
jdiv_round_up((long) cinfo->image_width, (long) cinfo->block_size);
cinfo->output_height = (JDIMENSION)
jdiv_round_up((long) cinfo->image_height, (long) cinfo->block_size);
cinfo->min_DCT_h_scaled_size = 1;
cinfo->min_DCT_v_scaled_size = 1;
} else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 2) {
/* Provide 2/block_size scaling */
cinfo->output_width = (JDIMENSION)
jdiv_round_up((long) cinfo->image_width * 2L, (long) cinfo->block_size);
cinfo->output_height = (JDIMENSION)
jdiv_round_up((long) cinfo->image_height * 2L, (long) cinfo->block_size);
cinfo->min_DCT_h_scaled_size = 2;
cinfo->min_DCT_v_scaled_size = 2;
} else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 4) {
/* Provide 4/block_size scaling */
cinfo->output_width = (JDIMENSION)
jdiv_round_up((long) cinfo->image_width * 4L, (long) cinfo->block_size);
cinfo->output_height = (JDIMENSION)
jdiv_round_up((long) cinfo->image_height * 4L, (long) cinfo->block_size);
cinfo->min_DCT_h_scaled_size = 4;
cinfo->min_DCT_v_scaled_size = 4;
} else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 8) {
/* Provide 8/block_size scaling */
cinfo->output_width = (JDIMENSION)
jdiv_round_up((long) cinfo->image_width * 8L, (long) cinfo->block_size);
cinfo->output_height = (JDIMENSION)
jdiv_round_up((long) cinfo->image_height * 8L, (long) cinfo->block_size);
cinfo->min_DCT_h_scaled_size = 8;
cinfo->min_DCT_v_scaled_size = 8;
}
/* Recompute dimensions of components */
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size;
compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size;
}
#else /* !IDCT_SCALING_SUPPORTED */
/* Hardwire it to "no scaling" */
cinfo->output_width = cinfo->image_width;
cinfo->output_height = cinfo->image_height;
/* jdinput.c has already initialized DCT_scaled_size,
* and has computed unscaled downsampled_width and downsampled_height.
*/
#endif /* IDCT_SCALING_SUPPORTED */
}
#endif
LOCAL(void)
initial_setup (j_decompress_ptr cinfo)
/* Called once, when first SOS marker is reached */
@ -70,16 +146,30 @@ initial_setup (j_decompress_ptr cinfo)
compptr->v_samp_factor);
}
#if JPEG_LIB_VERSION >=80
cinfo->block_size = DCTSIZE;
cinfo->natural_order = jpeg_natural_order;
cinfo->lim_Se = DCTSIZE2-1;
#endif
/* We initialize DCT_scaled_size and min_DCT_scaled_size to DCTSIZE.
* In the full decompressor, this will be overridden by jdmaster.c;
* but in the transcoder, jdmaster.c is not used, so we must do it here.
*/
#if JPEG_LIB_VERSION >= 70
cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = DCTSIZE;
#else
cinfo->min_DCT_scaled_size = DCTSIZE;
#endif
/* Compute dimensions of components */
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
#if JPEG_LIB_VERSION >= 70
compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = DCTSIZE;
#else
compptr->DCT_scaled_size = DCTSIZE;
#endif
/* Size in DCT blocks */
compptr->width_in_blocks = (JDIMENSION)
jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
@ -138,7 +228,7 @@ per_scan_setup (j_decompress_ptr cinfo)
compptr->MCU_width = 1;
compptr->MCU_height = 1;
compptr->MCU_blocks = 1;
compptr->MCU_sample_width = compptr->DCT_scaled_size;
compptr->MCU_sample_width = compptr->_DCT_scaled_size;
compptr->last_col_width = 1;
/* For noninterleaved scans, it is convenient to define last_row_height
* as the number of block rows present in the last iMCU row.
@ -174,7 +264,7 @@ per_scan_setup (j_decompress_ptr cinfo)
compptr->MCU_width = compptr->h_samp_factor;
compptr->MCU_height = compptr->v_samp_factor;
compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_scaled_size;
compptr->MCU_sample_width = compptr->MCU_width * compptr->_DCT_scaled_size;
/* Figure number of non-dummy blocks in last MCU column & row */
tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
if (tmp == 0) tmp = compptr->MCU_width;

Просмотреть файл

@ -2,6 +2,7 @@
* jdmainct.c
*
* Copyright (C) 1994-1996, Thomas G. Lane.
* Copyright (C) 2010, D. R. Commander.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -16,6 +17,7 @@
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jpegcomp.h"
/*
@ -161,7 +163,7 @@ alloc_funny_pointers (j_decompress_ptr cinfo)
{
my_main_ptr main = (my_main_ptr) cinfo->main;
int ci, rgroup;
int M = cinfo->min_DCT_scaled_size;
int M = cinfo->_min_DCT_scaled_size;
jpeg_component_info *compptr;
JSAMPARRAY xbuf;
@ -175,8 +177,8 @@ alloc_funny_pointers (j_decompress_ptr cinfo)
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
cinfo->min_DCT_scaled_size; /* height of a row group of component */
rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
cinfo->_min_DCT_scaled_size; /* height of a row group of component */
/* Get space for pointer lists --- M+4 row groups in each list.
* We alloc both pointer lists with one call to save a few cycles.
*/
@ -202,14 +204,14 @@ make_funny_pointers (j_decompress_ptr cinfo)
{
my_main_ptr main = (my_main_ptr) cinfo->main;
int ci, i, rgroup;
int M = cinfo->min_DCT_scaled_size;
int M = cinfo->_min_DCT_scaled_size;
jpeg_component_info *compptr;
JSAMPARRAY buf, xbuf0, xbuf1;
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
cinfo->min_DCT_scaled_size; /* height of a row group of component */
rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
cinfo->_min_DCT_scaled_size; /* height of a row group of component */
xbuf0 = main->xbuffer[0][ci];
xbuf1 = main->xbuffer[1][ci];
/* First copy the workspace pointers as-is */
@ -242,14 +244,14 @@ set_wraparound_pointers (j_decompress_ptr cinfo)
{
my_main_ptr main = (my_main_ptr) cinfo->main;
int ci, i, rgroup;
int M = cinfo->min_DCT_scaled_size;
int M = cinfo->_min_DCT_scaled_size;
jpeg_component_info *compptr;
JSAMPARRAY xbuf0, xbuf1;
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
cinfo->min_DCT_scaled_size; /* height of a row group of component */
rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
cinfo->_min_DCT_scaled_size; /* height of a row group of component */
xbuf0 = main->xbuffer[0][ci];
xbuf1 = main->xbuffer[1][ci];
for (i = 0; i < rgroup; i++) {
@ -277,8 +279,8 @@ set_bottom_pointers (j_decompress_ptr cinfo)
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
/* Count sample rows in one iMCU row and in one row group */
iMCUheight = compptr->v_samp_factor * compptr->DCT_scaled_size;
rgroup = iMCUheight / cinfo->min_DCT_scaled_size;
iMCUheight = compptr->v_samp_factor * compptr->_DCT_scaled_size;
rgroup = iMCUheight / cinfo->_min_DCT_scaled_size;
/* Count nondummy sample rows remaining for this component */
rows_left = (int) (compptr->downsampled_height % (JDIMENSION) iMCUheight);
if (rows_left == 0) rows_left = iMCUheight;
@ -357,7 +359,7 @@ process_data_simple_main (j_decompress_ptr cinfo,
}
/* There are always min_DCT_scaled_size row groups in an iMCU row. */
rowgroups_avail = (JDIMENSION) cinfo->min_DCT_scaled_size;
rowgroups_avail = (JDIMENSION) cinfo->_min_DCT_scaled_size;
/* Note: at the bottom of the image, we may pass extra garbage row groups
* to the postprocessor. The postprocessor has to check for bottom
* of image anyway (at row resolution), so no point in us doing it too.
@ -417,7 +419,7 @@ process_data_context_main (j_decompress_ptr cinfo,
case CTX_PREPARE_FOR_IMCU:
/* Prepare to process first M-1 row groups of this iMCU row */
main->rowgroup_ctr = 0;
main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size - 1);
main->rowgroups_avail = (JDIMENSION) (cinfo->_min_DCT_scaled_size - 1);
/* Check for bottom of image: if so, tweak pointers to "duplicate"
* the last sample row, and adjust rowgroups_avail to ignore padding rows.
*/
@ -440,8 +442,8 @@ process_data_context_main (j_decompress_ptr cinfo,
main->buffer_full = FALSE;
/* Still need to process last row group of this iMCU row, */
/* which is saved at index M+1 of the other xbuffer */
main->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_scaled_size + 1);
main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size + 2);
main->rowgroup_ctr = (JDIMENSION) (cinfo->_min_DCT_scaled_size + 1);
main->rowgroups_avail = (JDIMENSION) (cinfo->_min_DCT_scaled_size + 2);
main->context_state = CTX_POSTPONED_ROW;
}
}
@ -492,21 +494,21 @@ jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
* ngroups is the number of row groups we need.
*/
if (cinfo->upsample->need_context_rows) {
if (cinfo->min_DCT_scaled_size < 2) /* unsupported, see comments above */
if (cinfo->_min_DCT_scaled_size < 2) /* unsupported, see comments above */
ERREXIT(cinfo, JERR_NOTIMPL);
alloc_funny_pointers(cinfo); /* Alloc space for xbuffer[] lists */
ngroups = cinfo->min_DCT_scaled_size + 2;
ngroups = cinfo->_min_DCT_scaled_size + 2;
} else {
ngroups = cinfo->min_DCT_scaled_size;
ngroups = cinfo->_min_DCT_scaled_size;
}
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
cinfo->min_DCT_scaled_size; /* height of a row group of component */
rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
cinfo->_min_DCT_scaled_size; /* height of a row group of component */
main->buffer[ci] = (*cinfo->mem->alloc_sarray)
((j_common_ptr) cinfo, JPOOL_IMAGE,
compptr->width_in_blocks * compptr->DCT_scaled_size,
compptr->width_in_blocks * compptr->_DCT_scaled_size,
(JDIMENSION) (rgroup * ngroups));
}
}

Просмотреть файл

@ -79,7 +79,9 @@ typedef enum { /* JPEG marker codes */
M_JPG13 = 0xfd,
M_COM = 0xfe,
M_TEM = 0x01
M_TEM = 0x01,
M_ERROR = 0x100
} JPEG_MARKER;

Просмотреть файл

@ -2,6 +2,7 @@
* jdmaster.c
*
* Copyright (C) 1991-1997, Thomas G. Lane.
* Copyright (C) 2009-2010, D. R. Commander.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -14,105 +15,8 @@
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jpegcomp.h"
/* Use static array */
const JSAMPLE static_range_table[ (5 * (MAXJSAMPLE+1) + CENTERJSAMPLE) * SIZEOF(JSAMPLE) ]={
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c,
0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a,
0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76,
0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85,
0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94,
0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3,
0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2,
0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1,
0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee,
0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd,
0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45,
0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54,
0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63,
0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72,
0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f
};
/* Private state */
@ -147,8 +51,14 @@ use_merged_upsample (j_decompress_ptr cinfo)
return FALSE;
/* jdmerge.c only supports YCC=>RGB color conversion */
if (cinfo->jpeg_color_space != JCS_YCbCr || cinfo->num_components != 3 ||
cinfo->out_color_space != JCS_RGB ||
cinfo->out_color_components != RGB_PIXELSIZE)
(cinfo->out_color_space != JCS_RGB &&
cinfo->out_color_space != JCS_EXT_RGB &&
cinfo->out_color_space != JCS_EXT_RGBX &&
cinfo->out_color_space != JCS_EXT_BGR &&
cinfo->out_color_space != JCS_EXT_BGRX &&
cinfo->out_color_space != JCS_EXT_XBGR &&
cinfo->out_color_space != JCS_EXT_XRGB) ||
cinfo->out_color_components != rgb_pixelsize[cinfo->out_color_space])
return FALSE;
/* and it only handles 2h1v or 2h2v sampling ratios */
if (cinfo->comp_info[0].h_samp_factor != 2 ||
@ -159,9 +69,9 @@ use_merged_upsample (j_decompress_ptr cinfo)
cinfo->comp_info[2].v_samp_factor != 1)
return FALSE;
/* furthermore, it doesn't work if we've scaled the IDCTs differently */
if (cinfo->comp_info[0].DCT_scaled_size != cinfo->min_DCT_scaled_size ||
cinfo->comp_info[1].DCT_scaled_size != cinfo->min_DCT_scaled_size ||
cinfo->comp_info[2].DCT_scaled_size != cinfo->min_DCT_scaled_size)
if (cinfo->comp_info[0]._DCT_scaled_size != cinfo->_min_DCT_scaled_size ||
cinfo->comp_info[1]._DCT_scaled_size != cinfo->_min_DCT_scaled_size ||
cinfo->comp_info[2]._DCT_scaled_size != cinfo->_min_DCT_scaled_size)
return FALSE;
/* ??? also need to test for upsample-time rescaling, when & if supported */
return TRUE; /* by golly, it'll work... */
@ -200,26 +110,42 @@ jpeg_calc_output_dimensions (j_decompress_ptr cinfo)
jdiv_round_up((long) cinfo->image_width, 8L);
cinfo->output_height = (JDIMENSION)
jdiv_round_up((long) cinfo->image_height, 8L);
#if JPEG_LIB_VERSION >= 70
cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = 1;
#else
cinfo->min_DCT_scaled_size = 1;
#endif
} else if (cinfo->scale_num * 4 <= cinfo->scale_denom) {
/* Provide 1/4 scaling */
cinfo->output_width = (JDIMENSION)
jdiv_round_up((long) cinfo->image_width, 4L);
cinfo->output_height = (JDIMENSION)
jdiv_round_up((long) cinfo->image_height, 4L);
#if JPEG_LIB_VERSION >= 70
cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = 2;
#else
cinfo->min_DCT_scaled_size = 2;
#endif
} else if (cinfo->scale_num * 2 <= cinfo->scale_denom) {
/* Provide 1/2 scaling */
cinfo->output_width = (JDIMENSION)
jdiv_round_up((long) cinfo->image_width, 2L);
cinfo->output_height = (JDIMENSION)
jdiv_round_up((long) cinfo->image_height, 2L);
#if JPEG_LIB_VERSION >= 70
cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = 4;
#else
cinfo->min_DCT_scaled_size = 4;
#endif
} else {
/* Provide 1/1 scaling */
cinfo->output_width = cinfo->image_width;
cinfo->output_height = cinfo->image_height;
#if JPEG_LIB_VERSION >= 70
cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = DCTSIZE;
#else
cinfo->min_DCT_scaled_size = DCTSIZE;
#endif
}
/* In selecting the actual DCT scaling for each component, we try to
* scale up the chroma components via IDCT scaling rather than upsampling.
@ -228,15 +154,19 @@ jpeg_calc_output_dimensions (j_decompress_ptr cinfo)
*/
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
int ssize = cinfo->min_DCT_scaled_size;
int ssize = cinfo->_min_DCT_scaled_size;
while (ssize < DCTSIZE &&
(compptr->h_samp_factor * ssize * 2 <=
cinfo->max_h_samp_factor * cinfo->min_DCT_scaled_size) &&
cinfo->max_h_samp_factor * cinfo->_min_DCT_scaled_size) &&
(compptr->v_samp_factor * ssize * 2 <=
cinfo->max_v_samp_factor * cinfo->min_DCT_scaled_size)) {
cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size)) {
ssize = ssize * 2;
}
#if JPEG_LIB_VERSION >= 70
compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = ssize;
#else
compptr->DCT_scaled_size = ssize;
#endif
}
/* Recompute downsampled dimensions of components;
@ -247,11 +177,11 @@ jpeg_calc_output_dimensions (j_decompress_ptr cinfo)
/* Size in samples, after IDCT scaling */
compptr->downsampled_width = (JDIMENSION)
jdiv_round_up((long) cinfo->image_width *
(long) (compptr->h_samp_factor * compptr->DCT_scaled_size),
(long) (compptr->h_samp_factor * compptr->_DCT_scaled_size),
(long) (cinfo->max_h_samp_factor * DCTSIZE));
compptr->downsampled_height = (JDIMENSION)
jdiv_round_up((long) cinfo->image_height *
(long) (compptr->v_samp_factor * compptr->DCT_scaled_size),
(long) (compptr->v_samp_factor * compptr->_DCT_scaled_size),
(long) (cinfo->max_v_samp_factor * DCTSIZE));
}
@ -273,10 +203,14 @@ jpeg_calc_output_dimensions (j_decompress_ptr cinfo)
cinfo->out_color_components = 1;
break;
case JCS_RGB:
#if RGB_PIXELSIZE != 3
cinfo->out_color_components = RGB_PIXELSIZE;
case JCS_EXT_RGB:
case JCS_EXT_RGBX:
case JCS_EXT_BGR:
case JCS_EXT_BGRX:
case JCS_EXT_XBGR:
case JCS_EXT_XRGB:
cinfo->out_color_components = rgb_pixelsize[cinfo->out_color_space];
break;
#endif /* else share code with YCbCr */
case JCS_YCbCr:
cinfo->out_color_components = 3;
break;
@ -346,14 +280,6 @@ LOCAL(void)
prepare_range_limit_table (j_decompress_ptr cinfo)
/* Allocate and fill in the sample_range_limit table */
{
/* Use a static table and allow negative subscripts of simple table */
cinfo->sample_range_limit = (JSAMPLE *) static_range_table + (MAXJSAMPLE+1);
/* This code is used to create the values for the static table used above */
#if 0
JSAMPLE * table;
int i;
@ -376,8 +302,6 @@ prepare_range_limit_table (j_decompress_ptr cinfo)
(2 * (MAXJSAMPLE+1) - CENTERJSAMPLE) * SIZEOF(JSAMPLE));
MEMCOPY(table + (4 * (MAXJSAMPLE+1) - CENTERJSAMPLE),
cinfo->sample_range_limit, CENTERJSAMPLE * SIZEOF(JSAMPLE));
#endif /* 0 */
}
@ -481,7 +405,11 @@ master_selection (j_decompress_ptr cinfo)
jinit_inverse_dct(cinfo);
/* Entropy decoding: either Huffman or arithmetic coding. */
if (cinfo->arith_code) {
#ifdef D_ARITH_CODING_SUPPORTED
jinit_arith_decoder(cinfo);
#else
ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
#endif
} else {
if (cinfo->progressive_mode) {
#ifdef D_PROGRESSIVE_SUPPORTED

Просмотреть файл

@ -2,6 +2,8 @@
* jdmerge.c
*
* Copyright (C) 1994-1996, Thomas G. Lane.
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2009, D. R. Commander.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -35,22 +37,10 @@
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jsimd.h"
#ifdef UPSAMPLE_MERGING_SUPPORTED
#ifdef HAVE_MMX_INTEL_MNEMONICS
__int64 const1 = 0x59BA0000D24B59BA; // Cr_r Cr_b Cr_g Cr_r
__int64 const2 = 0x00007168E9FA0000; // Cb-r Cb_b Cb_g Cb_r
__int64 const5 = 0x0000D24B59BA0000; // Cr_b Cr_g Cr_r Cr_b
__int64 const6 = 0x7168E9FA00007168; // Cb_b Cb_g Cb_r Cb_b
// constants for factors (One_Half/fix(x)) << 2
__int64 const05 = 0x0001000000000001; // Cr_r Cr_b Cr_g Cr_r
__int64 const15 = 0x00000001FFFA0000; // Cb-r Cb_b Cb_g Cb_r
__int64 const45 = 0x0000000000010000; // Cr_b Cr_g Cr_r Cr_b
__int64 const55 = 0x0001FFFA00000001; // Cb_b Cb_g Cb_r Cb_b
#endif
/* Private subobject */
@ -240,9 +230,7 @@ h2v1_merged_upsample (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
JSAMPARRAY output_buf)
{
my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
register int y, cred, cgreen, cblue;
int cb, cr;
register JSAMPROW outptr;
@ -270,15 +258,15 @@ h2v1_merged_upsample (j_decompress_ptr cinfo,
cblue = Cbbtab[cb];
/* Fetch 2 Y values and emit 2 pixels */
y = GETJSAMPLE(*inptr0++);
outptr[RGB_RED] = range_limit[y + cred];
outptr[RGB_GREEN] = range_limit[y + cgreen];
outptr[RGB_BLUE] = range_limit[y + cblue];
outptr += RGB_PIXELSIZE;
outptr[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
outptr[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
outptr[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
outptr += rgb_pixelsize[cinfo->out_color_space];
y = GETJSAMPLE(*inptr0++);
outptr[RGB_RED] = range_limit[y + cred];
outptr[RGB_GREEN] = range_limit[y + cgreen];
outptr[RGB_BLUE] = range_limit[y + cblue];
outptr += RGB_PIXELSIZE;
outptr[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
outptr[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
outptr[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
outptr += rgb_pixelsize[cinfo->out_color_space];
}
/* If image width is odd, do the last output column separately */
if (cinfo->output_width & 1) {
@ -288,9 +276,9 @@ h2v1_merged_upsample (j_decompress_ptr cinfo,
cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
y = GETJSAMPLE(*inptr0);
outptr[RGB_RED] = range_limit[y + cred];
outptr[RGB_GREEN] = range_limit[y + cgreen];
outptr[RGB_BLUE] = range_limit[y + cblue];
outptr[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
outptr[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
outptr[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
}
}
@ -299,614 +287,6 @@ h2v1_merged_upsample (j_decompress_ptr cinfo,
* Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
*/
#ifdef HAVE_MMX_INTEL_MNEMONICS
__inline METHODDEF(void)
h2v2_merged_upsample_orig (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
JSAMPARRAY output_buf);
__inline METHODDEF(void)
h2v2_merged_upsample_mmx (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
JSAMPARRAY output_buf);
#endif
METHODDEF(void)
h2v2_merged_upsample (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
JSAMPARRAY output_buf);
#ifdef HAVE_MMX_INTEL_MNEMONICS
METHODDEF(void)
h2v2_merged_upsample (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
JSAMPARRAY output_buf)
{
if (MMXAvailable && (cinfo->image_width >= 8))
h2v2_merged_upsample_mmx (cinfo, input_buf, in_row_group_ctr, output_buf);
else
h2v2_merged_upsample_orig (cinfo, input_buf, in_row_group_ctr, output_buf);
}
__inline METHODDEF(void)
h2v2_merged_upsample_orig (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
JSAMPARRAY output_buf)
{
my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
register int y, cred, cgreen, cblue;
int cb, cr;
register JSAMPROW outptr0, outptr1;
JSAMPROW inptr00, inptr01, inptr1, inptr2;
JDIMENSION col;
/* copy these pointers into registers if possible */
register JSAMPLE * range_limit = cinfo->sample_range_limit;
int * Crrtab = upsample->Cr_r_tab;
int * Cbbtab = upsample->Cb_b_tab;
INT32 * Crgtab = upsample->Cr_g_tab;
INT32 * Cbgtab = upsample->Cb_g_tab;
SHIFT_TEMPS
inptr00 = input_buf[0][in_row_group_ctr*2];
inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
inptr1 = input_buf[1][in_row_group_ctr];
inptr2 = input_buf[2][in_row_group_ctr];
outptr0 = output_buf[0];
outptr1 = output_buf[1];
/* Loop for each group of output pixels */
for (col = cinfo->output_width >> 1; col > 0; col--) {
/* Do the chroma part of the calculation */
cb = GETJSAMPLE(*inptr1++);
cr = GETJSAMPLE(*inptr2++);
cred = Crrtab[cr];
cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
/* Fetch 4 Y values and emit 4 pixels */
y = GETJSAMPLE(*inptr00++);
outptr0[RGB_RED] = range_limit[y + cred];
outptr0[RGB_GREEN] = range_limit[y + cgreen];
outptr0[RGB_BLUE] = range_limit[y + cblue];
outptr0 += RGB_PIXELSIZE;
y = GETJSAMPLE(*inptr00++);
outptr0[RGB_RED] = range_limit[y + cred];
outptr0[RGB_GREEN] = range_limit[y + cgreen];
outptr0[RGB_BLUE] = range_limit[y + cblue];
outptr0 += RGB_PIXELSIZE;
y = GETJSAMPLE(*inptr01++);
outptr1[RGB_RED] = range_limit[y + cred];
outptr1[RGB_GREEN] = range_limit[y + cgreen];
outptr1[RGB_BLUE] = range_limit[y + cblue];
outptr1 += RGB_PIXELSIZE;
y = GETJSAMPLE(*inptr01++);
outptr1[RGB_RED] = range_limit[y + cred];
outptr1[RGB_GREEN] = range_limit[y + cgreen];
outptr1[RGB_BLUE] = range_limit[y + cblue];
outptr1 += RGB_PIXELSIZE;
}
/* If image width is odd, do the last output column separately */
if (cinfo->output_width & 1) {
cb = GETJSAMPLE(*inptr1);
cr = GETJSAMPLE(*inptr2);
cred = Crrtab[cr];
cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
y = GETJSAMPLE(*inptr00);
outptr0[RGB_RED] = range_limit[y + cred];
outptr0[RGB_GREEN] = range_limit[y + cgreen];
outptr0[RGB_BLUE] = range_limit[y + cblue];
y = GETJSAMPLE(*inptr01);
outptr1[RGB_RED] = range_limit[y + cred];
outptr1[RGB_GREEN] = range_limit[y + cgreen];
outptr1[RGB_BLUE] = range_limit[y + cblue];
}
}
/*
* Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
*/
__inline METHODDEF(void)
h2v2_merged_upsample_mmx (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
JSAMPARRAY output_buf)
{
// added for MMX
__int64 const128 = 0x0080008000800080;
__int64 empty = 0x0000000000000000;
__int64 davemask = 0x0000FFFFFFFF0000;
////////////////////////////////
my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
register int y, cred, cgreen, cblue;
int cb, cr;
register JSAMPROW outptr0, outptr1;
JSAMPROW inptr00, inptr01, inptr1, inptr2;
JDIMENSION col;
/* copy these pointers into registers if possible */
register JSAMPLE * range_limit = cinfo->sample_range_limit;
int * Crrtab = upsample->Cr_r_tab;
int * Cbbtab = upsample->Cb_b_tab;
INT32 * Crgtab = upsample->Cr_g_tab;
INT32 * Cbgtab = upsample->Cb_g_tab;
SHIFT_TEMPS
// Added for MMX
register int width = cinfo->image_width;
int cols = cinfo->output_width;
int cols_asm = (cols >> 3);
int diff = cols - (cols_asm<<3);
int cols_asm_copy = cols_asm;
///////////////////////////////////////
inptr00 = input_buf[0][in_row_group_ctr*2];
inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
inptr1 = input_buf[1][in_row_group_ctr];
inptr2 = input_buf[2][in_row_group_ctr];
outptr0 = output_buf[0];
outptr1 = output_buf[1];
/* Loop for each group of output pixels */
_asm
{
mov esi, inptr00
mov eax, inptr01
mov ebx, inptr2
mov ecx, inptr1
mov edi, outptr0
mov edx, outptr1
do_next16:
movd mm0, [ebx] ; Cr7 Cr6.....Cr1 Cr0
pxor mm6, mm6
punpcklbw mm0, mm0 ; Cr3 Cr3 Cr2 Cr2 Cr1 Cr1 Cr0 Cr0
movq mm7, const128
punpcklwd mm0, mm0 ; Cr1 Cr1 Cr1 Cr1 Cr0 Cr0 Cr0 Cr0
movq mm4, mm0
punpcklbw mm0, mm6 ; Cr0 Cr0 Cr0 Cr0
psubsw mm0, mm7 ; Cr0 - 128:Cr0-128:Cr0-128:Cr0 -128
movd mm1, [ecx] ; Cb7 Cb6...... Cb1 Cb0
psllw mm0, 2 ; left shift by 2 bits
punpcklbw mm1, mm1 ; Cb3 Cb3 Cb2 Cb2 Cb1 Cb1 Cb0 Cb0
paddsw mm0, const05 ; add (one_half/fix(x)) << 2
punpcklwd mm1, mm1 ; Cb1 Cb1 Cb1 Cb1 Cb0 Cb0 Cb0 Cb0
movq mm5, mm1
pmulhw mm0, const1 ; multiply by (fix(x) >> 1)
punpcklbw mm1, mm6 ; Cb0 Cb0 Cb0 Cb0
punpckhbw mm4, mm6 ; Cr1 Cr1 Cr1 Cr1
psubsw mm1, mm7 ; Cb0 - 128:Cb0-128:Cb0-128:Cb0 -128
punpckhbw mm5, mm6 ; Cb1 Cb1 Cb1 Cb1
psllw mm1, 2 ; left shift by 2 bits
paddsw mm1, const15 ; add (one_half/fix(x)) << 2
psubsw mm4, mm7 ; Cr1 - 128:Cr1-128:Cr1-128:Cr1 -128
psubsw mm5, mm7 ; Cb1 - 128:Cb1-128:Cb1-128:Cb1 -128
pmulhw mm1, const2 ; multiply by (fix(x) >> 1)
psllw mm4, 2 ; left shift by 2 bits
psllw mm5, 2 ; left shift by 2 bits
paddsw mm4, const45 ; add (one_half/fix(x)) << 2
movd mm7, [esi] ; Y13 Y12 Y9 Y8 Y5 Y4 Y1 Y0
pmulhw mm4, const5 ; multiply by (fix(x) >> 1)
movq mm6, mm7
punpcklbw mm7, mm7 ; Y5 Y5 Y4 Y4 Y1 Y1 Y0 Y0
paddsw mm5, const55 ; add (one_half/fix(x)) << 2
paddsw mm0, mm1 ; cred0 cbl0 cgr0 cred0
movq mm1, mm7
pmulhw mm5, const6 ; multiply by (fix(x) >> 1)
movq mm2, mm0 ; cred0 cbl0 cgr0 cred0
punpcklwd mm7, mm6 ; Y5 Y4 Y1 Y1 Y1 Y0 Y0 Y0
pand mm2, davemask ; 0 cbl0 cgr0 0
psrlq mm1, 16 ; 0 0 Y5 Y5 Y4 Y4 Y1 Y1
psrlq mm2, 16 ; 0 0 cbl0 cgr0
punpcklbw mm7, empty ; Y1 Y0 Y0 Y0
paddsw mm4, mm5 ; cbl1 cgr1 cred1 cbl1
movq mm3, mm4 ; cbl1 cgr1 cred1 cbl1
pand mm3, davemask ; 0 cgr1 cred1 0
paddsw mm7, mm0 ; r1 b0 g0 r0
psllq mm3, 16 ; cgr1 cred1 0 0
movq mm6, mm1 ; 0 0 Y5 Y5 Y4 Y4 Y1 Y1
por mm2, mm3 ; cgr1 cred1 cbl0 cgr0
punpcklbw mm6, empty ; Y4 Y4 Y1 Y1
movd mm3, [eax] ; Y15 Y14 Y11 Y10 Y7 Y6 Y3 Y2
paddsw mm6, mm2 ; g4 r4 b1 g1
packuswb mm7, mm6 ; g4 r4 b1 g1 r1 b0 g0 r0
movq mm6, mm3 ; Y15 Y14 Y11 Y10 Y7 Y6 Y3 Y2
punpcklbw mm3, mm3 ; Y7 Y7 Y6 Y6 Y3 Y3 Y2 Y2
movq [edi], mm7 ; move to memory g4 r4 b1 g1 r1 b0 g0 r0
movq mm5, mm3 ; Y7 Y7 Y6 Y6 Y3 Y3 Y2 Y2
punpcklwd mm3, mm6 ; X X X X Y3 Y2 Y2 Y2
punpcklbw mm3, empty ; Y3 Y2 Y2 Y2
psrlq mm5, 16 ; 0 0 Y7 Y7 Y6 Y6 Y3 Y3
paddsw mm3, mm0 ; r3 b2 g2 r2
movq mm6, mm5 ; 0 0 Y7 Y7 Y6 Y6 Y3 Y3
movq mm0, mm1 ; 0 0 Y5 Y5 Y4 Y4 Y1 Y1
punpckldq mm6, mm6 ; X X X X Y6 Y6 Y3 Y3
punpcklbw mm6, empty ; Y6 Y6 Y3 Y3
psrlq mm1, 24 ; 0 0 0 0 0 Y5 Y5 Y4
paddsw mm6, mm2 ; g6 r6 b3 g3
packuswb mm3, mm6 ; g6 r6 b3 g3 r3 b2 g2 r2
movq mm2, mm5 ; 0 0 Y7 Y7 Y6 Y6 Y3 Y3
psrlq mm0, 32 ; 0 0 0 0 0 0 Y5 Y5
movq [edx], mm3 ; move to memory g6 r6 b3 g3 r3 b2 g2 r2
punpcklwd mm1, mm0 ; X X X X Y5 Y5 Y5 Y4
psrlq mm5, 24 ; 0 0 0 0 0 Y7 Y7 Y6
movd mm0, [ebx] ; Cr9 Cr8.....Cr3 Cr2
psrlq mm2, 32 ; 0 0 0 0 0 0 Y7 Y7
psrlq mm0, 16
punpcklbw mm1, empty ; Y5 Y5 Y5 Y4
punpcklwd mm5, mm2 ; X X X X Y7 Y7 Y7 Y6
paddsw mm1, mm4 ; b5 g5 r5 b4
punpcklbw mm5, empty ; Y7 Y7 Y7 Y6
pxor mm6, mm6 ; clear mm6 registr
punpcklbw mm0, mm0 ; X X X X Cr3 Cr3 Cr2 Cr2
paddsw mm5, mm4 ; b7 g7 r7 b6
punpcklwd mm0, mm0 ; Cr3 Cr3 Cr3 Cr3 Cr2 Cr2 Cr2 Cr2
movq mm4, mm0
movd mm3, [ecx] ; Cb9 Cb8...... Cb3 Cb2
punpcklbw mm0, mm6 ; Cr2 Cr2 Cr2 Cr2
psrlq mm3, 16
psubsw mm0, const128 ; Cr2 - 128:Cr2-128:Cr2-128:Cr2 -128
punpcklbw mm3, mm3 ; X X X X Cb3 Cb3 Cb2 Cb2
psllw mm0, 2 ; left shift by 2 bits
paddsw mm0, const05 ; add (one_half/fix(x)) << 2
punpcklwd mm3, mm3 ; Cb3 Cb3 Cb3 Cb3 Cb2 Cb2 Cb2 Cb2
movq mm7, mm3
pmulhw mm0, const1 ; multiply by (fix(x) >> 1)
punpcklbw mm3, mm6 ; Cb2 Cb2 Cb2 Cb2
psubsw mm3, const128 ; Cb0 - 128:Cb0-128:Cb0-128:Cb0 -128
punpckhbw mm4, mm6 ; Cr3 Cr3 Cr3 Cr3
psllw mm3, 2 ; left shift by 2 bits
paddsw mm3, const15 ; add (one_half/fix(x)) << 2
punpckhbw mm7, mm6 ; Cb3 Cb3 Cb3 Cb3
pmulhw mm3, const2 ; multiply by (fix(x) >> 1)
psubsw mm7, const128 ; Cb3 - 128:Cb3-128:Cb3-128:Cb3 -128
paddsw mm0, mm3 ; cred2 cbl2 cgr2 cred2
psllw mm7, 2 ; left shift by 2 bits
psubsw mm4, const128 ; Cr3 - 128:Cr3-128:Cr3-128:Cr3 -128
movd mm3, [esi+4] ; Y21 Y20 Y17 Y16 Y13 Y12 Y9 Y8
psllw mm4, 2 ; left shift by 2 bits
paddsw mm7, const55 ; add (one_half/fix(x)) << 2
movq mm6, mm3 ; Y21 Y20 Y17 Y16 Y13 Y12 Y9 Y8
movq mm2, mm0
pand mm2, davemask
punpcklbw mm3, mm3 ; Y13 Y13 Y12 Y12 Y9 Y9 Y8 Y8
psrlq mm2, 16
paddsw mm4, const45 ; add (one_half/fix(x)) << 2
punpcklwd mm3, mm6 ; X X X X Y9 Y8 Y8 Y8
pmulhw mm4, const5 ; multiply by (fix(x) >> 1)
pmulhw mm7, const6 ; multiply by (fix(x) >> 1)
punpcklbw mm3, empty ; Y9 Y8 Y8 Y8
paddsw mm4, mm7 ; cbl3 cgr3 cred3 cbl3
paddsw mm3, mm0 ; r9 b8 g8 r8
movq mm7, mm4
packuswb mm1, mm3 ; r9 b8 g8 r8 b5 g5 r5 b4
movd mm3, [eax+4] ; Y23 Y22 Y19 Y18 Y15 Y14 Y11 Y10
pand mm7, davemask
psrlq mm6, 8 ; 0 Y21 Y20 Y17 Y16 Y13 Y12 Y9
psllq mm7, 16
movq [edi+8], mm1 ; move to memory r9 b8 g8 r8 b5 g5 r5 b4
por mm2, mm7
movq mm7, mm3 ; Y23 Y22 Y19 Y18 Y15 Y14 Y11 Y10
punpcklbw mm3, mm3 ; X X X X Y11 Y11 Y10 Y10
pxor mm1, mm1
punpcklwd mm3, mm7 ; X X X X Y11 Y10 Y10 Y10
punpcklbw mm3, mm1 ; Y11 Y10 Y10 Y10
psrlq mm7, 8 ; 0 Y23 Y22 Y19 Y18 Y15 Y14 Y11
paddsw mm3, mm0 ; r11 b10 g10 r10
movq mm0, mm7 ; 0 Y23 Y22 Y19 Y18 Y15 Y14 Y11
packuswb mm5, mm3 ; r11 b10 g10 r10 b7 g7 r7 b6
punpcklbw mm7, mm7 ; X X X X Y14 Y14 Y11 Y11
movq [edx+8], mm5 ; move to memory r11 b10 g10 r10 b7 g7 r7 b6
movq mm3, mm6 ; 0 Y21 Y20 Y17 Y16 Y13 Y12 Y9
punpcklbw mm6, mm6 ; X X X X Y12 Y12 Y9 Y9
punpcklbw mm7, mm1 ; Y14 Y14 Y11 Y11
punpcklbw mm6, mm1 ; Y12 Y12 Y9 Y9
paddsw mm7, mm2 ; g14 r14 b11 g11
paddsw mm6, mm2 ; g12 r12 b9 g9
psrlq mm3, 8 ; 0 0 Y21 Y20 Y17 Y16 Y13 Y12
movq mm1, mm3 ; 0 0 Y21 Y20 Y17 Y16 Y13 Y12
punpcklbw mm3, mm3 ; X X X X Y13 Y13 Y12 Y12
add esi, 8
psrlq mm3, 16 ; X X X X X X Y13 Y13 modified on 09/24
punpcklwd mm1, mm3 ; X X X X Y13 Y13 Y13 Y12
add eax, 8
psrlq mm0, 8 ; 0 0 Y23 Y22 Y19 Y18 Y15 Y14
punpcklbw mm1, empty ; Y13 Y13 Y13 Y12
movq mm5, mm0 ; 0 0 Y23 Y22 Y19 Y18 Y15 Y14
punpcklbw mm0, mm0 ; X X X X Y15 Y15 Y14 Y14
paddsw mm1, mm4 ; b13 g13 r13 b12
psrlq mm0, 16 ; X X X X X X Y15 Y15
add edi, 24
punpcklwd mm5, mm0 ; X X X X Y15 Y15 Y15 Y14
packuswb mm6, mm1 ; b13 g13 r13 b12 g12 r12 b9 g9
add edx, 24
punpcklbw mm5, empty ; Y15 Y15 Y15 Y14
add ebx, 4
paddsw mm5, mm4 ; b15 g15 r15 b14
movq [edi-8], mm6 ; move to memory b13 g13 r13 b12 g12 r12 b9 g9
packuswb mm7, mm5 ; b15 g15 r15 b14 g14 r14 b11 g11
add ecx, 4
movq [edx-8], mm7 ; move to memory b15 g15 r15 b14 g14 r14 b11 g11
dec cols_asm
jnz do_next16
EMMS
}
inptr1 += (cols_asm_copy<<2);
inptr2 += (cols_asm_copy<<2);
inptr00 += (cols_asm_copy<<3);
inptr01 += (cols_asm_copy<<3);
outptr0 += cols_asm_copy*24;
outptr1 += cols_asm_copy*24;
//for (col = cinfo->output_width >> 1; col > 0; col--) {
/* Do the chroma part of the calculation */
/*cb = GETJSAMPLE(*inptr1++);
cr = GETJSAMPLE(*inptr2++);
cred = Crrtab[cr];
cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];*/
/* Fetch 4 Y values and emit 4 pixels */
/*y = GETJSAMPLE(*inptr00++);
outptr0[RGB_RED] = range_limit[y + cred];
outptr0[RGB_GREEN] = range_limit[y + cgreen];
outptr0[RGB_BLUE] = range_limit[y + cblue];
outptr0 += RGB_PIXELSIZE;
y = GETJSAMPLE(*inptr00++);
outptr0[RGB_RED] = range_limit[y + cred];
outptr0[RGB_GREEN] = range_limit[y + cgreen];
outptr0[RGB_BLUE] = range_limit[y + cblue];
outptr0 += RGB_PIXELSIZE;
y = GETJSAMPLE(*inptr01++);
outptr1[RGB_RED] = range_limit[y + cred];
outptr1[RGB_GREEN] = range_limit[y + cgreen];
outptr1[RGB_BLUE] = range_limit[y + cblue];
outptr1 += RGB_PIXELSIZE;
y = GETJSAMPLE(*inptr01++);
outptr1[RGB_RED] = range_limit[y + cred];
outptr1[RGB_GREEN] = range_limit[y + cgreen];
outptr1[RGB_BLUE] = range_limit[y + cblue];
outptr1 += RGB_PIXELSIZE;
} */
for (col = diff >> 1; col > 0; col--) {
/* Do the chroma part of the calculation */
cb = GETJSAMPLE(*inptr1++);
cr = GETJSAMPLE(*inptr2++);
cred = Crrtab[cr];
cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
/* Fetch 4 Y values and emit 4 pixels */
y = GETJSAMPLE(*inptr00++);
outptr0[RGB_RED] = range_limit[y + cred];
outptr0[RGB_GREEN] = range_limit[y + cgreen];
outptr0[RGB_BLUE] = range_limit[y + cblue];
outptr0 += RGB_PIXELSIZE;
y = GETJSAMPLE(*inptr00++);
outptr0[RGB_RED] = range_limit[y + cred];
outptr0[RGB_GREEN] = range_limit[y + cgreen];
outptr0[RGB_BLUE] = range_limit[y + cblue];
outptr0 += RGB_PIXELSIZE;
y = GETJSAMPLE(*inptr01++);
outptr1[RGB_RED] = range_limit[y + cred];
outptr1[RGB_GREEN] = range_limit[y + cgreen];
outptr1[RGB_BLUE] = range_limit[y + cblue];
outptr1 += RGB_PIXELSIZE;
y = GETJSAMPLE(*inptr01++);
outptr1[RGB_RED] = range_limit[y + cred];
outptr1[RGB_GREEN] = range_limit[y + cgreen];
outptr1[RGB_BLUE] = range_limit[y + cblue];
outptr1 += RGB_PIXELSIZE;
}
/* If image width is odd, do the last output column separately */
//if (cinfo->output_width & 1) {
if (diff & 1) {
cb = GETJSAMPLE(*inptr1);
cr = GETJSAMPLE(*inptr2);
cred = Crrtab[cr];
cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
y = GETJSAMPLE(*inptr00);
outptr0[RGB_RED] = range_limit[y + cred];
outptr0[RGB_GREEN] = range_limit[y + cgreen];
outptr0[RGB_BLUE] = range_limit[y + cblue];
y = GETJSAMPLE(*inptr01);
outptr1[RGB_RED] = range_limit[y + cred];
outptr1[RGB_GREEN] = range_limit[y + cgreen];
outptr1[RGB_BLUE] = range_limit[y + cblue];
}
}
#else
METHODDEF(void)
h2v2_merged_upsample (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
@ -942,24 +322,24 @@ h2v2_merged_upsample (j_decompress_ptr cinfo,
cblue = Cbbtab[cb];
/* Fetch 4 Y values and emit 4 pixels */
y = GETJSAMPLE(*inptr00++);
outptr0[RGB_RED] = range_limit[y + cred];
outptr0[RGB_GREEN] = range_limit[y + cgreen];
outptr0[RGB_BLUE] = range_limit[y + cblue];
outptr0[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
outptr0[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
outptr0[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
outptr0 += RGB_PIXELSIZE;
y = GETJSAMPLE(*inptr00++);
outptr0[RGB_RED] = range_limit[y + cred];
outptr0[RGB_GREEN] = range_limit[y + cgreen];
outptr0[RGB_BLUE] = range_limit[y + cblue];
outptr0[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
outptr0[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
outptr0[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
outptr0 += RGB_PIXELSIZE;
y = GETJSAMPLE(*inptr01++);
outptr1[RGB_RED] = range_limit[y + cred];
outptr1[RGB_GREEN] = range_limit[y + cgreen];
outptr1[RGB_BLUE] = range_limit[y + cblue];
outptr1[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
outptr1[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
outptr1[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
outptr1 += RGB_PIXELSIZE;
y = GETJSAMPLE(*inptr01++);
outptr1[RGB_RED] = range_limit[y + cred];
outptr1[RGB_GREEN] = range_limit[y + cgreen];
outptr1[RGB_BLUE] = range_limit[y + cblue];
outptr1[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
outptr1[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
outptr1[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
outptr1 += RGB_PIXELSIZE;
}
/* If image width is odd, do the last output column separately */
@ -970,16 +350,15 @@ h2v2_merged_upsample (j_decompress_ptr cinfo,
cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
y = GETJSAMPLE(*inptr00);
outptr0[RGB_RED] = range_limit[y + cred];
outptr0[RGB_GREEN] = range_limit[y + cgreen];
outptr0[RGB_BLUE] = range_limit[y + cblue];
outptr0[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
outptr0[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
outptr0[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
y = GETJSAMPLE(*inptr01);
outptr1[RGB_RED] = range_limit[y + cred];
outptr1[RGB_GREEN] = range_limit[y + cgreen];
outptr1[RGB_BLUE] = range_limit[y + cblue];
outptr1[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
outptr1[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
outptr1[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
}
}
#endif
/*
@ -1006,14 +385,20 @@ jinit_merged_upsampler (j_decompress_ptr cinfo)
if (cinfo->max_v_samp_factor == 2) {
upsample->pub.upsample = merged_2v_upsample;
upsample->upmethod = h2v2_merged_upsample;
if (jsimd_can_h2v2_merged_upsample())
upsample->upmethod = jsimd_h2v2_merged_upsample;
else
upsample->upmethod = h2v2_merged_upsample;
/* Allocate a spare row buffer */
upsample->spare_row = (JSAMPROW)
(*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
(size_t) (upsample->out_row_width * SIZEOF(JSAMPLE)));
} else {
upsample->pub.upsample = merged_1v_upsample;
upsample->upmethod = h2v1_merged_upsample;
if (jsimd_can_h2v1_merged_upsample())
upsample->upmethod = jsimd_h2v1_merged_upsample;
else
upsample->upmethod = h2v1_merged_upsample;
/* No spare row needed */
upsample->spare_row = NULL;
}

Просмотреть файл

@ -2,6 +2,8 @@
* jdsample.c
*
* Copyright (C) 1991-1996, Thomas G. Lane.
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2010, D. R. Commander.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -21,6 +23,8 @@
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jsimd.h"
#include "jpegcomp.h"
/* Pointer to routine to upsample a single component */
@ -418,7 +422,7 @@ jinit_upsampler (j_decompress_ptr cinfo)
/* jdmainct.c doesn't support context rows when min_DCT_scaled_size = 1,
* so don't ask for it.
*/
do_fancy = cinfo->do_fancy_upsampling && cinfo->min_DCT_scaled_size > 1;
do_fancy = cinfo->do_fancy_upsampling && cinfo->_min_DCT_scaled_size > 1;
/* Verify we can handle the sampling factors, select per-component methods,
* and create storage as needed.
@ -428,10 +432,10 @@ jinit_upsampler (j_decompress_ptr cinfo)
/* Compute size of an "input group" after IDCT scaling. This many samples
* are to be converted to max_h_samp_factor * max_v_samp_factor pixels.
*/
h_in_group = (compptr->h_samp_factor * compptr->DCT_scaled_size) /
cinfo->min_DCT_scaled_size;
v_in_group = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
cinfo->min_DCT_scaled_size;
h_in_group = (compptr->h_samp_factor * compptr->_DCT_scaled_size) /
cinfo->_min_DCT_scaled_size;
v_in_group = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
cinfo->_min_DCT_scaled_size;
h_out_group = cinfo->max_h_samp_factor;
v_out_group = cinfo->max_v_samp_factor;
upsample->rowgroup_height[ci] = v_in_group; /* save for use later */
@ -447,18 +451,32 @@ jinit_upsampler (j_decompress_ptr cinfo)
} else if (h_in_group * 2 == h_out_group &&
v_in_group == v_out_group) {
/* Special cases for 2h1v upsampling */
if (do_fancy && compptr->downsampled_width > 2)
upsample->methods[ci] = h2v1_fancy_upsample;
else
upsample->methods[ci] = h2v1_upsample;
if (do_fancy && compptr->downsampled_width > 2) {
if (jsimd_can_h2v1_fancy_upsample())
upsample->methods[ci] = jsimd_h2v1_fancy_upsample;
else
upsample->methods[ci] = h2v1_fancy_upsample;
} else {
if (jsimd_can_h2v1_upsample())
upsample->methods[ci] = jsimd_h2v1_upsample;
else
upsample->methods[ci] = h2v1_upsample;
}
} else if (h_in_group * 2 == h_out_group &&
v_in_group * 2 == v_out_group) {
/* Special cases for 2h2v upsampling */
if (do_fancy && compptr->downsampled_width > 2) {
upsample->methods[ci] = h2v2_fancy_upsample;
if (jsimd_can_h2v2_fancy_upsample())
upsample->methods[ci] = jsimd_h2v2_fancy_upsample;
else
upsample->methods[ci] = h2v2_fancy_upsample;
upsample->pub.need_context_rows = TRUE;
} else
upsample->methods[ci] = h2v2_upsample;
} else {
if (jsimd_can_h2v2_upsample())
upsample->methods[ci] = jsimd_h2v2_upsample;
else
upsample->methods[ci] = h2v2_upsample;
}
} else if ((h_out_group % h_in_group) == 0 &&
(v_out_group % v_in_group) == 0) {
/* Generic integral-factors upsampling method */

147
jpeg/jdtrans.c Normal file
Просмотреть файл

@ -0,0 +1,147 @@
/*
* jdtrans.c
*
* Copyright (C) 1995-1997, Thomas G. Lane.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
* This file contains library routines for transcoding decompression,
* that is, reading raw DCT coefficient arrays from an input JPEG file.
* The routines in jdapimin.c will also be needed by a transcoder.
*/
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
/* Forward declarations */
LOCAL(void) transdecode_master_selection JPP((j_decompress_ptr cinfo));
/*
* Read the coefficient arrays from a JPEG file.
* jpeg_read_header must be completed before calling this.
*
* The entire image is read into a set of virtual coefficient-block arrays,
* one per component. The return value is a pointer to the array of
* virtual-array descriptors. These can be manipulated directly via the
* JPEG memory manager, or handed off to jpeg_write_coefficients().
* To release the memory occupied by the virtual arrays, call
* jpeg_finish_decompress() when done with the data.
*
* An alternative usage is to simply obtain access to the coefficient arrays
* during a buffered-image-mode decompression operation. This is allowed
* after any jpeg_finish_output() call. The arrays can be accessed until
* jpeg_finish_decompress() is called. (Note that any call to the library
* may reposition the arrays, so don't rely on access_virt_barray() results
* to stay valid across library calls.)
*
* Returns NULL if suspended. This case need be checked only if
* a suspending data source is used.
*/
GLOBAL(jvirt_barray_ptr *)
jpeg_read_coefficients (j_decompress_ptr cinfo)
{
if (cinfo->global_state == DSTATE_READY) {
/* First call: initialize active modules */
transdecode_master_selection(cinfo);
cinfo->global_state = DSTATE_RDCOEFS;
}
if (cinfo->global_state == DSTATE_RDCOEFS) {
/* Absorb whole file into the coef buffer */
for (;;) {
int retcode;
/* Call progress monitor hook if present */
if (cinfo->progress != NULL)
(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
/* Absorb some more input */
retcode = (*cinfo->inputctl->consume_input) (cinfo);
if (retcode == JPEG_SUSPENDED)
return NULL;
if (retcode == JPEG_REACHED_EOI)
break;
/* Advance progress counter if appropriate */
if (cinfo->progress != NULL &&
(retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
/* startup underestimated number of scans; ratchet up one scan */
cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
}
}
}
/* Set state so that jpeg_finish_decompress does the right thing */
cinfo->global_state = DSTATE_STOPPING;
}
/* At this point we should be in state DSTATE_STOPPING if being used
* standalone, or in state DSTATE_BUFIMAGE if being invoked to get access
* to the coefficients during a full buffered-image-mode decompression.
*/
if ((cinfo->global_state == DSTATE_STOPPING ||
cinfo->global_state == DSTATE_BUFIMAGE) && cinfo->buffered_image) {
return cinfo->coef->coef_arrays;
}
/* Oops, improper usage */
ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
return NULL; /* keep compiler happy */
}
/*
* Master selection of decompression modules for transcoding.
* This substitutes for jdmaster.c's initialization of the full decompressor.
*/
LOCAL(void)
transdecode_master_selection (j_decompress_ptr cinfo)
{
/* This is effectively a buffered-image operation. */
cinfo->buffered_image = TRUE;
/* Entropy decoding: either Huffman or arithmetic coding. */
if (cinfo->arith_code) {
#ifdef D_ARITH_CODING_SUPPORTED
jinit_arith_decoder(cinfo);
#else
ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
#endif
} else {
if (cinfo->progressive_mode) {
#ifdef D_PROGRESSIVE_SUPPORTED
jinit_phuff_decoder(cinfo);
#else
ERREXIT(cinfo, JERR_NOT_COMPILED);
#endif
} else
jinit_huff_decoder(cinfo);
}
/* Always get a full-image coefficient buffer. */
jinit_d_coef_controller(cinfo, TRUE);
/* We can now tell the memory manager to allocate virtual arrays. */
(*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
/* Initialize input side of decompressor to consume first scan. */
(*cinfo->inputctl->start_input_pass) (cinfo);
/* Initialize progress monitoring. */
if (cinfo->progress != NULL) {
int nscans;
/* Estimate number of scans to set pass_limit. */
if (cinfo->progressive_mode) {
/* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
nscans = 2 + 3 * cinfo->num_components;
} else if (cinfo->inputctl->has_multiple_scans) {
/* For a nonprogressive multiscan file, estimate 1 scan per component. */
nscans = cinfo->num_components;
} else {
nscans = 1;
}
cinfo->progress->pass_counter = 0L;
cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans;
cinfo->progress->completed_passes = 0;
cinfo->progress->total_passes = 1;
}
}

Просмотреть файл

@ -18,13 +18,6 @@
* These routines are used by both the compression and decompression code.
*/
/*
* This file has been modified for the Mozilla/Netscape environment.
* Modifications are distributed under the mozilla.org tri-license and are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved. See http://www.mozilla.org/MPL/
*/
/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
#include "jinclude.h"
#include "jpeglib.h"
@ -82,15 +75,7 @@ error_exit (j_common_ptr cinfo)
/* Let the memory manager delete any temp files before we die */
jpeg_destroy(cinfo);
/* Mozilla mod: in some Windows environments, the exit() function doesn't
* even exist, so don't compile a reference to it. Heaven help you if
* you fail to provide a replacement error_exit function, because the
* IJG library will NOT handle control returning from error_exit!
*/
#ifndef XP_WIN
exit(EXIT_FAILURE);
#endif
}
@ -116,6 +101,15 @@ output_message (j_common_ptr cinfo)
/* Create the message */
(*cinfo->err->format_message) (cinfo, buffer);
#ifdef USE_WINDOWS_MESSAGEBOX
/* Display it in a message dialog box */
MessageBox(GetActiveWindow(), buffer, "JPEG Library Error",
MB_OK | MB_ICONERROR);
#else
/* Send it to stderr, adding a newline */
fprintf(stderr, "%s\n", buffer);
#endif
}

Просмотреть файл

@ -2,6 +2,7 @@
* jerror.h
*
* Copyright (C) 1994-1997, Thomas G. Lane.
* Modified 1997-2009 by Guido Vollbeding.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -40,11 +41,12 @@ JMESSAGE(JMSG_NOMESSAGE, "Bogus message code %d") /* Must be first entry! */
/* For maintenance convenience, list is alphabetical by message code name */
JMESSAGE(JERR_ARITH_NOTIMPL,
"Sorry, there are legal restrictions on arithmetic coding")
"Sorry, arithmetic coding is not implemented")
JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix")
JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix")
JMESSAGE(JERR_BAD_BUFFER_MODE, "Bogus buffer control mode")
JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS")
JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request")
JMESSAGE(JERR_BAD_DCT_COEF, "DCT coefficient out of range")
JMESSAGE(JERR_BAD_DCTSIZE, "IDCT output block size %d not supported")
JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition")
@ -93,6 +95,7 @@ JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data")
JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
JMESSAGE(JERR_NOTIMPL, "Not implemented yet")
JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
JMESSAGE(JERR_NO_BACKING_STORE, "Backing store not supported")
JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined")
JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image")
@ -170,6 +173,7 @@ JMESSAGE(JTRC_UNKNOWN_IDS,
JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u")
JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u")
JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d")
JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
JMESSAGE(JWRN_BOGUS_PROGRESSION,
"Inconsistent progression sequence for component %d coefficient %d")
JMESSAGE(JWRN_EXTRANEOUS_DATA,

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -386,578 +386,4 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
}
}
#ifdef HAVE_SSE2_INTEL_MNEMONICS
/*
* Intel SSE2 optimized Inverse Discrete Cosine Transform
*
*
* Copyright (c) 2001-2002 Intel Corporation
* All Rights Reserved
*
*
* Authors:
* Danilov G.
*
*
*-----------------------------------------------------------------------------
*
* References:
* K.R. Rao and P. Yip
* Discrete Cosine Transform.
* Algorithms, Advantages, Applications.
* Academic Press, Inc, London, 1990.
* JPEG Group's software.
* This implementation is based on Appendix A.2 of the book (R&Y) ...
*
*-----------------------------------------------------------------------------
*/
typedef unsigned char Ipp8u;
typedef unsigned short Ipp16u;
typedef unsigned int Ipp32u;
typedef signed char Ipp8s;
typedef signed short Ipp16s;
typedef signed int Ipp32s;
#define BITS_INV_ACC 4
#define SHIFT_INV_ROW 16 - BITS_INV_ACC
#define SHIFT_INV_COL 1 + BITS_INV_ACC
#define RND_INV_ROW 1024 * (6 - BITS_INV_ACC) /* 1 << (SHIFT_INV_ROW-1) */
#define RND_INV_COL = 16 * (BITS_INV_ACC - 3) /* 1 << (SHIFT_INV_COL-1) */
#define RND_INV_CORR = RND_INV_COL - 1 /* correction -1.0 and round */
#define c_inv_corr_0 -1024 * (6 - BITS_INV_ACC) + 65536 /* -0.5 + (16.0 or 32.0) */
#define c_inv_corr_1 1877 * (6 - BITS_INV_ACC) /* 0.9167 */
#define c_inv_corr_2 1236 * (6 - BITS_INV_ACC) /* 0.6035 */
#define c_inv_corr_3 680 * (6 - BITS_INV_ACC) /* 0.3322 */
#define c_inv_corr_4 0 * (6 - BITS_INV_ACC) /* 0.0 */
#define c_inv_corr_5 -569 * (6 - BITS_INV_ACC) /* -0.278 */
#define c_inv_corr_6 -512 * (6 - BITS_INV_ACC) /* -0.25 */
#define c_inv_corr_7 -651 * (6 - BITS_INV_ACC) /* -0.3176 */
#define RND_INV_ROW_0 RND_INV_ROW + c_inv_corr_0
#define RND_INV_ROW_1 RND_INV_ROW + c_inv_corr_1
#define RND_INV_ROW_2 RND_INV_ROW + c_inv_corr_2
#define RND_INV_ROW_3 RND_INV_ROW + c_inv_corr_3
#define RND_INV_ROW_4 RND_INV_ROW + c_inv_corr_4
#define RND_INV_ROW_5 RND_INV_ROW + c_inv_corr_5
#define RND_INV_ROW_6 RND_INV_ROW + c_inv_corr_6
#define RND_INV_ROW_7 RND_INV_ROW + c_inv_corr_7
/* Table for rows 0,4 - constants are multiplied on cos_4_16 */
__declspec(align(16)) short tab_i_04[] = {
16384, 21407, 16384, 8867,
-16384, 21407, 16384, -8867,
16384, -8867, 16384, -21407,
16384, 8867, -16384, -21407,
22725, 19266, 19266, -4520,
4520, 19266, 19266, -22725,
12873, -22725, 4520, -12873,
12873, 4520, -22725, -12873};
/* Table for rows 1,7 - constants are multiplied on cos_1_16 */
__declspec(align(16)) short tab_i_17[] = {
22725, 29692, 22725, 12299,
-22725, 29692, 22725, -12299,
22725, -12299, 22725, -29692,
22725, 12299, -22725, -29692,
31521, 26722, 26722, -6270,
6270, 26722, 26722, -31521,
17855, -31521, 6270, -17855,
17855, 6270, -31521, -17855};
/* Table for rows 2,6 - constants are multiplied on cos_2_16 */
__declspec(align(16)) short tab_i_26[] = {
21407, 27969, 21407, 11585,
-21407, 27969, 21407, -11585,
21407, -11585, 21407, -27969,
21407, 11585, -21407, -27969,
29692, 25172, 25172, -5906,
5906, 25172, 25172, -29692,
16819, -29692, 5906, -16819,
16819, 5906, -29692, -16819};
/* Table for rows 3,5 - constants are multiplied on cos_3_16 */
__declspec(align(16)) short tab_i_35[] = {
19266, 25172, 19266, 10426,
-19266, 25172, 19266, -10426,
19266, -10426, 19266, -25172,
19266, 10426, -19266, -25172,
26722, 22654, 22654, -5315,
5315, 22654, 22654, -26722,
15137, -26722, 5315, -15137,
15137, 5315, -26722, -15137};
__declspec(align(16)) long round_i_0[] = {RND_INV_ROW_0,RND_INV_ROW_0,
RND_INV_ROW_0,RND_INV_ROW_0};
__declspec(align(16)) long round_i_1[] = {RND_INV_ROW_1,RND_INV_ROW_1,
RND_INV_ROW_1,RND_INV_ROW_1};
__declspec(align(16)) long round_i_2[] = {RND_INV_ROW_2,RND_INV_ROW_2,
RND_INV_ROW_2,RND_INV_ROW_2};
__declspec(align(16)) long round_i_3[] = {RND_INV_ROW_3,RND_INV_ROW_3,
RND_INV_ROW_3,RND_INV_ROW_3};
__declspec(align(16)) long round_i_4[] = {RND_INV_ROW_4,RND_INV_ROW_4,
RND_INV_ROW_4,RND_INV_ROW_4};
__declspec(align(16)) long round_i_5[] = {RND_INV_ROW_5,RND_INV_ROW_5,
RND_INV_ROW_5,RND_INV_ROW_5};
__declspec(align(16)) long round_i_6[] = {RND_INV_ROW_6,RND_INV_ROW_6,
RND_INV_ROW_6,RND_INV_ROW_6};
__declspec(align(16)) long round_i_7[] = {RND_INV_ROW_7,RND_INV_ROW_7,
RND_INV_ROW_7,RND_INV_ROW_7};
__declspec(align(16)) short tg_1_16[] = {
13036, 13036, 13036, 13036, /* tg * (2<<16) + 0.5 */
13036, 13036, 13036, 13036};
__declspec(align(16)) short tg_2_16[] = {
27146, 27146, 27146, 27146, /* tg * (2<<16) + 0.5 */
27146, 27146, 27146, 27146};
__declspec(align(16)) short tg_3_16[] = {
-21746, -21746, -21746, -21746, /* tg * (2<<16) + 0.5 */
-21746, -21746, -21746, -21746};
__declspec(align(16)) short cos_4_16[] = {
-19195, -19195, -19195, -19195, /* cos * (2<<16) + 0.5 */
-19195, -19195, -19195, -19195};
/*
* In this implementation the outputs of the iDCT-1D are multiplied
* for rows 0,4 - on cos_4_16,
* for rows 1,7 - on cos_1_16,
* for rows 2,6 - on cos_2_16,
* for rows 3,5 - on cos_3_16
* and are shifted to the left for rise of accuracy
*
* For used constants
* FIX(float_const) = (short) (float_const * (1<<15) + 0.5)
*
*-----------------------------------------------------------------------------
*
* On the first stage the calculation is executed at once for two rows.
* The permutation for each output row is done on second stage
* t7 t6 t5 t4 t3 t2 t1 t0 -> t4 t5 t6 t7 t3 t2 t1 t0
*
*-----------------------------------------------------------------------------
*/
#define DCT_8_INV_ROW_2R(TABLE, ROUND1, ROUND2) __asm { \
__asm pshuflw xmm1, xmm0, 10001000b \
__asm pshuflw xmm0, xmm0, 11011101b \
__asm pshufhw xmm1, xmm1, 10001000b \
__asm pshufhw xmm0, xmm0, 11011101b \
__asm movdqa xmm2, XMMWORD PTR [TABLE] \
__asm pmaddwd xmm2, xmm1 \
__asm movdqa xmm3, XMMWORD PTR [TABLE + 32] \
__asm pmaddwd xmm3, xmm0 \
__asm pmaddwd xmm1, XMMWORD PTR [TABLE + 16] \
__asm pmaddwd xmm0, XMMWORD PTR [TABLE + 48] \
__asm pshuflw xmm5, xmm4, 10001000b \
__asm pshuflw xmm4, xmm4, 11011101b \
__asm pshufhw xmm5, xmm5, 10001000b \
__asm pshufhw xmm4, xmm4, 11011101b \
__asm movdqa xmm6, XMMWORD PTR [TABLE] \
__asm pmaddwd xmm6, xmm5 \
__asm movdqa xmm7, XMMWORD PTR [TABLE + 32] \
__asm pmaddwd xmm7, xmm4 \
__asm pmaddwd xmm5, XMMWORD PTR [TABLE + 16] \
__asm pmaddwd xmm4, XMMWORD PTR [TABLE + 48] \
__asm pshufd xmm1, xmm1, 01001110b \
__asm pshufd xmm0, xmm0, 01001110b \
__asm paddd xmm2, XMMWORD PTR [ROUND1] \
__asm paddd xmm3, xmm0 \
__asm paddd xmm1, xmm2 \
__asm pshufd xmm5, xmm5, 01001110b \
__asm pshufd xmm4, xmm4, 01001110b \
__asm movdqa xmm2, xmm1 \
__asm psubd xmm2, xmm3 \
__asm psrad xmm2, SHIFT_INV_ROW \
__asm paddd xmm1, xmm3 \
__asm psrad xmm1, SHIFT_INV_ROW \
__asm packssdw xmm1, xmm2 \
__asm paddd xmm6, XMMWORD PTR [ROUND2] \
__asm paddd xmm7, xmm4 \
__asm paddd xmm5, xmm6 \
__asm movdqa xmm6, xmm5 \
__asm psubd xmm6, xmm7 \
__asm psrad xmm6, SHIFT_INV_ROW \
__asm paddd xmm5, xmm7 \
__asm psrad xmm5, SHIFT_INV_ROW \
__asm packssdw xmm5, xmm6 \
}
/*
*
* The second stage - inverse DCTs of columns
*
* The inputs are multiplied
* for rows 0,4 - on cos_4_16,
* for rows 1,7 - on cos_1_16,
* for rows 2,6 - on cos_2_16,
* for rows 3,5 - on cos_3_16
* and are shifted to the left for rise of accuracy
*/
#define DCT_8_INV_COL_8R(INP, OUTP) __asm { \
__asm movdqa xmm0, [INP + 5*16] \
__asm movdqa xmm1, XMMWORD PTR tg_3_16 \
__asm movdqa xmm2, xmm0 \
__asm movdqa xmm3, [INP + 3*16] \
__asm pmulhw xmm0, xmm1 \
__asm movdqa xmm4, [INP + 7*16] \
__asm pmulhw xmm1, xmm3 \
__asm movdqa xmm5, XMMWORD PTR tg_1_16 \
__asm movdqa xmm6, xmm4 \
__asm pmulhw xmm4, xmm5 \
__asm paddsw xmm0, xmm2 \
__asm pmulhw xmm5, [INP + 1*16] \
__asm paddsw xmm1, xmm3 \
__asm movdqa xmm7, [INP + 6*16] \
__asm paddsw xmm0, xmm3 \
__asm movdqa xmm3, XMMWORD PTR tg_2_16 \
__asm psubsw xmm2, xmm1 \
__asm pmulhw xmm7, xmm3 \
__asm movdqa xmm1, xmm0 \
__asm pmulhw xmm3, [INP + 2*16] \
__asm psubsw xmm5, xmm6 \
__asm paddsw xmm4, [INP + 1*16] \
__asm paddsw xmm0, xmm4 \
__asm psubsw xmm4, xmm1 \
__asm pshufhw xmm0, xmm0, 00011011b \
__asm paddsw xmm7, [INP + 2*16] \
__asm movdqa xmm6, xmm5 \
__asm psubsw xmm3, [INP + 6*16] \
__asm psubsw xmm5, xmm2 \
__asm paddsw xmm6, xmm2 \
__asm movdqa [OUTP + 7*16], xmm0 \
__asm movdqa xmm1, xmm4 \
__asm movdqa xmm2, XMMWORD PTR cos_4_16 \
__asm paddsw xmm4, xmm5 \
__asm movdqa xmm0, XMMWORD PTR cos_4_16 \
__asm pmulhw xmm2, xmm4 \
__asm pshufhw xmm6, xmm6, 00011011b \
__asm movdqa [OUTP + 3*16], xmm6 \
__asm psubsw xmm1, xmm5 \
__asm movdqa xmm6, [INP + 0*16] \
__asm pmulhw xmm0, xmm1 \
__asm movdqa xmm5, [INP + 4*16] \
__asm paddsw xmm4, xmm2 \
__asm paddsw xmm5, xmm6 \
__asm psubsw xmm6, [INP + 4*16] \
__asm paddsw xmm0, xmm1 \
__asm pshufhw xmm4, xmm4, 00011011b \
__asm movdqa xmm2, xmm5 \
__asm paddsw xmm5, xmm7 \
__asm movdqa xmm1, xmm6 \
__asm psubsw xmm2, xmm7 \
__asm movdqa xmm7, [OUTP + 7*16] \
__asm paddsw xmm6, xmm3 \
__asm pshufhw xmm5, xmm5, 00011011b \
__asm paddsw xmm7, xmm5 \
__asm psubsw xmm1, xmm3 \
__asm pshufhw xmm6, xmm6, 00011011b \
__asm movdqa xmm3, xmm6 \
__asm paddsw xmm6, xmm4 \
__asm pshufhw xmm2, xmm2, 00011011b \
__asm psraw xmm7, SHIFT_INV_COL \
__asm movdqa [OUTP + 0*16], xmm7 \
__asm movdqa xmm7, xmm1 \
__asm paddsw xmm1, xmm0 \
__asm psraw xmm6, SHIFT_INV_COL \
__asm movdqa [OUTP + 1*16], xmm6 \
__asm pshufhw xmm1, xmm1, 00011011b \
__asm movdqa xmm6, [OUTP + 3*16] \
__asm psubsw xmm7, xmm0 \
__asm psraw xmm1, SHIFT_INV_COL \
__asm movdqa [OUTP + 2*16], xmm1 \
__asm psubsw xmm5, [OUTP + 7*16] \
__asm paddsw xmm6, xmm2 \
__asm psubsw xmm2, [OUTP + 3*16] \
__asm psubsw xmm3, xmm4 \
__asm psraw xmm7, SHIFT_INV_COL \
__asm pshufhw xmm7, xmm7, 00011011b \
__asm movdqa [OUTP + 5*16], xmm7 \
__asm psraw xmm5, SHIFT_INV_COL \
__asm movdqa [OUTP + 7*16], xmm5 \
__asm psraw xmm6, SHIFT_INV_COL \
__asm movdqa [OUTP + 3*16], xmm6 \
__asm psraw xmm2, SHIFT_INV_COL \
__asm movdqa [OUTP + 4*16], xmm2 \
__asm psraw xmm3, SHIFT_INV_COL \
__asm movdqa [OUTP + 6*16], xmm3 \
}
/*
*
* Name: dct_8x8_inv_16s
* Purpose: Inverse Discrete Cosine Transform 8x8 with
* 2D buffer of short int data
* Context:
* void dct_8x8_inv_16s ( short *src, short *dst )
* Parameters:
* src - Pointer to the source buffer
* dst - Pointer to the destination buffer
*
*/
GLOBAL(void)
dct_8x8_inv_16s ( short *src, short *dst ) {
__asm {
mov ecx, src
mov edx, dst
movdqa xmm0, [ecx+0*16]
movdqa xmm4, [ecx+4*16]
DCT_8_INV_ROW_2R(tab_i_04, round_i_0, round_i_4)
movdqa [edx+0*16], xmm1
movdqa [edx+4*16], xmm5
movdqa xmm0, [ecx+1*16]
movdqa xmm4, [ecx+7*16]
DCT_8_INV_ROW_2R(tab_i_17, round_i_1, round_i_7)
movdqa [edx+1*16], xmm1
movdqa [edx+7*16], xmm5
movdqa xmm0, [ecx+3*16]
movdqa xmm4, [ecx+5*16]
DCT_8_INV_ROW_2R(tab_i_35, round_i_3, round_i_5);
movdqa [edx+3*16], xmm1
movdqa [edx+5*16], xmm5
movdqa xmm0, [ecx+2*16]
movdqa xmm4, [ecx+6*16]
DCT_8_INV_ROW_2R(tab_i_26, round_i_2, round_i_6);
movdqa [edx+2*16], xmm1
movdqa [edx+6*16], xmm5
DCT_8_INV_COL_8R(edx+0, edx+0);
}
}
/*
* Name:
* ownpj_QuantInv_8x8_16s
*
* Purpose:
* Dequantize 8x8 block of DCT coefficients
*
* Context:
* void ownpj_QuantInv_8x8_16s
* Ipp16s* pSrc,
* Ipp16s* pDst,
* const Ipp16u* pQTbl)*
*
*/
GLOBAL(void)
ownpj_QuantInv_8x8_16s(short * pSrc, short * pDst, const unsigned short * pQTbl)
{
__asm {
push ebx
push ecx
push edx
push esi
push edi
mov esi, pSrc
mov edi, pDst
mov edx, pQTbl
mov ecx, 4
mov ebx, 32
again:
movq mm0, QWORD PTR [esi+0]
movq mm1, QWORD PTR [esi+8]
movq mm2, QWORD PTR [esi+16]
movq mm3, QWORD PTR [esi+24]
prefetcht0 [esi+ebx] ; fetch next cache line
pmullw mm0, QWORD PTR [edx+0]
pmullw mm1, QWORD PTR [edx+8]
pmullw mm2, QWORD PTR [edx+16]
pmullw mm3, QWORD PTR [edx+24]
movq QWORD PTR [edi+0], mm0
movq QWORD PTR [edi+8], mm1
movq QWORD PTR [edi+16], mm2
movq QWORD PTR [edi+24], mm3
add esi, ebx
add edi, ebx
add edx, ebx
dec ecx
jnz again
emms
pop edi
pop esi
pop edx
pop ecx
pop ebx
}
}
/*
* Name:
* ownpj_Add128_8x8_16s8u
*
* Purpose:
* signed to unsigned conversion (level shift)
* for 8x8 block of DCT coefficients
*
* Context:
* void ownpj_Add128_8x8_16s8u
* const Ipp16s* pSrc,
* Ipp8u* pDst,
* int DstStep);
*
*/
__declspec(align(16)) long const_128[]= {0x00800080, 0x00800080, 0x00800080, 0x00800080};
GLOBAL(void)
ownpj_Add128_8x8_16s8u(const short * pSrc, unsigned char * pDst, int DstStep)
{
__asm {
push eax
push ebx
push ecx
push edx
push esi
push edi
mov esi, pSrc
mov edi, pDst
mov edx, DstStep
mov ecx, 2
mov ebx, edx
mov eax, edx
sal ebx, 1
add eax, ebx
movdqa xmm7, XMMWORD PTR const_128
again:
movdqa xmm0, XMMWORD PTR [esi+0] ; line 0
movdqa xmm1, XMMWORD PTR [esi+16] ; line 1
movdqa xmm2, XMMWORD PTR [esi+32] ; line 2
movdqa xmm3, XMMWORD PTR [esi+48] ; line 3
paddw xmm0, xmm7
paddw xmm1, xmm7
paddw xmm2, xmm7
paddw xmm3, xmm7
packuswb xmm0, xmm1
packuswb xmm2, xmm3
movq QWORD PTR [edi], xmm0 ;0*DstStep
movq QWORD PTR [edi+ebx], xmm2 ;2*DstStep
psrldq xmm0, 8
psrldq xmm2, 8
movq QWORD PTR [edi+edx], xmm0 ;1*DstStep
movq QWORD PTR [edi+eax], xmm2 ;3*DstStep
add edi, ebx
add esi, 64
add edi, ebx
dec ecx
jnz again
pop edi
pop esi
pop edx
pop ecx
pop ebx
pop eax
}
}
/*
* Name:
* ippiDCTQuantInv8x8LS_JPEG_16s8u_C1R
*
* Purpose:
* Inverse DCT transform, de-quantization and level shift
*
* Parameters:
* pSrc - pointer to source
* pDst - pointer to output array
* DstStep - line offset for output data
* pEncoderQuantTable - pointer to Quantization table
*
*/
GLOBAL(void)
ippiDCTQuantInv8x8LS_JPEG_16s8u_C1R(
short * pSrc,
unsigned char * pDst,
int DstStep,
const unsigned short * pQuantInvTable)
{
__declspec(align(16)) Ipp8u buf[DCTSIZE2*sizeof(Ipp16s)];
Ipp16s * workbuf = (Ipp16s *)buf;
ownpj_QuantInv_8x8_16s(pSrc,workbuf,pQuantInvTable);
dct_8x8_inv_16s(workbuf,workbuf);
ownpj_Add128_8x8_16s8u(workbuf,pDst,DstStep);
}
GLOBAL(void)
jpeg_idct_islow_sse2 (
j_decompress_ptr cinfo,
jpeg_component_info * compptr,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col)
{
int ctr;
JCOEFPTR inptr;
Ipp16u* quantptr;
Ipp8u* wsptr;
__declspec(align(16)) Ipp8u workspace[DCTSIZE2];
JSAMPROW outptr;
inptr = coef_block;
quantptr = (Ipp16u*)compptr->dct_table;
wsptr = workspace;
ippiDCTQuantInv8x8LS_JPEG_16s8u_C1R(inptr, workspace, 8, quantptr);
for(ctr = 0; ctr < DCTSIZE; ctr++)
{
outptr = output_buf[ctr] + output_col;
outptr[0] = wsptr[0];
outptr[1] = wsptr[1];
outptr[2] = wsptr[2];
outptr[3] = wsptr[3];
outptr[4] = wsptr[4];
outptr[5] = wsptr[5];
outptr[6] = wsptr[6];
outptr[7] = wsptr[7];
wsptr += DCTSIZE;
}
}
#endif /* HAVE_SSE2_INTEL_MNEMONICS */
#endif /* DCT_ISLOW_SUPPORTED */

Просмотреть файл

@ -1,167 +0,0 @@
/*
* jmemansi.c
*
* Copyright (C) 1992-1996, Thomas G. Lane.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
* This file provides a simple generic implementation of the system-
* dependent portion of the JPEG memory manager. This implementation
* assumes that you have the ANSI-standard library routine tmpfile().
* Also, the problem of determining the amount of memory available
* is shoved onto the user.
*/
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jmemsys.h" /* import the system-dependent declarations */
#ifndef HAVE_STDLIB_H /* <stdlib.h> should declare malloc(),free() */
extern void * malloc JPP((size_t size));
extern void free JPP((void *ptr));
#endif
#ifndef SEEK_SET /* pre-ANSI systems may not define this; */
#define SEEK_SET 0 /* if not, assume 0 is correct */
#endif
/*
* Memory allocation and freeing are controlled by the regular library
* routines malloc() and free().
*/
GLOBAL(void *)
jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject)
{
return (void *) malloc(sizeofobject);
}
GLOBAL(void)
jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject)
{
free(object);
}
/*
* "Large" objects are treated the same as "small" ones.
* NB: although we include FAR keywords in the routine declarations,
* this file won't actually work in 80x86 small/medium model; at least,
* you probably won't be able to process useful-size images in only 64KB.
*/
GLOBAL(void FAR *)
jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
{
return (void FAR *) malloc(sizeofobject);
}
GLOBAL(void)
jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
{
free(object);
}
/*
* This routine computes the total memory space available for allocation.
* It's impossible to do this in a portable way; our current solution is
* to make the user tell us (with a default value set at compile time).
* If you can actually get the available space, it's a good idea to subtract
* a slop factor of 5% or so.
*/
#ifndef DEFAULT_MAX_MEM /* so can override from makefile */
#define DEFAULT_MAX_MEM 1000000L /* default: one megabyte */
#endif
GLOBAL(long)
jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
long max_bytes_needed, long already_allocated)
{
return cinfo->mem->max_memory_to_use - already_allocated;
}
/*
* Backing store (temporary file) management.
* Backing store objects are only used when the value returned by
* jpeg_mem_available is less than the total space needed. You can dispense
* with these routines if you have plenty of virtual memory; see jmemnobs.c.
*/
METHODDEF(void)
read_backing_store (j_common_ptr cinfo, backing_store_ptr info,
void FAR * buffer_address,
long file_offset, long byte_count)
{
if (fseek(info->temp_file, file_offset, SEEK_SET))
ERREXIT(cinfo, JERR_TFILE_SEEK);
if (JFREAD(info->temp_file, buffer_address, byte_count)
!= (size_t) byte_count)
ERREXIT(cinfo, JERR_TFILE_READ);
}
METHODDEF(void)
write_backing_store (j_common_ptr cinfo, backing_store_ptr info,
void FAR * buffer_address,
long file_offset, long byte_count)
{
if (fseek(info->temp_file, file_offset, SEEK_SET))
ERREXIT(cinfo, JERR_TFILE_SEEK);
if (JFWRITE(info->temp_file, buffer_address, byte_count)
!= (size_t) byte_count)
ERREXIT(cinfo, JERR_TFILE_WRITE);
}
METHODDEF(void)
close_backing_store (j_common_ptr cinfo, backing_store_ptr info)
{
fclose(info->temp_file);
/* Since this implementation uses tmpfile() to create the file,
* no explicit file deletion is needed.
*/
}
/*
* Initial opening of a backing-store object.
*
* This version uses tmpfile(), which constructs a suitable file name
* behind the scenes. We don't have to use info->temp_name[] at all;
* indeed, we can't even find out the actual name of the temp file.
*/
GLOBAL(void)
jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
long total_bytes_needed)
{
if ((info->temp_file = tmpfile()) == NULL)
ERREXITS(cinfo, JERR_TFILE_CREATE, "");
info->read_backing_store = read_backing_store;
info->write_backing_store = write_backing_store;
info->close_backing_store = close_backing_store;
}
/*
* These routines take care of any system-dependent initialization and
* cleanup required.
*/
GLOBAL(long)
jpeg_mem_init (j_common_ptr cinfo)
{
return DEFAULT_MAX_MEM; /* default for max_memory_to_use */
}
GLOBAL(void)
jpeg_mem_term (j_common_ptr cinfo)
{
/* no work */
}

Просмотреть файл

@ -1,638 +0,0 @@
/*
* jmemdos.c
*
* Copyright (C) 1992-1997, Thomas G. Lane.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
* This file provides an MS-DOS-compatible implementation of the system-
* dependent portion of the JPEG memory manager. Temporary data can be
* stored in extended or expanded memory as well as in regular DOS files.
*
* If you use this file, you must be sure that NEED_FAR_POINTERS is defined
* if you compile in a small-data memory model; it should NOT be defined if
* you use a large-data memory model. This file is not recommended if you
* are using a flat-memory-space 386 environment such as DJGCC or Watcom C.
* Also, this code will NOT work if struct fields are aligned on greater than
* 2-byte boundaries.
*
* Based on code contributed by Ge' Weijers.
*/
/*
* If you have both extended and expanded memory, you may want to change the
* order in which they are tried in jopen_backing_store. On a 286 machine
* expanded memory is usually faster, since extended memory access involves
* an expensive protected-mode-and-back switch. On 386 and better, extended
* memory is usually faster. As distributed, the code tries extended memory
* first (what? not everyone has a 386? :-).
*
* You can disable use of extended/expanded memory entirely by altering these
* definitions or overriding them from the Makefile (eg, -DEMS_SUPPORTED=0).
*/
#ifndef XMS_SUPPORTED
#define XMS_SUPPORTED 1
#endif
#ifndef EMS_SUPPORTED
#define EMS_SUPPORTED 1
#endif
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jmemsys.h" /* import the system-dependent declarations */
#ifndef HAVE_STDLIB_H /* <stdlib.h> should declare these */
extern void * malloc JPP((size_t size));
extern void free JPP((void *ptr));
extern char * getenv JPP((const char * name));
#endif
#ifdef NEED_FAR_POINTERS
#ifdef __TURBOC__
/* These definitions work for Borland C (Turbo C) */
#include <alloc.h> /* need farmalloc(), farfree() */
#define far_malloc(x) farmalloc(x)
#define far_free(x) farfree(x)
#else
/* These definitions work for Microsoft C and compatible compilers */
#include <malloc.h> /* need _fmalloc(), _ffree() */
#define far_malloc(x) _fmalloc(x)
#define far_free(x) _ffree(x)
#endif
#else /* not NEED_FAR_POINTERS */
#define far_malloc(x) malloc(x)
#define far_free(x) free(x)
#endif /* NEED_FAR_POINTERS */
#ifdef DONT_USE_B_MODE /* define mode parameters for fopen() */
#define READ_BINARY "r"
#else
#define READ_BINARY "rb"
#endif
#ifndef USE_MSDOS_MEMMGR /* make sure user got configuration right */
You forgot to define USE_MSDOS_MEMMGR in jconfig.h. /* deliberate syntax error */
#endif
#if MAX_ALLOC_CHUNK >= 65535L /* make sure jconfig.h got this right */
MAX_ALLOC_CHUNK should be less than 64K. /* deliberate syntax error */
#endif
/*
* Declarations for assembly-language support routines (see jmemdosa.asm).
*
* The functions are declared "far" as are all their pointer arguments;
* this ensures the assembly source code will work regardless of the
* compiler memory model. We assume "short" is 16 bits, "long" is 32.
*/
typedef void far * XMSDRIVER; /* actually a pointer to code */
typedef struct { /* registers for calling XMS driver */
unsigned short ax, dx, bx;
void far * ds_si;
} XMScontext;
typedef struct { /* registers for calling EMS driver */
unsigned short ax, dx, bx;
void far * ds_si;
} EMScontext;
extern short far jdos_open JPP((short far * handle, char far * filename));
extern short far jdos_close JPP((short handle));
extern short far jdos_seek JPP((short handle, long offset));
extern short far jdos_read JPP((short handle, void far * buffer,
unsigned short count));
extern short far jdos_write JPP((short handle, void far * buffer,
unsigned short count));
extern void far jxms_getdriver JPP((XMSDRIVER far *));
extern void far jxms_calldriver JPP((XMSDRIVER, XMScontext far *));
extern short far jems_available JPP((void));
extern void far jems_calldriver JPP((EMScontext far *));
/*
* Selection of a file name for a temporary file.
* This is highly system-dependent, and you may want to customize it.
*/
static int next_file_num; /* to distinguish among several temp files */
LOCAL(void)
select_file_name (char * fname)
{
const char * env;
char * ptr;
FILE * tfile;
/* Keep generating file names till we find one that's not in use */
for (;;) {
/* Get temp directory name from environment TMP or TEMP variable;
* if none, use "."
*/
if ((env = (const char *) getenv("TMP")) == NULL)
if ((env = (const char *) getenv("TEMP")) == NULL)
env = ".";
if (*env == '\0') /* null string means "." */
env = ".";
ptr = fname; /* copy name to fname */
while (*env != '\0')
*ptr++ = *env++;
if (ptr[-1] != '\\' && ptr[-1] != '/')
*ptr++ = '\\'; /* append backslash if not in env variable */
/* Append a suitable file name */
next_file_num++; /* advance counter */
sprintf(ptr, "JPG%03d.TMP", next_file_num);
/* Probe to see if file name is already in use */
if ((tfile = fopen(fname, READ_BINARY)) == NULL)
break;
fclose(tfile); /* oops, it's there; close tfile & try again */
}
}
/*
* Near-memory allocation and freeing are controlled by the regular library
* routines malloc() and free().
*/
GLOBAL(void *)
jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject)
{
return (void *) malloc(sizeofobject);
}
GLOBAL(void)
jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject)
{
free(object);
}
/*
* "Large" objects are allocated in far memory, if possible
*/
GLOBAL(void FAR *)
jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
{
return (void FAR *) far_malloc(sizeofobject);
}
GLOBAL(void)
jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
{
far_free(object);
}
/*
* This routine computes the total memory space available for allocation.
* It's impossible to do this in a portable way; our current solution is
* to make the user tell us (with a default value set at compile time).
* If you can actually get the available space, it's a good idea to subtract
* a slop factor of 5% or so.
*/
#ifndef DEFAULT_MAX_MEM /* so can override from makefile */
#define DEFAULT_MAX_MEM 300000L /* for total usage about 450K */
#endif
GLOBAL(long)
jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
long max_bytes_needed, long already_allocated)
{
return cinfo->mem->max_memory_to_use - already_allocated;
}
/*
* Backing store (temporary file) management.
* Backing store objects are only used when the value returned by
* jpeg_mem_available is less than the total space needed. You can dispense
* with these routines if you have plenty of virtual memory; see jmemnobs.c.
*/
/*
* For MS-DOS we support three types of backing storage:
* 1. Conventional DOS files. We access these by direct DOS calls rather
* than via the stdio package. This provides a bit better performance,
* but the real reason is that the buffers to be read or written are FAR.
* The stdio library for small-data memory models can't cope with that.
* 2. Extended memory, accessed per the XMS V2.0 specification.
* 3. Expanded memory, accessed per the LIM/EMS 4.0 specification.
* You'll need copies of those specs to make sense of the related code.
* The specs are available by Internet FTP from the SIMTEL archives
* (oak.oakland.edu and its various mirror sites). See files
* pub/msdos/microsoft/xms20.arc and pub/msdos/info/limems41.zip.
*/
/*
* Access methods for a DOS file.
*/
METHODDEF(void)
read_file_store (j_common_ptr cinfo, backing_store_ptr info,
void FAR * buffer_address,
long file_offset, long byte_count)
{
if (jdos_seek(info->handle.file_handle, file_offset))
ERREXIT(cinfo, JERR_TFILE_SEEK);
/* Since MAX_ALLOC_CHUNK is less than 64K, byte_count will be too. */
if (byte_count > 65535L) /* safety check */
ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK);
if (jdos_read(info->handle.file_handle, buffer_address,
(unsigned short) byte_count))
ERREXIT(cinfo, JERR_TFILE_READ);
}
METHODDEF(void)
write_file_store (j_common_ptr cinfo, backing_store_ptr info,
void FAR * buffer_address,
long file_offset, long byte_count)
{
if (jdos_seek(info->handle.file_handle, file_offset))
ERREXIT(cinfo, JERR_TFILE_SEEK);
/* Since MAX_ALLOC_CHUNK is less than 64K, byte_count will be too. */
if (byte_count > 65535L) /* safety check */
ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK);
if (jdos_write(info->handle.file_handle, buffer_address,
(unsigned short) byte_count))
ERREXIT(cinfo, JERR_TFILE_WRITE);
}
METHODDEF(void)
close_file_store (j_common_ptr cinfo, backing_store_ptr info)
{
jdos_close(info->handle.file_handle); /* close the file */
remove(info->temp_name); /* delete the file */
/* If your system doesn't have remove(), try unlink() instead.
* remove() is the ANSI-standard name for this function, but
* unlink() was more common in pre-ANSI systems.
*/
TRACEMSS(cinfo, 1, JTRC_TFILE_CLOSE, info->temp_name);
}
LOCAL(boolean)
open_file_store (j_common_ptr cinfo, backing_store_ptr info,
long total_bytes_needed)
{
short handle;
select_file_name(info->temp_name);
if (jdos_open((short far *) & handle, (char far *) info->temp_name)) {
/* might as well exit since jpeg_open_backing_store will fail anyway */
ERREXITS(cinfo, JERR_TFILE_CREATE, info->temp_name);
return FALSE;
}
info->handle.file_handle = handle;
info->read_backing_store = read_file_store;
info->write_backing_store = write_file_store;
info->close_backing_store = close_file_store;
TRACEMSS(cinfo, 1, JTRC_TFILE_OPEN, info->temp_name);
return TRUE; /* succeeded */
}
/*
* Access methods for extended memory.
*/
#if XMS_SUPPORTED
static XMSDRIVER xms_driver; /* saved address of XMS driver */
typedef union { /* either long offset or real-mode pointer */
long offset;
void far * ptr;
} XMSPTR;
typedef struct { /* XMS move specification structure */
long length;
XMSH src_handle;
XMSPTR src;
XMSH dst_handle;
XMSPTR dst;
} XMSspec;
#define ODD(X) (((X) & 1L) != 0)
METHODDEF(void)
read_xms_store (j_common_ptr cinfo, backing_store_ptr info,
void FAR * buffer_address,
long file_offset, long byte_count)
{
XMScontext ctx;
XMSspec spec;
char endbuffer[2];
/* The XMS driver can't cope with an odd length, so handle the last byte
* specially if byte_count is odd. We don't expect this to be common.
*/
spec.length = byte_count & (~ 1L);
spec.src_handle = info->handle.xms_handle;
spec.src.offset = file_offset;
spec.dst_handle = 0;
spec.dst.ptr = buffer_address;
ctx.ds_si = (void far *) & spec;
ctx.ax = 0x0b00; /* EMB move */
jxms_calldriver(xms_driver, (XMScontext far *) & ctx);
if (ctx.ax != 1)
ERREXIT(cinfo, JERR_XMS_READ);
if (ODD(byte_count)) {
read_xms_store(cinfo, info, (void FAR *) endbuffer,
file_offset + byte_count - 1L, 2L);
((char FAR *) buffer_address)[byte_count - 1L] = endbuffer[0];
}
}
METHODDEF(void)
write_xms_store (j_common_ptr cinfo, backing_store_ptr info,
void FAR * buffer_address,
long file_offset, long byte_count)
{
XMScontext ctx;
XMSspec spec;
char endbuffer[2];
/* The XMS driver can't cope with an odd length, so handle the last byte
* specially if byte_count is odd. We don't expect this to be common.
*/
spec.length = byte_count & (~ 1L);
spec.src_handle = 0;
spec.src.ptr = buffer_address;
spec.dst_handle = info->handle.xms_handle;
spec.dst.offset = file_offset;
ctx.ds_si = (void far *) & spec;
ctx.ax = 0x0b00; /* EMB move */
jxms_calldriver(xms_driver, (XMScontext far *) & ctx);
if (ctx.ax != 1)
ERREXIT(cinfo, JERR_XMS_WRITE);
if (ODD(byte_count)) {
read_xms_store(cinfo, info, (void FAR *) endbuffer,
file_offset + byte_count - 1L, 2L);
endbuffer[0] = ((char FAR *) buffer_address)[byte_count - 1L];
write_xms_store(cinfo, info, (void FAR *) endbuffer,
file_offset + byte_count - 1L, 2L);
}
}
METHODDEF(void)
close_xms_store (j_common_ptr cinfo, backing_store_ptr info)
{
XMScontext ctx;
ctx.dx = info->handle.xms_handle;
ctx.ax = 0x0a00;
jxms_calldriver(xms_driver, (XMScontext far *) & ctx);
TRACEMS1(cinfo, 1, JTRC_XMS_CLOSE, info->handle.xms_handle);
/* we ignore any error return from the driver */
}
LOCAL(boolean)
open_xms_store (j_common_ptr cinfo, backing_store_ptr info,
long total_bytes_needed)
{
XMScontext ctx;
/* Get address of XMS driver */
jxms_getdriver((XMSDRIVER far *) & xms_driver);
if (xms_driver == NULL)
return FALSE; /* no driver to be had */
/* Get version number, must be >= 2.00 */
ctx.ax = 0x0000;
jxms_calldriver(xms_driver, (XMScontext far *) & ctx);
if (ctx.ax < (unsigned short) 0x0200)
return FALSE;
/* Try to get space (expressed in kilobytes) */
ctx.dx = (unsigned short) ((total_bytes_needed + 1023L) >> 10);
ctx.ax = 0x0900;
jxms_calldriver(xms_driver, (XMScontext far *) & ctx);
if (ctx.ax != 1)
return FALSE;
/* Succeeded, save the handle and away we go */
info->handle.xms_handle = ctx.dx;
info->read_backing_store = read_xms_store;
info->write_backing_store = write_xms_store;
info->close_backing_store = close_xms_store;
TRACEMS1(cinfo, 1, JTRC_XMS_OPEN, ctx.dx);
return TRUE; /* succeeded */
}
#endif /* XMS_SUPPORTED */
/*
* Access methods for expanded memory.
*/
#if EMS_SUPPORTED
/* The EMS move specification structure requires word and long fields aligned
* at odd byte boundaries. Some compilers will align struct fields at even
* byte boundaries. While it's usually possible to force byte alignment,
* that causes an overall performance penalty and may pose problems in merging
* JPEG into a larger application. Instead we accept some rather dirty code
* here. Note this code would fail if the hardware did not allow odd-byte
* word & long accesses, but all 80x86 CPUs do.
*/
typedef void far * EMSPTR;
typedef union { /* EMS move specification structure */
long length; /* It's easy to access first 4 bytes */
char bytes[18]; /* Misaligned fields in here! */
} EMSspec;
/* Macros for accessing misaligned fields */
#define FIELD_AT(spec,offset,type) (*((type *) &(spec.bytes[offset])))
#define SRC_TYPE(spec) FIELD_AT(spec,4,char)
#define SRC_HANDLE(spec) FIELD_AT(spec,5,EMSH)
#define SRC_OFFSET(spec) FIELD_AT(spec,7,unsigned short)
#define SRC_PAGE(spec) FIELD_AT(spec,9,unsigned short)
#define SRC_PTR(spec) FIELD_AT(spec,7,EMSPTR)
#define DST_TYPE(spec) FIELD_AT(spec,11,char)
#define DST_HANDLE(spec) FIELD_AT(spec,12,EMSH)
#define DST_OFFSET(spec) FIELD_AT(spec,14,unsigned short)
#define DST_PAGE(spec) FIELD_AT(spec,16,unsigned short)
#define DST_PTR(spec) FIELD_AT(spec,14,EMSPTR)
#define EMSPAGESIZE 16384L /* gospel, see the EMS specs */
#define HIBYTE(W) (((W) >> 8) & 0xFF)
#define LOBYTE(W) ((W) & 0xFF)
METHODDEF(void)
read_ems_store (j_common_ptr cinfo, backing_store_ptr info,
void FAR * buffer_address,
long file_offset, long byte_count)
{
EMScontext ctx;
EMSspec spec;
spec.length = byte_count;
SRC_TYPE(spec) = 1;
SRC_HANDLE(spec) = info->handle.ems_handle;
SRC_PAGE(spec) = (unsigned short) (file_offset / EMSPAGESIZE);
SRC_OFFSET(spec) = (unsigned short) (file_offset % EMSPAGESIZE);
DST_TYPE(spec) = 0;
DST_HANDLE(spec) = 0;
DST_PTR(spec) = buffer_address;
ctx.ds_si = (void far *) & spec;
ctx.ax = 0x5700; /* move memory region */
jems_calldriver((EMScontext far *) & ctx);
if (HIBYTE(ctx.ax) != 0)
ERREXIT(cinfo, JERR_EMS_READ);
}
METHODDEF(void)
write_ems_store (j_common_ptr cinfo, backing_store_ptr info,
void FAR * buffer_address,
long file_offset, long byte_count)
{
EMScontext ctx;
EMSspec spec;
spec.length = byte_count;
SRC_TYPE(spec) = 0;
SRC_HANDLE(spec) = 0;
SRC_PTR(spec) = buffer_address;
DST_TYPE(spec) = 1;
DST_HANDLE(spec) = info->handle.ems_handle;
DST_PAGE(spec) = (unsigned short) (file_offset / EMSPAGESIZE);
DST_OFFSET(spec) = (unsigned short) (file_offset % EMSPAGESIZE);
ctx.ds_si = (void far *) & spec;
ctx.ax = 0x5700; /* move memory region */
jems_calldriver((EMScontext far *) & ctx);
if (HIBYTE(ctx.ax) != 0)
ERREXIT(cinfo, JERR_EMS_WRITE);
}
METHODDEF(void)
close_ems_store (j_common_ptr cinfo, backing_store_ptr info)
{
EMScontext ctx;
ctx.ax = 0x4500;
ctx.dx = info->handle.ems_handle;
jems_calldriver((EMScontext far *) & ctx);
TRACEMS1(cinfo, 1, JTRC_EMS_CLOSE, info->handle.ems_handle);
/* we ignore any error return from the driver */
}
LOCAL(boolean)
open_ems_store (j_common_ptr cinfo, backing_store_ptr info,
long total_bytes_needed)
{
EMScontext ctx;
/* Is EMS driver there? */
if (! jems_available())
return FALSE;
/* Get status, make sure EMS is OK */
ctx.ax = 0x4000;
jems_calldriver((EMScontext far *) & ctx);
if (HIBYTE(ctx.ax) != 0)
return FALSE;
/* Get version, must be >= 4.0 */
ctx.ax = 0x4600;
jems_calldriver((EMScontext far *) & ctx);
if (HIBYTE(ctx.ax) != 0 || LOBYTE(ctx.ax) < 0x40)
return FALSE;
/* Try to allocate requested space */
ctx.ax = 0x4300;
ctx.bx = (unsigned short) ((total_bytes_needed + EMSPAGESIZE-1L) / EMSPAGESIZE);
jems_calldriver((EMScontext far *) & ctx);
if (HIBYTE(ctx.ax) != 0)
return FALSE;
/* Succeeded, save the handle and away we go */
info->handle.ems_handle = ctx.dx;
info->read_backing_store = read_ems_store;
info->write_backing_store = write_ems_store;
info->close_backing_store = close_ems_store;
TRACEMS1(cinfo, 1, JTRC_EMS_OPEN, ctx.dx);
return TRUE; /* succeeded */
}
#endif /* EMS_SUPPORTED */
/*
* Initial opening of a backing-store object.
*/
GLOBAL(void)
jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
long total_bytes_needed)
{
/* Try extended memory, then expanded memory, then regular file. */
#if XMS_SUPPORTED
if (open_xms_store(cinfo, info, total_bytes_needed))
return;
#endif
#if EMS_SUPPORTED
if (open_ems_store(cinfo, info, total_bytes_needed))
return;
#endif
if (open_file_store(cinfo, info, total_bytes_needed))
return;
ERREXITS(cinfo, JERR_TFILE_CREATE, "");
}
/*
* These routines take care of any system-dependent initialization and
* cleanup required.
*/
GLOBAL(long)
jpeg_mem_init (j_common_ptr cinfo)
{
next_file_num = 0; /* initialize temp file name generator */
return DEFAULT_MAX_MEM; /* default for max_memory_to_use */
}
GLOBAL(void)
jpeg_mem_term (j_common_ptr cinfo)
{
/* Microsoft C, at least in v6.00A, will not successfully reclaim freed
* blocks of size > 32Kbytes unless we give it a kick in the rear, like so:
*/
#ifdef NEED_FHEAPMIN
_fheapmin();
#endif
}

Просмотреть файл

@ -1,379 +0,0 @@
;
; jmemdosa.asm
;
; Copyright (C) 1992, Thomas G. Lane.
; This file is part of the Independent JPEG Group's software.
; For conditions of distribution and use, see the accompanying README file.
;
; This file contains low-level interface routines to support the MS-DOS
; backing store manager (jmemdos.c). Routines are provided to access disk
; files through direct DOS calls, and to access XMS and EMS drivers.
;
; This file should assemble with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler). If you haven't got
; a compatible assembler, better fall back to jmemansi.c or jmemname.c.
;
; To minimize dependence on the C compiler's register usage conventions,
; we save and restore all 8086 registers, even though most compilers only
; require SI,DI,DS to be preserved. Also, we use only 16-bit-wide return
; values, which everybody returns in AX.
;
; Based on code contributed by Ge' Weijers.
;
JMEMDOSA_TXT segment byte public 'CODE'
assume cs:JMEMDOSA_TXT
public _jdos_open
public _jdos_close
public _jdos_seek
public _jdos_read
public _jdos_write
public _jxms_getdriver
public _jxms_calldriver
public _jems_available
public _jems_calldriver
;
; short far jdos_open (short far * handle, char far * filename)
;
; Create and open a temporary file
;
_jdos_open proc far
push bp ; linkage
mov bp,sp
push si ; save all registers for safety
push di
push bx
push cx
push dx
push es
push ds
mov cx,0 ; normal file attributes
lds dx,dword ptr [bp+10] ; get filename pointer
mov ah,3ch ; create file
int 21h
jc open_err ; if failed, return error code
lds bx,dword ptr [bp+6] ; get handle pointer
mov word ptr [bx],ax ; save the handle
xor ax,ax ; return zero for OK
open_err: pop ds ; restore registers and exit
pop es
pop dx
pop cx
pop bx
pop di
pop si
pop bp
ret
_jdos_open endp
;
; short far jdos_close (short handle)
;
; Close the file handle
;
_jdos_close proc far
push bp ; linkage
mov bp,sp
push si ; save all registers for safety
push di
push bx
push cx
push dx
push es
push ds
mov bx,word ptr [bp+6] ; file handle
mov ah,3eh ; close file
int 21h
jc close_err ; if failed, return error code
xor ax,ax ; return zero for OK
close_err: pop ds ; restore registers and exit
pop es
pop dx
pop cx
pop bx
pop di
pop si
pop bp
ret
_jdos_close endp
;
; short far jdos_seek (short handle, long offset)
;
; Set file position
;
_jdos_seek proc far
push bp ; linkage
mov bp,sp
push si ; save all registers for safety
push di
push bx
push cx
push dx
push es
push ds
mov bx,word ptr [bp+6] ; file handle
mov dx,word ptr [bp+8] ; LS offset
mov cx,word ptr [bp+10] ; MS offset
mov ax,4200h ; absolute seek
int 21h
jc seek_err ; if failed, return error code
xor ax,ax ; return zero for OK
seek_err: pop ds ; restore registers and exit
pop es
pop dx
pop cx
pop bx
pop di
pop si
pop bp
ret
_jdos_seek endp
;
; short far jdos_read (short handle, void far * buffer, unsigned short count)
;
; Read from file
;
_jdos_read proc far
push bp ; linkage
mov bp,sp
push si ; save all registers for safety
push di
push bx
push cx
push dx
push es
push ds
mov bx,word ptr [bp+6] ; file handle
lds dx,dword ptr [bp+8] ; buffer address
mov cx,word ptr [bp+12] ; number of bytes
mov ah,3fh ; read file
int 21h
jc read_err ; if failed, return error code
cmp ax,word ptr [bp+12] ; make sure all bytes were read
je read_ok
mov ax,1 ; else return 1 for not OK
jmp short read_err
read_ok: xor ax,ax ; return zero for OK
read_err: pop ds ; restore registers and exit
pop es
pop dx
pop cx
pop bx
pop di
pop si
pop bp
ret
_jdos_read endp
;
; short far jdos_write (short handle, void far * buffer, unsigned short count)
;
; Write to file
;
_jdos_write proc far
push bp ; linkage
mov bp,sp
push si ; save all registers for safety
push di
push bx
push cx
push dx
push es
push ds
mov bx,word ptr [bp+6] ; file handle
lds dx,dword ptr [bp+8] ; buffer address
mov cx,word ptr [bp+12] ; number of bytes
mov ah,40h ; write file
int 21h
jc write_err ; if failed, return error code
cmp ax,word ptr [bp+12] ; make sure all bytes written
je write_ok
mov ax,1 ; else return 1 for not OK
jmp short write_err
write_ok: xor ax,ax ; return zero for OK
write_err: pop ds ; restore registers and exit
pop es
pop dx
pop cx
pop bx
pop di
pop si
pop bp
ret
_jdos_write endp
;
; void far jxms_getdriver (XMSDRIVER far *)
;
; Get the address of the XMS driver, or NULL if not available
;
_jxms_getdriver proc far
push bp ; linkage
mov bp,sp
push si ; save all registers for safety
push di
push bx
push cx
push dx
push es
push ds
mov ax,4300h ; call multiplex interrupt with
int 2fh ; a magic cookie, hex 4300
cmp al,80h ; AL should contain hex 80
je xmsavail
xor dx,dx ; no XMS driver available
xor ax,ax ; return a nil pointer
jmp short xmsavail_done
xmsavail: mov ax,4310h ; fetch driver address with
int 2fh ; another magic cookie
mov dx,es ; copy address to dx:ax
mov ax,bx
xmsavail_done: les bx,dword ptr [bp+6] ; get pointer to return value
mov word ptr es:[bx],ax
mov word ptr es:[bx+2],dx
pop ds ; restore registers and exit
pop es
pop dx
pop cx
pop bx
pop di
pop si
pop bp
ret
_jxms_getdriver endp
;
; void far jxms_calldriver (XMSDRIVER, XMScontext far *)
;
; The XMScontext structure contains values for the AX,DX,BX,SI,DS registers.
; These are loaded, the XMS call is performed, and the new values of the
; AX,DX,BX registers are written back to the context structure.
;
_jxms_calldriver proc far
push bp ; linkage
mov bp,sp
push si ; save all registers for safety
push di
push bx
push cx
push dx
push es
push ds
les bx,dword ptr [bp+10] ; get XMScontext pointer
mov ax,word ptr es:[bx] ; load registers
mov dx,word ptr es:[bx+2]
mov si,word ptr es:[bx+6]
mov ds,word ptr es:[bx+8]
mov bx,word ptr es:[bx+4]
call dword ptr [bp+6] ; call the driver
mov cx,bx ; save returned BX for a sec
les bx,dword ptr [bp+10] ; get XMScontext pointer
mov word ptr es:[bx],ax ; put back ax,dx,bx
mov word ptr es:[bx+2],dx
mov word ptr es:[bx+4],cx
pop ds ; restore registers and exit
pop es
pop dx
pop cx
pop bx
pop di
pop si
pop bp
ret
_jxms_calldriver endp
;
; short far jems_available (void)
;
; Have we got an EMS driver? (this comes straight from the EMS 4.0 specs)
;
_jems_available proc far
push si ; save all registers for safety
push di
push bx
push cx
push dx
push es
push ds
mov ax,3567h ; get interrupt vector 67h
int 21h
push cs
pop ds
mov di,000ah ; check offs 10 in returned seg
lea si,ASCII_device_name ; against literal string
mov cx,8
cld
repe cmpsb
jne no_ems
mov ax,1 ; match, it's there
jmp short avail_done
no_ems: xor ax,ax ; it's not there
avail_done: pop ds ; restore registers and exit
pop es
pop dx
pop cx
pop bx
pop di
pop si
ret
ASCII_device_name db "EMMXXXX0"
_jems_available endp
;
; void far jems_calldriver (EMScontext far *)
;
; The EMScontext structure contains values for the AX,DX,BX,SI,DS registers.
; These are loaded, the EMS trap is performed, and the new values of the
; AX,DX,BX registers are written back to the context structure.
;
_jems_calldriver proc far
push bp ; linkage
mov bp,sp
push si ; save all registers for safety
push di
push bx
push cx
push dx
push es
push ds
les bx,dword ptr [bp+6] ; get EMScontext pointer
mov ax,word ptr es:[bx] ; load registers
mov dx,word ptr es:[bx+2]
mov si,word ptr es:[bx+6]
mov ds,word ptr es:[bx+8]
mov bx,word ptr es:[bx+4]
int 67h ; call the EMS driver
mov cx,bx ; save returned BX for a sec
les bx,dword ptr [bp+6] ; get EMScontext pointer
mov word ptr es:[bx],ax ; put back ax,dx,bx
mov word ptr es:[bx+2],dx
mov word ptr es:[bx+4],cx
pop ds ; restore registers and exit
pop es
pop dx
pop cx
pop bx
pop di
pop si
pop bp
ret
_jems_calldriver endp
JMEMDOSA_TXT ends
end

Просмотреть файл

@ -57,22 +57,25 @@ extern char * getenv JPP((const char * name));
* requirement, and we had better do so too.
* There isn't any really portable way to determine the worst-case alignment
* requirement. This module assumes that the alignment requirement is
* multiples of sizeof(ALIGN_TYPE).
* By default, we define ALIGN_TYPE as double. This is necessary on some
* multiples of ALIGN_SIZE.
* By default, we define ALIGN_SIZE as sizeof(double). This is necessary on some
* workstations (where doubles really do need 8-byte alignment) and will work
* fine on nearly everything. If your machine has lesser alignment needs,
* you can save a few bytes by making ALIGN_TYPE smaller.
* you can save a few bytes by making ALIGN_SIZE smaller.
* The only place I know of where this will NOT work is certain Macintosh
* 680x0 compilers that define double as a 10-byte IEEE extended float.
* Doing 10-byte alignment is counterproductive because longwords won't be
* aligned well. Put "#define ALIGN_TYPE long" in jconfig.h if you have
* aligned well. Put "#define ALIGN_SIZE 4" in jconfig.h if you have
* such a compiler.
*/
#ifndef ALIGN_TYPE /* so can override from jconfig.h */
#define ALIGN_TYPE double
#ifndef ALIGN_SIZE /* so can override from jconfig.h */
#ifndef WITH_SIMD
#define ALIGN_SIZE SIZEOF(double)
#else
#define ALIGN_SIZE 16 /* Most SIMD implementations require this */
#endif
#endif
/*
* We allocate objects from "pools", where each pool is gotten with a single
@ -81,34 +84,24 @@ extern char * getenv JPP((const char * name));
* header with a link to the next pool of the same class.
* Small and large pool headers are identical except that the latter's
* link pointer must be FAR on 80x86 machines.
* Notice that the "real" header fields are union'ed with a dummy ALIGN_TYPE
* field. This forces the compiler to make SIZEOF(small_pool_hdr) a multiple
* of the alignment requirement of ALIGN_TYPE.
*/
typedef union small_pool_struct * small_pool_ptr;
typedef struct small_pool_struct * small_pool_ptr;
typedef union small_pool_struct {
struct {
small_pool_ptr next; /* next in list of pools */
size_t bytes_used; /* how many bytes already used within pool */
size_t bytes_left; /* bytes still available in this pool */
} hdr;
ALIGN_TYPE dummy; /* included in union to ensure alignment */
typedef struct small_pool_struct {
small_pool_ptr next; /* next in list of pools */
size_t bytes_used; /* how many bytes already used within pool */
size_t bytes_left; /* bytes still available in this pool */
} small_pool_hdr;
typedef union large_pool_struct FAR * large_pool_ptr;
typedef struct large_pool_struct FAR * large_pool_ptr;
typedef union large_pool_struct {
struct {
large_pool_ptr next; /* next in list of pools */
size_t bytes_used; /* how many bytes already used within pool */
size_t bytes_left; /* bytes still available in this pool */
} hdr;
ALIGN_TYPE dummy; /* included in union to ensure alignment */
typedef struct large_pool_struct {
large_pool_ptr next; /* next in list of pools */
size_t bytes_used; /* how many bytes already used within pool */
size_t bytes_left; /* bytes still available in this pool */
} large_pool_hdr;
/*
* Here is the full definition of a memory manager object.
*/
@ -129,7 +122,7 @@ typedef struct {
jvirt_barray_ptr virt_barray_list;
/* This counts total space obtained from jpeg_get_small/large */
long total_space_allocated;
size_t total_space_allocated;
/* alloc_sarray and alloc_barray set this value for use by virtual
* array routines.
@ -197,16 +190,16 @@ print_mem_stats (j_common_ptr cinfo, int pool_id)
pool_id, mem->total_space_allocated);
for (lhdr_ptr = mem->large_list[pool_id]; lhdr_ptr != NULL;
lhdr_ptr = lhdr_ptr->hdr.next) {
lhdr_ptr = lhdr_ptr->next) {
fprintf(stderr, " Large chunk used %ld\n",
(long) lhdr_ptr->hdr.bytes_used);
(long) lhdr_ptr->bytes_used);
}
for (shdr_ptr = mem->small_list[pool_id]; shdr_ptr != NULL;
shdr_ptr = shdr_ptr->hdr.next) {
shdr_ptr = shdr_ptr->next) {
fprintf(stderr, " Small chunk used %ld free %ld\n",
(long) shdr_ptr->hdr.bytes_used,
(long) shdr_ptr->hdr.bytes_left);
(long) shdr_ptr->bytes_used,
(long) shdr_ptr->bytes_left);
}
}
@ -236,6 +229,10 @@ out_of_memory (j_common_ptr cinfo, int which)
* and we also distinguish the first pool of a class from later ones.
* NOTE: the values given work fairly well on both 16- and 32-bit-int
* machines, but may be too small if longs are 64 bits or more.
*
* Since we do not know what alignment malloc() gives us, we have to
* allocate ALIGN_SIZE-1 extra space per pool to have room for alignment
* adjustment.
*/
static const size_t first_pool_slop[JPOOL_NUMPOOLS] =
@ -260,33 +257,36 @@ alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
small_pool_ptr hdr_ptr, prev_hdr_ptr;
char * data_ptr;
size_t odd_bytes, min_request, slop;
size_t min_request, slop;
/*
* Round up the requested size to a multiple of ALIGN_SIZE in order
* to assure alignment for the next object allocated in the same pool
* and so that algorithms can straddle outside the proper area up
* to the next alignment.
*/
sizeofobject = jround_up(sizeofobject, ALIGN_SIZE);
/* Check for unsatisfiable request (do now to ensure no overflow below) */
if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(small_pool_hdr)))
if ((SIZEOF(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > MAX_ALLOC_CHUNK)
out_of_memory(cinfo, 1); /* request exceeds malloc's ability */
/* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
if (odd_bytes > 0)
sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
/* See if space is available in any existing pool */
if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */
prev_hdr_ptr = NULL;
hdr_ptr = mem->small_list[pool_id];
while (hdr_ptr != NULL) {
if (hdr_ptr->hdr.bytes_left >= sizeofobject)
if (hdr_ptr->bytes_left >= sizeofobject)
break; /* found pool with enough space */
prev_hdr_ptr = hdr_ptr;
hdr_ptr = hdr_ptr->hdr.next;
hdr_ptr = hdr_ptr->next;
}
/* Time to make a new pool? */
if (hdr_ptr == NULL) {
/* min_request is what we need now, slop is what will be leftover */
min_request = sizeofobject + SIZEOF(small_pool_hdr);
min_request = SIZEOF(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1;
if (prev_hdr_ptr == NULL) /* first pool in class? */
slop = first_pool_slop[pool_id];
else
@ -305,20 +305,23 @@ alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
}
mem->total_space_allocated += min_request + slop;
/* Success, initialize the new pool header and add to end of list */
hdr_ptr->hdr.next = NULL;
hdr_ptr->hdr.bytes_used = 0;
hdr_ptr->hdr.bytes_left = sizeofobject + slop;
hdr_ptr->next = NULL;
hdr_ptr->bytes_used = 0;
hdr_ptr->bytes_left = sizeofobject + slop;
if (prev_hdr_ptr == NULL) /* first pool in class? */
mem->small_list[pool_id] = hdr_ptr;
else
prev_hdr_ptr->hdr.next = hdr_ptr;
prev_hdr_ptr->next = hdr_ptr;
}
/* OK, allocate the object from the current pool */
data_ptr = (char *) (hdr_ptr + 1); /* point to first data byte in pool */
data_ptr += hdr_ptr->hdr.bytes_used; /* point to place for object */
hdr_ptr->hdr.bytes_used += sizeofobject;
hdr_ptr->hdr.bytes_left -= sizeofobject;
data_ptr = (char *) hdr_ptr; /* point to first data byte in pool... */
data_ptr += SIZEOF(small_pool_hdr); /* ...by skipping the header... */
if ((size_t)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */
data_ptr += ALIGN_SIZE - (size_t)data_ptr % ALIGN_SIZE;
data_ptr += hdr_ptr->bytes_used; /* point to place for object */
hdr_ptr->bytes_used += sizeofobject;
hdr_ptr->bytes_left -= sizeofobject;
return (void *) data_ptr;
}
@ -344,37 +347,45 @@ alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
{
my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
large_pool_ptr hdr_ptr;
size_t odd_bytes;
char FAR * data_ptr;
/*
* Round up the requested size to a multiple of ALIGN_SIZE so that
* algorithms can straddle outside the proper area up to the next
* alignment.
*/
sizeofobject = jround_up(sizeofobject, ALIGN_SIZE);
/* Check for unsatisfiable request (do now to ensure no overflow below) */
if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)))
if ((SIZEOF(large_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > MAX_ALLOC_CHUNK)
out_of_memory(cinfo, 3); /* request exceeds malloc's ability */
/* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
if (odd_bytes > 0)
sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
/* Always make a new pool */
if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */
hdr_ptr = (large_pool_ptr) jpeg_get_large(cinfo, sizeofobject +
SIZEOF(large_pool_hdr));
SIZEOF(large_pool_hdr) +
ALIGN_SIZE - 1);
if (hdr_ptr == NULL)
out_of_memory(cinfo, 4); /* jpeg_get_large failed */
mem->total_space_allocated += sizeofobject + SIZEOF(large_pool_hdr);
mem->total_space_allocated += sizeofobject + SIZEOF(large_pool_hdr) + ALIGN_SIZE - 1;
/* Success, initialize the new pool header and add to list */
hdr_ptr->hdr.next = mem->large_list[pool_id];
hdr_ptr->next = mem->large_list[pool_id];
/* We maintain space counts in each pool header for statistical purposes,
* even though they are not needed for allocation.
*/
hdr_ptr->hdr.bytes_used = sizeofobject;
hdr_ptr->hdr.bytes_left = 0;
hdr_ptr->bytes_used = sizeofobject;
hdr_ptr->bytes_left = 0;
mem->large_list[pool_id] = hdr_ptr;
return (void FAR *) (hdr_ptr + 1); /* point to first data byte in pool */
data_ptr = (char *) hdr_ptr; /* point to first data byte in pool... */
data_ptr += SIZEOF(small_pool_hdr); /* ...by skipping the header... */
if ((size_t)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */
data_ptr += ALIGN_SIZE - (size_t)data_ptr % ALIGN_SIZE;
return (void FAR *) data_ptr;
}
@ -389,6 +400,10 @@ alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
* this chunking of rows. The rowsperchunk value is left in the mem manager
* object so that it can be saved away if this sarray is the workspace for
* a virtual array.
*
* Since we are often upsampling with a factor 2, we align the size (not
* the start) to 2 * ALIGN_SIZE so that the upsampling routines don't have
* to be as careful about size.
*/
METHODDEF(JSAMPARRAY)
@ -402,6 +417,11 @@ alloc_sarray (j_common_ptr cinfo, int pool_id,
JDIMENSION rowsperchunk, currow, i;
long ltemp;
/* Make sure each row is properly aligned */
if ((ALIGN_SIZE % SIZEOF(JSAMPLE)) != 0)
out_of_memory(cinfo, 5); /* safety check */
samplesperrow = (JDIMENSION)jround_up(samplesperrow, (2 * ALIGN_SIZE) / SIZEOF(JSAMPLE));
/* Calculate max # of rows allowed in one allocation chunk */
ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
((long) samplesperrow * SIZEOF(JSAMPLE));
@ -450,6 +470,10 @@ alloc_barray (j_common_ptr cinfo, int pool_id,
JDIMENSION rowsperchunk, currow, i;
long ltemp;
/* Make sure each row is properly aligned */
if ((SIZEOF(JBLOCK) % ALIGN_SIZE) != 0)
out_of_memory(cinfo, 6); /* safety check */
/* Calculate max # of rows allowed in one allocation chunk */
ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
((long) blocksperrow * SIZEOF(JBLOCK));
@ -584,8 +608,8 @@ realize_virt_arrays (j_common_ptr cinfo)
/* Allocate the in-memory buffers for any unrealized virtual arrays */
{
my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
long space_per_minheight, maximum_space, avail_mem;
long minheights, max_minheights;
size_t space_per_minheight, maximum_space, avail_mem;
size_t minheights, max_minheights;
jvirt_sarray_ptr sptr;
jvirt_barray_ptr bptr;
@ -968,9 +992,9 @@ free_pool (j_common_ptr cinfo, int pool_id)
mem->large_list[pool_id] = NULL;
while (lhdr_ptr != NULL) {
large_pool_ptr next_lhdr_ptr = lhdr_ptr->hdr.next;
space_freed = lhdr_ptr->hdr.bytes_used +
lhdr_ptr->hdr.bytes_left +
large_pool_ptr next_lhdr_ptr = lhdr_ptr->next;
space_freed = lhdr_ptr->bytes_used +
lhdr_ptr->bytes_left +
SIZEOF(large_pool_hdr);
jpeg_free_large(cinfo, (void FAR *) lhdr_ptr, space_freed);
mem->total_space_allocated -= space_freed;
@ -982,9 +1006,9 @@ free_pool (j_common_ptr cinfo, int pool_id)
mem->small_list[pool_id] = NULL;
while (shdr_ptr != NULL) {
small_pool_ptr next_shdr_ptr = shdr_ptr->hdr.next;
space_freed = shdr_ptr->hdr.bytes_used +
shdr_ptr->hdr.bytes_left +
small_pool_ptr next_shdr_ptr = shdr_ptr->next;
space_freed = shdr_ptr->bytes_used +
shdr_ptr->bytes_left +
SIZEOF(small_pool_hdr);
jpeg_free_small(cinfo, (void *) shdr_ptr, space_freed);
mem->total_space_allocated -= space_freed;
@ -1041,16 +1065,16 @@ jinit_memory_mgr (j_common_ptr cinfo)
* in common if and only if X is a power of 2, ie has only one one-bit.
* Some compilers may give an "unreachable code" warning here; ignore it.
*/
if ((SIZEOF(ALIGN_TYPE) & (SIZEOF(ALIGN_TYPE)-1)) != 0)
if ((ALIGN_SIZE & (ALIGN_SIZE-1)) != 0)
ERREXIT(cinfo, JERR_BAD_ALIGN_TYPE);
/* MAX_ALLOC_CHUNK must be representable as type size_t, and must be
* a multiple of SIZEOF(ALIGN_TYPE).
* a multiple of ALIGN_SIZE.
* Again, an "unreachable code" warning may be ignored here.
* But a "constant too large" warning means you need to fix MAX_ALLOC_CHUNK.
*/
test_mac = (size_t) MAX_ALLOC_CHUNK;
if ((long) test_mac != MAX_ALLOC_CHUNK ||
(MAX_ALLOC_CHUNK % SIZEOF(ALIGN_TYPE)) != 0)
(MAX_ALLOC_CHUNK % ALIGN_SIZE) != 0)
ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK);
max_to_use = jpeg_mem_init(cinfo); /* system-dependent initialization */

Просмотреть файл

@ -1,276 +0,0 @@
/*
* jmemname.c
*
* Copyright (C) 1992-1997, Thomas G. Lane.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
* This file provides a generic implementation of the system-dependent
* portion of the JPEG memory manager. This implementation assumes that
* you must explicitly construct a name for each temp file.
* Also, the problem of determining the amount of memory available
* is shoved onto the user.
*/
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jmemsys.h" /* import the system-dependent declarations */
#ifndef HAVE_STDLIB_H /* <stdlib.h> should declare malloc(),free() */
extern void * malloc JPP((size_t size));
extern void free JPP((void *ptr));
#endif
#ifndef SEEK_SET /* pre-ANSI systems may not define this; */
#define SEEK_SET 0 /* if not, assume 0 is correct */
#endif
#ifdef DONT_USE_B_MODE /* define mode parameters for fopen() */
#define READ_BINARY "r"
#define RW_BINARY "w+"
#else
#ifdef VMS /* VMS is very nonstandard */
#define READ_BINARY "rb", "ctx=stm"
#define RW_BINARY "w+b", "ctx=stm"
#else /* standard ANSI-compliant case */
#define READ_BINARY "rb"
#define RW_BINARY "w+b"
#endif
#endif
/*
* Selection of a file name for a temporary file.
* This is system-dependent!
*
* The code as given is suitable for most Unix systems, and it is easily
* modified for most non-Unix systems. Some notes:
* 1. The temp file is created in the directory named by TEMP_DIRECTORY.
* The default value is /usr/tmp, which is the conventional place for
* creating large temp files on Unix. On other systems you'll probably
* want to change the file location. You can do this by editing the
* #define, or (preferred) by defining TEMP_DIRECTORY in jconfig.h.
*
* 2. If you need to change the file name as well as its location,
* you can override the TEMP_FILE_NAME macro. (Note that this is
* actually a printf format string; it must contain %s and %d.)
* Few people should need to do this.
*
* 3. mktemp() is used to ensure that multiple processes running
* simultaneously won't select the same file names. If your system
* doesn't have mktemp(), define NO_MKTEMP to do it the hard way.
* (If you don't have <errno.h>, also define NO_ERRNO_H.)
*
* 4. You probably want to define NEED_SIGNAL_CATCHER so that cjpeg.c/djpeg.c
* will cause the temp files to be removed if you stop the program early.
*/
#ifndef TEMP_DIRECTORY /* can override from jconfig.h or Makefile */
#define TEMP_DIRECTORY "/usr/tmp/" /* recommended setting for Unix */
#endif
static int next_file_num; /* to distinguish among several temp files */
#ifdef NO_MKTEMP
#ifndef TEMP_FILE_NAME /* can override from jconfig.h or Makefile */
#define TEMP_FILE_NAME "%sJPG%03d.TMP"
#endif
#ifndef NO_ERRNO_H
#include <errno.h> /* to define ENOENT */
#endif
/* ANSI C specifies that errno is a macro, but on older systems it's more
* likely to be a plain int variable. And not all versions of errno.h
* bother to declare it, so we have to in order to be most portable. Thus:
*/
#ifndef errno
extern int errno;
#endif
LOCAL(void)
select_file_name (char * fname)
{
FILE * tfile;
/* Keep generating file names till we find one that's not in use */
for (;;) {
next_file_num++; /* advance counter */
sprintf(fname, TEMP_FILE_NAME, TEMP_DIRECTORY, next_file_num);
if ((tfile = fopen(fname, READ_BINARY)) == NULL) {
/* fopen could have failed for a reason other than the file not
* being there; for example, file there but unreadable.
* If <errno.h> isn't available, then we cannot test the cause.
*/
#ifdef ENOENT
if (errno != ENOENT)
continue;
#endif
break;
}
fclose(tfile); /* oops, it's there; close tfile & try again */
}
}
#else /* ! NO_MKTEMP */
/* Note that mktemp() requires the initial filename to end in six X's */
#ifndef TEMP_FILE_NAME /* can override from jconfig.h or Makefile */
#define TEMP_FILE_NAME "%sJPG%dXXXXXX"
#endif
LOCAL(void)
select_file_name (char * fname)
{
next_file_num++; /* advance counter */
sprintf(fname, TEMP_FILE_NAME, TEMP_DIRECTORY, next_file_num);
mktemp(fname); /* make sure file name is unique */
/* mktemp replaces the trailing XXXXXX with a unique string of characters */
}
#endif /* NO_MKTEMP */
/*
* Memory allocation and freeing are controlled by the regular library
* routines malloc() and free().
*/
GLOBAL(void *)
jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject)
{
return (void *) malloc(sizeofobject);
}
GLOBAL(void)
jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject)
{
free(object);
}
/*
* "Large" objects are treated the same as "small" ones.
* NB: although we include FAR keywords in the routine declarations,
* this file won't actually work in 80x86 small/medium model; at least,
* you probably won't be able to process useful-size images in only 64KB.
*/
GLOBAL(void FAR *)
jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
{
return (void FAR *) malloc(sizeofobject);
}
GLOBAL(void)
jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
{
free(object);
}
/*
* This routine computes the total memory space available for allocation.
* It's impossible to do this in a portable way; our current solution is
* to make the user tell us (with a default value set at compile time).
* If you can actually get the available space, it's a good idea to subtract
* a slop factor of 5% or so.
*/
#ifndef DEFAULT_MAX_MEM /* so can override from makefile */
#define DEFAULT_MAX_MEM 1000000L /* default: one megabyte */
#endif
GLOBAL(long)
jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
long max_bytes_needed, long already_allocated)
{
return cinfo->mem->max_memory_to_use - already_allocated;
}
/*
* Backing store (temporary file) management.
* Backing store objects are only used when the value returned by
* jpeg_mem_available is less than the total space needed. You can dispense
* with these routines if you have plenty of virtual memory; see jmemnobs.c.
*/
METHODDEF(void)
read_backing_store (j_common_ptr cinfo, backing_store_ptr info,
void FAR * buffer_address,
long file_offset, long byte_count)
{
if (fseek(info->temp_file, file_offset, SEEK_SET))
ERREXIT(cinfo, JERR_TFILE_SEEK);
if (JFREAD(info->temp_file, buffer_address, byte_count)
!= (size_t) byte_count)
ERREXIT(cinfo, JERR_TFILE_READ);
}
METHODDEF(void)
write_backing_store (j_common_ptr cinfo, backing_store_ptr info,
void FAR * buffer_address,
long file_offset, long byte_count)
{
if (fseek(info->temp_file, file_offset, SEEK_SET))
ERREXIT(cinfo, JERR_TFILE_SEEK);
if (JFWRITE(info->temp_file, buffer_address, byte_count)
!= (size_t) byte_count)
ERREXIT(cinfo, JERR_TFILE_WRITE);
}
METHODDEF(void)
close_backing_store (j_common_ptr cinfo, backing_store_ptr info)
{
fclose(info->temp_file); /* close the file */
unlink(info->temp_name); /* delete the file */
/* If your system doesn't have unlink(), use remove() instead.
* remove() is the ANSI-standard name for this function, but if
* your system was ANSI you'd be using jmemansi.c, right?
*/
TRACEMSS(cinfo, 1, JTRC_TFILE_CLOSE, info->temp_name);
}
/*
* Initial opening of a backing-store object.
*/
GLOBAL(void)
jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
long total_bytes_needed)
{
select_file_name(info->temp_name);
if ((info->temp_file = fopen(info->temp_name, RW_BINARY)) == NULL)
ERREXITS(cinfo, JERR_TFILE_CREATE, info->temp_name);
info->read_backing_store = read_backing_store;
info->write_backing_store = write_backing_store;
info->close_backing_store = close_backing_store;
TRACEMSS(cinfo, 1, JTRC_TFILE_OPEN, info->temp_name);
}
/*
* These routines take care of any system-dependent initialization and
* cleanup required.
*/
GLOBAL(long)
jpeg_mem_init (j_common_ptr cinfo)
{
next_file_num = 0; /* initialize temp file name generator */
return DEFAULT_MAX_MEM; /* default for max_memory_to_use */
}
GLOBAL(void)
jpeg_mem_term (j_common_ptr cinfo)
{
/* no work */
}

Просмотреть файл

@ -69,9 +69,9 @@ jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
* Here we always say, "we got all you want bud!"
*/
GLOBAL(long)
jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
long max_bytes_needed, long already_allocated)
GLOBAL(size_t)
jpeg_mem_available (j_common_ptr cinfo, size_t min_bytes_needed,
size_t max_bytes_needed, size_t already_allocated)
{
return max_bytes_needed;
}

Просмотреть файл

@ -100,10 +100,10 @@ EXTERN(void) jpeg_free_large JPP((j_common_ptr cinfo, void FAR * object,
* Conversely, zero may be returned to always use the minimum amount of memory.
*/
EXTERN(long) jpeg_mem_available JPP((j_common_ptr cinfo,
long min_bytes_needed,
long max_bytes_needed,
long already_allocated));
EXTERN(size_t) jpeg_mem_available JPP((j_common_ptr cinfo,
size_t min_bytes_needed,
size_t max_bytes_needed,
size_t already_allocated));
/*

Просмотреть файл

@ -2,6 +2,7 @@
* jmorecfg.h
*
* Copyright (C) 1991-1997, Thomas G. Lane.
* Copyright (C) 2009, D. R. Commander.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -10,13 +11,7 @@
* optimizations. Most users will not need to touch this file.
*/
/*
* This file has been modified for the Mozilla/Netscape environment.
* Modifications are distributed under the mozilla.org tri-license and are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved. See http://www.mozilla.org/MPL/
*/
#include "prtypes.h"
/*
* Define BITS_IN_JSAMPLE as either
@ -69,11 +64,11 @@ typedef unsigned char JSAMPLE;
#else /* not HAVE_UNSIGNED_CHAR */
typedef char JSAMPLE;
#ifdef CHAR_IS_UNSIGNED
#ifdef __CHAR_UNSIGNED__
#define GETJSAMPLE(value) ((int) (value))
#else
#define GETJSAMPLE(value) ((int) (value) & 0xFF)
#endif /* CHAR_IS_UNSIGNED */
#endif /* __CHAR_UNSIGNED__ */
#endif /* HAVE_UNSIGNED_CHAR */
@ -105,24 +100,6 @@ typedef short JSAMPLE;
typedef short JCOEF;
/* Defines for MMX/SSE2 support. */
#if defined(XP_WIN32) && defined(_M_IX86) && !defined(__GNUC__)
#define HAVE_MMX_INTEL_MNEMONICS
/* SSE2 code appears broken for some cpus (bug 247437) */
#define HAVE_SSE2_INTEL_MNEMONICS
#define HAVE_SSE2_INTRINSICS
#endif
#if defined(__GNUC__) && defined(__i386__)
#if defined(XP_MACOSX)
#define HAVE_SSE2_INTRINSICS
#endif /* ! XP_MACOSX */
#endif /* ! GNUC && i386 */
/* Add support for other platforms here */
/* Compressed datastreams are represented as arrays of JOCTET.
* These must be EXACTLY 8 bits wide, at least once they are written to
@ -138,11 +115,11 @@ typedef unsigned char JOCTET;
#else /* not HAVE_UNSIGNED_CHAR */
typedef char JOCTET;
#ifdef CHAR_IS_UNSIGNED
#ifdef __CHAR_UNSIGNED__
#define GETJOCTET(value) (value)
#else
#define GETJOCTET(value) ((value) & 0xFF)
#endif /* CHAR_IS_UNSIGNED */
#endif /* __CHAR_UNSIGNED__ */
#endif /* HAVE_UNSIGNED_CHAR */
@ -156,39 +133,19 @@ typedef char JOCTET;
/* UINT8 must hold at least the values 0..255. */
#ifdef HAVE_UNSIGNED_CHAR
typedef unsigned char UINT8;
#else /* not HAVE_UNSIGNED_CHAR */
#ifdef CHAR_IS_UNSIGNED
typedef char UINT8;
#else /* not CHAR_IS_UNSIGNED */
typedef short UINT8;
#endif /* CHAR_IS_UNSIGNED */
#endif /* HAVE_UNSIGNED_CHAR */
typedef PRUint8 UINT8;
/* UINT16 must hold at least the values 0..65535. */
#ifdef HAVE_UNSIGNED_SHORT
typedef unsigned short UINT16;
#else /* not HAVE_UNSIGNED_SHORT */
typedef unsigned int UINT16;
#endif /* HAVE_UNSIGNED_SHORT */
typedef PRUint16 UINT16;
/* INT16 must hold at least the values -32768..32767. */
#ifndef XMD_H /* X11/xmd.h correctly defines INT16 */
typedef short INT16;
#endif
typedef PRInt16 INT16;
/* INT32 must hold at least signed 32-bit values. */
#ifndef XMD_H /* X11/xmd.h correctly defines INT32 */
#ifndef _BASETSD_H_ /* basetsd.h correctly defines INT32 */
#ifndef _BASETSD_H
typedef long INT32;
#endif
#endif
#endif
typedef PRInt32 INT32;
/* Datatype used for image dimensions. The JPEG standard only supports
* images up to 64K*64K due to 16-bit fields in SOF markers. Therefore
@ -209,21 +166,14 @@ typedef unsigned int JDIMENSION;
* or code profilers that require it.
*/
/* Mozilla mod: make external functions be DLL-able via JRI_PUBLIC_API(),
* and supply extern "C" for C++ users of the C-compiled IJG library.
* (Well, not anymore, but there's still a modification here.)
*/
#include "prtypes.h"
/* a function called through method pointers: */
#define METHODDEF(type) static type
/* a function used only in its module: */
#define LOCAL(type) static type
PR_BEGIN_EXTERN_C
#define GLOBAL(type) type
#define EXTERN(type) extern type
PR_END_EXTERN_C
/* a function referenced thru EXTERNs: */
#define GLOBAL(type) type
/* a reference to a GLOBAL function: */
#define EXTERN(type) extern type
/* This macro is used to declare a "method", that is, a function pointer.
@ -245,13 +195,11 @@ PR_END_EXTERN_C
* explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol.
*/
#ifndef FAR
#ifdef NEED_FAR_POINTERS
#define FAR far
#else
#define FAR
#endif
#endif
/*
@ -261,20 +209,8 @@ PR_END_EXTERN_C
* Defining HAVE_BOOLEAN before including jpeglib.h should make it work.
*/
/* Mozilla mod: IJG distribution makes boolean = int, but on Windows
* it's far safer to define boolean = unsigned char. Easier to switch
* than fight.
*/
/* For some reason, on SunOS 5.3 HAVE_BOOLEAN gets defined when using
* gcc, but boolean doesn't. Even if you use -UHAVE_BOOLEAN, it still
* gets reset somewhere.
*/
#if defined(MUST_UNDEF_HAVE_BOOLEAN_AFTER_INCLUDES) && defined(HAVE_BOOLEAN)
#undef HAVE_BOOLEAN
#endif
#ifndef HAVE_BOOLEAN
typedef unsigned char boolean;
typedef int boolean;
#endif
#ifndef FALSE /* in case these macros already exist */
#define FALSE 0 /* values of boolean */
@ -306,22 +242,13 @@ typedef unsigned char boolean;
* (You may HAVE to do that if your compiler doesn't like null source files.)
*/
/*
* Mozilla mods here: undef some features not actually used by the browser.
* This reduces object code size and more importantly allows us to compile
* even with broken compilers that crash when fed certain modules of the
* IJG sources. Currently we undef:
* DCT_FLOAT_SUPPORTED INPUT_SMOOTHING_SUPPORTED IDCT_SCALING_SUPPORTED
* QUANT_1PASS_SUPPORTED QUANT_2PASS_SUPPORTED
*/
/* Arithmetic coding is unsupported for legal reasons. Complaints to IBM. */
/* Capability options common to encoder and decoder: */
#define DCT_ISLOW_SUPPORTED /* slow but accurate integer algorithm */
#undef DCT_IFAST_SUPPORTED /* faster, less accurate integer method */
#undef DCT_FLOAT_SUPPORTED /* floating-point: accurate, fast on fast HW */
#define DCT_IFAST_SUPPORTED /* faster, less accurate integer method */
#define DCT_FLOAT_SUPPORTED /* floating-point: accurate, fast on fast HW */
/* Encoder capability options: */
@ -337,7 +264,7 @@ typedef unsigned char boolean;
* The exact same statements apply for progressive JPEG: the default tables
* don't work for progressive mode. (This may get fixed, however.)
*/
#undef INPUT_SMOOTHING_SUPPORTED /* Input image smoothing option? */
#define INPUT_SMOOTHING_SUPPORTED /* Input image smoothing option? */
/* Decoder capability options: */
@ -346,11 +273,11 @@ typedef unsigned char boolean;
#define D_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/
#define SAVE_MARKERS_SUPPORTED /* jpeg_save_markers() needed? */
#define BLOCK_SMOOTHING_SUPPORTED /* Block smoothing? (Progressive only) */
#undef IDCT_SCALING_SUPPORTED /* Output rescaling via IDCT? */
#define IDCT_SCALING_SUPPORTED /* Output rescaling via IDCT? */
#undef UPSAMPLE_SCALING_SUPPORTED /* Output rescaling at upsample stage? */
#define UPSAMPLE_MERGING_SUPPORTED /* Fast path for sloppy upsampling? */
#undef QUANT_1PASS_SUPPORTED /* 1-pass color quantization? */
#undef QUANT_2PASS_SUPPORTED /* 2-pass color quantization? */
#define QUANT_1PASS_SUPPORTED /* 1-pass color quantization? */
#define QUANT_2PASS_SUPPORTED /* 2-pass color quantization? */
/* more capability options later, no doubt */
@ -375,40 +302,37 @@ typedef unsigned char boolean;
#define RGB_BLUE 2 /* Offset of Blue */
#define RGB_PIXELSIZE 3 /* JSAMPLEs per RGB scanline element */
#define JPEG_NUMCS 12
static const int rgb_red[JPEG_NUMCS] = {
-1, -1, RGB_RED, -1, -1, -1, 0, 0, 2, 2, 3, 1
};
static const int rgb_green[JPEG_NUMCS] = {
-1, -1, RGB_GREEN, -1, -1, -1, 1, 1, 1, 1, 2, 2
};
static const int rgb_blue[JPEG_NUMCS] = {
-1, -1, RGB_BLUE, -1, -1, -1, 2, 2, 0, 0, 1, 3
};
static const int rgb_pixelsize[JPEG_NUMCS] = {
-1, -1, RGB_PIXELSIZE, -1, -1, -1, 3, 4, 3, 4, 4, 4
};
/* Definitions for speed-related optimizations. */
/* If your compiler supports inline functions, define INLINE
* as the inline keyword; otherwise define it as empty.
*/
/* Mozilla mods here: add more ways of defining INLINE */
#ifndef INLINE
#ifdef __GNUC__ /* for instance, GNU C knows about inline */
#define INLINE __inline__
#endif
#if defined( __IBMC__ ) || defined (__IBMCPP__)
#define INLINE _Inline
#endif
#ifndef INLINE
#ifdef __cplusplus
#define INLINE inline /* a C++ compiler should have it too */
#else
#define INLINE /* default is to define it as empty */
#endif
#endif
#endif
/* On some machines (notably 68000 series) "int" is 32 bits, but multiplying
* two 16-bit shorts is faster than multiplying two ints. Define MULTIPLIER
* as short on such a machine. MULTIPLIER must be at least 16 bits wide.
*/
#ifndef MULTIPLIER
#define MULTIPLIER int16 /* type for fastest integer multiply */
#ifndef WITH_SIMD
#define MULTIPLIER int /* type for fastest integer multiply */
#else
#define MULTIPLIER short /* prefer 16-bit with SIMD for parellelism */
#endif
#endif

Просмотреть файл

@ -1,45 +0,0 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef __jos2fig_h__
#define __jos2fig_h__
/*
** Place holder for the OS/2 code that might actually make it into the trunk someday. Maybe.
*/
#endif /* __jos2fig_h__ */

26
jpeg/jpegcomp.h Normal file
Просмотреть файл

@ -0,0 +1,26 @@
/*
* jpegcomp.h
*
* Copyright (C) 2010, D. R. Commander
* For conditions of distribution and use, see the accompanying README file.
*
* JPEG compatibility macros
* These declarations are considered internal to the JPEG library; most
* applications using the library shouldn't need to include this file.
*/
#if JPEG_LIB_VERSION >= 70
#define _DCT_scaled_size DCT_h_scaled_size
#define _min_DCT_scaled_size min_DCT_h_scaled_size
#define _min_DCT_h_scaled_size min_DCT_h_scaled_size
#define _min_DCT_v_scaled_size min_DCT_v_scaled_size
#define _jpeg_width jpeg_width
#define _jpeg_height jpeg_height
#else
#define _DCT_scaled_size DCT_scaled_size
#define _min_DCT_scaled_size min_DCT_scaled_size
#define _min_DCT_h_scaled_size min_DCT_scaled_size
#define _min_DCT_v_scaled_size min_DCT_scaled_size
#define _jpeg_width image_width
#define _jpeg_height image_height
#endif

Просмотреть файл

@ -2,6 +2,7 @@
* jpegint.h
*
* Copyright (C) 1991-1997, Thomas G. Lane.
* Modified 1997-2009 by Guido Vollbeding.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -304,6 +305,7 @@ struct jpeg_color_quantizer {
#define jinit_forward_dct jIFDCT
#define jinit_huff_encoder jIHEncoder
#define jinit_phuff_encoder jIPHEncoder
#define jinit_arith_encoder jIAEncoder
#define jinit_marker_writer jIMWriter
#define jinit_master_decompress jIDMaster
#define jinit_d_main_controller jIDMainC
@ -313,6 +315,7 @@ struct jpeg_color_quantizer {
#define jinit_marker_reader jIMReader
#define jinit_huff_decoder jIHDecoder
#define jinit_phuff_decoder jIPHDecoder
#define jinit_arith_decoder jIADecoder
#define jinit_inverse_dct jIIDCT
#define jinit_upsampler jIUpsampler
#define jinit_color_deconverter jIDColor
@ -327,6 +330,7 @@ struct jpeg_color_quantizer {
#define jzero_far jZeroFar
#define jpeg_zigzag_order jZIGTable
#define jpeg_natural_order jZAGTable
#define jpeg_aritab jAriTab
#endif /* NEED_SHORT_EXTERNAL_NAMES */
@ -345,6 +349,7 @@ EXTERN(void) jinit_downsampler JPP((j_compress_ptr cinfo));
EXTERN(void) jinit_forward_dct JPP((j_compress_ptr cinfo));
EXTERN(void) jinit_huff_encoder JPP((j_compress_ptr cinfo));
EXTERN(void) jinit_phuff_encoder JPP((j_compress_ptr cinfo));
EXTERN(void) jinit_arith_encoder JPP((j_compress_ptr cinfo));
EXTERN(void) jinit_marker_writer JPP((j_compress_ptr cinfo));
/* Decompression module initialization routines */
EXTERN(void) jinit_master_decompress JPP((j_decompress_ptr cinfo));
@ -358,6 +363,7 @@ EXTERN(void) jinit_input_controller JPP((j_decompress_ptr cinfo));
EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo));
EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo));
EXTERN(void) jinit_phuff_decoder JPP((j_decompress_ptr cinfo));
EXTERN(void) jinit_arith_decoder JPP((j_decompress_ptr cinfo));
EXTERN(void) jinit_inverse_dct JPP((j_decompress_ptr cinfo));
EXTERN(void) jinit_upsampler JPP((j_decompress_ptr cinfo));
EXTERN(void) jinit_color_deconverter JPP((j_decompress_ptr cinfo));
@ -369,7 +375,7 @@ EXTERN(void) jinit_memory_mgr JPP((j_common_ptr cinfo));
/* Utility routines in jutils.c */
EXTERN(long) jdiv_round_up JPP((long a, long b));
EXTERN(long) jround_up JPP((long a, long b));
EXTERN(size_t) jround_up JPP((size_t a, size_t b));
EXTERN(void) jcopy_sample_rows JPP((JSAMPARRAY input_array, int source_row,
JSAMPARRAY output_array, int dest_row,
int num_rows, JDIMENSION num_cols));
@ -382,6 +388,9 @@ extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */
#endif
extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */
/* Arithmetic coding probability estimation tables in jaricom.c */
extern const INT32 jpeg_aritab[];
/* Suppress undefined-structure complaints if necessary. */
#ifdef INCOMPLETE_TYPES_BROKEN

Просмотреть файл

@ -2,6 +2,8 @@
* jpeglib.h
*
* Copyright (C) 1991-1998, Thomas G. Lane.
* Modified 2002-2009 by Guido Vollbeding.
* Copyright (C) 2009-2010, D. R. Commander.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -13,23 +15,6 @@
#ifndef JPEGLIB_H
#define JPEGLIB_H
#ifdef XP_OS2
/*
* On OS/2, the system will have defined RGB_* so we #undef 'em to avoid warnings
* from jmorecfg.h.
*/
#ifdef RGB_RED
#undef RGB_RED
#endif
#ifdef RGB_GREEN
#undef RGB_GREEN
#endif
#ifdef RGB_BLUE
#undef RGB_BLUE
#endif
#endif
/*
* First we include the configuration files that record how this
* installation of the JPEG library is set up. jconfig.h can be
@ -43,16 +28,11 @@
#include "jmorecfg.h" /* seldom changed options */
#ifdef HAVE_MMX_INTEL_MNEMONICS
extern int MMXAvailable;
#ifdef __cplusplus
#ifndef DONT_USE_EXTERN_C
extern "C" {
#endif
#endif
/* Version ID for the JPEG library.
* Might be useful for tests like "#if JPEG_LIB_VERSION >= 60".
*/
#define JPEG_LIB_VERSION 62 /* Version 6b */
/* Various constants determining the sizes of things.
@ -166,12 +146,17 @@ typedef struct {
* Values of 1,2,4,8 are likely to be supported. Note that different
* components may receive different IDCT scalings.
*/
#if JPEG_LIB_VERSION >= 70
int DCT_h_scaled_size;
int DCT_v_scaled_size;
#else
int DCT_scaled_size;
#endif
/* The downsampled dimensions are the component's actual, unpadded number
* of samples at the main buffer (preprocessing/compression interface), thus
* downsampled_width = ceil(image_width * Hi/Hmax)
* and similarly for height. For decompression, IDCT scaling is included, so
* downsampled_width = ceil(image_width * Hi/Hmax * DCT_scaled_size/DCTSIZE)
* downsampled_width = ceil(image_width * Hi/Hmax * DCT_[h_]scaled_size/DCTSIZE)
*/
JDIMENSION downsampled_width; /* actual width in samples */
JDIMENSION downsampled_height; /* actual height in samples */
@ -186,7 +171,7 @@ typedef struct {
int MCU_width; /* number of blocks per MCU, horizontally */
int MCU_height; /* number of blocks per MCU, vertically */
int MCU_blocks; /* MCU_width * MCU_height */
int MCU_sample_width; /* MCU width in samples, MCU_width*DCT_scaled_size */
int MCU_sample_width; /* MCU width in samples, MCU_width*DCT_[h_]scaled_size */
int last_col_width; /* # of non-dummy blocks across in last MCU */
int last_row_height; /* # of non-dummy blocks down in last MCU */
@ -225,13 +210,22 @@ struct jpeg_marker_struct {
/* Known color spaces. */
#define JCS_EXTENSIONS 1
typedef enum {
JCS_UNKNOWN, /* error/unspecified */
JCS_GRAYSCALE, /* monochrome */
JCS_RGB, /* red/green/blue */
JCS_RGB, /* red/green/blue as specified by the RGB_RED, RGB_GREEN,
RGB_BLUE, and RGB_PIXELSIZE macros */
JCS_YCbCr, /* Y/Cb/Cr (also known as YUV) */
JCS_CMYK, /* C/M/Y/K */
JCS_YCCK /* Y/Cb/Cr/K */
JCS_YCCK, /* Y/Cb/Cr/K */
JCS_EXT_RGB, /* red/green/blue */
JCS_EXT_RGBX, /* red/green/blue/x */
JCS_EXT_BGR, /* blue/green/red */
JCS_EXT_BGRX, /* blue/green/red/x */
JCS_EXT_XBGR, /* x/blue/green/red */
JCS_EXT_XRGB /* x/red/green/blue */
} J_COLOR_SPACE;
/* DCT/IDCT algorithm options. */
@ -313,6 +307,19 @@ struct jpeg_compress_struct {
* helper routines to simplify changing parameters.
*/
#if JPEG_LIB_VERSION >= 70
unsigned int scale_num, scale_denom; /* fraction by which to scale image */
JDIMENSION jpeg_width; /* scaled JPEG image width */
JDIMENSION jpeg_height; /* scaled JPEG image height */
/* Dimensions of actual JPEG image that will be written to file,
* derived from input dimensions by scaling factors above.
* These fields are computed by jpeg_start_compress().
* You can also use jpeg_calc_jpeg_dimensions() to determine these values
* in advance of calling jpeg_start_compress().
*/
#endif
int data_precision; /* bits of precision in image data */
int num_components; /* # of color components in JPEG image */
@ -320,14 +327,19 @@ struct jpeg_compress_struct {
jpeg_component_info * comp_info;
/* comp_info[i] describes component that appears i'th in SOF */
JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
/* ptrs to coefficient quantization tables, or NULL if not defined */
#if JPEG_LIB_VERSION >= 70
int q_scale_factor[NUM_QUANT_TBLS];
#endif
/* ptrs to coefficient quantization tables, or NULL if not defined,
* and corresponding scale factors (percentage, initialized 100).
*/
JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
/* ptrs to Huffman coding tables, or NULL if not defined */
UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
@ -343,6 +355,9 @@ struct jpeg_compress_struct {
boolean arith_code; /* TRUE=arithmetic coding, FALSE=Huffman */
boolean optimize_coding; /* TRUE=optimize entropy encoding parms */
boolean CCIR601_sampling; /* TRUE=first samples are cosited */
#if JPEG_LIB_VERSION >= 70
boolean do_fancy_downsampling; /* TRUE=apply fancy downsampling */
#endif
int smoothing_factor; /* 1..100, or 0 for no input smoothing */
J_DCT_METHOD dct_method; /* DCT algorithm selector */
@ -386,6 +401,11 @@ struct jpeg_compress_struct {
int max_h_samp_factor; /* largest h_samp_factor */
int max_v_samp_factor; /* largest v_samp_factor */
#if JPEG_LIB_VERSION >= 70
int min_DCT_h_scaled_size; /* smallest DCT_h_scaled_size of any component */
int min_DCT_v_scaled_size; /* smallest DCT_v_scaled_size of any component */
#endif
JDIMENSION total_iMCU_rows; /* # of iMCU rows to be input to coef ctlr */
/* The coefficient controller receives data in units of MCU rows as defined
* for fully interleaved scans (whether the JPEG file is interleaved or not).
@ -411,6 +431,12 @@ struct jpeg_compress_struct {
int Ss, Se, Ah, Al; /* progressive JPEG parameters for scan */
#if JPEG_LIB_VERSION >= 80
int block_size; /* the basic DCT block size: 1..16 */
const int * natural_order; /* natural-order position array */
int lim_Se; /* min( Se, DCTSIZE2-1 ) */
#endif
/*
* Links to compression subobjects (methods and private variables of modules)
*/
@ -557,6 +583,9 @@ struct jpeg_decompress_struct {
jpeg_component_info * comp_info;
/* comp_info[i] describes component that appears i'th in SOF */
#if JPEG_LIB_VERSION >= 80
boolean is_baseline; /* TRUE if Baseline SOF0 encountered */
#endif
boolean progressive_mode; /* TRUE if SOFn specifies progressive mode */
boolean arith_code; /* TRUE=arithmetic coding, FALSE=Huffman */
@ -597,7 +626,12 @@ struct jpeg_decompress_struct {
int max_h_samp_factor; /* largest h_samp_factor */
int max_v_samp_factor; /* largest v_samp_factor */
#if JPEG_LIB_VERSION >= 70
int min_DCT_h_scaled_size; /* smallest DCT_h_scaled_size of any component */
int min_DCT_v_scaled_size; /* smallest DCT_v_scaled_size of any component */
#else
int min_DCT_scaled_size; /* smallest DCT_scaled_size of any component */
#endif
JDIMENSION total_iMCU_rows; /* # of iMCU rows in image */
/* The coefficient controller's input and output progress is measured in
@ -605,7 +639,7 @@ struct jpeg_decompress_struct {
* in fully interleaved JPEG scans, but are used whether the scan is
* interleaved or not. We define an iMCU row as v_samp_factor DCT block
* rows of each component. Therefore, the IDCT output contains
* v_samp_factor*DCT_scaled_size sample rows of a component per iMCU row.
* v_samp_factor*DCT_[v_]scaled_size sample rows of a component per iMCU row.
*/
JSAMPLE * sample_range_limit; /* table for fast range-limiting */
@ -629,6 +663,14 @@ struct jpeg_decompress_struct {
int Ss, Se, Ah, Al; /* progressive JPEG parameters for scan */
#if JPEG_LIB_VERSION >= 80
/* These fields are derived from Se of first SOS marker.
*/
int block_size; /* the basic DCT block size: 1..16 */
const int * natural_order; /* natural-order position array for entropy decode */
int lim_Se; /* min( Se, DCTSIZE2-1 ) for entropy decode */
#endif
/* This field is shared between entropy decoder and marker parser.
* It is either zero or the code of a JPEG marker that has been
* read from the data source, but has not yet been processed.
@ -858,11 +900,18 @@ typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo));
#define jpeg_destroy_decompress jDestDecompress
#define jpeg_stdio_dest jStdDest
#define jpeg_stdio_src jStdSrc
#if JPEG_LIB_VERSION >= 80
#define jpeg_mem_dest jMemDest
#define jpeg_mem_src jMemSrc
#endif
#define jpeg_set_defaults jSetDefaults
#define jpeg_set_colorspace jSetColorspace
#define jpeg_default_colorspace jDefColorspace
#define jpeg_set_quality jSetQuality
#define jpeg_set_linear_quality jSetLQuality
#if JPEG_LIB_VERSION >= 70
#define jpeg_default_qtables jDefQTables
#endif
#define jpeg_add_quant_table jAddQuantTable
#define jpeg_quality_scaling jQualityScaling
#define jpeg_simple_progression jSimProgress
@ -872,6 +921,9 @@ typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo));
#define jpeg_start_compress jStrtCompress
#define jpeg_write_scanlines jWrtScanlines
#define jpeg_finish_compress jFinCompress
#if JPEG_LIB_VERSION >= 70
#define jpeg_calc_jpeg_dimensions jCjpegDimensions
#endif
#define jpeg_write_raw_data jWrtRawData
#define jpeg_write_marker jWrtMarker
#define jpeg_write_m_header jWrtMHeader
@ -888,6 +940,9 @@ typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo));
#define jpeg_input_complete jInComplete
#define jpeg_new_colormap jNewCMap
#define jpeg_consume_input jConsumeInput
#if JPEG_LIB_VERSION >= 80
#define jpeg_core_output_dimensions jCoreDimensions
#endif
#define jpeg_calc_output_dimensions jCalcDimensions
#define jpeg_save_markers jSaveMarkers
#define jpeg_set_marker_processor jSetMarker
@ -901,9 +956,6 @@ typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo));
#define jpeg_resync_to_restart jResyncRestart
#endif /* NEED_SHORT_EXTERNAL_NAMES */
#ifdef __cplusplus
extern "C" {
#endif
/* Default error-management setup */
EXTERN(struct jpeg_error_mgr *) jpeg_std_error
@ -935,6 +987,16 @@ EXTERN(void) jpeg_destroy_decompress JPP((j_decompress_ptr cinfo));
EXTERN(void) jpeg_stdio_dest JPP((j_compress_ptr cinfo, FILE * outfile));
EXTERN(void) jpeg_stdio_src JPP((j_decompress_ptr cinfo, FILE * infile));
#if JPEG_LIB_VERSION >= 80
/* Data source and destination managers: memory buffers. */
EXTERN(void) jpeg_mem_dest JPP((j_compress_ptr cinfo,
unsigned char ** outbuffer,
unsigned long * outsize));
EXTERN(void) jpeg_mem_src JPP((j_decompress_ptr cinfo,
unsigned char * inbuffer,
unsigned long insize));
#endif
/* Default parameter setup for compression */
EXTERN(void) jpeg_set_defaults JPP((j_compress_ptr cinfo));
/* Compression parameter setup aids */
@ -946,6 +1008,10 @@ EXTERN(void) jpeg_set_quality JPP((j_compress_ptr cinfo, int quality,
EXTERN(void) jpeg_set_linear_quality JPP((j_compress_ptr cinfo,
int scale_factor,
boolean force_baseline));
#if JPEG_LIB_VERSION >= 70
EXTERN(void) jpeg_default_qtables JPP((j_compress_ptr cinfo,
boolean force_baseline));
#endif
EXTERN(void) jpeg_add_quant_table JPP((j_compress_ptr cinfo, int which_tbl,
const unsigned int *basic_table,
int scale_factor,
@ -965,12 +1031,17 @@ EXTERN(JDIMENSION) jpeg_write_scanlines JPP((j_compress_ptr cinfo,
JDIMENSION num_lines));
EXTERN(void) jpeg_finish_compress JPP((j_compress_ptr cinfo));
#if JPEG_LIB_VERSION >= 70
/* Precalculate JPEG dimensions for current compression parameters. */
EXTERN(void) jpeg_calc_jpeg_dimensions JPP((j_compress_ptr cinfo));
#endif
/* Replaces jpeg_write_scanlines when writing raw downsampled data. */
EXTERN(JDIMENSION) jpeg_write_raw_data JPP((j_compress_ptr cinfo,
JSAMPIMAGE data,
JDIMENSION num_lines));
/* Write a special marker. See libjpeg.doc concerning safe usage. */
/* Write a special marker. See libjpeg.txt concerning safe usage. */
EXTERN(void) jpeg_write_marker
JPP((j_compress_ptr cinfo, int marker,
const JOCTET * dataptr, unsigned int datalen));
@ -1024,6 +1095,9 @@ EXTERN(int) jpeg_consume_input JPP((j_decompress_ptr cinfo));
#define JPEG_SCAN_COMPLETED 4 /* Completed last iMCU row of a scan */
/* Precalculate output dimensions for current decompression parameters. */
#if JPEG_LIB_VERSION >= 80
EXTERN(void) jpeg_core_output_dimensions JPP((j_decompress_ptr cinfo));
#endif
EXTERN(void) jpeg_calc_output_dimensions JPP((j_decompress_ptr cinfo));
/* Control saving of COM and APPn markers into marker_list. */
@ -1062,9 +1136,6 @@ EXTERN(void) jpeg_destroy JPP((j_common_ptr cinfo));
EXTERN(boolean) jpeg_resync_to_restart JPP((j_decompress_ptr cinfo,
int desired));
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
/* These marker codes are exported since applications and data source modules
* are likely to want to use them.
@ -1121,4 +1192,10 @@ struct jpeg_color_quantizer { long dummy; };
#include "jerror.h" /* fetch error codes too */
#endif
#ifdef __cplusplus
#ifndef DONT_USE_EXTERN_C
}
#endif
#endif
#endif /* JPEGLIB_H */

Просмотреть файл

@ -2,6 +2,7 @@
* jquant1.c
*
* Copyright (C) 1991-1996, Thomas G. Lane.
* Copyright (C) 2009, D. R. Commander
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -193,7 +194,10 @@ select_ncolors (j_decompress_ptr cinfo, int Ncolors[])
int total_colors, iroot, i, j;
boolean changed;
long temp;
static const int RGB_order[3] = { RGB_GREEN, RGB_RED, RGB_BLUE };
int RGB_order[3] = { RGB_GREEN, RGB_RED, RGB_BLUE };
RGB_order[0] = rgb_green[cinfo->out_color_space];
RGB_order[1] = rgb_red[cinfo->out_color_space];
RGB_order[2] = rgb_blue[cinfo->out_color_space];
/* We can allocate at least the nc'th root of max_colors per component. */
/* Compute floor(nc'th root of max_colors). */

Просмотреть файл

@ -2,6 +2,7 @@
* jquant2.c
*
* Copyright (C) 1991-1996, Thomas G. Lane.
* Copyright (C) 2009, D. R. Commander.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -74,29 +75,10 @@
#define G_SCALE 3 /* scale G distances by this much */
#define B_SCALE 1 /* and B by this much */
/* Relabel R/G/B as components 0/1/2, respecting the RGB ordering defined
* in jmorecfg.h. As the code stands, it will do the right thing for R,G,B
* and B,G,R orders. If you define some other weird order in jmorecfg.h,
* you'll get compile errors until you extend this logic. In that case
* you'll probably want to tweak the histogram sizes too.
*/
#if RGB_RED == 0
#define C0_SCALE R_SCALE
#endif
#if RGB_BLUE == 0
#define C0_SCALE B_SCALE
#endif
#if RGB_GREEN == 1
#define C1_SCALE G_SCALE
#endif
#if RGB_RED == 2
#define C2_SCALE R_SCALE
#endif
#if RGB_BLUE == 2
#define C2_SCALE B_SCALE
#endif
static const int c_scales[3]={R_SCALE, G_SCALE, B_SCALE};
#define C0_SCALE c_scales[rgb_red[cinfo->out_color_space]]
#define C1_SCALE c_scales[rgb_green[cinfo->out_color_space]]
#define C2_SCALE c_scales[rgb_blue[cinfo->out_color_space]]
/*
* First we have the histogram data structure and routines for creating it.
@ -454,15 +436,16 @@ median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes,
/* We want to break any ties in favor of green, then red, blue last.
* This code does the right thing for R,G,B or B,G,R color orders only.
*/
#if RGB_RED == 0
cmax = c1; n = 1;
if (c0 > cmax) { cmax = c0; n = 0; }
if (c2 > cmax) { n = 2; }
#else
cmax = c1; n = 1;
if (c2 > cmax) { cmax = c2; n = 2; }
if (c0 > cmax) { n = 0; }
#endif
if (rgb_red[cinfo->out_color_space] == 0) {
cmax = c1; n = 1;
if (c0 > cmax) { cmax = c0; n = 0; }
if (c2 > cmax) { n = 2; }
}
else {
cmax = c1; n = 1;
if (c2 > cmax) { cmax = c2; n = 2; }
if (c0 > cmax) { n = 0; }
}
/* Choose split point along selected axis, and update box bounds.
* Current algorithm: split at halfway point.
* (Since the box has been shrunk to minimum volume,

90
jpeg/jsimd.h Normal file
Просмотреть файл

@ -0,0 +1,90 @@
/*
* jsimd.h
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
* For conditions of distribution and use, see copyright notice in jsimdext.inc
*
*/
/* Short forms of external names for systems with brain-damaged linkers. */
#ifdef NEED_SHORT_EXTERNAL_NAMES
#define jsimd_can_rgb_ycc jSCanRgbYcc
#define jsimd_can_ycc_rgb jSCanYccRgb
#define jsimd_rgb_ycc_convert jSRgbYccConv
#define jsimd_ycc_rgb_convert jSYccRgbConv
#define jsimd_can_h2v2_downsample jSCanH2V2Down
#define jsimd_can_h2v1_downsample jSCanH2V1Down
#define jsimd_h2v2_downsample jSH2V2Down
#define jsimd_h2v1_downsample jSH2V1Down
#define jsimd_can_h2v2_upsample jSCanH2V2Up
#define jsimd_can_h2v1_upsample jSCanH2V1Up
#define jsimd_h2v2_upsample jSH2V2Up
#define jsimd_h2v1_upsample jSH2V1Up
#define jsimd_can_h2v2_fancy_upsample jSCanH2V2FUp
#define jsimd_can_h2v1_fancy_upsample jSCanH2V1FUp
#define jsimd_h2v2_fancy_upsample jSH2V2FUp
#define jsimd_h2v1_fancy_upsample jSH2V1FUp
#define jsimd_can_h2v2_merged_upsample jSCanH2V2MUp
#define jsimd_can_h2v1_merged_upsample jSCanH2V1MUp
#define jsimd_h2v2_merged_upsample jSH2V2MUp
#define jsimd_h2v1_merged_upsample jSH2V1MUp
#endif /* NEED_SHORT_EXTERNAL_NAMES */
EXTERN(int) jsimd_can_rgb_ycc JPP((void));
EXTERN(int) jsimd_can_ycc_rgb JPP((void));
EXTERN(void) jsimd_rgb_ycc_convert
JPP((j_compress_ptr cinfo,
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
JDIMENSION output_row, int num_rows));
EXTERN(void) jsimd_ycc_rgb_convert
JPP((j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION input_row,
JSAMPARRAY output_buf, int num_rows));
EXTERN(int) jsimd_can_h2v2_downsample JPP((void));
EXTERN(int) jsimd_can_h2v1_downsample JPP((void));
EXTERN(void) jsimd_h2v2_downsample
JPP((j_compress_ptr cinfo, jpeg_component_info * compptr,
JSAMPARRAY input_data, JSAMPARRAY output_data));
EXTERN(void) jsimd_h2v1_downsample
JPP((j_compress_ptr cinfo, jpeg_component_info * compptr,
JSAMPARRAY input_data, JSAMPARRAY output_data));
EXTERN(int) jsimd_can_h2v2_upsample JPP((void));
EXTERN(int) jsimd_can_h2v1_upsample JPP((void));
EXTERN(void) jsimd_h2v2_upsample
JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
EXTERN(void) jsimd_h2v1_upsample
JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
EXTERN(int) jsimd_can_h2v2_fancy_upsample JPP((void));
EXTERN(int) jsimd_can_h2v1_fancy_upsample JPP((void));
EXTERN(void) jsimd_h2v2_fancy_upsample
JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
EXTERN(void) jsimd_h2v1_fancy_upsample
JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
EXTERN(int) jsimd_can_h2v2_merged_upsample JPP((void));
EXTERN(int) jsimd_can_h2v1_merged_upsample JPP((void));
EXTERN(void) jsimd_h2v2_merged_upsample
JPP((j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
JSAMPARRAY output_buf));
EXTERN(void) jsimd_h2v1_merged_upsample
JPP((j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
JSAMPARRAY output_buf));

300
jpeg/jsimd_none.c Normal file
Просмотреть файл

@ -0,0 +1,300 @@
/*
* jsimd_none.c
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright 2009 D. R. Commander
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
* For conditions of distribution and use, see copyright notice in jsimdext.inc
*
* This file contains stubs for when there is no SIMD support available.
*/
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jsimd.h"
#include "jdct.h"
#include "jsimddct.h"
GLOBAL(int)
jsimd_can_rgb_ycc (void)
{
return 0;
}
GLOBAL(int)
jsimd_can_ycc_rgb (void)
{
return 0;
}
GLOBAL(void)
jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
JDIMENSION output_row, int num_rows)
{
}
GLOBAL(void)
jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION input_row,
JSAMPARRAY output_buf, int num_rows)
{
}
GLOBAL(int)
jsimd_can_h2v2_downsample (void)
{
return 0;
}
GLOBAL(int)
jsimd_can_h2v1_downsample (void)
{
return 0;
}
GLOBAL(void)
jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
JSAMPARRAY input_data, JSAMPARRAY output_data)
{
}
GLOBAL(void)
jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
JSAMPARRAY input_data, JSAMPARRAY output_data)
{
}
GLOBAL(int)
jsimd_can_h2v2_upsample (void)
{
return 0;
}
GLOBAL(int)
jsimd_can_h2v1_upsample (void)
{
return 0;
}
GLOBAL(void)
jsimd_h2v2_upsample (j_decompress_ptr cinfo,
jpeg_component_info * compptr,
JSAMPARRAY input_data,
JSAMPARRAY * output_data_ptr)
{
}
GLOBAL(void)
jsimd_h2v1_upsample (j_decompress_ptr cinfo,
jpeg_component_info * compptr,
JSAMPARRAY input_data,
JSAMPARRAY * output_data_ptr)
{
}
GLOBAL(int)
jsimd_can_h2v2_fancy_upsample (void)
{
return 0;
}
GLOBAL(int)
jsimd_can_h2v1_fancy_upsample (void)
{
return 0;
}
GLOBAL(void)
jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
jpeg_component_info * compptr,
JSAMPARRAY input_data,
JSAMPARRAY * output_data_ptr)
{
}
GLOBAL(void)
jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
jpeg_component_info * compptr,
JSAMPARRAY input_data,
JSAMPARRAY * output_data_ptr)
{
}
GLOBAL(int)
jsimd_can_h2v2_merged_upsample (void)
{
return 0;
}
GLOBAL(int)
jsimd_can_h2v1_merged_upsample (void)
{
return 0;
}
GLOBAL(void)
jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf,
JDIMENSION in_row_group_ctr,
JSAMPARRAY output_buf)
{
}
GLOBAL(void)
jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf,
JDIMENSION in_row_group_ctr,
JSAMPARRAY output_buf)
{
}
GLOBAL(int)
jsimd_can_convsamp (void)
{
return 0;
}
GLOBAL(int)
jsimd_can_convsamp_float (void)
{
return 0;
}
GLOBAL(void)
jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
DCTELEM * workspace)
{
}
GLOBAL(void)
jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
FAST_FLOAT * workspace)
{
}
GLOBAL(int)
jsimd_can_fdct_islow (void)
{
return 0;
}
GLOBAL(int)
jsimd_can_fdct_ifast (void)
{
return 0;
}
GLOBAL(int)
jsimd_can_fdct_float (void)
{
return 0;
}
GLOBAL(void)
jsimd_fdct_islow (DCTELEM * data)
{
}
GLOBAL(void)
jsimd_fdct_ifast (DCTELEM * data)
{
}
GLOBAL(void)
jsimd_fdct_float (FAST_FLOAT * data)
{
}
GLOBAL(int)
jsimd_can_quantize (void)
{
return 0;
}
GLOBAL(int)
jsimd_can_quantize_float (void)
{
return 0;
}
GLOBAL(void)
jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
DCTELEM * workspace)
{
}
GLOBAL(void)
jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
FAST_FLOAT * workspace)
{
}
GLOBAL(int)
jsimd_can_idct_2x2 (void)
{
return 0;
}
GLOBAL(int)
jsimd_can_idct_4x4 (void)
{
return 0;
}
GLOBAL(void)
jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf,
JDIMENSION output_col)
{
}
GLOBAL(void)
jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf,
JDIMENSION output_col)
{
}
GLOBAL(int)
jsimd_can_idct_islow (void)
{
return 0;
}
GLOBAL(int)
jsimd_can_idct_ifast (void)
{
return 0;
}
GLOBAL(int)
jsimd_can_idct_float (void)
{
return 0;
}
GLOBAL(void)
jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf,
JDIMENSION output_col)
{
}
GLOBAL(void)
jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf,
JDIMENSION output_col)
{
}
GLOBAL(void)
jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf,
JDIMENSION output_col)
{
}

102
jpeg/jsimddct.h Normal file
Просмотреть файл

@ -0,0 +1,102 @@
/*
* jsimddct.h
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
* For conditions of distribution and use, see copyright notice in jsimdext.inc
*
*/
/* Short forms of external names for systems with brain-damaged linkers. */
#ifdef NEED_SHORT_EXTERNAL_NAMES
#define jsimd_can_convsamp jSCanConv
#define jsimd_can_convsamp_float jSCanConvF
#define jsimd_convsamp jSConv
#define jsimd_convsamp_float jSConvF
#define jsimd_can_fdct_islow jSCanFDCTIS
#define jsimd_can_fdct_ifast jSCanFDCTIF
#define jsimd_can_fdct_float jSCanFDCTFl
#define jsimd_fdct_islow jSFDCTIS
#define jsimd_fdct_ifast jSFDCTIF
#define jsimd_fdct_float jSFDCTFl
#define jsimd_can_quantize jSCanQuant
#define jsimd_can_quantize_float jSCanQuantF
#define jsimd_quantize jSQuant
#define jsimd_quantize_float jSQuantF
#define jsimd_can_idct_2x2 jSCanIDCT22
#define jsimd_can_idct_4x4 jSCanIDCT44
#define jsimd_idct_2x2 jSIDCT22
#define jsimd_idct_4x4 jSIDCT44
#define jsimd_can_idct_islow jSCanIDCTIS
#define jsimd_can_idct_ifast jSCanIDCTIF
#define jsimd_can_idct_float jSCanIDCTFl
#define jsimd_idct_islow jSIDCTIS
#define jsimd_idct_ifast jSIDCTIF
#define jsimd_idct_float jSIDCTFl
#endif /* NEED_SHORT_EXTERNAL_NAMES */
EXTERN(int) jsimd_can_convsamp JPP((void));
EXTERN(int) jsimd_can_convsamp_float JPP((void));
EXTERN(void) jsimd_convsamp JPP((JSAMPARRAY sample_data,
JDIMENSION start_col,
DCTELEM * workspace));
EXTERN(void) jsimd_convsamp_float JPP((JSAMPARRAY sample_data,
JDIMENSION start_col,
FAST_FLOAT * workspace));
EXTERN(int) jsimd_can_fdct_islow JPP((void));
EXTERN(int) jsimd_can_fdct_ifast JPP((void));
EXTERN(int) jsimd_can_fdct_float JPP((void));
EXTERN(void) jsimd_fdct_islow JPP((DCTELEM * data));
EXTERN(void) jsimd_fdct_ifast JPP((DCTELEM * data));
EXTERN(void) jsimd_fdct_float JPP((FAST_FLOAT * data));
EXTERN(int) jsimd_can_quantize JPP((void));
EXTERN(int) jsimd_can_quantize_float JPP((void));
EXTERN(void) jsimd_quantize JPP((JCOEFPTR coef_block,
DCTELEM * divisors,
DCTELEM * workspace));
EXTERN(void) jsimd_quantize_float JPP((JCOEFPTR coef_block,
FAST_FLOAT * divisors,
FAST_FLOAT * workspace));
EXTERN(int) jsimd_can_idct_2x2 JPP((void));
EXTERN(int) jsimd_can_idct_4x4 JPP((void));
EXTERN(void) jsimd_idct_2x2 JPP((j_decompress_ptr cinfo,
jpeg_component_info * compptr,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col));
EXTERN(void) jsimd_idct_4x4 JPP((j_decompress_ptr cinfo,
jpeg_component_info * compptr,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col));
EXTERN(int) jsimd_can_idct_islow JPP((void));
EXTERN(int) jsimd_can_idct_ifast JPP((void));
EXTERN(int) jsimd_can_idct_float JPP((void));
EXTERN(void) jsimd_idct_islow JPP((j_decompress_ptr cinfo,
jpeg_component_info * compptr,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col));
EXTERN(void) jsimd_idct_ifast JPP((j_decompress_ptr cinfo,
jpeg_component_info * compptr,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col));
EXTERN(void) jsimd_idct_float JPP((j_decompress_ptr cinfo,
jpeg_component_info * compptr,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col));

Просмотреть файл

@ -77,8 +77,8 @@ jdiv_round_up (long a, long b)
}
GLOBAL(long)
jround_up (long a, long b)
GLOBAL(size_t)
jround_up (size_t a, size_t b)
/* Compute a rounded up to next multiple of b, ie, ceil(a/b)*b */
/* Assumes a >= 0, b > 0 */
{

Просмотреть файл

@ -1,7 +1,8 @@
/*
* jversion.h
*
* Copyright (C) 1991-1998, Thomas G. Lane.
* Copyright (C) 1991-2010, Thomas G. Lane, Guido Vollbeding.
* Copyright (C) 2010, D. R. Commander.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
@ -9,6 +10,28 @@
*/
#if JPEG_LIB_VERSION >= 80
#define JVERSION "8b 16-May-2010"
#define JCOPYRIGHT "Copyright (C) 2010, Thomas G. Lane, Guido Vollbeding"
#elif JPEG_LIB_VERSION >= 70
#define JVERSION "7 27-Jun-2009"
#define JCOPYRIGHT "Copyright (C) 2009, Thomas G. Lane, Guido Vollbeding"
#else
#define JVERSION "6b 27-Mar-1998"
#define JCOPYRIGHT "Copyright (C) 1998, Thomas G. Lane"
#endif
#define LJTCOPYRIGHT "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
"Copyright (C) 2004 Landmark Graphics Corporation\n" \
"Copyright (C) 2005-2007 Sun Microsystems, Inc.\n" \
"Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \
"Copyright (C) 2009-2011 D. R. Commander"

Просмотреть файл

@ -1,48 +0,0 @@
/* jconfig.mc6 --- jconfig.h for Microsoft C on MS-DOS, version 6.00A & up. */
/* see jconfig.doc for explanations */
/* this is a hack */
#define HAVE_BOOLEAN
#ifndef __RPCNDR_H__
typedef unsigned char boolean;
#endif
#define HAVE_PROTOTYPES
#define HAVE_UNSIGNED_CHAR
#define HAVE_UNSIGNED_SHORT
/* #define void char */
/* #define const */
#undef CHAR_IS_UNSIGNED
#define HAVE_STDDEF_H
#define HAVE_STDLIB_H
#undef NEED_BSD_STRINGS
#undef NEED_SYS_TYPES_H
#undef NEED_FAR_POINTERS /* for small or medium memory model */
#undef NEED_SHORT_EXTERNAL_NAMES
#undef INCOMPLETE_TYPES_BROKEN
#ifdef JPEG_INTERNALS
#undef RIGHT_SHIFT_IS_UNSIGNED
#define USE_MSDOS_MEMANSI
#define MAX_ALLOC_CHUNK 65520L /* Maximum request to malloc() */
#endif /* JPEG_INTERNALS */
#ifdef JPEG_CJPEG_DJPEG
#define BMP_SUPPORTED /* BMP image file format */
#define GIF_SUPPORTED /* GIF image file format */
#define PPM_SUPPORTED /* PBMPLUS PPM/PGM image file format */
#undef RLE_SUPPORTED /* Utah RLE image file format */
#define TARGA_SUPPORTED /* Targa image file format */
#define TWO_FILE_COMMANDLINE
#define USE_SETMODE /* Microsoft has setmode() */
#define NEED_SIGNAL_CATCHER /* Define this if you use jmemdos.c */
#undef DONT_USE_B_MODE
#undef PROGRESS_REPORT /* optional */
#endif /* JPEG_CJPEG_DJPEG */

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,274 +0,0 @@
# Generated automatically from makefile.cfg by configure.
# Makefile for Independent JPEG Group's software
# makefile.cfg is edited by configure to produce a custom Makefile.
# Read installation instructions before saying "make" !!
# For compiling with source and object files in different directories.
srcdir = $(VPATH)
# Where to install the programs and man pages.
prefix = /usr/local
exec_prefix = ${prefix}
bindir = $(exec_prefix)/bin
libdir = $(exec_prefix)/lib
includedir = $(prefix)/include
binprefix =
manprefix =
manext = 1
mandir = $(prefix)/man/man$(manext)
# The name of your C compiler:
CC= cc
# You may need to adjust these cc options:
CFLAGS= -O3 -I$(srcdir)
# Generally, we recommend defining any configuration symbols in jconfig.h,
# NOT via -D switches here.
# However, any special defines for ansi2knr.c may be included here:
ANSI2KNRFLAGS=
# Link-time cc options:
LDFLAGS=
# To link any special libraries, add the necessary -l commands here.
LDLIBS=
# Put here the object file name for the correct system-dependent memory
# manager file. For Unix this is usually jmemnobs.o, but you may want
# to use jmemansi.o or jmemname.o if you have limited swap space.
SYSDEPMEM= jmemnobs.o
# miscellaneous OS-dependent stuff
SHELL= /bin/sh
# linker
LN= $(CC)
# file deletion command
RM= rm -f
# file rename command
MV= mv
# library (.a) file creation command
AR= ar rc
# second step in .a creation (use "touch" if not needed)
AR2= ranlib
# installation program
INSTALL= cp
INSTALL_PROGRAM= ${INSTALL}
INSTALL_DATA= ${INSTALL}
# End of configurable options.
# source files: JPEG library proper
LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \
jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \
jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \
jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
jquant2.c jutils.c jmemmgr.c jmemansi.c jmemname.c jmemnobs.c \
jmemdos.c
# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom
APPSOURCES= cjpeg.c djpeg.c jpegtran.c cdjpeg.c rdcolmap.c rdswitch.c \
rdjpgcom.c wrjpgcom.c rdppm.c wrppm.c rdgif.c wrgif.c rdtarga.c \
wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c
SOURCES= $(LIBSOURCES) $(APPSOURCES)
# files included by source files
INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h
# documentation, test, and support files
DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \
wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \
coderules.doc filelist.doc change.log
MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \
makefile.mc6 makefile.dj makefile.wat makcjpeg.st makdjpeg.st \
makljpeg.st maktjpeg.st makefile.manx makefile.sas makefile.mms \
makefile.vms makvms.opt
CONFIGFILES= jconfig.cfg jconfig.manx jconfig.sas jconfig.st jconfig.bcc \
jconfig.mc6 jconfig.dj jconfig.wat jconfig.vms
OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm
TESTFILES= testorig.jpg testimg.ppm testimg.gif testimg.jpg testprog.jpg \
testimgp.jpg
DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
$(OTHERFILES) $(TESTFILES)
# library object files common to compression and decompression
COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM)
# compression library object files
CLIBOBJECTS= jcapimin.o jcapistd.o jctrans.o jcparam.o jdatadst.o jcinit.o \
jcmaster.o jcmarker.o jcmainct.o jcprepct.o jccoefct.o jccolor.o \
jcsample.o jchuff.o jcphuff.o jcdctmgr.o jfdctfst.o jfdctflt.o \
jfdctint.o
# decompression library object files
DLIBOBJECTS= jdapimin.o jdapistd.o jdatasrc.o jdmaster.o \
jdinput.o jdmarker.o jdhuff.o jdphuff.o jdmainct.o jdcoefct.o \
jdpostct.o jddctmgr.o jidctfst.o jidctflt.o jidctint.o jidctred.o \
jdsample.o jdcolor.o jquant1.o jquant2.o jdmerge.o
# These objectfiles are included in libjpeg.a
LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
# object files for sample applications (excluding library files)
COBJECTS= cjpeg.o rdppm.o rdgif.o rdtarga.o rdrle.o rdbmp.o rdswitch.o \
cdjpeg.o
DOBJECTS= djpeg.o wrppm.o wrgif.o wrtarga.o wrrle.o wrbmp.o rdcolmap.o \
cdjpeg.o
TROBJECTS= jpegtran.o rdswitch.o cdjpeg.o
all: libjpeg.a
realall: libjpeg.a cjpeg djpeg jpegtran rdjpgcom wrjpgcom
# This rule causes ansi2knr to be invoked.
# .c.o:
# ./ansi2knr $(srcdir)/$*.c T$*.c
# $(CC) $(CFLAGS) -c T$*.c
# $(RM) T$*.c $*.o
# $(MV) T$*.o $*.o
ansi2knr: ansi2knr.c
$(CC) $(CFLAGS) $(ANSI2KNRFLAGS) -o ansi2knr ansi2knr.c
# Decompression-only library
libjpeg.a: $(DLIBOBJECTS) $(COMOBJECTS)
$(RM) libjpeg.a
$(AR) libjpeg.a $(DLIBOBJECTS) $(COMOBJECTS)
$(AR2) libjpeg.a
cjpeg: $(COBJECTS) libjpeg.a
$(LN) $(LDFLAGS) -o cjpeg $(COBJECTS) libjpeg.a $(LDLIBS)
djpeg: $(DOBJECTS) libjpeg.a
$(LN) $(LDFLAGS) -o djpeg $(DOBJECTS) libjpeg.a $(LDLIBS)
jpegtran: $(TROBJECTS) libjpeg.a
$(LN) $(LDFLAGS) -o jpegtran $(TROBJECTS) libjpeg.a $(LDLIBS)
rdjpgcom: rdjpgcom.o
$(LN) $(LDFLAGS) -o rdjpgcom rdjpgcom.o $(LDLIBS)
wrjpgcom: wrjpgcom.o
$(LN) $(LDFLAGS) -o wrjpgcom wrjpgcom.o $(LDLIBS)
jconfig.h: jconfig.doc
echo You must prepare a system-dependent jconfig.h file.
echo Please read the installation directions in install.doc.
exit 1
install: cjpeg djpeg jpegtran rdjpgcom wrjpgcom
$(INSTALL_PROGRAM) cjpeg $(bindir)/$(binprefix)cjpeg
$(INSTALL_PROGRAM) djpeg $(bindir)/$(binprefix)djpeg
$(INSTALL_PROGRAM) jpegtran $(bindir)/$(binprefix)jpegtran
$(INSTALL_PROGRAM) rdjpgcom $(bindir)/$(binprefix)rdjpgcom
$(INSTALL_PROGRAM) wrjpgcom $(bindir)/$(binprefix)wrjpgcom
$(INSTALL_DATA) $(srcdir)/cjpeg.1 $(mandir)/$(manprefix)cjpeg.$(manext)
$(INSTALL_DATA) $(srcdir)/djpeg.1 $(mandir)/$(manprefix)djpeg.$(manext)
$(INSTALL_DATA) $(srcdir)/jpegtran.1 $(mandir)/$(manprefix)jpegtran.$(manext)
$(INSTALL_DATA) $(srcdir)/rdjpgcom.1 $(mandir)/$(manprefix)rdjpgcom.$(manext)
$(INSTALL_DATA) $(srcdir)/wrjpgcom.1 $(mandir)/$(manprefix)wrjpgcom.$(manext)
install-lib: libjpeg.a install-headers
$(INSTALL_DATA) libjpeg.a $(libdir)/$(binprefix)libjpeg.a
install-headers: jconfig.h
$(INSTALL_DATA) jconfig.h $(includedir)/jconfig.h
$(INSTALL_DATA) $(srcdir)/jpeglib.h $(includedir)/jpeglib.h
$(INSTALL_DATA) $(srcdir)/jmorecfg.h $(includedir)/jmorecfg.h
$(INSTALL_DATA) $(srcdir)/jerror.h $(includedir)/jerror.h
clean:
$(RM) *.o cjpeg djpeg jpegtran libjpeg.a rdjpgcom wrjpgcom
$(RM) ansi2knr core testout* config.log config.status
distribute:
$(RM) jpegsrc.tar*
tar cvf jpegsrc.tar $(DISTFILES)
compress -v jpegsrc.tar
test: cjpeg djpeg jpegtran
$(RM) testout*
./djpeg -dct int -ppm -outfile testout.ppm $(srcdir)/testorig.jpg
./djpeg -dct int -gif -outfile testout.gif $(srcdir)/testorig.jpg
./cjpeg -dct int -outfile testout.jpg $(srcdir)/testimg.ppm
./djpeg -dct int -ppm -outfile testoutp.ppm $(srcdir)/testprog.jpg
./cjpeg -dct int -progressive -opt -outfile testoutp.jpg $(srcdir)/testimg.ppm
./jpegtran -outfile testoutt.jpg $(srcdir)/testprog.jpg
cmp $(srcdir)/testimg.ppm testout.ppm
cmp $(srcdir)/testimg.gif testout.gif
cmp $(srcdir)/testimg.jpg testout.jpg
cmp $(srcdir)/testimg.ppm testoutp.ppm
cmp $(srcdir)/testimgp.jpg testoutp.jpg
cmp $(srcdir)/testorig.jpg testoutt.jpg
check: test
# GNU Make likes to know which target names are not really files to be made:
.PHONY: all install install-lib install-headers clean distribute test check
jcapimin.o: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jcapistd.o: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jccoefct.o: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jcdctmgr.o: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
jchuff.o: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
jcinit.o: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jcmainct.o: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jcmarker.o: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jcmaster.o: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jcomapi.o: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jcparam.o: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jcphuff.o: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
jcprepct.o: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jctrans.o: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jdapimin.o: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jdapistd.o: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jdatadst.o: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
jdatasrc.o: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
jdcoefct.o: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jddctmgr.o: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
jdhuff.o: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
jdinput.o: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jdmainct.o: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jdmarker.o: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jdmaster.o: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jdphuff.o: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
jdpostct.o: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jdtrans.o: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jerror.o: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
jquant1.o: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jquant2.o: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jutils.o: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
jmemmgr.o: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
jmemansi.o: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
jmemname.o: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
jmemnobs.o: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
jmemdos.o: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
cjpeg.o: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
djpeg.o: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
jpegtran.o: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
cdjpeg.o: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
rdcolmap.o: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
rdswitch.o: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
rdjpgcom.o: rdjpgcom.c jinclude.h jconfig.h
wrjpgcom.o: wrjpgcom.c jinclude.h jconfig.h
rdppm.o: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
wrppm.o: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
rdgif.o: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
wrgif.o: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
rdtarga.o: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
wrtarga.o: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
rdbmp.o: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
wrbmp.o: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
rdrle.o: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
wrrle.o: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h

Просмотреть файл

@ -1,52 +0,0 @@
***** BEGIN LICENSE BLOCK *****
Version: MPL 1.1/GPL 2.0/LGPL 2.1
The contents of this file are subject to the Mozilla Public License Version
1.1 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
for the specific language governing rights and limitations under the
License.
The Original Code is mozilla.org code.
The Initial Developer of the Original Code is
Netscape Communications Corporation
Portions created by the Initial Developer are Copyright (C) 1998
the Initial Developer. All Rights Reserved.
Contributor(s):
Alternatively, the contents of this file may be used under the terms of
either the GNU General Public License Version 2 or later (the "GPL"), or
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
in which case the provisions of the GPL or the LGPL are applicable instead
of those above. If you wish to allow use of your version of this file only
under the terms of either the GPL or the LGPL, and not to allow others to
use your version of this file under the terms of the MPL, indicate your
decision by deleting the provisions above and replace them with the notice
and other provisions required by the GPL or the LGPL. If you do not delete
the provisions above, a recipient may use your version of this file under
the terms of any one of the MPL, the GPL or the LGPL.
***** END LICENSE BLOCK *****
This directory contains a subset of the IJG JPEG library. Among other
omissions, most of the original IJG documentation has been deleted.
You can find the full IJG distribution at the archive sites mentioned in
the README file. Please note that the IJG code does not fall under the
Netscape NPL, but is freely distributable under its own copyright terms
(see README).
Several files have been modified to allow incorporation of the IJG code
into the Netscape environment. As of this writing, jconfig.h, jmorecfg.h,
and jerror.c contain Netscape-specific changes. In addition, we have created
our own makefiles following Netscape conventions, rather than using any of
those provided by IJG.
There are some other changes herein, such as MMX-specific optimizations,
which should eventually make their way back into the standard IJG
distribution.

48
jpeg/simd/Makefile.in Normal file
Просмотреть файл

@ -0,0 +1,48 @@
#
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Mozilla Corporation
# Portions created by the Initial Developer are Copyright (C) 2010
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Justin Lebar <justin.lebar@gmail.com>
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK *****
DEPTH = ../..
topsrcdir = @top_srcdir@
srcdir = @srcdir@
VPATH = @srcdir@
include $(DEPTH)/config/autoconf.mk
# empty makefile so this directory gets created in the objdir.
include $(topsrcdir)/config/rules.mk

479
jpeg/simd/jcclrmmx.asm Normal file
Просмотреть файл

@ -0,0 +1,479 @@
;
; jcclrmmx.asm - colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jcolsamp.inc"
; --------------------------------------------------------------------------
SECTION SEG_TEXT
BITS 32
;
; Convert some rows of samples to the output colorspace.
;
; GLOBAL(void)
; jsimd_rgb_ycc_convert_mmx (JDIMENSION img_width,
; JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
; JDIMENSION output_row, int num_rows);
;
%define img_width(b) (b)+8 ; JDIMENSION img_width
%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf
%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf
%define output_row(b) (b)+20 ; JDIMENSION output_row
%define num_rows(b) (b)+24 ; int num_rows
%define original_ebp ebp+0
%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM]
%define WK_NUM 8
%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
align 16
global EXTN(jsimd_rgb_ycc_convert_mmx)
EXTN(jsimd_rgb_ycc_convert_mmx):
push ebp
mov eax,esp ; eax = original ebp
sub esp, byte 4
and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits
mov [esp],eax
mov ebp,esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)] ; num_cols
test ecx,ecx
jz near .return
push ecx
mov esi, JSAMPIMAGE [output_buf(eax)]
mov ecx, JDIMENSION [output_row(eax)]
mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
mov ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY]
mov edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY]
lea edi, [edi+ecx*SIZEOF_JSAMPROW]
lea ebx, [ebx+ecx*SIZEOF_JSAMPROW]
lea edx, [edx+ecx*SIZEOF_JSAMPROW]
pop ecx
mov esi, JSAMPARRAY [input_buf(eax)]
mov eax, INT [num_rows(eax)]
test eax,eax
jle near .return
alignx 16,7
.rowloop:
pushpic eax
push edx
push ebx
push edi
push esi
push ecx ; col
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr0
mov ebx, JSAMPROW [ebx] ; outptr1
mov edx, JSAMPROW [edx] ; outptr2
movpic eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_MMWORD
jae short .columnloop
alignx 16,7
%if RGB_PIXELSIZE == 3 ; ---------------
.column_ld1:
push eax
push edx
lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE
test cl, SIZEOF_BYTE
jz short .column_ld2
sub ecx, byte SIZEOF_BYTE
xor eax,eax
mov al, BYTE [esi+ecx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub ecx, byte SIZEOF_WORD
xor edx,edx
mov dx, WORD [esi+ecx]
shl eax, WORD_BIT
or eax,edx
.column_ld4:
movd mmA,eax
pop edx
pop eax
test cl, SIZEOF_DWORD
jz short .column_ld8
sub ecx, byte SIZEOF_DWORD
movd mmG, DWORD [esi+ecx]
psllq mmA, DWORD_BIT
por mmA,mmG
.column_ld8:
test cl, SIZEOF_MMWORD
jz short .column_ld16
movq mmG,mmA
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
mov ecx, SIZEOF_MMWORD
jmp short .rgb_ycc_cnv
.column_ld16:
test cl, 2*SIZEOF_MMWORD
mov ecx, SIZEOF_MMWORD
jz short .rgb_ycc_cnv
movq mmF,mmA
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
movq mmG, MMWORD [esi+1*SIZEOF_MMWORD]
jmp short .rgb_ycc_cnv
alignx 16,7
.columnloop:
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
movq mmG, MMWORD [esi+1*SIZEOF_MMWORD]
movq mmF, MMWORD [esi+2*SIZEOF_MMWORD]
.rgb_ycc_cnv:
; mmA=(00 10 20 01 11 21 02 12)
; mmG=(22 03 13 23 04 14 24 05)
; mmF=(15 25 06 16 26 07 17 27)
movq mmD,mmA
psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 10 20 01)
psrlq mmD,4*BYTE_BIT ; mmD=(11 21 02 12 -- -- -- --)
punpckhbw mmA,mmG ; mmA=(00 04 10 14 20 24 01 05)
psllq mmG,4*BYTE_BIT ; mmG=(-- -- -- -- 22 03 13 23)
punpcklbw mmD,mmF ; mmD=(11 15 21 25 02 06 12 16)
punpckhbw mmG,mmF ; mmG=(22 26 03 07 13 17 23 27)
movq mmE,mmA
psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 04 10 14)
psrlq mmE,4*BYTE_BIT ; mmE=(20 24 01 05 -- -- -- --)
punpckhbw mmA,mmD ; mmA=(00 02 04 06 10 12 14 16)
psllq mmD,4*BYTE_BIT ; mmD=(-- -- -- -- 11 15 21 25)
punpcklbw mmE,mmG ; mmE=(20 22 24 26 01 03 05 07)
punpckhbw mmD,mmG ; mmD=(11 13 15 17 21 23 25 27)
pxor mmH,mmH
movq mmC,mmA
punpcklbw mmA,mmH ; mmA=(00 02 04 06)
punpckhbw mmC,mmH ; mmC=(10 12 14 16)
movq mmB,mmE
punpcklbw mmE,mmH ; mmE=(20 22 24 26)
punpckhbw mmB,mmH ; mmB=(01 03 05 07)
movq mmF,mmD
punpcklbw mmD,mmH ; mmD=(11 13 15 17)
punpckhbw mmF,mmH ; mmF=(21 23 25 27)
%else ; RGB_PIXELSIZE == 4 ; -----------
.column_ld1:
test cl, SIZEOF_MMWORD/8
jz short .column_ld2
sub ecx, byte SIZEOF_MMWORD/8
movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
.column_ld2:
test cl, SIZEOF_MMWORD/4
jz short .column_ld4
sub ecx, byte SIZEOF_MMWORD/4
movq mmF,mmA
movq mmA, MMWORD [esi+ecx*RGB_PIXELSIZE]
.column_ld4:
test cl, SIZEOF_MMWORD/2
mov ecx, SIZEOF_MMWORD
jz short .rgb_ycc_cnv
movq mmD,mmA
movq mmC,mmF
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
movq mmF, MMWORD [esi+1*SIZEOF_MMWORD]
jmp short .rgb_ycc_cnv
alignx 16,7
.columnloop:
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
movq mmF, MMWORD [esi+1*SIZEOF_MMWORD]
movq mmD, MMWORD [esi+2*SIZEOF_MMWORD]
movq mmC, MMWORD [esi+3*SIZEOF_MMWORD]
.rgb_ycc_cnv:
; mmA=(00 10 20 30 01 11 21 31)
; mmF=(02 12 22 32 03 13 23 33)
; mmD=(04 14 24 34 05 15 25 35)
; mmC=(06 16 26 36 07 17 27 37)
movq mmB,mmA
punpcklbw mmA,mmF ; mmA=(00 02 10 12 20 22 30 32)
punpckhbw mmB,mmF ; mmB=(01 03 11 13 21 23 31 33)
movq mmG,mmD
punpcklbw mmD,mmC ; mmD=(04 06 14 16 24 26 34 36)
punpckhbw mmG,mmC ; mmG=(05 07 15 17 25 27 35 37)
movq mmE,mmA
punpcklwd mmA,mmD ; mmA=(00 02 04 06 10 12 14 16)
punpckhwd mmE,mmD ; mmE=(20 22 24 26 30 32 34 36)
movq mmH,mmB
punpcklwd mmB,mmG ; mmB=(01 03 05 07 11 13 15 17)
punpckhwd mmH,mmG ; mmH=(21 23 25 27 31 33 35 37)
pxor mmF,mmF
movq mmC,mmA
punpcklbw mmA,mmF ; mmA=(00 02 04 06)
punpckhbw mmC,mmF ; mmC=(10 12 14 16)
movq mmD,mmB
punpcklbw mmB,mmF ; mmB=(01 03 05 07)
punpckhbw mmD,mmF ; mmD=(11 13 15 17)
movq mmG,mmE
punpcklbw mmE,mmF ; mmE=(20 22 24 26)
punpckhbw mmG,mmF ; mmG=(30 32 34 36)
punpcklbw mmF,mmH
punpckhbw mmH,mmH
psrlw mmF,BYTE_BIT ; mmF=(21 23 25 27)
psrlw mmH,BYTE_BIT ; mmH=(31 33 35 37)
%endif ; RGB_PIXELSIZE ; ---------------
; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE
; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO
; (Original)
; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
;
; (This implementation)
; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
movq MMWORD [wk(0)], mm0 ; wk(0)=RE
movq MMWORD [wk(1)], mm1 ; wk(1)=RO
movq MMWORD [wk(2)], mm4 ; wk(2)=BE
movq MMWORD [wk(3)], mm5 ; wk(3)=BO
movq mm6,mm1
punpcklwd mm1,mm3
punpckhwd mm6,mm3
movq mm7,mm1
movq mm4,mm6
pmaddwd mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337)
pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337)
pmaddwd mm7,[GOTOFF(eax,PW_MF016_MF033)] ; mm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
pmaddwd mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
movq MMWORD [wk(4)], mm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337)
movq MMWORD [wk(5)], mm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337)
pxor mm1,mm1
pxor mm6,mm6
punpcklwd mm1,mm5 ; mm1=BOL
punpckhwd mm6,mm5 ; mm6=BOH
psrld mm1,1 ; mm1=BOL*FIX(0.500)
psrld mm6,1 ; mm6=BOH*FIX(0.500)
movq mm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm5=[PD_ONEHALFM1_CJ]
paddd mm7,mm1
paddd mm4,mm6
paddd mm7,mm5
paddd mm4,mm5
psrld mm7,SCALEBITS ; mm7=CbOL
psrld mm4,SCALEBITS ; mm4=CbOH
packssdw mm7,mm4 ; mm7=CbO
movq mm1, MMWORD [wk(2)] ; mm1=BE
movq mm6,mm0
punpcklwd mm0,mm2
punpckhwd mm6,mm2
movq mm5,mm0
movq mm4,mm6
pmaddwd mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337)
pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337)
pmaddwd mm5,[GOTOFF(eax,PW_MF016_MF033)] ; mm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
pmaddwd mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
movq MMWORD [wk(6)], mm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337)
movq MMWORD [wk(7)], mm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337)
pxor mm0,mm0
pxor mm6,mm6
punpcklwd mm0,mm1 ; mm0=BEL
punpckhwd mm6,mm1 ; mm6=BEH
psrld mm0,1 ; mm0=BEL*FIX(0.500)
psrld mm6,1 ; mm6=BEH*FIX(0.500)
movq mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ]
paddd mm5,mm0
paddd mm4,mm6
paddd mm5,mm1
paddd mm4,mm1
psrld mm5,SCALEBITS ; mm5=CbEL
psrld mm4,SCALEBITS ; mm4=CbEH
packssdw mm5,mm4 ; mm5=CbE
psllw mm7,BYTE_BIT
por mm5,mm7 ; mm5=Cb
movq MMWORD [ebx], mm5 ; Save Cb
movq mm0, MMWORD [wk(3)] ; mm0=BO
movq mm6, MMWORD [wk(2)] ; mm6=BE
movq mm1, MMWORD [wk(1)] ; mm1=RO
movq mm4,mm0
punpcklwd mm0,mm3
punpckhwd mm4,mm3
movq mm7,mm0
movq mm5,mm4
pmaddwd mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250)
pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250)
pmaddwd mm7,[GOTOFF(eax,PW_MF008_MF041)] ; mm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
pmaddwd mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
movq mm3,[GOTOFF(eax,PD_ONEHALF)] ; mm3=[PD_ONEHALF]
paddd mm0, MMWORD [wk(4)]
paddd mm4, MMWORD [wk(5)]
paddd mm0,mm3
paddd mm4,mm3
psrld mm0,SCALEBITS ; mm0=YOL
psrld mm4,SCALEBITS ; mm4=YOH
packssdw mm0,mm4 ; mm0=YO
pxor mm3,mm3
pxor mm4,mm4
punpcklwd mm3,mm1 ; mm3=ROL
punpckhwd mm4,mm1 ; mm4=ROH
psrld mm3,1 ; mm3=ROL*FIX(0.500)
psrld mm4,1 ; mm4=ROH*FIX(0.500)
movq mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ]
paddd mm7,mm3
paddd mm5,mm4
paddd mm7,mm1
paddd mm5,mm1
psrld mm7,SCALEBITS ; mm7=CrOL
psrld mm5,SCALEBITS ; mm5=CrOH
packssdw mm7,mm5 ; mm7=CrO
movq mm3, MMWORD [wk(0)] ; mm3=RE
movq mm4,mm6
punpcklwd mm6,mm2
punpckhwd mm4,mm2
movq mm1,mm6
movq mm5,mm4
pmaddwd mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250)
pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250)
pmaddwd mm1,[GOTOFF(eax,PW_MF008_MF041)] ; mm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
pmaddwd mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
movq mm2,[GOTOFF(eax,PD_ONEHALF)] ; mm2=[PD_ONEHALF]
paddd mm6, MMWORD [wk(6)]
paddd mm4, MMWORD [wk(7)]
paddd mm6,mm2
paddd mm4,mm2
psrld mm6,SCALEBITS ; mm6=YEL
psrld mm4,SCALEBITS ; mm4=YEH
packssdw mm6,mm4 ; mm6=YE
psllw mm0,BYTE_BIT
por mm6,mm0 ; mm6=Y
movq MMWORD [edi], mm6 ; Save Y
pxor mm2,mm2
pxor mm4,mm4
punpcklwd mm2,mm3 ; mm2=REL
punpckhwd mm4,mm3 ; mm4=REH
psrld mm2,1 ; mm2=REL*FIX(0.500)
psrld mm4,1 ; mm4=REH*FIX(0.500)
movq mm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm0=[PD_ONEHALFM1_CJ]
paddd mm1,mm2
paddd mm5,mm4
paddd mm1,mm0
paddd mm5,mm0
psrld mm1,SCALEBITS ; mm1=CrEL
psrld mm5,SCALEBITS ; mm5=CrEH
packssdw mm1,mm5 ; mm1=CrE
psllw mm7,BYTE_BIT
por mm1,mm7 ; mm1=Cr
movq MMWORD [edx], mm1 ; Save Cr
sub ecx, byte SIZEOF_MMWORD
add esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; inptr
add edi, byte SIZEOF_MMWORD ; outptr0
add ebx, byte SIZEOF_MMWORD ; outptr1
add edx, byte SIZEOF_MMWORD ; outptr2
cmp ecx, byte SIZEOF_MMWORD
jae near .columnloop
test ecx,ecx
jnz near .column_ld1
pop ecx ; col
pop esi
pop edi
pop ebx
pop edx
poppic eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW
add ebx, byte SIZEOF_JSAMPROW
add edx, byte SIZEOF_JSAMPROW
dec eax ; num_rows
jg near .rowloop
emms ; empty MMX state
.return:
pop edi
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
pop ebx
mov esp,ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
ret
; For some reason, the OS X linker does not honor the request to align the
; segment unless we do this.
align 16

487
jpeg/simd/jcclrss2-64.asm Normal file
Просмотреть файл

@ -0,0 +1,487 @@
;
; jcclrss2-64.asm - colorspace conversion (64-bit SSE2)
;
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; Copyright (C) 2009, D. R. Commander.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jcolsamp.inc"
; --------------------------------------------------------------------------
SECTION SEG_TEXT
BITS 64
;
; Convert some rows of samples to the output colorspace.
;
; GLOBAL(void)
; jsimd_rgb_ycc_convert_sse2 (JDIMENSION img_width,
; JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
; JDIMENSION output_row, int num_rows);
;
; r10 = JDIMENSION img_width
; r11 = JSAMPARRAY input_buf
; r12 = JSAMPIMAGE output_buf
; r13 = JDIMENSION output_row
; r14 = int num_rows
%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
%define WK_NUM 8
align 16
global EXTN(jsimd_rgb_ycc_convert_sse2)
EXTN(jsimd_rgb_ycc_convert_sse2):
push rbp
mov rax,rsp ; rax = original rbp
sub rsp, byte 4
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
mov [rsp],rax
mov rbp,rsp ; rbp = aligned rbp
lea rsp, [wk(0)]
collect_args
push rbx
mov rcx, r10
test rcx,rcx
jz near .return
push rcx
mov rsi, r12
mov rcx, r13
mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
mov rbx, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY]
mov rdx, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY]
lea rdi, [rdi+rcx*SIZEOF_JSAMPROW]
lea rbx, [rbx+rcx*SIZEOF_JSAMPROW]
lea rdx, [rdx+rcx*SIZEOF_JSAMPROW]
pop rcx
mov rsi, r11
mov eax, r14d
test rax,rax
jle near .return
.rowloop:
push rdx
push rbx
push rdi
push rsi
push rcx ; col
mov rsi, JSAMPROW [rsi] ; inptr
mov rdi, JSAMPROW [rdi] ; outptr0
mov rbx, JSAMPROW [rbx] ; outptr1
mov rdx, JSAMPROW [rdx] ; outptr2
cmp rcx, byte SIZEOF_XMMWORD
jae near .columnloop
%if RGB_PIXELSIZE == 3 ; ---------------
.column_ld1:
push rax
push rdx
lea rcx,[rcx+rcx*2] ; imul ecx,RGB_PIXELSIZE
test cl, SIZEOF_BYTE
jz short .column_ld2
sub rcx, byte SIZEOF_BYTE
movzx rax, BYTE [rsi+rcx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub rcx, byte SIZEOF_WORD
movzx rdx, WORD [rsi+rcx]
shl rax, WORD_BIT
or rax,rdx
.column_ld4:
movd xmmA,eax
pop rdx
pop rax
test cl, SIZEOF_DWORD
jz short .column_ld8
sub rcx, byte SIZEOF_DWORD
movd xmmF, XMM_DWORD [rsi+rcx]
pslldq xmmA, SIZEOF_DWORD
por xmmA,xmmF
.column_ld8:
test cl, SIZEOF_MMWORD
jz short .column_ld16
sub rcx, byte SIZEOF_MMWORD
movq xmmB, XMM_MMWORD [rsi+rcx]
pslldq xmmA, SIZEOF_MMWORD
por xmmA,xmmB
.column_ld16:
test cl, SIZEOF_XMMWORD
jz short .column_ld32
movdqa xmmF,xmmA
movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
mov rcx, SIZEOF_XMMWORD
jmp short .rgb_ycc_cnv
.column_ld32:
test cl, 2*SIZEOF_XMMWORD
mov rcx, SIZEOF_XMMWORD
jz short .rgb_ycc_cnv
movdqa xmmB,xmmA
movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD]
jmp short .rgb_ycc_cnv
.columnloop:
movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD]
movdqu xmmB, XMMWORD [rsi+2*SIZEOF_XMMWORD]
.rgb_ycc_cnv:
; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
movdqa xmmG,xmmA
pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
movdqa xmmD,xmmA
pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
movdqa xmmE,xmmA
pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
pxor xmmH,xmmH
movdqa xmmC,xmmA
punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E)
punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E)
movdqa xmmB,xmmE
punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E)
punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F)
movdqa xmmF,xmmD
punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F)
punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F)
%else ; RGB_PIXELSIZE == 4 ; -----------
.column_ld1:
test cl, SIZEOF_XMMWORD/16
jz short .column_ld2
sub rcx, byte SIZEOF_XMMWORD/16
movd xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE]
.column_ld2:
test cl, SIZEOF_XMMWORD/8
jz short .column_ld4
sub rcx, byte SIZEOF_XMMWORD/8
movq xmmE, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE]
pslldq xmmA, SIZEOF_MMWORD
por xmmA,xmmE
.column_ld4:
test cl, SIZEOF_XMMWORD/4
jz short .column_ld8
sub rcx, byte SIZEOF_XMMWORD/4
movdqa xmmE,xmmA
movdqu xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE]
.column_ld8:
test cl, SIZEOF_XMMWORD/2
mov rcx, SIZEOF_XMMWORD
jz short .rgb_ycc_cnv
movdqa xmmF,xmmA
movdqa xmmH,xmmE
movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD]
jmp short .rgb_ycc_cnv
.columnloop:
movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD]
movdqu xmmF, XMMWORD [rsi+2*SIZEOF_XMMWORD]
movdqu xmmH, XMMWORD [rsi+3*SIZEOF_XMMWORD]
.rgb_ycc_cnv:
; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
movdqa xmmD,xmmA
punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
movdqa xmmC,xmmF
punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
movdqa xmmB,xmmA
punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
movdqa xmmG,xmmD
punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
movdqa xmmE,xmmA
punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
movdqa xmmH,xmmB
punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
pxor xmmF,xmmF
movdqa xmmC,xmmA
punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E)
punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E)
movdqa xmmD,xmmB
punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F)
punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F)
movdqa xmmG,xmmE
punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E)
punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E)
punpcklbw xmmF,xmmH
punpckhbw xmmH,xmmH
psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F)
psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F)
%endif ; RGB_PIXELSIZE ; ---------------
; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
; (Original)
; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
;
; (This implementation)
; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=RE
movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=RO
movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=BE
movdqa XMMWORD [wk(3)], xmm5 ; wk(3)=BO
movdqa xmm6,xmm1
punpcklwd xmm1,xmm3
punpckhwd xmm6,xmm3
movdqa xmm7,xmm1
movdqa xmm4,xmm6
pmaddwd xmm1,[rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
pmaddwd xmm7,[rel PW_MF016_MF033] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
pmaddwd xmm4,[rel PW_MF016_MF033] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
movdqa XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337)
movdqa XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337)
pxor xmm1,xmm1
pxor xmm6,xmm6
punpcklwd xmm1,xmm5 ; xmm1=BOL
punpckhwd xmm6,xmm5 ; xmm6=BOH
psrld xmm1,1 ; xmm1=BOL*FIX(0.500)
psrld xmm6,1 ; xmm6=BOH*FIX(0.500)
movdqa xmm5,[rel PD_ONEHALFM1_CJ] ; xmm5=[PD_ONEHALFM1_CJ]
paddd xmm7,xmm1
paddd xmm4,xmm6
paddd xmm7,xmm5
paddd xmm4,xmm5
psrld xmm7,SCALEBITS ; xmm7=CbOL
psrld xmm4,SCALEBITS ; xmm4=CbOH
packssdw xmm7,xmm4 ; xmm7=CbO
movdqa xmm1, XMMWORD [wk(2)] ; xmm1=BE
movdqa xmm6,xmm0
punpcklwd xmm0,xmm2
punpckhwd xmm6,xmm2
movdqa xmm5,xmm0
movdqa xmm4,xmm6
pmaddwd xmm0,[rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
pmaddwd xmm5,[rel PW_MF016_MF033] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
pmaddwd xmm4,[rel PW_MF016_MF033] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337)
movdqa XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337)
pxor xmm0,xmm0
pxor xmm6,xmm6
punpcklwd xmm0,xmm1 ; xmm0=BEL
punpckhwd xmm6,xmm1 ; xmm6=BEH
psrld xmm0,1 ; xmm0=BEL*FIX(0.500)
psrld xmm6,1 ; xmm6=BEH*FIX(0.500)
movdqa xmm1,[rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ]
paddd xmm5,xmm0
paddd xmm4,xmm6
paddd xmm5,xmm1
paddd xmm4,xmm1
psrld xmm5,SCALEBITS ; xmm5=CbEL
psrld xmm4,SCALEBITS ; xmm4=CbEH
packssdw xmm5,xmm4 ; xmm5=CbE
psllw xmm7,BYTE_BIT
por xmm5,xmm7 ; xmm5=Cb
movdqa XMMWORD [rbx], xmm5 ; Save Cb
movdqa xmm0, XMMWORD [wk(3)] ; xmm0=BO
movdqa xmm6, XMMWORD [wk(2)] ; xmm6=BE
movdqa xmm1, XMMWORD [wk(1)] ; xmm1=RO
movdqa xmm4,xmm0
punpcklwd xmm0,xmm3
punpckhwd xmm4,xmm3
movdqa xmm7,xmm0
movdqa xmm5,xmm4
pmaddwd xmm0,[rel PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
pmaddwd xmm7,[rel PW_MF008_MF041] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
pmaddwd xmm5,[rel PW_MF008_MF041] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
movdqa xmm3,[rel PD_ONEHALF] ; xmm3=[PD_ONEHALF]
paddd xmm0, XMMWORD [wk(4)]
paddd xmm4, XMMWORD [wk(5)]
paddd xmm0,xmm3
paddd xmm4,xmm3
psrld xmm0,SCALEBITS ; xmm0=YOL
psrld xmm4,SCALEBITS ; xmm4=YOH
packssdw xmm0,xmm4 ; xmm0=YO
pxor xmm3,xmm3
pxor xmm4,xmm4
punpcklwd xmm3,xmm1 ; xmm3=ROL
punpckhwd xmm4,xmm1 ; xmm4=ROH
psrld xmm3,1 ; xmm3=ROL*FIX(0.500)
psrld xmm4,1 ; xmm4=ROH*FIX(0.500)
movdqa xmm1,[rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ]
paddd xmm7,xmm3
paddd xmm5,xmm4
paddd xmm7,xmm1
paddd xmm5,xmm1
psrld xmm7,SCALEBITS ; xmm7=CrOL
psrld xmm5,SCALEBITS ; xmm5=CrOH
packssdw xmm7,xmm5 ; xmm7=CrO
movdqa xmm3, XMMWORD [wk(0)] ; xmm3=RE
movdqa xmm4,xmm6
punpcklwd xmm6,xmm2
punpckhwd xmm4,xmm2
movdqa xmm1,xmm6
movdqa xmm5,xmm4
pmaddwd xmm6,[rel PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
pmaddwd xmm1,[rel PW_MF008_MF041] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
pmaddwd xmm5,[rel PW_MF008_MF041] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
movdqa xmm2,[rel PD_ONEHALF] ; xmm2=[PD_ONEHALF]
paddd xmm6, XMMWORD [wk(6)]
paddd xmm4, XMMWORD [wk(7)]
paddd xmm6,xmm2
paddd xmm4,xmm2
psrld xmm6,SCALEBITS ; xmm6=YEL
psrld xmm4,SCALEBITS ; xmm4=YEH
packssdw xmm6,xmm4 ; xmm6=YE
psllw xmm0,BYTE_BIT
por xmm6,xmm0 ; xmm6=Y
movdqa XMMWORD [rdi], xmm6 ; Save Y
pxor xmm2,xmm2
pxor xmm4,xmm4
punpcklwd xmm2,xmm3 ; xmm2=REL
punpckhwd xmm4,xmm3 ; xmm4=REH
psrld xmm2,1 ; xmm2=REL*FIX(0.500)
psrld xmm4,1 ; xmm4=REH*FIX(0.500)
movdqa xmm0,[rel PD_ONEHALFM1_CJ] ; xmm0=[PD_ONEHALFM1_CJ]
paddd xmm1,xmm2
paddd xmm5,xmm4
paddd xmm1,xmm0
paddd xmm5,xmm0
psrld xmm1,SCALEBITS ; xmm1=CrEL
psrld xmm5,SCALEBITS ; xmm5=CrEH
packssdw xmm1,xmm5 ; xmm1=CrE
psllw xmm7,BYTE_BIT
por xmm1,xmm7 ; xmm1=Cr
movdqa XMMWORD [rdx], xmm1 ; Save Cr
sub rcx, byte SIZEOF_XMMWORD
add rsi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr
add rdi, byte SIZEOF_XMMWORD ; outptr0
add rbx, byte SIZEOF_XMMWORD ; outptr1
add rdx, byte SIZEOF_XMMWORD ; outptr2
cmp rcx, byte SIZEOF_XMMWORD
jae near .columnloop
test rcx,rcx
jnz near .column_ld1
pop rcx ; col
pop rsi
pop rdi
pop rbx
pop rdx
add rsi, byte SIZEOF_JSAMPROW ; input_buf
add rdi, byte SIZEOF_JSAMPROW
add rbx, byte SIZEOF_JSAMPROW
add rdx, byte SIZEOF_JSAMPROW
dec rax ; num_rows
jg near .rowloop
.return:
pop rbx
uncollect_args
mov rsp,rbp ; rsp <- aligned rbp
pop rsp ; rsp <- original rbp
pop rbp
ret
; For some reason, the OS X linker does not honor the request to align the
; segment unless we do this.
align 16

505
jpeg/simd/jcclrss2.asm Normal file
Просмотреть файл

@ -0,0 +1,505 @@
;
; jcclrss2.asm - colorspace conversion (SSE2)
;
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jcolsamp.inc"
; --------------------------------------------------------------------------
SECTION SEG_TEXT
BITS 32
;
; Convert some rows of samples to the output colorspace.
;
; GLOBAL(void)
; jsimd_rgb_ycc_convert_sse2 (JDIMENSION img_width,
; JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
; JDIMENSION output_row, int num_rows);
;
%define img_width(b) (b)+8 ; JDIMENSION img_width
%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf
%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf
%define output_row(b) (b)+20 ; JDIMENSION output_row
%define num_rows(b) (b)+24 ; int num_rows
%define original_ebp ebp+0
%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
%define WK_NUM 8
%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
align 16
global EXTN(jsimd_rgb_ycc_convert_sse2)
EXTN(jsimd_rgb_ycc_convert_sse2):
push ebp
mov eax,esp ; eax = original ebp
sub esp, byte 4
and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
mov [esp],eax
mov ebp,esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)]
test ecx,ecx
jz near .return
push ecx
mov esi, JSAMPIMAGE [output_buf(eax)]
mov ecx, JDIMENSION [output_row(eax)]
mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
mov ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY]
mov edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY]
lea edi, [edi+ecx*SIZEOF_JSAMPROW]
lea ebx, [ebx+ecx*SIZEOF_JSAMPROW]
lea edx, [edx+ecx*SIZEOF_JSAMPROW]
pop ecx
mov esi, JSAMPARRAY [input_buf(eax)]
mov eax, INT [num_rows(eax)]
test eax,eax
jle near .return
alignx 16,7
.rowloop:
pushpic eax
push edx
push ebx
push edi
push esi
push ecx ; col
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr0
mov ebx, JSAMPROW [ebx] ; outptr1
mov edx, JSAMPROW [edx] ; outptr2
movpic eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_XMMWORD
jae near .columnloop
alignx 16,7
%if RGB_PIXELSIZE == 3 ; ---------------
.column_ld1:
push eax
push edx
lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE
test cl, SIZEOF_BYTE
jz short .column_ld2
sub ecx, byte SIZEOF_BYTE
movzx eax, BYTE [esi+ecx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub ecx, byte SIZEOF_WORD
movzx edx, WORD [esi+ecx]
shl eax, WORD_BIT
or eax,edx
.column_ld4:
movd xmmA,eax
pop edx
pop eax
test cl, SIZEOF_DWORD
jz short .column_ld8
sub ecx, byte SIZEOF_DWORD
movd xmmF, XMM_DWORD [esi+ecx]
pslldq xmmA, SIZEOF_DWORD
por xmmA,xmmF
.column_ld8:
test cl, SIZEOF_MMWORD
jz short .column_ld16
sub ecx, byte SIZEOF_MMWORD
movq xmmB, XMM_MMWORD [esi+ecx]
pslldq xmmA, SIZEOF_MMWORD
por xmmA,xmmB
.column_ld16:
test cl, SIZEOF_XMMWORD
jz short .column_ld32
movdqa xmmF,xmmA
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
mov ecx, SIZEOF_XMMWORD
jmp short .rgb_ycc_cnv
.column_ld32:
test cl, 2*SIZEOF_XMMWORD
mov ecx, SIZEOF_XMMWORD
jz short .rgb_ycc_cnv
movdqa xmmB,xmmA
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
jmp short .rgb_ycc_cnv
alignx 16,7
.columnloop:
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
movdqu xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD]
.rgb_ycc_cnv:
; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
movdqa xmmG,xmmA
pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
movdqa xmmD,xmmA
pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
movdqa xmmE,xmmA
pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
pxor xmmH,xmmH
movdqa xmmC,xmmA
punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E)
punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E)
movdqa xmmB,xmmE
punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E)
punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F)
movdqa xmmF,xmmD
punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F)
punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F)
%else ; RGB_PIXELSIZE == 4 ; -----------
.column_ld1:
test cl, SIZEOF_XMMWORD/16
jz short .column_ld2
sub ecx, byte SIZEOF_XMMWORD/16
movd xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE]
.column_ld2:
test cl, SIZEOF_XMMWORD/8
jz short .column_ld4
sub ecx, byte SIZEOF_XMMWORD/8
movq xmmE, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE]
pslldq xmmA, SIZEOF_MMWORD
por xmmA,xmmE
.column_ld4:
test cl, SIZEOF_XMMWORD/4
jz short .column_ld8
sub ecx, byte SIZEOF_XMMWORD/4
movdqa xmmE,xmmA
movdqu xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE]
.column_ld8:
test cl, SIZEOF_XMMWORD/2
mov ecx, SIZEOF_XMMWORD
jz short .rgb_ycc_cnv
movdqa xmmF,xmmA
movdqa xmmH,xmmE
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
jmp short .rgb_ycc_cnv
alignx 16,7
.columnloop:
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
movdqu xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD]
movdqu xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD]
.rgb_ycc_cnv:
; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
movdqa xmmD,xmmA
punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
movdqa xmmC,xmmF
punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
movdqa xmmB,xmmA
punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
movdqa xmmG,xmmD
punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
movdqa xmmE,xmmA
punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
movdqa xmmH,xmmB
punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
pxor xmmF,xmmF
movdqa xmmC,xmmA
punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E)
punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E)
movdqa xmmD,xmmB
punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F)
punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F)
movdqa xmmG,xmmE
punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E)
punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E)
punpcklbw xmmF,xmmH
punpckhbw xmmH,xmmH
psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F)
psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F)
%endif ; RGB_PIXELSIZE ; ---------------
; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
; (Original)
; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
;
; (This implementation)
; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=RE
movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=RO
movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=BE
movdqa XMMWORD [wk(3)], xmm5 ; wk(3)=BO
movdqa xmm6,xmm1
punpcklwd xmm1,xmm3
punpckhwd xmm6,xmm3
movdqa xmm7,xmm1
movdqa xmm4,xmm6
pmaddwd xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
pmaddwd xmm7,[GOTOFF(eax,PW_MF016_MF033)] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
pmaddwd xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
movdqa XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337)
movdqa XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337)
pxor xmm1,xmm1
pxor xmm6,xmm6
punpcklwd xmm1,xmm5 ; xmm1=BOL
punpckhwd xmm6,xmm5 ; xmm6=BOH
psrld xmm1,1 ; xmm1=BOL*FIX(0.500)
psrld xmm6,1 ; xmm6=BOH*FIX(0.500)
movdqa xmm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm5=[PD_ONEHALFM1_CJ]
paddd xmm7,xmm1
paddd xmm4,xmm6
paddd xmm7,xmm5
paddd xmm4,xmm5
psrld xmm7,SCALEBITS ; xmm7=CbOL
psrld xmm4,SCALEBITS ; xmm4=CbOH
packssdw xmm7,xmm4 ; xmm7=CbO
movdqa xmm1, XMMWORD [wk(2)] ; xmm1=BE
movdqa xmm6,xmm0
punpcklwd xmm0,xmm2
punpckhwd xmm6,xmm2
movdqa xmm5,xmm0
movdqa xmm4,xmm6
pmaddwd xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
pmaddwd xmm5,[GOTOFF(eax,PW_MF016_MF033)] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
pmaddwd xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337)
movdqa XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337)
pxor xmm0,xmm0
pxor xmm6,xmm6
punpcklwd xmm0,xmm1 ; xmm0=BEL
punpckhwd xmm6,xmm1 ; xmm6=BEH
psrld xmm0,1 ; xmm0=BEL*FIX(0.500)
psrld xmm6,1 ; xmm6=BEH*FIX(0.500)
movdqa xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ]
paddd xmm5,xmm0
paddd xmm4,xmm6
paddd xmm5,xmm1
paddd xmm4,xmm1
psrld xmm5,SCALEBITS ; xmm5=CbEL
psrld xmm4,SCALEBITS ; xmm4=CbEH
packssdw xmm5,xmm4 ; xmm5=CbE
psllw xmm7,BYTE_BIT
por xmm5,xmm7 ; xmm5=Cb
movdqa XMMWORD [ebx], xmm5 ; Save Cb
movdqa xmm0, XMMWORD [wk(3)] ; xmm0=BO
movdqa xmm6, XMMWORD [wk(2)] ; xmm6=BE
movdqa xmm1, XMMWORD [wk(1)] ; xmm1=RO
movdqa xmm4,xmm0
punpcklwd xmm0,xmm3
punpckhwd xmm4,xmm3
movdqa xmm7,xmm0
movdqa xmm5,xmm4
pmaddwd xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
pmaddwd xmm7,[GOTOFF(eax,PW_MF008_MF041)] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
pmaddwd xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
movdqa xmm3,[GOTOFF(eax,PD_ONEHALF)] ; xmm3=[PD_ONEHALF]
paddd xmm0, XMMWORD [wk(4)]
paddd xmm4, XMMWORD [wk(5)]
paddd xmm0,xmm3
paddd xmm4,xmm3
psrld xmm0,SCALEBITS ; xmm0=YOL
psrld xmm4,SCALEBITS ; xmm4=YOH
packssdw xmm0,xmm4 ; xmm0=YO
pxor xmm3,xmm3
pxor xmm4,xmm4
punpcklwd xmm3,xmm1 ; xmm3=ROL
punpckhwd xmm4,xmm1 ; xmm4=ROH
psrld xmm3,1 ; xmm3=ROL*FIX(0.500)
psrld xmm4,1 ; xmm4=ROH*FIX(0.500)
movdqa xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ]
paddd xmm7,xmm3
paddd xmm5,xmm4
paddd xmm7,xmm1
paddd xmm5,xmm1
psrld xmm7,SCALEBITS ; xmm7=CrOL
psrld xmm5,SCALEBITS ; xmm5=CrOH
packssdw xmm7,xmm5 ; xmm7=CrO
movdqa xmm3, XMMWORD [wk(0)] ; xmm3=RE
movdqa xmm4,xmm6
punpcklwd xmm6,xmm2
punpckhwd xmm4,xmm2
movdqa xmm1,xmm6
movdqa xmm5,xmm4
pmaddwd xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
pmaddwd xmm1,[GOTOFF(eax,PW_MF008_MF041)] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
pmaddwd xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
movdqa xmm2,[GOTOFF(eax,PD_ONEHALF)] ; xmm2=[PD_ONEHALF]
paddd xmm6, XMMWORD [wk(6)]
paddd xmm4, XMMWORD [wk(7)]
paddd xmm6,xmm2
paddd xmm4,xmm2
psrld xmm6,SCALEBITS ; xmm6=YEL
psrld xmm4,SCALEBITS ; xmm4=YEH
packssdw xmm6,xmm4 ; xmm6=YE
psllw xmm0,BYTE_BIT
por xmm6,xmm0 ; xmm6=Y
movdqa XMMWORD [edi], xmm6 ; Save Y
pxor xmm2,xmm2
pxor xmm4,xmm4
punpcklwd xmm2,xmm3 ; xmm2=REL
punpckhwd xmm4,xmm3 ; xmm4=REH
psrld xmm2,1 ; xmm2=REL*FIX(0.500)
psrld xmm4,1 ; xmm4=REH*FIX(0.500)
movdqa xmm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm0=[PD_ONEHALFM1_CJ]
paddd xmm1,xmm2
paddd xmm5,xmm4
paddd xmm1,xmm0
paddd xmm5,xmm0
psrld xmm1,SCALEBITS ; xmm1=CrEL
psrld xmm5,SCALEBITS ; xmm5=CrEH
packssdw xmm1,xmm5 ; xmm1=CrE
psllw xmm7,BYTE_BIT
por xmm1,xmm7 ; xmm1=Cr
movdqa XMMWORD [edx], xmm1 ; Save Cr
sub ecx, byte SIZEOF_XMMWORD
add esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr
add edi, byte SIZEOF_XMMWORD ; outptr0
add ebx, byte SIZEOF_XMMWORD ; outptr1
add edx, byte SIZEOF_XMMWORD ; outptr2
cmp ecx, byte SIZEOF_XMMWORD
jae near .columnloop
test ecx,ecx
jnz near .column_ld1
pop ecx ; col
pop esi
pop edi
pop ebx
pop edx
poppic eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW
add ebx, byte SIZEOF_JSAMPROW
add edx, byte SIZEOF_JSAMPROW
dec eax ; num_rows
jg near .rowloop
.return:
pop edi
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
pop ebx
mov esp,ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
ret
; For some reason, the OS X linker does not honor the request to align the
; segment unless we do this.
align 16

120
jpeg/simd/jccolmmx.asm Normal file
Просмотреть файл

@ -0,0 +1,120 @@
;
; jccolmmx.asm - colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright 2009 D. R. Commander
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
; --------------------------------------------------------------------------
%define SCALEBITS 16
F_0_081 equ 5329 ; FIX(0.08131)
F_0_114 equ 7471 ; FIX(0.11400)
F_0_168 equ 11059 ; FIX(0.16874)
F_0_250 equ 16384 ; FIX(0.25000)
F_0_299 equ 19595 ; FIX(0.29900)
F_0_331 equ 21709 ; FIX(0.33126)
F_0_418 equ 27439 ; FIX(0.41869)
F_0_587 equ 38470 ; FIX(0.58700)
F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 16
global EXTN(jconst_rgb_ycc_convert_mmx)
EXTN(jconst_rgb_ycc_convert_mmx):
PW_F0299_F0337 times 2 dw F_0_299, F_0_337
PW_F0114_F0250 times 2 dw F_0_114, F_0_250
PW_MF016_MF033 times 2 dw -F_0_168,-F_0_331
PW_MF008_MF041 times 2 dw -F_0_081,-F_0_418
PD_ONEHALFM1_CJ times 2 dd (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS)
PD_ONEHALF times 2 dd (1 << (SCALEBITS-1))
alignz 16
; --------------------------------------------------------------------------
%include "jcclrmmx.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 0
%define RGB_GREEN 1
%define RGB_BLUE 2
%define RGB_PIXELSIZE 3
%define jsimd_rgb_ycc_convert_mmx jsimd_extrgb_ycc_convert_mmx
%include "jcclrmmx.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 0
%define RGB_GREEN 1
%define RGB_BLUE 2
%define RGB_PIXELSIZE 4
%define jsimd_rgb_ycc_convert_mmx jsimd_extrgbx_ycc_convert_mmx
%include "jcclrmmx.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 2
%define RGB_GREEN 1
%define RGB_BLUE 0
%define RGB_PIXELSIZE 3
%define jsimd_rgb_ycc_convert_mmx jsimd_extbgr_ycc_convert_mmx
%include "jcclrmmx.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 2
%define RGB_GREEN 1
%define RGB_BLUE 0
%define RGB_PIXELSIZE 4
%define jsimd_rgb_ycc_convert_mmx jsimd_extbgrx_ycc_convert_mmx
%include "jcclrmmx.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 3
%define RGB_GREEN 2
%define RGB_BLUE 1
%define RGB_PIXELSIZE 4
%define jsimd_rgb_ycc_convert_mmx jsimd_extxbgr_ycc_convert_mmx
%include "jcclrmmx.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 1
%define RGB_GREEN 2
%define RGB_BLUE 3
%define RGB_PIXELSIZE 4
%define jsimd_rgb_ycc_convert_mmx jsimd_extxrgb_ycc_convert_mmx
%include "jcclrmmx.asm"

117
jpeg/simd/jccolss2-64.asm Normal file
Просмотреть файл

@ -0,0 +1,117 @@
;
; jccolss2-64.asm - colorspace conversion (64-bit SSE2)
;
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; Copyright (C) 2009, D. R. Commander.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
; --------------------------------------------------------------------------
%define SCALEBITS 16
F_0_081 equ 5329 ; FIX(0.08131)
F_0_114 equ 7471 ; FIX(0.11400)
F_0_168 equ 11059 ; FIX(0.16874)
F_0_250 equ 16384 ; FIX(0.25000)
F_0_299 equ 19595 ; FIX(0.29900)
F_0_331 equ 21709 ; FIX(0.33126)
F_0_418 equ 27439 ; FIX(0.41869)
F_0_587 equ 38470 ; FIX(0.58700)
F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 16
global EXTN(jconst_rgb_ycc_convert_sse2)
EXTN(jconst_rgb_ycc_convert_sse2):
PW_F0299_F0337 times 4 dw F_0_299, F_0_337
PW_F0114_F0250 times 4 dw F_0_114, F_0_250
PW_MF016_MF033 times 4 dw -F_0_168,-F_0_331
PW_MF008_MF041 times 4 dw -F_0_081,-F_0_418
PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS)
PD_ONEHALF times 4 dd (1 << (SCALEBITS-1))
alignz 16
; --------------------------------------------------------------------------
%include "jcclrss2-64.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 0
%define RGB_GREEN 1
%define RGB_BLUE 2
%define RGB_PIXELSIZE 3
%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2
%include "jcclrss2-64.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 0
%define RGB_GREEN 1
%define RGB_BLUE 2
%define RGB_PIXELSIZE 4
%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2
%include "jcclrss2-64.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 2
%define RGB_GREEN 1
%define RGB_BLUE 0
%define RGB_PIXELSIZE 3
%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2
%include "jcclrss2-64.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 2
%define RGB_GREEN 1
%define RGB_BLUE 0
%define RGB_PIXELSIZE 4
%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2
%include "jcclrss2-64.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 3
%define RGB_GREEN 2
%define RGB_BLUE 1
%define RGB_PIXELSIZE 4
%define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2
%include "jcclrss2-64.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 1
%define RGB_GREEN 2
%define RGB_BLUE 3
%define RGB_PIXELSIZE 4
%define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2
%include "jcclrss2-64.asm"

117
jpeg/simd/jccolss2.asm Normal file
Просмотреть файл

@ -0,0 +1,117 @@
;
; jccolss2.asm - colorspace conversion (SSE2)
;
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; Copyright (C) 2009, D. R. Commander.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
; --------------------------------------------------------------------------
%define SCALEBITS 16
F_0_081 equ 5329 ; FIX(0.08131)
F_0_114 equ 7471 ; FIX(0.11400)
F_0_168 equ 11059 ; FIX(0.16874)
F_0_250 equ 16384 ; FIX(0.25000)
F_0_299 equ 19595 ; FIX(0.29900)
F_0_331 equ 21709 ; FIX(0.33126)
F_0_418 equ 27439 ; FIX(0.41869)
F_0_587 equ 38470 ; FIX(0.58700)
F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 16
global EXTN(jconst_rgb_ycc_convert_sse2)
EXTN(jconst_rgb_ycc_convert_sse2):
PW_F0299_F0337 times 4 dw F_0_299, F_0_337
PW_F0114_F0250 times 4 dw F_0_114, F_0_250
PW_MF016_MF033 times 4 dw -F_0_168,-F_0_331
PW_MF008_MF041 times 4 dw -F_0_081,-F_0_418
PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS)
PD_ONEHALF times 4 dd (1 << (SCALEBITS-1))
alignz 16
; --------------------------------------------------------------------------
%include "jcclrss2.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 0
%define RGB_GREEN 1
%define RGB_BLUE 2
%define RGB_PIXELSIZE 3
%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2
%include "jcclrss2.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 0
%define RGB_GREEN 1
%define RGB_BLUE 2
%define RGB_PIXELSIZE 4
%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2
%include "jcclrss2.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 2
%define RGB_GREEN 1
%define RGB_BLUE 0
%define RGB_PIXELSIZE 3
%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2
%include "jcclrss2.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 2
%define RGB_GREEN 1
%define RGB_BLUE 0
%define RGB_PIXELSIZE 4
%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2
%include "jcclrss2.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 3
%define RGB_GREEN 2
%define RGB_BLUE 1
%define RGB_PIXELSIZE 4
%define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2
%include "jcclrss2.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 1
%define RGB_GREEN 2
%define RGB_BLUE 3
%define RGB_PIXELSIZE 4
%define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2
%include "jcclrss2.asm"

105
jpeg/simd/jcolsamp.inc Normal file
Просмотреть файл

@ -0,0 +1,105 @@
;
; jcolsamp.inc - private declarations for color conversion & up/downsampling
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; [TAB8]
; --------------------------------------------------------------------------
; pseudo-resisters to make ordering of RGB configurable
;
%if RGB_RED == 0
%define mmA mm0
%define mmB mm1
%define xmmA xmm0
%define xmmB xmm1
%elif RGB_GREEN == 0
%define mmA mm2
%define mmB mm3
%define xmmA xmm2
%define xmmB xmm3
%elif RGB_BLUE == 0
%define mmA mm4
%define mmB mm5
%define xmmA xmm4
%define xmmB xmm5
%else
%define mmA mm6
%define mmB mm7
%define xmmA xmm6
%define xmmB xmm7
%endif
%if RGB_RED == 1
%define mmC mm0
%define mmD mm1
%define xmmC xmm0
%define xmmD xmm1
%elif RGB_GREEN == 1
%define mmC mm2
%define mmD mm3
%define xmmC xmm2
%define xmmD xmm3
%elif RGB_BLUE == 1
%define mmC mm4
%define mmD mm5
%define xmmC xmm4
%define xmmD xmm5
%else
%define mmC mm6
%define mmD mm7
%define xmmC xmm6
%define xmmD xmm7
%endif
%if RGB_RED == 2
%define mmE mm0
%define mmF mm1
%define xmmE xmm0
%define xmmF xmm1
%elif RGB_GREEN == 2
%define mmE mm2
%define mmF mm3
%define xmmE xmm2
%define xmmF xmm3
%elif RGB_BLUE == 2
%define mmE mm4
%define mmF mm5
%define xmmE xmm4
%define xmmF xmm5
%else
%define mmE mm6
%define mmF mm7
%define xmmE xmm6
%define xmmF xmm7
%endif
%if RGB_RED == 3
%define mmG mm0
%define mmH mm1
%define xmmG xmm0
%define xmmH xmm1
%elif RGB_GREEN == 3
%define mmG mm2
%define mmH mm3
%define xmmG xmm2
%define xmmH xmm3
%elif RGB_BLUE == 3
%define mmG mm4
%define mmH mm5
%define xmmG xmm4
%define xmmH xmm5
%else
%define mmG mm6
%define mmH mm7
%define xmmG xmm6
%define xmmH xmm7
%endif
; --------------------------------------------------------------------------

233
jpeg/simd/jcqnt3dn.asm Normal file
Просмотреть файл

@ -0,0 +1,233 @@
;
; jcqnt3dn.asm - sample data conversion and quantization (3DNow! & MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
; --------------------------------------------------------------------------
SECTION SEG_TEXT
BITS 32
;
; Load data into workspace, applying unsigned->signed conversion
;
; GLOBAL(void)
; jsimd_convsamp_float_3dnow (JSAMPARRAY sample_data, JDIMENSION start_col,
; FAST_FLOAT * workspace);
;
%define sample_data ebp+8 ; JSAMPARRAY sample_data
%define start_col ebp+12 ; JDIMENSION start_col
%define workspace ebp+16 ; FAST_FLOAT * workspace
align 16
global EXTN(jsimd_convsamp_float_3dnow)
EXTN(jsimd_convsamp_float_3dnow):
push ebp
mov ebp,esp
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
pcmpeqw mm7,mm7
psllw mm7,7
packsswb mm7,mm7 ; mm7 = PB_CENTERJSAMPLE (0x808080..)
mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *)
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/2
alignx 16,7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE]
movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE]
psubb mm0,mm7 ; mm0=(01234567)
psubb mm1,mm7 ; mm1=(89ABCDEF)
punpcklbw mm2,mm0 ; mm2=(*0*1*2*3)
punpckhbw mm0,mm0 ; mm0=(*4*5*6*7)
punpcklbw mm3,mm1 ; mm3=(*8*9*A*B)
punpckhbw mm1,mm1 ; mm1=(*C*D*E*F)
punpcklwd mm4,mm2 ; mm4=(***0***1)
punpckhwd mm2,mm2 ; mm2=(***2***3)
punpcklwd mm5,mm0 ; mm5=(***4***5)
punpckhwd mm0,mm0 ; mm0=(***6***7)
psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(01)
psrad mm2,(DWORD_BIT-BYTE_BIT) ; mm2=(23)
pi2fd mm4,mm4
pi2fd mm2,mm2
psrad mm5,(DWORD_BIT-BYTE_BIT) ; mm5=(45)
psrad mm0,(DWORD_BIT-BYTE_BIT) ; mm0=(67)
pi2fd mm5,mm5
pi2fd mm0,mm0
movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm4
movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm2
movq MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5
movq MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0
punpcklwd mm6,mm3 ; mm6=(***8***9)
punpckhwd mm3,mm3 ; mm3=(***A***B)
punpcklwd mm4,mm1 ; mm4=(***C***D)
punpckhwd mm1,mm1 ; mm1=(***E***F)
psrad mm6,(DWORD_BIT-BYTE_BIT) ; mm6=(89)
psrad mm3,(DWORD_BIT-BYTE_BIT) ; mm3=(AB)
pi2fd mm6,mm6
pi2fd mm3,mm3
psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(CD)
psrad mm1,(DWORD_BIT-BYTE_BIT) ; mm1=(EF)
pi2fd mm4,mm4
pi2fd mm1,mm1
movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm6
movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm3
movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm4
movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1
add esi, byte 2*SIZEOF_JSAMPROW
add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
dec ecx
jnz near .convloop
femms ; empty MMX/3DNow! state
pop edi
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
pop ebx
pop ebp
ret
; --------------------------------------------------------------------------
;
; Quantize/descale the coefficients, and store into coef_block
;
; GLOBAL(void)
; jsimd_quantize_float_3dnow (JCOEFPTR coef_block, FAST_FLOAT * divisors,
; FAST_FLOAT * workspace);
;
%define coef_block ebp+8 ; JCOEFPTR coef_block
%define divisors ebp+12 ; FAST_FLOAT * divisors
%define workspace ebp+16 ; FAST_FLOAT * workspace
align 16
global EXTN(jsimd_quantize_float_3dnow)
EXTN(jsimd_quantize_float_3dnow):
push ebp
mov ebp,esp
; push ebx ; unused
; push ecx ; unused
; push edx ; need not be preserved
push esi
push edi
mov eax, 0x4B400000 ; (float)0x00C00000 (rndint_magic)
movd mm7,eax
punpckldq mm7,mm7 ; mm7={12582912.0F 12582912.0F}
mov esi, POINTER [workspace]
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov eax, DCTSIZE2/16
alignx 16,7
.quantloop:
movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
movq mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
pfmul mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
movq mm2, MMWORD [MMBLOCK(0,2,esi,SIZEOF_FAST_FLOAT)]
movq mm3, MMWORD [MMBLOCK(0,3,esi,SIZEOF_FAST_FLOAT)]
pfmul mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)]
pfmul mm3, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)]
pfadd mm0,mm7 ; mm0=(00 ** 01 **)
pfadd mm1,mm7 ; mm1=(02 ** 03 **)
pfadd mm2,mm7 ; mm0=(04 ** 05 **)
pfadd mm3,mm7 ; mm1=(06 ** 07 **)
movq mm4,mm0
punpcklwd mm0,mm1 ; mm0=(00 02 ** **)
punpckhwd mm4,mm1 ; mm4=(01 03 ** **)
movq mm5,mm2
punpcklwd mm2,mm3 ; mm2=(04 06 ** **)
punpckhwd mm5,mm3 ; mm5=(05 07 ** **)
punpcklwd mm0,mm4 ; mm0=(00 01 02 03)
punpcklwd mm2,mm5 ; mm2=(04 05 06 07)
movq mm6, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
movq mm1, MMWORD [MMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
pfmul mm6, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
pfmul mm1, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
movq mm3, MMWORD [MMBLOCK(1,2,esi,SIZEOF_FAST_FLOAT)]
movq mm4, MMWORD [MMBLOCK(1,3,esi,SIZEOF_FAST_FLOAT)]
pfmul mm3, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)]
pfmul mm4, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)]
pfadd mm6,mm7 ; mm0=(10 ** 11 **)
pfadd mm1,mm7 ; mm4=(12 ** 13 **)
pfadd mm3,mm7 ; mm0=(14 ** 15 **)
pfadd mm4,mm7 ; mm4=(16 ** 17 **)
movq mm5,mm6
punpcklwd mm6,mm1 ; mm6=(10 12 ** **)
punpckhwd mm5,mm1 ; mm5=(11 13 ** **)
movq mm1,mm3
punpcklwd mm3,mm4 ; mm3=(14 16 ** **)
punpckhwd mm1,mm4 ; mm1=(15 17 ** **)
punpcklwd mm6,mm5 ; mm6=(10 11 12 13)
punpcklwd mm3,mm1 ; mm3=(14 15 16 17)
movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm2
movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm6
movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3
add esi, byte 16*SIZEOF_FAST_FLOAT
add edx, byte 16*SIZEOF_FAST_FLOAT
add edi, byte 16*SIZEOF_JCOEF
dec eax
jnz near .quantloop
femms ; empty MMX/3DNow! state
pop edi
pop esi
; pop edx ; need not be preserved
; pop ecx ; unused
; pop ebx ; unused
pop ebp
ret
; For some reason, the OS X linker does not honor the request to align the
; segment unless we do this.
align 16

274
jpeg/simd/jcqntmmx.asm Normal file
Просмотреть файл

@ -0,0 +1,274 @@
;
; jcqntmmx.asm - sample data conversion and quantization (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
; --------------------------------------------------------------------------
SECTION SEG_TEXT
BITS 32
;
; Load data into workspace, applying unsigned->signed conversion
;
; GLOBAL(void)
; jsimd_convsamp_mmx (JSAMPARRAY sample_data, JDIMENSION start_col,
; DCTELEM * workspace);
;
%define sample_data ebp+8 ; JSAMPARRAY sample_data
%define start_col ebp+12 ; JDIMENSION start_col
%define workspace ebp+16 ; DCTELEM * workspace
align 16
global EXTN(jsimd_convsamp_mmx)
EXTN(jsimd_convsamp_mmx):
push ebp
mov ebp,esp
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
pxor mm6,mm6 ; mm6=(all 0's)
pcmpeqw mm7,mm7
psllw mm7,7 ; mm7={0xFF80 0xFF80 0xFF80 0xFF80}
mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *)
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/4
alignx 16,7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; mm0=(01234567)
movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE] ; mm1=(89ABCDEF)
mov ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq mm2, MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; mm2=(GHIJKLMN)
movq mm3, MMWORD [edx+eax*SIZEOF_JSAMPLE] ; mm3=(OPQRSTUV)
movq mm4,mm0
punpcklbw mm0,mm6 ; mm0=(0123)
punpckhbw mm4,mm6 ; mm4=(4567)
movq mm5,mm1
punpcklbw mm1,mm6 ; mm1=(89AB)
punpckhbw mm5,mm6 ; mm5=(CDEF)
paddw mm0,mm7
paddw mm4,mm7
paddw mm1,mm7
paddw mm5,mm7
movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0
movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm4
movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_DCTELEM)], mm1
movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_DCTELEM)], mm5
movq mm0,mm2
punpcklbw mm2,mm6 ; mm2=(GHIJ)
punpckhbw mm0,mm6 ; mm0=(KLMN)
movq mm4,mm3
punpcklbw mm3,mm6 ; mm3=(OPQR)
punpckhbw mm4,mm6 ; mm4=(STUV)
paddw mm2,mm7
paddw mm0,mm7
paddw mm3,mm7
paddw mm4,mm7
movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_DCTELEM)], mm2
movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_DCTELEM)], mm0
movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_DCTELEM)], mm3
movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_DCTELEM)], mm4
add esi, byte 4*SIZEOF_JSAMPROW
add edi, byte 4*DCTSIZE*SIZEOF_DCTELEM
dec ecx
jnz short .convloop
emms ; empty MMX state
pop edi
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
pop ebx
pop ebp
ret
; --------------------------------------------------------------------------
;
; Quantize/descale the coefficients, and store into coef_block
;
; This implementation is based on an algorithm described in
; "How to optimize for the Pentium family of microprocessors"
; (http://www.agner.org/assem/).
;
; GLOBAL(void)
; jsimd_quantize_mmx (JCOEFPTR coef_block, DCTELEM * divisors,
; DCTELEM * workspace);
;
%define RECIPROCAL(m,n,b) MMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
%define CORRECTION(m,n,b) MMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
%define SCALE(m,n,b) MMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
%define SHIFT(m,n,b) MMBLOCK(DCTSIZE*3+(m),(n),(b),SIZEOF_DCTELEM)
%define coef_block ebp+8 ; JCOEFPTR coef_block
%define divisors ebp+12 ; DCTELEM * divisors
%define workspace ebp+16 ; DCTELEM * workspace
align 16
global EXTN(jsimd_quantize_mmx)
EXTN(jsimd_quantize_mmx):
push ebp
mov ebp,esp
; push ebx ; unused
; push ecx ; unused
; push edx ; need not be preserved
push esi
push edi
mov esi, POINTER [workspace]
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov ah, 2
alignx 16,7
.quantloop1:
mov al, DCTSIZE2/8/2
alignx 16,7
.quantloop2:
movq mm2, MMWORD [MMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
movq mm3, MMWORD [MMBLOCK(0,1,esi,SIZEOF_DCTELEM)]
movq mm0,mm2
movq mm1,mm3
psraw mm2,(WORD_BIT-1) ; -1 if value < 0, 0 otherwise
psraw mm3,(WORD_BIT-1)
pxor mm0,mm2 ; val = -val
pxor mm1,mm3
psubw mm0,mm2
psubw mm1,mm3
;
; MMX is an annoyingly crappy instruction set. It has two
; misfeatures that are causing problems here:
;
; - All multiplications are signed.
;
; - The second operand for the shifts is not treated as packed.
;
;
; We work around the first problem by implementing this algorithm:
;
; unsigned long unsigned_multiply(unsigned short x, unsigned short y)
; {
; enum { SHORT_BIT = 16 };
; signed short sx = (signed short) x;
; signed short sy = (signed short) y;
; signed long sz;
;
; sz = (long) sx * (long) sy; /* signed multiply */
;
; if (sx < 0) sz += (long) sy << SHORT_BIT;
; if (sy < 0) sz += (long) sx << SHORT_BIT;
;
; return (unsigned long) sz;
; }
;
; (note that a negative sx adds _sy_ and vice versa)
;
; For the second problem, we replace the shift by a multiplication.
; Unfortunately that means we have to deal with the signed issue again.
;
paddw mm0, MMWORD [CORRECTION(0,0,edx)] ; correction + roundfactor
paddw mm1, MMWORD [CORRECTION(0,1,edx)]
movq mm4,mm0 ; store current value for later
movq mm5,mm1
pmulhw mm0, MMWORD [RECIPROCAL(0,0,edx)] ; reciprocal
pmulhw mm1, MMWORD [RECIPROCAL(0,1,edx)]
paddw mm0,mm4 ; reciprocal is always negative (MSB=1),
paddw mm1,mm5 ; so we always need to add the initial value
; (input value is never negative as we
; inverted it at the start of this routine)
; here it gets a bit tricky as both scale
; and mm0/mm1 can be negative
movq mm6, MMWORD [SCALE(0,0,edx)] ; scale
movq mm7, MMWORD [SCALE(0,1,edx)]
movq mm4,mm0
movq mm5,mm1
pmulhw mm0,mm6
pmulhw mm1,mm7
psraw mm6,(WORD_BIT-1) ; determine if scale is negative
psraw mm7,(WORD_BIT-1)
pand mm6,mm4 ; and add input if it is
pand mm7,mm5
paddw mm0,mm6
paddw mm1,mm7
psraw mm4,(WORD_BIT-1) ; then check if negative input
psraw mm5,(WORD_BIT-1)
pand mm4, MMWORD [SCALE(0,0,edx)] ; and add scale if it is
pand mm5, MMWORD [SCALE(0,1,edx)]
paddw mm0,mm4
paddw mm1,mm5
pxor mm0,mm2 ; val = -val
pxor mm1,mm3
psubw mm0,mm2
psubw mm1,mm3
movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0
movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm1
add esi, byte 8*SIZEOF_DCTELEM
add edx, byte 8*SIZEOF_DCTELEM
add edi, byte 8*SIZEOF_JCOEF
dec al
jnz near .quantloop2
dec ah
jnz near .quantloop1 ; to avoid branch misprediction
emms ; empty MMX state
pop edi
pop esi
; pop edx ; need not be preserved
; pop ecx ; unused
; pop ebx ; unused
pop ebp
ret
; For some reason, the OS X linker does not honor the request to align the
; segment unless we do this.
align 16

158
jpeg/simd/jcqnts2f-64.asm Normal file
Просмотреть файл

@ -0,0 +1,158 @@
;
; jcqnts2f-64.asm - sample data conversion and quantization (64-bit SSE & SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright 2009 D. R. Commander
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
; --------------------------------------------------------------------------
SECTION SEG_TEXT
BITS 64
;
; Load data into workspace, applying unsigned->signed conversion
;
; GLOBAL(void)
; jsimd_convsamp_float_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col,
; FAST_FLOAT * workspace);
;
; r10 = JSAMPARRAY sample_data
; r11 = JDIMENSION start_col
; r12 = FAST_FLOAT * workspace
align 16
global EXTN(jsimd_convsamp_float_sse2)
EXTN(jsimd_convsamp_float_sse2):
push rbp
mov rax,rsp
mov rbp,rsp
collect_args
push rbx
pcmpeqw xmm7,xmm7
psllw xmm7,7
packsswb xmm7,xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..)
mov rsi, r10
mov rax, r11
mov rdi, r12
mov rcx, DCTSIZE/2
.convloop:
mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE]
movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE]
psubb xmm0,xmm7 ; xmm0=(01234567)
psubb xmm1,xmm7 ; xmm1=(89ABCDEF)
punpcklbw xmm0,xmm0 ; xmm0=(*0*1*2*3*4*5*6*7)
punpcklbw xmm1,xmm1 ; xmm1=(*8*9*A*B*C*D*E*F)
punpcklwd xmm2,xmm0 ; xmm2=(***0***1***2***3)
punpckhwd xmm0,xmm0 ; xmm0=(***4***5***6***7)
punpcklwd xmm3,xmm1 ; xmm3=(***8***9***A***B)
punpckhwd xmm1,xmm1 ; xmm1=(***C***D***E***F)
psrad xmm2,(DWORD_BIT-BYTE_BIT) ; xmm2=(0123)
psrad xmm0,(DWORD_BIT-BYTE_BIT) ; xmm0=(4567)
cvtdq2ps xmm2,xmm2 ; xmm2=(0123)
cvtdq2ps xmm0,xmm0 ; xmm0=(4567)
psrad xmm3,(DWORD_BIT-BYTE_BIT) ; xmm3=(89AB)
psrad xmm1,(DWORD_BIT-BYTE_BIT) ; xmm1=(CDEF)
cvtdq2ps xmm3,xmm3 ; xmm3=(89AB)
cvtdq2ps xmm1,xmm1 ; xmm1=(CDEF)
movaps XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm2
movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm0
movaps XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm3
movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm1
add rsi, byte 2*SIZEOF_JSAMPROW
add rdi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
dec rcx
jnz short .convloop
pop rbx
uncollect_args
pop rbp
ret
; --------------------------------------------------------------------------
;
; Quantize/descale the coefficients, and store into coef_block
;
; GLOBAL(void)
; jsimd_quantize_float_sse2 (JCOEFPTR coef_block, FAST_FLOAT * divisors,
; FAST_FLOAT * workspace);
;
; r10 = JCOEFPTR coef_block
; r11 = FAST_FLOAT * divisors
; r12 = FAST_FLOAT * workspace
align 16
global EXTN(jsimd_quantize_float_sse2)
EXTN(jsimd_quantize_float_sse2):
push rbp
mov rax,rsp
mov rbp,rsp
collect_args
mov rsi, r12
mov rdx, r11
mov rdi, r10
mov rax, DCTSIZE2/16
.quantloop:
movaps xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_FAST_FLOAT)]
movaps xmm1, XMMWORD [XMMBLOCK(0,1,rsi,SIZEOF_FAST_FLOAT)]
mulps xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)]
mulps xmm1, XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)]
movaps xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_FAST_FLOAT)]
movaps xmm3, XMMWORD [XMMBLOCK(1,1,rsi,SIZEOF_FAST_FLOAT)]
mulps xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)]
mulps xmm3, XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)]
cvtps2dq xmm0,xmm0
cvtps2dq xmm1,xmm1
cvtps2dq xmm2,xmm2
cvtps2dq xmm3,xmm3
packssdw xmm0,xmm1
packssdw xmm2,xmm3
movdqa XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_JCOEF)], xmm0
movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_JCOEF)], xmm2
add rsi, byte 16*SIZEOF_FAST_FLOAT
add rdx, byte 16*SIZEOF_FAST_FLOAT
add rdi, byte 16*SIZEOF_JCOEF
dec rax
jnz short .quantloop
uncollect_args
pop rbp
ret
; For some reason, the OS X linker does not honor the request to align the
; segment unless we do this.
align 16

171
jpeg/simd/jcqnts2f.asm Normal file
Просмотреть файл

@ -0,0 +1,171 @@
;
; jcqnts2f.asm - sample data conversion and quantization (SSE & SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
; --------------------------------------------------------------------------
SECTION SEG_TEXT
BITS 32
;
; Load data into workspace, applying unsigned->signed conversion
;
; GLOBAL(void)
; jsimd_convsamp_float_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col,
; FAST_FLOAT * workspace);
;
%define sample_data ebp+8 ; JSAMPARRAY sample_data
%define start_col ebp+12 ; JDIMENSION start_col
%define workspace ebp+16 ; FAST_FLOAT * workspace
align 16
global EXTN(jsimd_convsamp_float_sse2)
EXTN(jsimd_convsamp_float_sse2):
push ebp
mov ebp,esp
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
pcmpeqw xmm7,xmm7
psllw xmm7,7
packsswb xmm7,xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..)
mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *)
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/2
alignx 16,7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE]
movq xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE]
psubb xmm0,xmm7 ; xmm0=(01234567)
psubb xmm1,xmm7 ; xmm1=(89ABCDEF)
punpcklbw xmm0,xmm0 ; xmm0=(*0*1*2*3*4*5*6*7)
punpcklbw xmm1,xmm1 ; xmm1=(*8*9*A*B*C*D*E*F)
punpcklwd xmm2,xmm0 ; xmm2=(***0***1***2***3)
punpckhwd xmm0,xmm0 ; xmm0=(***4***5***6***7)
punpcklwd xmm3,xmm1 ; xmm3=(***8***9***A***B)
punpckhwd xmm1,xmm1 ; xmm1=(***C***D***E***F)
psrad xmm2,(DWORD_BIT-BYTE_BIT) ; xmm2=(0123)
psrad xmm0,(DWORD_BIT-BYTE_BIT) ; xmm0=(4567)
cvtdq2ps xmm2,xmm2 ; xmm2=(0123)
cvtdq2ps xmm0,xmm0 ; xmm0=(4567)
psrad xmm3,(DWORD_BIT-BYTE_BIT) ; xmm3=(89AB)
psrad xmm1,(DWORD_BIT-BYTE_BIT) ; xmm1=(CDEF)
cvtdq2ps xmm3,xmm3 ; xmm3=(89AB)
cvtdq2ps xmm1,xmm1 ; xmm1=(CDEF)
movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm2
movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0
movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3
movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1
add esi, byte 2*SIZEOF_JSAMPROW
add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
dec ecx
jnz short .convloop
pop edi
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
pop ebx
pop ebp
ret
; --------------------------------------------------------------------------
;
; Quantize/descale the coefficients, and store into coef_block
;
; GLOBAL(void)
; jsimd_quantize_float_sse2 (JCOEFPTR coef_block, FAST_FLOAT * divisors,
; FAST_FLOAT * workspace);
;
%define coef_block ebp+8 ; JCOEFPTR coef_block
%define divisors ebp+12 ; FAST_FLOAT * divisors
%define workspace ebp+16 ; FAST_FLOAT * workspace
align 16
global EXTN(jsimd_quantize_float_sse2)
EXTN(jsimd_quantize_float_sse2):
push ebp
mov ebp,esp
; push ebx ; unused
; push ecx ; unused
; push edx ; need not be preserved
push esi
push edi
mov esi, POINTER [workspace]
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov eax, DCTSIZE2/16
alignx 16,7
.quantloop:
movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
mulps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
movaps xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
mulps xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
cvtps2dq xmm0,xmm0
cvtps2dq xmm1,xmm1
cvtps2dq xmm2,xmm2
cvtps2dq xmm3,xmm3
packssdw xmm0,xmm1
packssdw xmm2,xmm3
movdqa XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_JCOEF)], xmm0
movdqa XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_JCOEF)], xmm2
add esi, byte 16*SIZEOF_FAST_FLOAT
add edx, byte 16*SIZEOF_FAST_FLOAT
add edi, byte 16*SIZEOF_JCOEF
dec eax
jnz short .quantloop
pop edi
pop esi
; pop edx ; need not be preserved
; pop ecx ; unused
; pop ebx ; unused
pop ebp
ret
; For some reason, the OS X linker does not honor the request to align the
; segment unless we do this.
align 16

187
jpeg/simd/jcqnts2i-64.asm Normal file
Просмотреть файл

@ -0,0 +1,187 @@
;
; jcqnts2i-64.asm - sample data conversion and quantization (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright 2009 D. R. Commander
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
; --------------------------------------------------------------------------
SECTION SEG_TEXT
BITS 64
;
; Load data into workspace, applying unsigned->signed conversion
;
; GLOBAL(void)
; jsimd_convsamp_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col,
; DCTELEM * workspace);
;
; r10 = JSAMPARRAY sample_data
; r11 = JDIMENSION start_col
; r12 = DCTELEM * workspace
align 16
global EXTN(jsimd_convsamp_sse2)
EXTN(jsimd_convsamp_sse2):
push rbp
mov rax,rsp
mov rbp,rsp
collect_args
push rbx
pxor xmm6,xmm6 ; xmm6=(all 0's)
pcmpeqw xmm7,xmm7
psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
mov rsi, r10
mov rax, r11
mov rdi, r12
mov rcx, DCTSIZE/4
.convloop:
mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm0=(01234567)
movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF)
mov rbx, JSAMPROW [rsi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdx, JSAMPROW [rsi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm2, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm2=(GHIJKLMN)
movq xmm3, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm3=(OPQRSTUV)
punpcklbw xmm0,xmm6 ; xmm0=(01234567)
punpcklbw xmm1,xmm6 ; xmm1=(89ABCDEF)
paddw xmm0,xmm7
paddw xmm1,xmm7
punpcklbw xmm2,xmm6 ; xmm2=(GHIJKLMN)
punpcklbw xmm3,xmm6 ; xmm3=(OPQRSTUV)
paddw xmm2,xmm7
paddw xmm3,xmm7
movdqa XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_DCTELEM)], xmm0
movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_DCTELEM)], xmm1
movdqa XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_DCTELEM)], xmm2
movdqa XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3
add rsi, byte 4*SIZEOF_JSAMPROW
add rdi, byte 4*DCTSIZE*SIZEOF_DCTELEM
dec rcx
jnz short .convloop
pop rbx
uncollect_args
pop rbp
ret
; --------------------------------------------------------------------------
;
; Quantize/descale the coefficients, and store into coef_block
;
; This implementation is based on an algorithm described in
; "How to optimize for the Pentium family of microprocessors"
; (http://www.agner.org/assem/).
;
; GLOBAL(void)
; jsimd_quantize_sse2 (JCOEFPTR coef_block, DCTELEM * divisors,
; DCTELEM * workspace);
;
%define RECIPROCAL(m,n,b) XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
%define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
%define SCALE(m,n,b) XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
; r10 = JCOEFPTR coef_block
; r11 = DCTELEM * divisors
; r12 = DCTELEM * workspace
align 16
global EXTN(jsimd_quantize_sse2)
EXTN(jsimd_quantize_sse2):
push rbp
mov rax,rsp
mov rbp,rsp
collect_args
mov rsi, r12
mov rdx, r11
mov rdi, r10
mov rax, DCTSIZE2/32
.quantloop:
movdqa xmm4, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_DCTELEM)]
movdqa xmm5, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_DCTELEM)]
movdqa xmm6, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_DCTELEM)]
movdqa xmm7, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_DCTELEM)]
movdqa xmm0,xmm4
movdqa xmm1,xmm5
movdqa xmm2,xmm6
movdqa xmm3,xmm7
psraw xmm4,(WORD_BIT-1)
psraw xmm5,(WORD_BIT-1)
psraw xmm6,(WORD_BIT-1)
psraw xmm7,(WORD_BIT-1)
pxor xmm0,xmm4
pxor xmm1,xmm5
pxor xmm2,xmm6
pxor xmm3,xmm7
psubw xmm0,xmm4 ; if (xmm0 < 0) xmm0 = -xmm0;
psubw xmm1,xmm5 ; if (xmm1 < 0) xmm1 = -xmm1;
psubw xmm2,xmm6 ; if (xmm2 < 0) xmm2 = -xmm2;
psubw xmm3,xmm7 ; if (xmm3 < 0) xmm3 = -xmm3;
paddw xmm0, XMMWORD [CORRECTION(0,0,rdx)] ; correction + roundfactor
paddw xmm1, XMMWORD [CORRECTION(1,0,rdx)]
paddw xmm2, XMMWORD [CORRECTION(2,0,rdx)]
paddw xmm3, XMMWORD [CORRECTION(3,0,rdx)]
pmulhuw xmm0, XMMWORD [RECIPROCAL(0,0,rdx)] ; reciprocal
pmulhuw xmm1, XMMWORD [RECIPROCAL(1,0,rdx)]
pmulhuw xmm2, XMMWORD [RECIPROCAL(2,0,rdx)]
pmulhuw xmm3, XMMWORD [RECIPROCAL(3,0,rdx)]
pmulhuw xmm0, XMMWORD [SCALE(0,0,rdx)] ; scale
pmulhuw xmm1, XMMWORD [SCALE(1,0,rdx)]
pmulhuw xmm2, XMMWORD [SCALE(2,0,rdx)]
pmulhuw xmm3, XMMWORD [SCALE(3,0,rdx)]
pxor xmm0,xmm4
pxor xmm1,xmm5
pxor xmm2,xmm6
pxor xmm3,xmm7
psubw xmm0,xmm4
psubw xmm1,xmm5
psubw xmm2,xmm6
psubw xmm3,xmm7
movdqa XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_DCTELEM)], xmm0
movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_DCTELEM)], xmm1
movdqa XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_DCTELEM)], xmm2
movdqa XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3
add rsi, byte 32*SIZEOF_DCTELEM
add rdx, byte 32*SIZEOF_DCTELEM
add rdi, byte 32*SIZEOF_JCOEF
dec rax
jnz near .quantloop
uncollect_args
pop rbp
ret
; For some reason, the OS X linker does not honor the request to align the
; segment unless we do this.
align 16

200
jpeg/simd/jcqnts2i.asm Normal file
Просмотреть файл

@ -0,0 +1,200 @@
;
; jcqnts2i.asm - sample data conversion and quantization (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
; --------------------------------------------------------------------------
SECTION SEG_TEXT
BITS 32
;
; Load data into workspace, applying unsigned->signed conversion
;
; GLOBAL(void)
; jsimd_convsamp_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col,
; DCTELEM * workspace);
;
%define sample_data ebp+8 ; JSAMPARRAY sample_data
%define start_col ebp+12 ; JDIMENSION start_col
%define workspace ebp+16 ; DCTELEM * workspace
align 16
global EXTN(jsimd_convsamp_sse2)
EXTN(jsimd_convsamp_sse2):
push ebp
mov ebp,esp
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
pxor xmm6,xmm6 ; xmm6=(all 0's)
pcmpeqw xmm7,xmm7
psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *)
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/4
alignx 16,7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; xmm0=(01234567)
movq xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF)
mov ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm2, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; xmm2=(GHIJKLMN)
movq xmm3, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] ; xmm3=(OPQRSTUV)
punpcklbw xmm0,xmm6 ; xmm0=(01234567)
punpcklbw xmm1,xmm6 ; xmm1=(89ABCDEF)
paddw xmm0,xmm7
paddw xmm1,xmm7
punpcklbw xmm2,xmm6 ; xmm2=(GHIJKLMN)
punpcklbw xmm3,xmm6 ; xmm3=(OPQRSTUV)
paddw xmm2,xmm7
paddw xmm3,xmm7
movdqa XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0
movdqa XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1
movdqa XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2
movdqa XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3
add esi, byte 4*SIZEOF_JSAMPROW
add edi, byte 4*DCTSIZE*SIZEOF_DCTELEM
dec ecx
jnz short .convloop
pop edi
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
pop ebx
pop ebp
ret
; --------------------------------------------------------------------------
;
; Quantize/descale the coefficients, and store into coef_block
;
; This implementation is based on an algorithm described in
; "How to optimize for the Pentium family of microprocessors"
; (http://www.agner.org/assem/).
;
; GLOBAL(void)
; jsimd_quantize_sse2 (JCOEFPTR coef_block, DCTELEM * divisors,
; DCTELEM * workspace);
;
%define RECIPROCAL(m,n,b) XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
%define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
%define SCALE(m,n,b) XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
%define coef_block ebp+8 ; JCOEFPTR coef_block
%define divisors ebp+12 ; DCTELEM * divisors
%define workspace ebp+16 ; DCTELEM * workspace
align 16
global EXTN(jsimd_quantize_sse2)
EXTN(jsimd_quantize_sse2):
push ebp
mov ebp,esp
; push ebx ; unused
; push ecx ; unused
; push edx ; need not be preserved
push esi
push edi
mov esi, POINTER [workspace]
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov eax, DCTSIZE2/32
alignx 16,7
.quantloop:
movdqa xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
movdqa xmm5, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_DCTELEM)]
movdqa xmm6, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_DCTELEM)]
movdqa xmm7, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_DCTELEM)]
movdqa xmm0,xmm4
movdqa xmm1,xmm5
movdqa xmm2,xmm6
movdqa xmm3,xmm7
psraw xmm4,(WORD_BIT-1)
psraw xmm5,(WORD_BIT-1)
psraw xmm6,(WORD_BIT-1)
psraw xmm7,(WORD_BIT-1)
pxor xmm0,xmm4
pxor xmm1,xmm5
pxor xmm2,xmm6
pxor xmm3,xmm7
psubw xmm0,xmm4 ; if (xmm0 < 0) xmm0 = -xmm0;
psubw xmm1,xmm5 ; if (xmm1 < 0) xmm1 = -xmm1;
psubw xmm2,xmm6 ; if (xmm2 < 0) xmm2 = -xmm2;
psubw xmm3,xmm7 ; if (xmm3 < 0) xmm3 = -xmm3;
paddw xmm0, XMMWORD [CORRECTION(0,0,edx)] ; correction + roundfactor
paddw xmm1, XMMWORD [CORRECTION(1,0,edx)]
paddw xmm2, XMMWORD [CORRECTION(2,0,edx)]
paddw xmm3, XMMWORD [CORRECTION(3,0,edx)]
pmulhuw xmm0, XMMWORD [RECIPROCAL(0,0,edx)] ; reciprocal
pmulhuw xmm1, XMMWORD [RECIPROCAL(1,0,edx)]
pmulhuw xmm2, XMMWORD [RECIPROCAL(2,0,edx)]
pmulhuw xmm3, XMMWORD [RECIPROCAL(3,0,edx)]
pmulhuw xmm0, XMMWORD [SCALE(0,0,edx)] ; scale
pmulhuw xmm1, XMMWORD [SCALE(1,0,edx)]
pmulhuw xmm2, XMMWORD [SCALE(2,0,edx)]
pmulhuw xmm3, XMMWORD [SCALE(3,0,edx)]
pxor xmm0,xmm4
pxor xmm1,xmm5
pxor xmm2,xmm6
pxor xmm3,xmm7
psubw xmm0,xmm4
psubw xmm1,xmm5
psubw xmm2,xmm6
psubw xmm3,xmm7
movdqa XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0
movdqa XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1
movdqa XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2
movdqa XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3
add esi, byte 32*SIZEOF_DCTELEM
add edx, byte 32*SIZEOF_DCTELEM
add edi, byte 32*SIZEOF_JCOEF
dec eax
jnz near .quantloop
pop edi
pop esi
; pop edx ; need not be preserved
; pop ecx ; unused
; pop ebx ; unused
pop ebp
ret
; For some reason, the OS X linker does not honor the request to align the
; segment unless we do this.
align 16

211
jpeg/simd/jcqntsse.asm Normal file
Просмотреть файл

@ -0,0 +1,211 @@
;
; jcqntsse.asm - sample data conversion and quantization (SSE & MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
; --------------------------------------------------------------------------
SECTION SEG_TEXT
BITS 32
;
; Load data into workspace, applying unsigned->signed conversion
;
; GLOBAL(void)
; jsimd_convsamp_float_sse (JSAMPARRAY sample_data, JDIMENSION start_col,
; FAST_FLOAT * workspace);
;
%define sample_data ebp+8 ; JSAMPARRAY sample_data
%define start_col ebp+12 ; JDIMENSION start_col
%define workspace ebp+16 ; FAST_FLOAT * workspace
align 16
global EXTN(jsimd_convsamp_float_sse)
EXTN(jsimd_convsamp_float_sse):
push ebp
mov ebp,esp
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
pcmpeqw mm7,mm7
psllw mm7,7
packsswb mm7,mm7 ; mm7 = PB_CENTERJSAMPLE (0x808080..)
mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *)
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/2
alignx 16,7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE]
movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE]
psubb mm0,mm7 ; mm0=(01234567)
psubb mm1,mm7 ; mm1=(89ABCDEF)
punpcklbw mm2,mm0 ; mm2=(*0*1*2*3)
punpckhbw mm0,mm0 ; mm0=(*4*5*6*7)
punpcklbw mm3,mm1 ; mm3=(*8*9*A*B)
punpckhbw mm1,mm1 ; mm1=(*C*D*E*F)
punpcklwd mm4,mm2 ; mm4=(***0***1)
punpckhwd mm2,mm2 ; mm2=(***2***3)
punpcklwd mm5,mm0 ; mm5=(***4***5)
punpckhwd mm0,mm0 ; mm0=(***6***7)
psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(01)
psrad mm2,(DWORD_BIT-BYTE_BIT) ; mm2=(23)
cvtpi2ps xmm0,mm4 ; xmm0=(01**)
cvtpi2ps xmm1,mm2 ; xmm1=(23**)
psrad mm5,(DWORD_BIT-BYTE_BIT) ; mm5=(45)
psrad mm0,(DWORD_BIT-BYTE_BIT) ; mm0=(67)
cvtpi2ps xmm2,mm5 ; xmm2=(45**)
cvtpi2ps xmm3,mm0 ; xmm3=(67**)
punpcklwd mm6,mm3 ; mm6=(***8***9)
punpckhwd mm3,mm3 ; mm3=(***A***B)
punpcklwd mm4,mm1 ; mm4=(***C***D)
punpckhwd mm1,mm1 ; mm1=(***E***F)
psrad mm6,(DWORD_BIT-BYTE_BIT) ; mm6=(89)
psrad mm3,(DWORD_BIT-BYTE_BIT) ; mm3=(AB)
cvtpi2ps xmm4,mm6 ; xmm4=(89**)
cvtpi2ps xmm5,mm3 ; xmm5=(AB**)
psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(CD)
psrad mm1,(DWORD_BIT-BYTE_BIT) ; mm1=(EF)
cvtpi2ps xmm6,mm4 ; xmm6=(CD**)
cvtpi2ps xmm7,mm1 ; xmm7=(EF**)
movlhps xmm0,xmm1 ; xmm0=(0123)
movlhps xmm2,xmm3 ; xmm2=(4567)
movlhps xmm4,xmm5 ; xmm4=(89AB)
movlhps xmm6,xmm7 ; xmm6=(CDEF)
movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0
movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm2
movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm4
movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
add esi, byte 2*SIZEOF_JSAMPROW
add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
dec ecx
jnz near .convloop
emms ; empty MMX state
pop edi
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
pop ebx
pop ebp
ret
; --------------------------------------------------------------------------
;
; Quantize/descale the coefficients, and store into coef_block
;
; GLOBAL(void)
; jsimd_quantize_float_sse (JCOEFPTR coef_block, FAST_FLOAT * divisors,
; FAST_FLOAT * workspace);
;
%define coef_block ebp+8 ; JCOEFPTR coef_block
%define divisors ebp+12 ; FAST_FLOAT * divisors
%define workspace ebp+16 ; FAST_FLOAT * workspace
align 16
global EXTN(jsimd_quantize_float_sse)
EXTN(jsimd_quantize_float_sse):
push ebp
mov ebp,esp
; push ebx ; unused
; push ecx ; unused
; push edx ; need not be preserved
push esi
push edi
mov esi, POINTER [workspace]
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov eax, DCTSIZE2/16
alignx 16,7
.quantloop:
movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
mulps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
movaps xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
mulps xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
movhlps xmm4,xmm0
movhlps xmm5,xmm1
cvtps2pi mm0,xmm0
cvtps2pi mm1,xmm1
cvtps2pi mm4,xmm4
cvtps2pi mm5,xmm5
movhlps xmm6,xmm2
movhlps xmm7,xmm3
cvtps2pi mm2,xmm2
cvtps2pi mm3,xmm3
cvtps2pi mm6,xmm6
cvtps2pi mm7,xmm7
packssdw mm0,mm4
packssdw mm1,mm5
packssdw mm2,mm6
packssdw mm3,mm7
movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1
movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm2
movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3
add esi, byte 16*SIZEOF_FAST_FLOAT
add edx, byte 16*SIZEOF_FAST_FLOAT
add edi, byte 16*SIZEOF_JCOEF
dec eax
jnz short .quantloop
emms ; empty MMX state
pop edi
pop esi
; pop edx ; need not be preserved
; pop ecx ; unused
; pop ebx ; unused
pop ebp
ret
; For some reason, the OS X linker does not honor the request to align the
; segment unless we do this.
align 16

324
jpeg/simd/jcsammmx.asm Normal file
Просмотреть файл

@ -0,0 +1,324 @@
;
; jcsammmx.asm - downsampling (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
; --------------------------------------------------------------------------
SECTION SEG_TEXT
BITS 32
;
; Downsample pixel values of a single component.
; This version handles the common case of 2:1 horizontal and 1:1 vertical,
; without smoothing.
;
; GLOBAL(void)
; jsimd_h2v1_downsample_mmx (JDIMENSION image_width, int max_v_samp_factor,
; JDIMENSION v_samp_factor, JDIMENSION width_blocks,
; JSAMPARRAY input_data, JSAMPARRAY output_data);
;
%define img_width(b) (b)+8 ; JDIMENSION image_width
%define max_v_samp(b) (b)+12 ; int max_v_samp_factor
%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor
%define width_blks(b) (b)+20 ; JDIMENSION width_blocks
%define input_data(b) (b)+24 ; JSAMPARRAY input_data
%define output_data(b) (b)+28 ; JSAMPARRAY output_data
align 16
global EXTN(jsimd_h2v1_downsample_mmx)
EXTN(jsimd_h2v1_downsample_mmx):
push ebp
mov ebp,esp
; push ebx ; unused
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
mov ecx, JDIMENSION [width_blks(ebp)]
shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols)
jz near .return
mov edx, JDIMENSION [img_width(ebp)]
; -- expand_right_edge
push ecx
shl ecx,1 ; output_cols * 2
sub ecx,edx
jle short .expand_end
mov eax, INT [max_v_samp(ebp)]
test eax,eax
jle short .expand_end
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
alignx 16,7
.expandloop:
push eax
push ecx
mov edi, JSAMPROW [esi]
add edi,edx
mov al, JSAMPLE [edi-1]
rep stosb
pop ecx
pop eax
add esi, byte SIZEOF_JSAMPROW
dec eax
jg short .expandloop
.expand_end:
pop ecx ; output_cols
; -- h2v1_downsample
mov eax, JDIMENSION [v_samp(ebp)] ; rowctr
test eax,eax
jle near .return
mov edx, 0x00010000 ; bias pattern
movd mm7,edx
pcmpeqw mm6,mm6
punpckldq mm7,mm7 ; mm7={0, 1, 0, 1}
psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..}
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
alignx 16,7
.rowloop:
push ecx
push edi
push esi
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr
alignx 16,7
.columnloop:
movq mm0, MMWORD [esi+0*SIZEOF_MMWORD]
movq mm1, MMWORD [esi+1*SIZEOF_MMWORD]
movq mm2,mm0
movq mm3,mm1
pand mm0,mm6
psrlw mm2,BYTE_BIT
pand mm1,mm6
psrlw mm3,BYTE_BIT
paddw mm0,mm2
paddw mm1,mm3
paddw mm0,mm7
paddw mm1,mm7
psrlw mm0,1
psrlw mm1,1
packuswb mm0,mm1
movq MMWORD [edi+0*SIZEOF_MMWORD], mm0
add esi, byte 2*SIZEOF_MMWORD ; inptr
add edi, byte 1*SIZEOF_MMWORD ; outptr
sub ecx, byte SIZEOF_MMWORD ; outcol
jnz short .columnloop
pop esi
pop edi
pop ecx
add esi, byte SIZEOF_JSAMPROW ; input_data
add edi, byte SIZEOF_JSAMPROW ; output_data
dec eax ; rowctr
jg short .rowloop
emms ; empty MMX state
.return:
pop edi
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
; pop ebx ; unused
pop ebp
ret
; --------------------------------------------------------------------------
;
; Downsample pixel values of a single component.
; This version handles the standard case of 2:1 horizontal and 2:1 vertical,
; without smoothing.
;
; GLOBAL(void)
; jsimd_h2v2_downsample_mmx (JDIMENSION image_width, int max_v_samp_factor,
; JDIMENSION v_samp_factor, JDIMENSION width_blocks,
; JSAMPARRAY input_data, JSAMPARRAY output_data);
;
%define img_width(b) (b)+8 ; JDIMENSION image_width
%define max_v_samp(b) (b)+12 ; int max_v_samp_factor
%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor
%define width_blks(b) (b)+20 ; JDIMENSION width_blocks
%define input_data(b) (b)+24 ; JSAMPARRAY input_data
%define output_data(b) (b)+28 ; JSAMPARRAY output_data
align 16
global EXTN(jsimd_h2v2_downsample_mmx)
EXTN(jsimd_h2v2_downsample_mmx):
push ebp
mov ebp,esp
; push ebx ; unused
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
mov ecx, JDIMENSION [width_blks(ebp)]
shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols)
jz near .return
mov edx, JDIMENSION [img_width(ebp)]
; -- expand_right_edge
push ecx
shl ecx,1 ; output_cols * 2
sub ecx,edx
jle short .expand_end
mov eax, INT [max_v_samp(ebp)]
test eax,eax
jle short .expand_end
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
alignx 16,7
.expandloop:
push eax
push ecx
mov edi, JSAMPROW [esi]
add edi,edx
mov al, JSAMPLE [edi-1]
rep stosb
pop ecx
pop eax
add esi, byte SIZEOF_JSAMPROW
dec eax
jg short .expandloop
.expand_end:
pop ecx ; output_cols
; -- h2v2_downsample
mov eax, JDIMENSION [v_samp(ebp)] ; rowctr
test eax,eax
jle near .return
mov edx, 0x00020001 ; bias pattern
movd mm7,edx
pcmpeqw mm6,mm6
punpckldq mm7,mm7 ; mm7={1, 2, 1, 2}
psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..}
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
alignx 16,7
.rowloop:
push ecx
push edi
push esi
mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0
mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1
mov edi, JSAMPROW [edi] ; outptr
alignx 16,7
.columnloop:
movq mm0, MMWORD [edx+0*SIZEOF_MMWORD]
movq mm1, MMWORD [esi+0*SIZEOF_MMWORD]
movq mm2, MMWORD [edx+1*SIZEOF_MMWORD]
movq mm3, MMWORD [esi+1*SIZEOF_MMWORD]
movq mm4,mm0
movq mm5,mm1
pand mm0,mm6
psrlw mm4,BYTE_BIT
pand mm1,mm6
psrlw mm5,BYTE_BIT
paddw mm0,mm4
paddw mm1,mm5
movq mm4,mm2
movq mm5,mm3
pand mm2,mm6
psrlw mm4,BYTE_BIT
pand mm3,mm6
psrlw mm5,BYTE_BIT
paddw mm2,mm4
paddw mm3,mm5
paddw mm0,mm1
paddw mm2,mm3
paddw mm0,mm7
paddw mm2,mm7
psrlw mm0,2
psrlw mm2,2
packuswb mm0,mm2
movq MMWORD [edi+0*SIZEOF_MMWORD], mm0
add edx, byte 2*SIZEOF_MMWORD ; inptr0
add esi, byte 2*SIZEOF_MMWORD ; inptr1
add edi, byte 1*SIZEOF_MMWORD ; outptr
sub ecx, byte SIZEOF_MMWORD ; outcol
jnz near .columnloop
pop esi
pop edi
pop ecx
add esi, byte 2*SIZEOF_JSAMPROW ; input_data
add edi, byte 1*SIZEOF_JSAMPROW ; output_data
dec eax ; rowctr
jg near .rowloop
emms ; empty MMX state
.return:
pop edi
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
; pop ebx ; unused
pop ebp
ret
; For some reason, the OS X linker does not honor the request to align the
; segment unless we do this.
align 16

330
jpeg/simd/jcsamss2-64.asm Normal file
Просмотреть файл

@ -0,0 +1,330 @@
;
; jcsamss2-64.asm - downsampling (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright 2009 D. R. Commander
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
; --------------------------------------------------------------------------
SECTION SEG_TEXT
BITS 64
;
; Downsample pixel values of a single component.
; This version handles the common case of 2:1 horizontal and 1:1 vertical,
; without smoothing.
;
; GLOBAL(void)
; jsimd_h2v1_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor,
; JDIMENSION v_samp_factor, JDIMENSION width_blocks,
; JSAMPARRAY input_data, JSAMPARRAY output_data);
;
; r10 = JDIMENSION image_width
; r11 = int max_v_samp_factor
; r12 = JDIMENSION v_samp_factor
; r13 = JDIMENSION width_blocks
; r14 = JSAMPARRAY input_data
; r15 = JSAMPARRAY output_data
align 16
global EXTN(jsimd_h2v1_downsample_sse2)
EXTN(jsimd_h2v1_downsample_sse2):
push rbp
mov rax,rsp
mov rbp,rsp
collect_args
mov rcx, r13
shl rcx,3 ; imul rcx,DCTSIZE (rcx = output_cols)
jz near .return
mov rdx, r10
; -- expand_right_edge
push rcx
shl rcx,1 ; output_cols * 2
sub rcx,rdx
jle short .expand_end
mov rax, r11
test rax,rax
jle short .expand_end
cld
mov rsi, r14 ; input_data
.expandloop:
push rax
push rcx
mov rdi, JSAMPROW [rsi]
add rdi,rdx
mov al, JSAMPLE [rdi-1]
rep stosb
pop rcx
pop rax
add rsi, byte SIZEOF_JSAMPROW
dec rax
jg short .expandloop
.expand_end:
pop rcx ; output_cols
; -- h2v1_downsample
mov rax, r12 ; rowctr
test eax,eax
jle near .return
mov rdx, 0x00010000 ; bias pattern
movd xmm7,edx
pcmpeqw xmm6,xmm6
pshufd xmm7,xmm7,0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1}
psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..}
mov rsi, r14 ; input_data
mov rdi, r15 ; output_data
.rowloop:
push rcx
push rdi
push rsi
mov rsi, JSAMPROW [rsi] ; inptr
mov rdi, JSAMPROW [rdi] ; outptr
cmp rcx, byte SIZEOF_XMMWORD
jae short .columnloop
.columnloop_r8:
movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD]
pxor xmm1,xmm1
mov rcx, SIZEOF_XMMWORD
jmp short .downsample
.columnloop:
movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD]
movdqa xmm1, XMMWORD [rsi+1*SIZEOF_XMMWORD]
.downsample:
movdqa xmm2,xmm0
movdqa xmm3,xmm1
pand xmm0,xmm6
psrlw xmm2,BYTE_BIT
pand xmm1,xmm6
psrlw xmm3,BYTE_BIT
paddw xmm0,xmm2
paddw xmm1,xmm3
paddw xmm0,xmm7
paddw xmm1,xmm7
psrlw xmm0,1
psrlw xmm1,1
packuswb xmm0,xmm1
movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0
sub rcx, byte SIZEOF_XMMWORD ; outcol
add rsi, byte 2*SIZEOF_XMMWORD ; inptr
add rdi, byte 1*SIZEOF_XMMWORD ; outptr
cmp rcx, byte SIZEOF_XMMWORD
jae short .columnloop
test rcx,rcx
jnz short .columnloop_r8
pop rsi
pop rdi
pop rcx
add rsi, byte SIZEOF_JSAMPROW ; input_data
add rdi, byte SIZEOF_JSAMPROW ; output_data
dec rax ; rowctr
jg near .rowloop
.return:
uncollect_args
pop rbp
ret
; --------------------------------------------------------------------------
;
; Downsample pixel values of a single component.
; This version handles the standard case of 2:1 horizontal and 2:1 vertical,
; without smoothing.
;
; GLOBAL(void)
; jsimd_h2v2_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor,
; JDIMENSION v_samp_factor, JDIMENSION width_blocks,
; JSAMPARRAY input_data, JSAMPARRAY output_data);
;
; r10 = JDIMENSION image_width
; r11 = int max_v_samp_factor
; r12 = JDIMENSION v_samp_factor
; r13 = JDIMENSION width_blocks
; r14 = JSAMPARRAY input_data
; r15 = JSAMPARRAY output_data
align 16
global EXTN(jsimd_h2v2_downsample_sse2)
EXTN(jsimd_h2v2_downsample_sse2):
push rbp
mov rax,rsp
mov rbp,rsp
collect_args
mov rcx, r13
shl rcx,3 ; imul rcx,DCTSIZE (rcx = output_cols)
jz near .return
mov rdx, r10
; -- expand_right_edge
push rcx
shl rcx,1 ; output_cols * 2
sub rcx,rdx
jle short .expand_end
mov rax, r11
test rax,rax
jle short .expand_end
cld
mov rsi, r14 ; input_data
.expandloop:
push rax
push rcx
mov rdi, JSAMPROW [rsi]
add rdi,rdx
mov al, JSAMPLE [rdi-1]
rep stosb
pop rcx
pop rax
add rsi, byte SIZEOF_JSAMPROW
dec rax
jg short .expandloop
.expand_end:
pop rcx ; output_cols
; -- h2v2_downsample
mov rax, r12 ; rowctr
test rax,rax
jle near .return
mov rdx, 0x00020001 ; bias pattern
movd xmm7,edx
pcmpeqw xmm6,xmm6
pshufd xmm7,xmm7,0x00 ; xmm7={1, 2, 1, 2, 1, 2, 1, 2}
psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..}
mov rsi, r14 ; input_data
mov rdi, r15 ; output_data
.rowloop:
push rcx
push rdi
push rsi
mov rdx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1
mov rdi, JSAMPROW [rdi] ; outptr
cmp rcx, byte SIZEOF_XMMWORD
jae short .columnloop
.columnloop_r8:
movdqa xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD]
movdqa xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD]
pxor xmm2,xmm2
pxor xmm3,xmm3
mov rcx, SIZEOF_XMMWORD
jmp short .downsample
.columnloop:
movdqa xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD]
movdqa xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD]
movdqa xmm2, XMMWORD [rdx+1*SIZEOF_XMMWORD]
movdqa xmm3, XMMWORD [rsi+1*SIZEOF_XMMWORD]
.downsample:
movdqa xmm4,xmm0
movdqa xmm5,xmm1
pand xmm0,xmm6
psrlw xmm4,BYTE_BIT
pand xmm1,xmm6
psrlw xmm5,BYTE_BIT
paddw xmm0,xmm4
paddw xmm1,xmm5
movdqa xmm4,xmm2
movdqa xmm5,xmm3
pand xmm2,xmm6
psrlw xmm4,BYTE_BIT
pand xmm3,xmm6
psrlw xmm5,BYTE_BIT
paddw xmm2,xmm4
paddw xmm3,xmm5
paddw xmm0,xmm1
paddw xmm2,xmm3
paddw xmm0,xmm7
paddw xmm2,xmm7
psrlw xmm0,2
psrlw xmm2,2
packuswb xmm0,xmm2
movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0
sub rcx, byte SIZEOF_XMMWORD ; outcol
add rdx, byte 2*SIZEOF_XMMWORD ; inptr0
add rsi, byte 2*SIZEOF_XMMWORD ; inptr1
add rdi, byte 1*SIZEOF_XMMWORD ; outptr
cmp rcx, byte SIZEOF_XMMWORD
jae near .columnloop
test rcx,rcx
jnz near .columnloop_r8
pop rsi
pop rdi
pop rcx
add rsi, byte 2*SIZEOF_JSAMPROW ; input_data
add rdi, byte 1*SIZEOF_JSAMPROW ; output_data
dec rax ; rowctr
jg near .rowloop
.return:
uncollect_args
pop rbp
ret
; For some reason, the OS X linker does not honor the request to align the
; segment unless we do this.
align 16

351
jpeg/simd/jcsamss2.asm Normal file
Просмотреть файл

@ -0,0 +1,351 @@
;
; jcsamss2.asm - downsampling (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
; --------------------------------------------------------------------------
SECTION SEG_TEXT
BITS 32
;
; Downsample pixel values of a single component.
; This version handles the common case of 2:1 horizontal and 1:1 vertical,
; without smoothing.
;
; GLOBAL(void)
; jsimd_h2v1_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor,
; JDIMENSION v_samp_factor, JDIMENSION width_blocks,
; JSAMPARRAY input_data, JSAMPARRAY output_data);
;
%define img_width(b) (b)+8 ; JDIMENSION image_width
%define max_v_samp(b) (b)+12 ; int max_v_samp_factor
%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor
%define width_blks(b) (b)+20 ; JDIMENSION width_blocks
%define input_data(b) (b)+24 ; JSAMPARRAY input_data
%define output_data(b) (b)+28 ; JSAMPARRAY output_data
align 16
global EXTN(jsimd_h2v1_downsample_sse2)
EXTN(jsimd_h2v1_downsample_sse2):
push ebp
mov ebp,esp
; push ebx ; unused
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
mov ecx, JDIMENSION [width_blks(ebp)]
shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols)
jz near .return
mov edx, JDIMENSION [img_width(ebp)]
; -- expand_right_edge
push ecx
shl ecx,1 ; output_cols * 2
sub ecx,edx
jle short .expand_end
mov eax, INT [max_v_samp(ebp)]
test eax,eax
jle short .expand_end
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
alignx 16,7
.expandloop:
push eax
push ecx
mov edi, JSAMPROW [esi]
add edi,edx
mov al, JSAMPLE [edi-1]
rep stosb
pop ecx
pop eax
add esi, byte SIZEOF_JSAMPROW
dec eax
jg short .expandloop
.expand_end:
pop ecx ; output_cols
; -- h2v1_downsample
mov eax, JDIMENSION [v_samp(ebp)] ; rowctr
test eax,eax
jle near .return
mov edx, 0x00010000 ; bias pattern
movd xmm7,edx
pcmpeqw xmm6,xmm6
pshufd xmm7,xmm7,0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1}
psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..}
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
alignx 16,7
.rowloop:
push ecx
push edi
push esi
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr
cmp ecx, byte SIZEOF_XMMWORD
jae short .columnloop
alignx 16,7
.columnloop_r8:
movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
pxor xmm1,xmm1
mov ecx, SIZEOF_XMMWORD
jmp short .downsample
alignx 16,7
.columnloop:
movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
movdqa xmm1, XMMWORD [esi+1*SIZEOF_XMMWORD]
.downsample:
movdqa xmm2,xmm0
movdqa xmm3,xmm1
pand xmm0,xmm6
psrlw xmm2,BYTE_BIT
pand xmm1,xmm6
psrlw xmm3,BYTE_BIT
paddw xmm0,xmm2
paddw xmm1,xmm3
paddw xmm0,xmm7
paddw xmm1,xmm7
psrlw xmm0,1
psrlw xmm1,1
packuswb xmm0,xmm1
movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
sub ecx, byte SIZEOF_XMMWORD ; outcol
add esi, byte 2*SIZEOF_XMMWORD ; inptr
add edi, byte 1*SIZEOF_XMMWORD ; outptr
cmp ecx, byte SIZEOF_XMMWORD
jae short .columnloop
test ecx,ecx
jnz short .columnloop_r8
pop esi
pop edi
pop ecx
add esi, byte SIZEOF_JSAMPROW ; input_data
add edi, byte SIZEOF_JSAMPROW ; output_data
dec eax ; rowctr
jg near .rowloop
.return:
pop edi
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
; pop ebx ; unused
pop ebp
ret
; --------------------------------------------------------------------------
;
; Downsample pixel values of a single component.
; This version handles the standard case of 2:1 horizontal and 2:1 vertical,
; without smoothing.
;
; GLOBAL(void)
; jsimd_h2v2_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor,
; JDIMENSION v_samp_factor, JDIMENSION width_blocks,
; JSAMPARRAY input_data, JSAMPARRAY output_data);
;
%define img_width(b) (b)+8 ; JDIMENSION image_width
%define max_v_samp(b) (b)+12 ; int max_v_samp_factor
%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor
%define width_blks(b) (b)+20 ; JDIMENSION width_blocks
%define input_data(b) (b)+24 ; JSAMPARRAY input_data
%define output_data(b) (b)+28 ; JSAMPARRAY output_data
align 16
global EXTN(jsimd_h2v2_downsample_sse2)
EXTN(jsimd_h2v2_downsample_sse2):
push ebp
mov ebp,esp
; push ebx ; unused
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
mov ecx, JDIMENSION [width_blks(ebp)]
shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols)
jz near .return
mov edx, JDIMENSION [img_width(ebp)]
; -- expand_right_edge
push ecx
shl ecx,1 ; output_cols * 2
sub ecx,edx
jle short .expand_end
mov eax, INT [max_v_samp(ebp)]
test eax,eax
jle short .expand_end
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
alignx 16,7
.expandloop:
push eax
push ecx
mov edi, JSAMPROW [esi]
add edi,edx
mov al, JSAMPLE [edi-1]
rep stosb
pop ecx
pop eax
add esi, byte SIZEOF_JSAMPROW
dec eax
jg short .expandloop
.expand_end:
pop ecx ; output_cols
; -- h2v2_downsample
mov eax, JDIMENSION [v_samp(ebp)] ; rowctr
test eax,eax
jle near .return
mov edx, 0x00020001 ; bias pattern
movd xmm7,edx
pcmpeqw xmm6,xmm6
pshufd xmm7,xmm7,0x00 ; xmm7={1, 2, 1, 2, 1, 2, 1, 2}
psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..}
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
alignx 16,7
.rowloop:
push ecx
push edi
push esi
mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0
mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1
mov edi, JSAMPROW [edi] ; outptr
cmp ecx, byte SIZEOF_XMMWORD
jae short .columnloop
alignx 16,7
.columnloop_r8:
movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
pxor xmm2,xmm2
pxor xmm3,xmm3
mov ecx, SIZEOF_XMMWORD
jmp short .downsample
alignx 16,7
.columnloop:
movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
movdqa xmm2, XMMWORD [edx+1*SIZEOF_XMMWORD]
movdqa xmm3, XMMWORD [esi+1*SIZEOF_XMMWORD]
.downsample:
movdqa xmm4,xmm0
movdqa xmm5,xmm1
pand xmm0,xmm6
psrlw xmm4,BYTE_BIT
pand xmm1,xmm6
psrlw xmm5,BYTE_BIT
paddw xmm0,xmm4
paddw xmm1,xmm5
movdqa xmm4,xmm2
movdqa xmm5,xmm3
pand xmm2,xmm6
psrlw xmm4,BYTE_BIT
pand xmm3,xmm6
psrlw xmm5,BYTE_BIT
paddw xmm2,xmm4
paddw xmm3,xmm5
paddw xmm0,xmm1
paddw xmm2,xmm3
paddw xmm0,xmm7
paddw xmm2,xmm7
psrlw xmm0,2
psrlw xmm2,2
packuswb xmm0,xmm2
movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
sub ecx, byte SIZEOF_XMMWORD ; outcol
add edx, byte 2*SIZEOF_XMMWORD ; inptr0
add esi, byte 2*SIZEOF_XMMWORD ; inptr1
add edi, byte 1*SIZEOF_XMMWORD ; outptr
cmp ecx, byte SIZEOF_XMMWORD
jae near .columnloop
test ecx,ecx
jnz near .columnloop_r8
pop esi
pop edi
pop ecx
add esi, byte 2*SIZEOF_JSAMPROW ; input_data
add edi, byte 1*SIZEOF_JSAMPROW ; output_data
dec eax ; rowctr
jg near .rowloop
.return:
pop edi
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
; pop ebx ; unused
pop ebp
ret
; For some reason, the OS X linker does not honor the request to align the
; segment unless we do this.
align 16

407
jpeg/simd/jdclrmmx.asm Normal file
Просмотреть файл

@ -0,0 +1,407 @@
;
; jdclrmmx.asm - colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jcolsamp.inc"
; --------------------------------------------------------------------------
SECTION SEG_TEXT
BITS 32
;
; Convert some rows of samples to the output colorspace.
;
; GLOBAL(void)
; jsimd_ycc_rgb_convert_mmx (JDIMENSION out_width,
; JSAMPIMAGE input_buf, JDIMENSION input_row,
; JSAMPARRAY output_buf, int num_rows)
;
%define out_width(b) (b)+8 ; JDIMENSION out_width
%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf
%define input_row(b) (b)+16 ; JDIMENSION input_row
%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf
%define num_rows(b) (b)+24 ; int num_rows
%define original_ebp ebp+0
%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM]
%define WK_NUM 2
%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
align 16
global EXTN(jsimd_ycc_rgb_convert_mmx)
EXTN(jsimd_ycc_rgb_convert_mmx):
push ebp
mov eax,esp ; eax = original ebp
sub esp, byte 4
and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits
mov [esp],eax
mov ebp,esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [out_width(eax)] ; num_cols
test ecx,ecx
jz near .return
push ecx
mov edi, JSAMPIMAGE [input_buf(eax)]
mov ecx, JDIMENSION [input_row(eax)]
mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
lea esi, [esi+ecx*SIZEOF_JSAMPROW]
lea ebx, [ebx+ecx*SIZEOF_JSAMPROW]
lea edx, [edx+ecx*SIZEOF_JSAMPROW]
pop ecx
mov edi, JSAMPARRAY [output_buf(eax)]
mov eax, INT [num_rows(eax)]
test eax,eax
jle near .return
alignx 16,7
.rowloop:
push eax
push edi
push edx
push ebx
push esi
push ecx ; col
mov esi, JSAMPROW [esi] ; inptr0
mov ebx, JSAMPROW [ebx] ; inptr1
mov edx, JSAMPROW [edx] ; inptr2
mov edi, JSAMPROW [edi] ; outptr
movpic eax, POINTER [gotptr] ; load GOT address (eax)
alignx 16,7
.columnloop:
movq mm5, MMWORD [ebx] ; mm5=Cb(01234567)
movq mm1, MMWORD [edx] ; mm1=Cr(01234567)
pcmpeqw mm4,mm4
pcmpeqw mm7,mm7
psrlw mm4,BYTE_BIT
psllw mm7,7 ; mm7={0xFF80 0xFF80 0xFF80 0xFF80}
movq mm0,mm4 ; mm0=mm4={0xFF 0x00 0xFF 0x00 ..}
pand mm4,mm5 ; mm4=Cb(0246)=CbE
psrlw mm5,BYTE_BIT ; mm5=Cb(1357)=CbO
pand mm0,mm1 ; mm0=Cr(0246)=CrE
psrlw mm1,BYTE_BIT ; mm1=Cr(1357)=CrO
paddw mm4,mm7
paddw mm5,mm7
paddw mm0,mm7
paddw mm1,mm7
; (Original)
; R = Y + 1.40200 * Cr
; G = Y - 0.34414 * Cb - 0.71414 * Cr
; B = Y + 1.77200 * Cb
;
; (This implementation)
; R = Y + 0.40200 * Cr + Cr
; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
; B = Y - 0.22800 * Cb + Cb + Cb
movq mm2,mm4 ; mm2=CbE
movq mm3,mm5 ; mm3=CbO
paddw mm4,mm4 ; mm4=2*CbE
paddw mm5,mm5 ; mm5=2*CbO
movq mm6,mm0 ; mm6=CrE
movq mm7,mm1 ; mm7=CrO
paddw mm0,mm0 ; mm0=2*CrE
paddw mm1,mm1 ; mm1=2*CrO
pmulhw mm4,[GOTOFF(eax,PW_MF0228)] ; mm4=(2*CbE * -FIX(0.22800))
pmulhw mm5,[GOTOFF(eax,PW_MF0228)] ; mm5=(2*CbO * -FIX(0.22800))
pmulhw mm0,[GOTOFF(eax,PW_F0402)] ; mm0=(2*CrE * FIX(0.40200))
pmulhw mm1,[GOTOFF(eax,PW_F0402)] ; mm1=(2*CrO * FIX(0.40200))
paddw mm4,[GOTOFF(eax,PW_ONE)]
paddw mm5,[GOTOFF(eax,PW_ONE)]
psraw mm4,1 ; mm4=(CbE * -FIX(0.22800))
psraw mm5,1 ; mm5=(CbO * -FIX(0.22800))
paddw mm0,[GOTOFF(eax,PW_ONE)]
paddw mm1,[GOTOFF(eax,PW_ONE)]
psraw mm0,1 ; mm0=(CrE * FIX(0.40200))
psraw mm1,1 ; mm1=(CrO * FIX(0.40200))
paddw mm4,mm2
paddw mm5,mm3
paddw mm4,mm2 ; mm4=(CbE * FIX(1.77200))=(B-Y)E
paddw mm5,mm3 ; mm5=(CbO * FIX(1.77200))=(B-Y)O
paddw mm0,mm6 ; mm0=(CrE * FIX(1.40200))=(R-Y)E
paddw mm1,mm7 ; mm1=(CrO * FIX(1.40200))=(R-Y)O
movq MMWORD [wk(0)], mm4 ; wk(0)=(B-Y)E
movq MMWORD [wk(1)], mm5 ; wk(1)=(B-Y)O
movq mm4,mm2
movq mm5,mm3
punpcklwd mm2,mm6
punpckhwd mm4,mm6
pmaddwd mm2,[GOTOFF(eax,PW_MF0344_F0285)]
pmaddwd mm4,[GOTOFF(eax,PW_MF0344_F0285)]
punpcklwd mm3,mm7
punpckhwd mm5,mm7
pmaddwd mm3,[GOTOFF(eax,PW_MF0344_F0285)]
pmaddwd mm5,[GOTOFF(eax,PW_MF0344_F0285)]
paddd mm2,[GOTOFF(eax,PD_ONEHALF)]
paddd mm4,[GOTOFF(eax,PD_ONEHALF)]
psrad mm2,SCALEBITS
psrad mm4,SCALEBITS
paddd mm3,[GOTOFF(eax,PD_ONEHALF)]
paddd mm5,[GOTOFF(eax,PD_ONEHALF)]
psrad mm3,SCALEBITS
psrad mm5,SCALEBITS
packssdw mm2,mm4 ; mm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
packssdw mm3,mm5 ; mm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
psubw mm2,mm6 ; mm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
psubw mm3,mm7 ; mm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
movq mm5, MMWORD [esi] ; mm5=Y(01234567)
pcmpeqw mm4,mm4
psrlw mm4,BYTE_BIT ; mm4={0xFF 0x00 0xFF 0x00 ..}
pand mm4,mm5 ; mm4=Y(0246)=YE
psrlw mm5,BYTE_BIT ; mm5=Y(1357)=YO
paddw mm0,mm4 ; mm0=((R-Y)E+YE)=RE=(R0 R2 R4 R6)
paddw mm1,mm5 ; mm1=((R-Y)O+YO)=RO=(R1 R3 R5 R7)
packuswb mm0,mm0 ; mm0=(R0 R2 R4 R6 ** ** ** **)
packuswb mm1,mm1 ; mm1=(R1 R3 R5 R7 ** ** ** **)
paddw mm2,mm4 ; mm2=((G-Y)E+YE)=GE=(G0 G2 G4 G6)
paddw mm3,mm5 ; mm3=((G-Y)O+YO)=GO=(G1 G3 G5 G7)
packuswb mm2,mm2 ; mm2=(G0 G2 G4 G6 ** ** ** **)
packuswb mm3,mm3 ; mm3=(G1 G3 G5 G7 ** ** ** **)
paddw mm4, MMWORD [wk(0)] ; mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6)
paddw mm5, MMWORD [wk(1)] ; mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7)
packuswb mm4,mm4 ; mm4=(B0 B2 B4 B6 ** ** ** **)
packuswb mm5,mm5 ; mm5=(B1 B3 B5 B7 ** ** ** **)
%if RGB_PIXELSIZE == 3 ; ---------------
; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **)
punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16)
punpcklbw mmE,mmB ; mmE=(20 01 22 03 24 05 26 07)
punpcklbw mmD,mmF ; mmD=(11 21 13 23 15 25 17 27)
movq mmG,mmA
movq mmH,mmA
punpcklwd mmA,mmE ; mmA=(00 10 20 01 02 12 22 03)
punpckhwd mmG,mmE ; mmG=(04 14 24 05 06 16 26 07)
psrlq mmH,2*BYTE_BIT ; mmH=(02 12 04 14 06 16 -- --)
psrlq mmE,2*BYTE_BIT ; mmE=(22 03 24 05 26 07 -- --)
movq mmC,mmD
movq mmB,mmD
punpcklwd mmD,mmH ; mmD=(11 21 02 12 13 23 04 14)
punpckhwd mmC,mmH ; mmC=(15 25 06 16 17 27 -- --)
psrlq mmB,2*BYTE_BIT ; mmB=(13 23 15 25 17 27 -- --)
movq mmF,mmE
punpcklwd mmE,mmB ; mmE=(22 03 13 23 24 05 15 25)
punpckhwd mmF,mmB ; mmF=(26 07 17 27 -- -- -- --)
punpckldq mmA,mmD ; mmA=(00 10 20 01 11 21 02 12)
punpckldq mmE,mmG ; mmE=(22 03 13 23 04 14 24 05)
punpckldq mmC,mmF ; mmC=(15 25 06 16 26 07 17 27)
cmp ecx, byte SIZEOF_MMWORD
jb short .column_st16
movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
movq MMWORD [edi+1*SIZEOF_MMWORD], mmE
movq MMWORD [edi+2*SIZEOF_MMWORD], mmC
sub ecx, byte SIZEOF_MMWORD
jz short .nextrow
add esi, byte SIZEOF_MMWORD ; inptr0
add ebx, byte SIZEOF_MMWORD ; inptr1
add edx, byte SIZEOF_MMWORD ; inptr2
add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr
jmp near .columnloop
alignx 16,7
.column_st16:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
cmp ecx, byte 2*SIZEOF_MMWORD
jb short .column_st8
movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
movq MMWORD [edi+1*SIZEOF_MMWORD], mmE
movq mmA,mmC
sub ecx, byte 2*SIZEOF_MMWORD
add edi, byte 2*SIZEOF_MMWORD
jmp short .column_st4
.column_st8:
cmp ecx, byte SIZEOF_MMWORD
jb short .column_st4
movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
movq mmA,mmE
sub ecx, byte SIZEOF_MMWORD
add edi, byte SIZEOF_MMWORD
.column_st4:
movd eax,mmA
cmp ecx, byte SIZEOF_DWORD
jb short .column_st2
mov DWORD [edi+0*SIZEOF_DWORD], eax
psrlq mmA,DWORD_BIT
movd eax,mmA
sub ecx, byte SIZEOF_DWORD
add edi, byte SIZEOF_DWORD
.column_st2:
cmp ecx, byte SIZEOF_WORD
jb short .column_st1
mov WORD [edi+0*SIZEOF_WORD], ax
shr eax,WORD_BIT
sub ecx, byte SIZEOF_WORD
add edi, byte SIZEOF_WORD
.column_st1:
cmp ecx, byte SIZEOF_BYTE
jb short .nextrow
mov BYTE [edi+0*SIZEOF_BYTE], al
%else ; RGB_PIXELSIZE == 4 ; -----------
%ifdef RGBX_FILLER_0XFF
pcmpeqb mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **)
pcmpeqb mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **)
%else
pxor mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **)
pxor mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **)
%endif
; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **)
punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16)
punpcklbw mmE,mmG ; mmE=(20 30 22 32 24 34 26 36)
punpcklbw mmB,mmD ; mmB=(01 11 03 13 05 15 07 17)
punpcklbw mmF,mmH ; mmF=(21 31 23 33 25 35 27 37)
movq mmC,mmA
punpcklwd mmA,mmE ; mmA=(00 10 20 30 02 12 22 32)
punpckhwd mmC,mmE ; mmC=(04 14 24 34 06 16 26 36)
movq mmG,mmB
punpcklwd mmB,mmF ; mmB=(01 11 21 31 03 13 23 33)
punpckhwd mmG,mmF ; mmG=(05 15 25 35 07 17 27 37)
movq mmD,mmA
punpckldq mmA,mmB ; mmA=(00 10 20 30 01 11 21 31)
punpckhdq mmD,mmB ; mmD=(02 12 22 32 03 13 23 33)
movq mmH,mmC
punpckldq mmC,mmG ; mmC=(04 14 24 34 05 15 25 35)
punpckhdq mmH,mmG ; mmH=(06 16 26 36 07 17 27 37)
cmp ecx, byte SIZEOF_MMWORD
jb short .column_st16
movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
movq MMWORD [edi+1*SIZEOF_MMWORD], mmD
movq MMWORD [edi+2*SIZEOF_MMWORD], mmC
movq MMWORD [edi+3*SIZEOF_MMWORD], mmH
sub ecx, byte SIZEOF_MMWORD
jz short .nextrow
add esi, byte SIZEOF_MMWORD ; inptr0
add ebx, byte SIZEOF_MMWORD ; inptr1
add edx, byte SIZEOF_MMWORD ; inptr2
add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr
jmp near .columnloop
alignx 16,7
.column_st16:
cmp ecx, byte SIZEOF_MMWORD/2
jb short .column_st8
movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
movq MMWORD [edi+1*SIZEOF_MMWORD], mmD
movq mmA,mmC
movq mmD,mmH
sub ecx, byte SIZEOF_MMWORD/2
add edi, byte 2*SIZEOF_MMWORD
.column_st8:
cmp ecx, byte SIZEOF_MMWORD/4
jb short .column_st4
movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
movq mmA,mmD
sub ecx, byte SIZEOF_MMWORD/4
add edi, byte 1*SIZEOF_MMWORD
.column_st4:
cmp ecx, byte SIZEOF_MMWORD/8
jb short .nextrow
movd DWORD [edi+0*SIZEOF_DWORD], mmA
%endif ; RGB_PIXELSIZE ; ---------------
alignx 16,7
.nextrow:
pop ecx
pop esi
pop ebx
pop edx
pop edi
pop eax
add esi, byte SIZEOF_JSAMPROW
add ebx, byte SIZEOF_JSAMPROW
add edx, byte SIZEOF_JSAMPROW
add edi, byte SIZEOF_JSAMPROW ; output_buf
dec eax ; num_rows
jg near .rowloop
emms ; empty MMX state
.return:
pop edi
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
pop ebx
mov esp,ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
ret
; For some reason, the OS X linker does not honor the request to align the
; segment unless we do this.
align 16

487
jpeg/simd/jdclrss2-64.asm Normal file
Просмотреть файл

@ -0,0 +1,487 @@
;
; jdclrss2-64.asm - colorspace conversion (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright 2009 D. R. Commander
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jcolsamp.inc"
; --------------------------------------------------------------------------
SECTION SEG_TEXT
BITS 64
;
; Convert some rows of samples to the output colorspace.
;
; GLOBAL(void)
; jsimd_ycc_rgb_convert_sse2 (JDIMENSION out_width,
; JSAMPIMAGE input_buf, JDIMENSION input_row,
; JSAMPARRAY output_buf, int num_rows)
;
; r10 = JDIMENSION out_width
; r11 = JSAMPIMAGE input_buf
; r12 = JDIMENSION input_row
; r13 = JSAMPARRAY output_buf
; r14 = int num_rows
%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
%define WK_NUM 2
align 16
global EXTN(jsimd_ycc_rgb_convert_sse2)
EXTN(jsimd_ycc_rgb_convert_sse2):
push rbp
mov rax,rsp ; rax = original rbp
sub rsp, byte 4
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
mov [rsp],rax
mov rbp,rsp ; rbp = aligned rbp
lea rsp, [wk(0)]
collect_args
push rbx
mov rcx, r10 ; num_cols
test rcx,rcx
jz near .return
push rcx
mov rdi, r11
mov rcx, r12
mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
lea rsi, [rsi+rcx*SIZEOF_JSAMPROW]
lea rbx, [rbx+rcx*SIZEOF_JSAMPROW]
lea rdx, [rdx+rcx*SIZEOF_JSAMPROW]
pop rcx
mov rdi, r13
mov eax, r14d
test rax,rax
jle near .return
.rowloop:
push rax
push rdi
push rdx
push rbx
push rsi
push rcx ; col
mov rsi, JSAMPROW [rsi] ; inptr0
mov rbx, JSAMPROW [rbx] ; inptr1
mov rdx, JSAMPROW [rdx] ; inptr2
mov rdi, JSAMPROW [rdi] ; outptr
.columnloop:
movdqa xmm5, XMMWORD [rbx] ; xmm5=Cb(0123456789ABCDEF)
movdqa xmm1, XMMWORD [rdx] ; xmm1=Cr(0123456789ABCDEF)
pcmpeqw xmm4,xmm4
pcmpeqw xmm7,xmm7
psrlw xmm4,BYTE_BIT
psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
movdqa xmm0,xmm4 ; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..}
pand xmm4,xmm5 ; xmm4=Cb(02468ACE)=CbE
psrlw xmm5,BYTE_BIT ; xmm5=Cb(13579BDF)=CbO
pand xmm0,xmm1 ; xmm0=Cr(02468ACE)=CrE
psrlw xmm1,BYTE_BIT ; xmm1=Cr(13579BDF)=CrO
paddw xmm4,xmm7
paddw xmm5,xmm7
paddw xmm0,xmm7
paddw xmm1,xmm7
; (Original)
; R = Y + 1.40200 * Cr
; G = Y - 0.34414 * Cb - 0.71414 * Cr
; B = Y + 1.77200 * Cb
;
; (This implementation)
; R = Y + 0.40200 * Cr + Cr
; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
; B = Y - 0.22800 * Cb + Cb + Cb
movdqa xmm2,xmm4 ; xmm2=CbE
movdqa xmm3,xmm5 ; xmm3=CbO
paddw xmm4,xmm4 ; xmm4=2*CbE
paddw xmm5,xmm5 ; xmm5=2*CbO
movdqa xmm6,xmm0 ; xmm6=CrE
movdqa xmm7,xmm1 ; xmm7=CrO
paddw xmm0,xmm0 ; xmm0=2*CrE
paddw xmm1,xmm1 ; xmm1=2*CrO
pmulhw xmm4,[rel PW_MF0228] ; xmm4=(2*CbE * -FIX(0.22800))
pmulhw xmm5,[rel PW_MF0228] ; xmm5=(2*CbO * -FIX(0.22800))
pmulhw xmm0,[rel PW_F0402] ; xmm0=(2*CrE * FIX(0.40200))
pmulhw xmm1,[rel PW_F0402] ; xmm1=(2*CrO * FIX(0.40200))
paddw xmm4,[rel PW_ONE]
paddw xmm5,[rel PW_ONE]
psraw xmm4,1 ; xmm4=(CbE * -FIX(0.22800))
psraw xmm5,1 ; xmm5=(CbO * -FIX(0.22800))
paddw xmm0,[rel PW_ONE]
paddw xmm1,[rel PW_ONE]
psraw xmm0,1 ; xmm0=(CrE * FIX(0.40200))
psraw xmm1,1 ; xmm1=(CrO * FIX(0.40200))
paddw xmm4,xmm2
paddw xmm5,xmm3
paddw xmm4,xmm2 ; xmm4=(CbE * FIX(1.77200))=(B-Y)E
paddw xmm5,xmm3 ; xmm5=(CbO * FIX(1.77200))=(B-Y)O
paddw xmm0,xmm6 ; xmm0=(CrE * FIX(1.40200))=(R-Y)E
paddw xmm1,xmm7 ; xmm1=(CrO * FIX(1.40200))=(R-Y)O
movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=(B-Y)E
movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(B-Y)O
movdqa xmm4,xmm2
movdqa xmm5,xmm3
punpcklwd xmm2,xmm6
punpckhwd xmm4,xmm6
pmaddwd xmm2,[rel PW_MF0344_F0285]
pmaddwd xmm4,[rel PW_MF0344_F0285]
punpcklwd xmm3,xmm7
punpckhwd xmm5,xmm7
pmaddwd xmm3,[rel PW_MF0344_F0285]
pmaddwd xmm5,[rel PW_MF0344_F0285]
paddd xmm2,[rel PD_ONEHALF]
paddd xmm4,[rel PD_ONEHALF]
psrad xmm2,SCALEBITS
psrad xmm4,SCALEBITS
paddd xmm3,[rel PD_ONEHALF]
paddd xmm5,[rel PD_ONEHALF]
psrad xmm3,SCALEBITS
psrad xmm5,SCALEBITS
packssdw xmm2,xmm4 ; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
packssdw xmm3,xmm5 ; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
psubw xmm2,xmm6 ; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
psubw xmm3,xmm7 ; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
movdqa xmm5, XMMWORD [rsi] ; xmm5=Y(0123456789ABCDEF)
pcmpeqw xmm4,xmm4
psrlw xmm4,BYTE_BIT ; xmm4={0xFF 0x00 0xFF 0x00 ..}
pand xmm4,xmm5 ; xmm4=Y(02468ACE)=YE
psrlw xmm5,BYTE_BIT ; xmm5=Y(13579BDF)=YO
paddw xmm0,xmm4 ; xmm0=((R-Y)E+YE)=RE=R(02468ACE)
paddw xmm1,xmm5 ; xmm1=((R-Y)O+YO)=RO=R(13579BDF)
packuswb xmm0,xmm0 ; xmm0=R(02468ACE********)
packuswb xmm1,xmm1 ; xmm1=R(13579BDF********)
paddw xmm2,xmm4 ; xmm2=((G-Y)E+YE)=GE=G(02468ACE)
paddw xmm3,xmm5 ; xmm3=((G-Y)O+YO)=GO=G(13579BDF)
packuswb xmm2,xmm2 ; xmm2=G(02468ACE********)
packuswb xmm3,xmm3 ; xmm3=G(13579BDF********)
paddw xmm4, XMMWORD [wk(0)] ; xmm4=(YE+(B-Y)E)=BE=B(02468ACE)
paddw xmm5, XMMWORD [wk(1)] ; xmm5=(YO+(B-Y)O)=BO=B(13579BDF)
packuswb xmm4,xmm4 ; xmm4=B(02468ACE********)
packuswb xmm5,xmm5 ; xmm5=B(13579BDF********)
%if RGB_PIXELSIZE == 3 ; ---------------
; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
movdqa xmmG,xmmA
movdqa xmmH,xmmA
punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
movdqa xmmC,xmmD
movdqa xmmB,xmmD
punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
movdqa xmmF,xmmE
punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
movdqa xmmB,xmmE
punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
movdqa xmmB,xmmF
punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
cmp rcx, byte SIZEOF_XMMWORD
jb short .column_st32
test rdi, SIZEOF_XMMWORD-1
jnz short .out1
; --(aligned)-------------------
movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
jmp short .out0
.out1: ; --(unaligned)-----------------
pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA
add rdi, byte SIZEOF_XMMWORD ; outptr
maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [rdi], xmmD
add rdi, byte SIZEOF_XMMWORD ; outptr
maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [rdi], xmmF
add rdi, byte SIZEOF_XMMWORD ; outptr
.out0:
sub rcx, byte SIZEOF_XMMWORD
jz near .nextrow
add rsi, byte SIZEOF_XMMWORD ; inptr0
add rbx, byte SIZEOF_XMMWORD ; inptr1
add rdx, byte SIZEOF_XMMWORD ; inptr2
jmp near .columnloop
.column_st32:
pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE
cmp rcx, byte 2*SIZEOF_XMMWORD
jb short .column_st16
maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA
add rdi, byte SIZEOF_XMMWORD ; outptr
maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [rdi], xmmD
add rdi, byte SIZEOF_XMMWORD ; outptr
movdqa xmmA,xmmF
sub rcx, byte 2*SIZEOF_XMMWORD
jmp short .column_st15
.column_st16:
cmp rcx, byte SIZEOF_XMMWORD
jb short .column_st15
maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA
add rdi, byte SIZEOF_XMMWORD ; outptr
movdqa xmmA,xmmD
sub rcx, byte SIZEOF_XMMWORD
.column_st15:
mov rax,rcx
xor rcx, byte 0x0F
shl rcx, 2
movd xmmB,ecx
psrlq xmmH,4
pcmpeqb xmmE,xmmE
psrlq xmmH,xmmB
psrlq xmmE,xmmB
punpcklbw xmmE,xmmH
; ----------------
mov rcx,rdi
and rcx, byte SIZEOF_XMMWORD-1
jz short .adj0
add rax,rcx
cmp rax, byte SIZEOF_XMMWORD
ja short .adj0
and rdi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
shl rcx, 3 ; pslldq xmmA,ecx & pslldq xmmE,rcx
movdqa xmmG,xmmA
movdqa xmmC,xmmE
pslldq xmmA, SIZEOF_XMMWORD/2
pslldq xmmE, SIZEOF_XMMWORD/2
movd xmmD,ecx
sub rcx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
jb short .adj1
movd xmmF,ecx
psllq xmmA,xmmF
psllq xmmE,xmmF
jmp short .adj0
.adj1: neg ecx
movd xmmF,ecx
psrlq xmmA,xmmF
psrlq xmmE,xmmF
psllq xmmG,xmmD
psllq xmmC,xmmD
por xmmA,xmmG
por xmmE,xmmC
.adj0: ; ----------------
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA
%else ; RGB_PIXELSIZE == 4 ; -----------
%ifdef RGBX_FILLER_0XFF
pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********)
pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********)
%else
pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********)
pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********)
%endif
; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
movdqa xmmC,xmmA
punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
movdqa xmmG,xmmB
punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
movdqa xmmD,xmmA
punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
movdqa xmmH,xmmC
punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
cmp rcx, byte SIZEOF_XMMWORD
jb short .column_st32
test rdi, SIZEOF_XMMWORD-1
jnz short .out1
; --(aligned)-------------------
movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
jmp short .out0
.out1: ; --(unaligned)-----------------
pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA
add rdi, byte SIZEOF_XMMWORD ; outptr
maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [rdi], xmmD
add rdi, byte SIZEOF_XMMWORD ; outptr
maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [rdi], xmmC
add rdi, byte SIZEOF_XMMWORD ; outptr
maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [rdi], xmmH
add rdi, byte SIZEOF_XMMWORD ; outptr
.out0:
sub rcx, byte SIZEOF_XMMWORD
jz near .nextrow
add rsi, byte SIZEOF_XMMWORD ; inptr0
add rbx, byte SIZEOF_XMMWORD ; inptr1
add rdx, byte SIZEOF_XMMWORD ; inptr2
jmp near .columnloop
.column_st32:
pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
cmp rcx, byte SIZEOF_XMMWORD/2
jb short .column_st16
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA
add rdi, byte SIZEOF_XMMWORD ; outptr
maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [rdi], xmmD
add rdi, byte SIZEOF_XMMWORD ; outptr
movdqa xmmA,xmmC
movdqa xmmD,xmmH
sub rcx, byte SIZEOF_XMMWORD/2
.column_st16:
cmp rcx, byte SIZEOF_XMMWORD/4
jb short .column_st15
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA
add rdi, byte SIZEOF_XMMWORD ; outptr
movdqa xmmA,xmmD
sub rcx, byte SIZEOF_XMMWORD/4
.column_st15:
cmp rcx, byte SIZEOF_XMMWORD/16
jb near .nextrow
mov rax,rcx
xor rcx, byte 0x03
inc rcx
shl rcx, 4
movd xmmF,ecx
psrlq xmmE,xmmF
punpcklbw xmmE,xmmE
; ----------------
mov rcx,rdi
and rcx, byte SIZEOF_XMMWORD-1
jz short .adj0
lea rax, [rcx+rax*4] ; RGB_PIXELSIZE
cmp rax, byte SIZEOF_XMMWORD
ja short .adj0
and rdi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
shl rcx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
movdqa xmmB,xmmA
movdqa xmmG,xmmE
pslldq xmmA, SIZEOF_XMMWORD/2
pslldq xmmE, SIZEOF_XMMWORD/2
movd xmmC,ecx
sub rcx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
jb short .adj1
movd xmmH,ecx
psllq xmmA,xmmH
psllq xmmE,xmmH
jmp short .adj0
.adj1: neg rcx
movd xmmH,ecx
psrlq xmmA,xmmH
psrlq xmmE,xmmH
psllq xmmB,xmmC
psllq xmmG,xmmC
por xmmA,xmmB
por xmmE,xmmG
.adj0: ; ----------------
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA
%endif ; RGB_PIXELSIZE ; ---------------
.nextrow:
pop rcx
pop rsi
pop rbx
pop rdx
pop rdi
pop rax
add rsi, byte SIZEOF_JSAMPROW
add rbx, byte SIZEOF_JSAMPROW
add rdx, byte SIZEOF_JSAMPROW
add rdi, byte SIZEOF_JSAMPROW ; output_buf
dec rax ; num_rows
jg near .rowloop
sfence ; flush the write buffer
.return:
pop rbx
uncollect_args
mov rsp,rbp ; rsp <- aligned rbp
pop rsp ; rsp <- original rbp
pop rbp
ret
; For some reason, the OS X linker does not honor the request to align the
; segment unless we do this.
align 16

505
jpeg/simd/jdclrss2.asm Normal file
Просмотреть файл

@ -0,0 +1,505 @@
;
; jdclrss2.asm - colorspace conversion (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jcolsamp.inc"
; --------------------------------------------------------------------------
SECTION SEG_TEXT
BITS 32
;
; Convert some rows of samples to the output colorspace.
;
; GLOBAL(void)
; jsimd_ycc_rgb_convert_sse2 (JDIMENSION out_width,
; JSAMPIMAGE input_buf, JDIMENSION input_row,
; JSAMPARRAY output_buf, int num_rows)
;
%define out_width(b) (b)+8 ; JDIMENSION out_width
%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf
%define input_row(b) (b)+16 ; JDIMENSION input_row
%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf
%define num_rows(b) (b)+24 ; int num_rows
%define original_ebp ebp+0
%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
%define WK_NUM 2
%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
align 16
global EXTN(jsimd_ycc_rgb_convert_sse2)
EXTN(jsimd_ycc_rgb_convert_sse2):
push ebp
mov eax,esp ; eax = original ebp
sub esp, byte 4
and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
mov [esp],eax
mov ebp,esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [out_width(eax)] ; num_cols
test ecx,ecx
jz near .return
push ecx
mov edi, JSAMPIMAGE [input_buf(eax)]
mov ecx, JDIMENSION [input_row(eax)]
mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
lea esi, [esi+ecx*SIZEOF_JSAMPROW]
lea ebx, [ebx+ecx*SIZEOF_JSAMPROW]
lea edx, [edx+ecx*SIZEOF_JSAMPROW]
pop ecx
mov edi, JSAMPARRAY [output_buf(eax)]
mov eax, INT [num_rows(eax)]
test eax,eax
jle near .return
alignx 16,7
.rowloop:
push eax
push edi
push edx
push ebx
push esi
push ecx ; col
mov esi, JSAMPROW [esi] ; inptr0
mov ebx, JSAMPROW [ebx] ; inptr1
mov edx, JSAMPROW [edx] ; inptr2
mov edi, JSAMPROW [edi] ; outptr
movpic eax, POINTER [gotptr] ; load GOT address (eax)
alignx 16,7
.columnloop:
movdqa xmm5, XMMWORD [ebx] ; xmm5=Cb(0123456789ABCDEF)
movdqa xmm1, XMMWORD [edx] ; xmm1=Cr(0123456789ABCDEF)
pcmpeqw xmm4,xmm4
pcmpeqw xmm7,xmm7
psrlw xmm4,BYTE_BIT
psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
movdqa xmm0,xmm4 ; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..}
pand xmm4,xmm5 ; xmm4=Cb(02468ACE)=CbE
psrlw xmm5,BYTE_BIT ; xmm5=Cb(13579BDF)=CbO
pand xmm0,xmm1 ; xmm0=Cr(02468ACE)=CrE
psrlw xmm1,BYTE_BIT ; xmm1=Cr(13579BDF)=CrO
paddw xmm4,xmm7
paddw xmm5,xmm7
paddw xmm0,xmm7
paddw xmm1,xmm7
; (Original)
; R = Y + 1.40200 * Cr
; G = Y - 0.34414 * Cb - 0.71414 * Cr
; B = Y + 1.77200 * Cb
;
; (This implementation)
; R = Y + 0.40200 * Cr + Cr
; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
; B = Y - 0.22800 * Cb + Cb + Cb
movdqa xmm2,xmm4 ; xmm2=CbE
movdqa xmm3,xmm5 ; xmm3=CbO
paddw xmm4,xmm4 ; xmm4=2*CbE
paddw xmm5,xmm5 ; xmm5=2*CbO
movdqa xmm6,xmm0 ; xmm6=CrE
movdqa xmm7,xmm1 ; xmm7=CrO
paddw xmm0,xmm0 ; xmm0=2*CrE
paddw xmm1,xmm1 ; xmm1=2*CrO
pmulhw xmm4,[GOTOFF(eax,PW_MF0228)] ; xmm4=(2*CbE * -FIX(0.22800))
pmulhw xmm5,[GOTOFF(eax,PW_MF0228)] ; xmm5=(2*CbO * -FIX(0.22800))
pmulhw xmm0,[GOTOFF(eax,PW_F0402)] ; xmm0=(2*CrE * FIX(0.40200))
pmulhw xmm1,[GOTOFF(eax,PW_F0402)] ; xmm1=(2*CrO * FIX(0.40200))
paddw xmm4,[GOTOFF(eax,PW_ONE)]
paddw xmm5,[GOTOFF(eax,PW_ONE)]
psraw xmm4,1 ; xmm4=(CbE * -FIX(0.22800))
psraw xmm5,1 ; xmm5=(CbO * -FIX(0.22800))
paddw xmm0,[GOTOFF(eax,PW_ONE)]
paddw xmm1,[GOTOFF(eax,PW_ONE)]
psraw xmm0,1 ; xmm0=(CrE * FIX(0.40200))
psraw xmm1,1 ; xmm1=(CrO * FIX(0.40200))
paddw xmm4,xmm2
paddw xmm5,xmm3
paddw xmm4,xmm2 ; xmm4=(CbE * FIX(1.77200))=(B-Y)E
paddw xmm5,xmm3 ; xmm5=(CbO * FIX(1.77200))=(B-Y)O
paddw xmm0,xmm6 ; xmm0=(CrE * FIX(1.40200))=(R-Y)E
paddw xmm1,xmm7 ; xmm1=(CrO * FIX(1.40200))=(R-Y)O
movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=(B-Y)E
movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(B-Y)O
movdqa xmm4,xmm2
movdqa xmm5,xmm3
punpcklwd xmm2,xmm6
punpckhwd xmm4,xmm6
pmaddwd xmm2,[GOTOFF(eax,PW_MF0344_F0285)]
pmaddwd xmm4,[GOTOFF(eax,PW_MF0344_F0285)]
punpcklwd xmm3,xmm7
punpckhwd xmm5,xmm7
pmaddwd xmm3,[GOTOFF(eax,PW_MF0344_F0285)]
pmaddwd xmm5,[GOTOFF(eax,PW_MF0344_F0285)]
paddd xmm2,[GOTOFF(eax,PD_ONEHALF)]
paddd xmm4,[GOTOFF(eax,PD_ONEHALF)]
psrad xmm2,SCALEBITS
psrad xmm4,SCALEBITS
paddd xmm3,[GOTOFF(eax,PD_ONEHALF)]
paddd xmm5,[GOTOFF(eax,PD_ONEHALF)]
psrad xmm3,SCALEBITS
psrad xmm5,SCALEBITS
packssdw xmm2,xmm4 ; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
packssdw xmm3,xmm5 ; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
psubw xmm2,xmm6 ; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
psubw xmm3,xmm7 ; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
movdqa xmm5, XMMWORD [esi] ; xmm5=Y(0123456789ABCDEF)
pcmpeqw xmm4,xmm4
psrlw xmm4,BYTE_BIT ; xmm4={0xFF 0x00 0xFF 0x00 ..}
pand xmm4,xmm5 ; xmm4=Y(02468ACE)=YE
psrlw xmm5,BYTE_BIT ; xmm5=Y(13579BDF)=YO
paddw xmm0,xmm4 ; xmm0=((R-Y)E+YE)=RE=R(02468ACE)
paddw xmm1,xmm5 ; xmm1=((R-Y)O+YO)=RO=R(13579BDF)
packuswb xmm0,xmm0 ; xmm0=R(02468ACE********)
packuswb xmm1,xmm1 ; xmm1=R(13579BDF********)
paddw xmm2,xmm4 ; xmm2=((G-Y)E+YE)=GE=G(02468ACE)
paddw xmm3,xmm5 ; xmm3=((G-Y)O+YO)=GO=G(13579BDF)
packuswb xmm2,xmm2 ; xmm2=G(02468ACE********)
packuswb xmm3,xmm3 ; xmm3=G(13579BDF********)
paddw xmm4, XMMWORD [wk(0)] ; xmm4=(YE+(B-Y)E)=BE=B(02468ACE)
paddw xmm5, XMMWORD [wk(1)] ; xmm5=(YO+(B-Y)O)=BO=B(13579BDF)
packuswb xmm4,xmm4 ; xmm4=B(02468ACE********)
packuswb xmm5,xmm5 ; xmm5=B(13579BDF********)
%if RGB_PIXELSIZE == 3 ; ---------------
; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
movdqa xmmG,xmmA
movdqa xmmH,xmmA
punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
movdqa xmmC,xmmD
movdqa xmmB,xmmD
punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
movdqa xmmF,xmmE
punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
movdqa xmmB,xmmE
punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
movdqa xmmB,xmmF
punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
cmp ecx, byte SIZEOF_XMMWORD
jb short .column_st32
test edi, SIZEOF_XMMWORD-1
jnz short .out1
; --(aligned)-------------------
movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
jmp short .out0
.out1: ; --(unaligned)-----------------
pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA
add edi, byte SIZEOF_XMMWORD ; outptr
maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD
add edi, byte SIZEOF_XMMWORD ; outptr
maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [edi], xmmF
add edi, byte SIZEOF_XMMWORD ; outptr
.out0:
sub ecx, byte SIZEOF_XMMWORD
jz near .nextrow
add esi, byte SIZEOF_XMMWORD ; inptr0
add ebx, byte SIZEOF_XMMWORD ; inptr1
add edx, byte SIZEOF_XMMWORD ; inptr2
jmp near .columnloop
alignx 16,7
.column_st32:
pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
cmp ecx, byte 2*SIZEOF_XMMWORD
jb short .column_st16
maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA
add edi, byte SIZEOF_XMMWORD ; outptr
maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD
add edi, byte SIZEOF_XMMWORD ; outptr
movdqa xmmA,xmmF
sub ecx, byte 2*SIZEOF_XMMWORD
jmp short .column_st15
.column_st16:
cmp ecx, byte SIZEOF_XMMWORD
jb short .column_st15
maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA
add edi, byte SIZEOF_XMMWORD ; outptr
movdqa xmmA,xmmD
sub ecx, byte SIZEOF_XMMWORD
.column_st15:
mov eax,ecx
xor ecx, byte 0x0F
shl ecx, 2
movd xmmB,ecx
psrlq xmmH,4
pcmpeqb xmmE,xmmE
psrlq xmmH,xmmB
psrlq xmmE,xmmB
punpcklbw xmmE,xmmH
; ----------------
mov ecx,edi
and ecx, byte SIZEOF_XMMWORD-1
jz short .adj0
add eax,ecx
cmp eax, byte SIZEOF_XMMWORD
ja short .adj0
and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
movdqa xmmG,xmmA
movdqa xmmC,xmmE
pslldq xmmA, SIZEOF_XMMWORD/2
pslldq xmmE, SIZEOF_XMMWORD/2
movd xmmD,ecx
sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
jb short .adj1
movd xmmF,ecx
psllq xmmA,xmmF
psllq xmmE,xmmF
jmp short .adj0
.adj1: neg ecx
movd xmmF,ecx
psrlq xmmA,xmmF
psrlq xmmE,xmmF
psllq xmmG,xmmD
psllq xmmC,xmmD
por xmmA,xmmG
por xmmE,xmmC
.adj0: ; ----------------
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
%else ; RGB_PIXELSIZE == 4 ; -----------
%ifdef RGBX_FILLER_0XFF
pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********)
pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********)
%else
pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********)
pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********)
%endif
; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
movdqa xmmC,xmmA
punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
movdqa xmmG,xmmB
punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
movdqa xmmD,xmmA
punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
movdqa xmmH,xmmC
punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
cmp ecx, byte SIZEOF_XMMWORD
jb short .column_st32
test edi, SIZEOF_XMMWORD-1
jnz short .out1
; --(aligned)-------------------
movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
jmp short .out0
.out1: ; --(unaligned)-----------------
pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
add edi, byte SIZEOF_XMMWORD ; outptr
maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD
add edi, byte SIZEOF_XMMWORD ; outptr
maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [edi], xmmC
add edi, byte SIZEOF_XMMWORD ; outptr
maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [edi], xmmH
add edi, byte SIZEOF_XMMWORD ; outptr
.out0:
sub ecx, byte SIZEOF_XMMWORD
jz near .nextrow
add esi, byte SIZEOF_XMMWORD ; inptr0
add ebx, byte SIZEOF_XMMWORD ; inptr1
add edx, byte SIZEOF_XMMWORD ; inptr2
jmp near .columnloop
alignx 16,7
.column_st32:
pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
cmp ecx, byte SIZEOF_XMMWORD/2
jb short .column_st16
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
add edi, byte SIZEOF_XMMWORD ; outptr
maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD
add edi, byte SIZEOF_XMMWORD ; outptr
movdqa xmmA,xmmC
movdqa xmmD,xmmH
sub ecx, byte SIZEOF_XMMWORD/2
.column_st16:
cmp ecx, byte SIZEOF_XMMWORD/4
jb short .column_st15
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
add edi, byte SIZEOF_XMMWORD ; outptr
movdqa xmmA,xmmD
sub ecx, byte SIZEOF_XMMWORD/4
.column_st15:
cmp ecx, byte SIZEOF_XMMWORD/16
jb short .nextrow
mov eax,ecx
xor ecx, byte 0x03
inc ecx
shl ecx, 4
movd xmmF,ecx
psrlq xmmE,xmmF
punpcklbw xmmE,xmmE
; ----------------
mov ecx,edi
and ecx, byte SIZEOF_XMMWORD-1
jz short .adj0
lea eax, [ecx+eax*4] ; RGB_PIXELSIZE
cmp eax, byte SIZEOF_XMMWORD
ja short .adj0
and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
movdqa xmmB,xmmA
movdqa xmmG,xmmE
pslldq xmmA, SIZEOF_XMMWORD/2
pslldq xmmE, SIZEOF_XMMWORD/2
movd xmmC,ecx
sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
jb short .adj1
movd xmmH,ecx
psllq xmmA,xmmH
psllq xmmE,xmmH
jmp short .adj0
.adj1: neg ecx
movd xmmH,ecx
psrlq xmmA,xmmH
psrlq xmmE,xmmH
psllq xmmB,xmmC
psllq xmmG,xmmC
por xmmA,xmmB
por xmmE,xmmG
.adj0: ; ----------------
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
%endif ; RGB_PIXELSIZE ; ---------------
alignx 16,7
.nextrow:
pop ecx
pop esi
pop ebx
pop edx
pop edi
pop eax
add esi, byte SIZEOF_JSAMPROW
add ebx, byte SIZEOF_JSAMPROW
add edx, byte SIZEOF_JSAMPROW
add edi, byte SIZEOF_JSAMPROW ; output_buf
dec eax ; num_rows
jg near .rowloop
sfence ; flush the write buffer
.return:
pop edi
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
pop ebx
mov esp,ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
ret
; For some reason, the OS X linker does not honor the request to align the
; segment unless we do this.
align 16

117
jpeg/simd/jdcolmmx.asm Normal file
Просмотреть файл

@ -0,0 +1,117 @@
;
; jdcolmmx.asm - colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright 2009 D. R. Commander
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
; --------------------------------------------------------------------------
%define SCALEBITS 16
F_0_344 equ 22554 ; FIX(0.34414)
F_0_714 equ 46802 ; FIX(0.71414)
F_1_402 equ 91881 ; FIX(1.40200)
F_1_772 equ 116130 ; FIX(1.77200)
F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1)
F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414)
F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 16
global EXTN(jconst_ycc_rgb_convert_mmx)
EXTN(jconst_ycc_rgb_convert_mmx):
PW_F0402 times 4 dw F_0_402
PW_MF0228 times 4 dw -F_0_228
PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285
PW_ONE times 4 dw 1
PD_ONEHALF times 2 dd 1 << (SCALEBITS-1)
alignz 16
; --------------------------------------------------------------------------
%include "jdclrmmx.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 0
%define RGB_GREEN 1
%define RGB_BLUE 2
%define RGB_PIXELSIZE 3
%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgb_convert_mmx
%include "jdclrmmx.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 0
%define RGB_GREEN 1
%define RGB_BLUE 2
%define RGB_PIXELSIZE 4
%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgbx_convert_mmx
%include "jdclrmmx.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 2
%define RGB_GREEN 1
%define RGB_BLUE 0
%define RGB_PIXELSIZE 3
%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgr_convert_mmx
%include "jdclrmmx.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 2
%define RGB_GREEN 1
%define RGB_BLUE 0
%define RGB_PIXELSIZE 4
%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgrx_convert_mmx
%include "jdclrmmx.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 3
%define RGB_GREEN 2
%define RGB_BLUE 1
%define RGB_PIXELSIZE 4
%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxbgr_convert_mmx
%include "jdclrmmx.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 1
%define RGB_GREEN 2
%define RGB_BLUE 3
%define RGB_PIXELSIZE 4
%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxrgb_convert_mmx
%include "jdclrmmx.asm"

117
jpeg/simd/jdcolss2-64.asm Normal file
Просмотреть файл

@ -0,0 +1,117 @@
;
; jdcolss2-64.asm - colorspace conversion (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright 2009 D. R. Commander
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
; --------------------------------------------------------------------------
%define SCALEBITS 16
F_0_344 equ 22554 ; FIX(0.34414)
F_0_714 equ 46802 ; FIX(0.71414)
F_1_402 equ 91881 ; FIX(1.40200)
F_1_772 equ 116130 ; FIX(1.77200)
F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1)
F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414)
F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 16
global EXTN(jconst_ycc_rgb_convert_sse2)
EXTN(jconst_ycc_rgb_convert_sse2):
PW_F0402 times 8 dw F_0_402
PW_MF0228 times 8 dw -F_0_228
PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
PW_ONE times 8 dw 1
PD_ONEHALF times 4 dd 1 << (SCALEBITS-1)
alignz 16
; --------------------------------------------------------------------------
%include "jdclrss2-64.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 0
%define RGB_GREEN 1
%define RGB_BLUE 2
%define RGB_PIXELSIZE 3
%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgb_convert_sse2
%include "jdclrss2-64.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 0
%define RGB_GREEN 1
%define RGB_BLUE 2
%define RGB_PIXELSIZE 4
%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgbx_convert_sse2
%include "jdclrss2-64.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 2
%define RGB_GREEN 1
%define RGB_BLUE 0
%define RGB_PIXELSIZE 3
%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgr_convert_sse2
%include "jdclrss2-64.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 2
%define RGB_GREEN 1
%define RGB_BLUE 0
%define RGB_PIXELSIZE 4
%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgrx_convert_sse2
%include "jdclrss2-64.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 3
%define RGB_GREEN 2
%define RGB_BLUE 1
%define RGB_PIXELSIZE 4
%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxbgr_convert_sse2
%include "jdclrss2-64.asm"
%undef RGB_RED
%undef RGB_GREEN
%undef RGB_BLUE
%undef RGB_PIXELSIZE
%define RGB_RED 1
%define RGB_GREEN 2
%define RGB_BLUE 3
%define RGB_PIXELSIZE 4
%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxrgb_convert_sse2
%include "jdclrss2-64.asm"

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше