From f93749e6f2843ffb549186a316d23ea3a7f7a1e3 Mon Sep 17 00:00:00 2001
From: Nigel Tao <nigeltao@golang.org>
Date: Tue, 25 Mar 2014 13:29:48 +1100
Subject: [PATCH] go.image/vp8, go.image/webp: new packages, copied from
 code.google.com/p/vp8-go

This is a straight copy, followed by import path updates.

LGTM=r
R=r, mirtchovski, oleku.konko
CC=golang-codereviews
https://golang.org/cl/79320043
---
 vp8/decode.go      | 356 +++++++++++++++++++++++++++++
 vp8/idct.go        |  98 ++++++++
 vp8/partition.go   | 127 +++++++++++
 vp8/pred.go        | 201 ++++++++++++++++
 vp8/predfunc.go    | 553 +++++++++++++++++++++++++++++++++++++++++++++
 vp8/quant.go       |  95 ++++++++
 vp8/reconstruct.go | 435 +++++++++++++++++++++++++++++++++++
 vp8/token.go       | 381 +++++++++++++++++++++++++++++++
 webp/decode.go     |  75 ++++++
 9 files changed, 2321 insertions(+)
 create mode 100644 vp8/decode.go
 create mode 100644 vp8/idct.go
 create mode 100644 vp8/partition.go
 create mode 100644 vp8/pred.go
 create mode 100644 vp8/predfunc.go
 create mode 100644 vp8/quant.go
 create mode 100644 vp8/reconstruct.go
 create mode 100644 vp8/token.go
 create mode 100644 webp/decode.go

diff --git a/vp8/decode.go b/vp8/decode.go
new file mode 100644
index 0000000..9cb2a71
--- /dev/null
+++ b/vp8/decode.go
@@ -0,0 +1,356 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package vp8 implements a vp8 image and video decoder.
+//
+// The VP8 specification is at:
+// http://datatracker.ietf.org/doc/rfc6386/
+package vp8
+
+// This file implements the top-level decoding algorithm.
+
+import (
+	"errors"
+	"image"
+	"io"
+)
+
+// limitReader wraps an io.Reader to read at most n bytes from it.
+type limitReader struct {
+	r io.Reader
+	n int
+}
+
+// ReadFull reads exactly len(p) bytes into p.
+func (r *limitReader) ReadFull(p []byte) error {
+	if len(p) > r.n {
+		return io.ErrUnexpectedEOF
+	}
+	n, err := io.ReadFull(r.r, p)
+	r.n -= n
+	return err
+}
+
+// FrameHeader is a frame header, as specified in section 9.1.
+type FrameHeader struct {
+	KeyFrame          bool
+	VersionNumber     uint8
+	ShowFrame         bool
+	FirstPartitionLen uint32
+	Width             int
+	Height            int
+	XScale            uint8
+	YScale            uint8
+}
+
+const (
+	nSegment     = 4
+	nSegmentProb = 3
+)
+
+// segmentHeader holds segment-related header information.
+type segmentHeader struct {
+	useSegment     bool
+	updateMap      bool
+	relativeDelta  bool
+	quantizer      [nSegment]int8
+	filterStrength [nSegment]int8
+	prob           [nSegmentProb]uint8
+}
+
+const (
+	nRefLFDelta  = 4
+	nModeLFDelta = 4
+)
+
+// filterHeader holds filter-related header information.
+type filterHeader struct {
+	simple          bool
+	level           int8
+	sharpness       uint8
+	useLFDelta      bool
+	refLFDelta      [nRefLFDelta]int8
+	modeLFDelta     [nModeLFDelta]int8
+	perSegmentLevel [nSegment]int8
+}
+
+// mb is the per-macroblock decode state. A decoder maintains mbw+1 of these
+// as it is decoding macroblocks left-to-right and top-to-bottom: mbw for the
+// macroblocks in the row above, and one for the macroblock to the left.
+type mb struct {
+	// pred is the predictor mode for the 4 bottom or right 4x4 luma regions.
+	pred [4]uint8
+	// nzMask is a mask of 8 bits: 4 for the bottom or right 4x4 luma regions,
+	// and 2 + 2 for the bottom or right 4x4 chroma regions. A 1 bit indicates
+	// that that region has non-zero coefficients.
+	nzMask uint8
+	// nzY16 is a 0/1 value that is 1 if the macroblock used Y16 prediction and
+	// had non-zero coefficients.
+	nzY16 uint8
+}
+
+// Decoder decodes VP8 bitstreams into frames. Decoding one frame consists of
+// calling Init, DecodeFrameHeader and then DecodeFrame in that order.
+// A Decoder can be re-used to decode multiple frames.
+type Decoder struct {
+	// r is the input bitsream.
+	r limitReader
+	// scratch is a scratch buffer.
+	scratch [8]byte
+	// img is the YCbCr image to decode into.
+	img *image.YCbCr
+	// mbw and mbh are the number of 16x16 macroblocks wide and high the image is.
+	mbw, mbh int
+	// frameHeader is the frame header. When decoding multiple frames,
+	// frames that aren't key frames will inherit the Width, Height,
+	// XScale and YScale of the most recent key frame.
+	frameHeader FrameHeader
+	// Other headers.
+	segmentHeader segmentHeader
+	filterHeader  filterHeader
+	// The image data is divided into a number of independent partitions.
+	// There is 1 "first partition" and between 1 and 8 "other partitions"
+	// for coefficient data.
+	fp  partition
+	op  [8]partition
+	nOP int
+	// Quantization factors.
+	quant [nSegment]quant
+	// DCT/WHT coefficient decoding probabilities.
+	tokenProb   [nPlane][nBand][nContext][nProb]uint8
+	useSkipProb bool
+	skipProb    uint8
+
+	// The eight fields below relate to the current macroblock being decoded.
+	//
+	// Segment-based adjustments.
+	segment int
+	// Per-macroblock state for the macroblock immediately left of and those
+	// macroblocks immediately above the current macroblock.
+	leftMB mb
+	upMB   []mb
+	// Bitmasks for which 4x4 regions of coeff contain non-zero coefficients.
+	nzDCMask, nzACMask uint32
+	// Predictor modes.
+	usePredY16 bool
+	predY16    uint8
+	predC8     uint8
+	predY4     [4][4]uint8
+
+	// The two fields below form a workspace for reconstructing a macroblock.
+	// Their specific sizes are documented in reconstruct.go.
+	coeff [1*16*16 + 2*8*8 + 1*4*4]int16
+	ybr   [1 + 16 + 1 + 8][32]uint8
+}
+
+// NewDecoder returns a new Decoder.
+func NewDecoder() *Decoder {
+	return &Decoder{}
+}
+
+// Init initializes the decoder to read at most n bytes from r.
+func (d *Decoder) Init(r io.Reader, n int) {
+	d.r = limitReader{r, n}
+}
+
+// DecodeFrameHeader decodes the frame header.
+func (d *Decoder) DecodeFrameHeader() (fh FrameHeader, err error) {
+	// All frame headers are at least 3 bytes long.
+	b := d.scratch[:3]
+	if err = d.r.ReadFull(b); err != nil {
+		return
+	}
+	d.frameHeader.KeyFrame = (b[0] & 1) == 0
+	d.frameHeader.VersionNumber = (b[0] >> 1) & 7
+	d.frameHeader.ShowFrame = (b[0]>>4)&1 == 1
+	d.frameHeader.FirstPartitionLen = uint32(b[0])>>5 | uint32(b[1])<<3 | uint32(b[2])<<11
+	if !d.frameHeader.KeyFrame {
+		return d.frameHeader, nil
+	}
+	// Frame headers for key frames are an additional 7 bytes long.
+	b = d.scratch[:7]
+	if err = d.r.ReadFull(b); err != nil {
+		return
+	}
+	// Check the magic sync code.
+	if b[0] != 0x9d || b[1] != 0x01 || b[2] != 0x2a {
+		err = errors.New("vp8: invalid format")
+		return
+	}
+	d.frameHeader.Width = int(b[4]&0x3f)<<8 | int(b[3])
+	d.frameHeader.Height = int(b[6]&0x3f)<<8 | int(b[5])
+	d.frameHeader.XScale = b[4] >> 6
+	d.frameHeader.YScale = b[6] >> 6
+	d.mbw = (d.frameHeader.Width + 0x0f) >> 4
+	d.mbh = (d.frameHeader.Height + 0x0f) >> 4
+	d.segmentHeader = segmentHeader{
+		prob: [3]uint8{0xff, 0xff, 0xff},
+	}
+	d.tokenProb = defaultTokenProb
+	d.segment = 0
+	return d.frameHeader, nil
+}
+
+// ensureImg ensures that d.img is large enough to hold the decoded frame.
+func (d *Decoder) ensureImg() {
+	if d.img != nil {
+		p0, p1 := d.img.Rect.Min, d.img.Rect.Max
+		if p0.X == 0 && p0.Y == 0 && p1.X >= 16*d.mbw && p1.Y >= 16*d.mbh {
+			return
+		}
+	}
+	m := image.NewYCbCr(image.Rect(0, 0, 16*d.mbw, 16*d.mbh), image.YCbCrSubsampleRatio420)
+	d.img = m.SubImage(image.Rect(0, 0, d.frameHeader.Width, d.frameHeader.Height)).(*image.YCbCr)
+	d.upMB = make([]mb, d.mbw)
+}
+
+// parseSegmentHeader parses the segment header, as specified in section 9.3.
+func (d *Decoder) parseSegmentHeader() {
+	d.segmentHeader.useSegment = d.fp.readBit(uniformProb)
+	if !d.segmentHeader.useSegment {
+		d.segmentHeader.updateMap = false
+		return
+	}
+	d.segmentHeader.updateMap = d.fp.readBit(uniformProb)
+	if d.fp.readBit(uniformProb) {
+		d.segmentHeader.relativeDelta = !d.fp.readBit(uniformProb)
+		for i := range d.segmentHeader.quantizer {
+			d.segmentHeader.quantizer[i] = int8(d.fp.readOptionalInt(uniformProb, 7))
+		}
+		for i := range d.segmentHeader.filterStrength {
+			d.segmentHeader.filterStrength[i] = int8(d.fp.readOptionalInt(uniformProb, 6))
+		}
+	}
+	if !d.segmentHeader.updateMap {
+		return
+	}
+	for i := range d.segmentHeader.prob {
+		if d.fp.readBit(uniformProb) {
+			d.segmentHeader.prob[i] = uint8(d.fp.readUint(uniformProb, 8))
+		} else {
+			d.segmentHeader.prob[i] = 0xff
+		}
+	}
+}
+
+// parseFilterHeader parses the filter header, as specified in section 9.4.
+func (d *Decoder) parseFilterHeader() {
+	d.filterHeader.simple = d.fp.readBit(uniformProb)
+	d.filterHeader.level = int8(d.fp.readUint(uniformProb, 6))
+	d.filterHeader.sharpness = uint8(d.fp.readUint(uniformProb, 3))
+	d.filterHeader.useLFDelta = d.fp.readBit(uniformProb)
+	if d.filterHeader.useLFDelta && d.fp.readBit(uniformProb) {
+		for i := range d.filterHeader.refLFDelta {
+			d.filterHeader.refLFDelta[i] = int8(d.fp.readOptionalInt(uniformProb, 6))
+		}
+		for i := range d.filterHeader.modeLFDelta {
+			d.filterHeader.modeLFDelta[i] = int8(d.fp.readOptionalInt(uniformProb, 6))
+		}
+	}
+	if d.filterHeader.level == 0 {
+		return
+	}
+	if d.segmentHeader.useSegment {
+		for i := range d.filterHeader.perSegmentLevel {
+			strength := d.segmentHeader.filterStrength[i]
+			if d.segmentHeader.relativeDelta {
+				strength += d.filterHeader.level
+			}
+			d.filterHeader.perSegmentLevel[i] = strength
+		}
+	} else {
+		d.filterHeader.perSegmentLevel[0] = d.filterHeader.level
+	}
+}
+
+// parseOtherPartitions parses the other partitions, as specified in section 9.5.
+func (d *Decoder) parseOtherPartitions() error {
+	buf := make([]byte, d.r.n)
+	if err := d.r.ReadFull(buf); err != nil {
+		return err
+	}
+	d.nOP = 1 << d.fp.readUint(uniformProb, 2)
+	n := 3 * (d.nOP - 1)
+	if n > len(buf) {
+		return io.ErrUnexpectedEOF
+	}
+	partLen, buf := buf[:n], buf[n:]
+	for i := 0; i < d.nOP-1; i++ {
+		m := int(partLen[3*i+0]) | int(partLen[3*i+1])<<8 | int(partLen[3*i+2])<<16
+		if m > len(buf) {
+			return io.ErrUnexpectedEOF
+		}
+		d.op[i].init(buf[:m])
+		buf = buf[m:]
+	}
+	d.op[d.nOP-1].init(buf)
+	return nil
+}
+
+// parseOtherHeaders parses header information other than the frame header.
+func (d *Decoder) parseOtherHeaders() error {
+	// Initialize and parse the first partition.
+	firstPartition := make([]byte, d.frameHeader.FirstPartitionLen)
+	if err := d.r.ReadFull(firstPartition); err != nil {
+		return err
+	}
+	d.fp.init(firstPartition)
+	if d.frameHeader.KeyFrame {
+		// Read and ignore the color space and pixel clamp values. They are
+		// specified in section 9.2, but are unimplemented.
+		d.fp.readBit(uniformProb)
+		d.fp.readBit(uniformProb)
+	}
+	d.parseSegmentHeader()
+	d.parseFilterHeader()
+	if err := d.parseOtherPartitions(); err != nil {
+		return err
+	}
+	d.parseQuant()
+	if !d.frameHeader.KeyFrame {
+		// Golden and AltRef frames are specified in section 9.7.
+		// TODO(nigeltao): implement. Note that they are only used for video, not still images.
+		return errors.New("vp8: Golden / AltRef frames are not implemented")
+	}
+	// Read and ignore the refreshLastFrameBuffer bit, specified in section 9.8.
+	// It applies only to video, and not still images.
+	d.fp.readBit(uniformProb)
+	d.parseTokenProb()
+	d.useSkipProb = d.fp.readBit(uniformProb)
+	if d.useSkipProb {
+		d.skipProb = uint8(d.fp.readUint(uniformProb, 8))
+	}
+	if d.fp.unexpectedEOF {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+
+// DecodeFrame decodes the frame and returns it as an YCbCr image.
+// The image's contents are valid up until the next call to Decoder.Init.
+func (d *Decoder) DecodeFrame() (*image.YCbCr, error) {
+	d.ensureImg()
+	if err := d.parseOtherHeaders(); err != nil {
+		return nil, err
+	}
+	for mbx := 0; mbx < d.mbw; mbx++ {
+		d.upMB[mbx] = mb{}
+	}
+	for mby := 0; mby < d.mbh; mby++ {
+		d.leftMB = mb{}
+		for mbx := 0; mbx < d.mbw; mbx++ {
+			d.reconstruct(mbx, mby)
+		}
+		// TODO(nigeltao): filter, as specified in chapter 15.
+	}
+	if d.fp.unexpectedEOF {
+		return nil, io.ErrUnexpectedEOF
+	}
+	for i := 0; i < d.nOP; i++ {
+		if d.op[i].unexpectedEOF {
+			return nil, io.ErrUnexpectedEOF
+		}
+	}
+	return d.img, nil
+}
diff --git a/vp8/idct.go b/vp8/idct.go
new file mode 100644
index 0000000..929af2c
--- /dev/null
+++ b/vp8/idct.go
@@ -0,0 +1,98 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vp8
+
+// This file implements the inverse Discrete Cosine Transform and the inverse
+// Walsh Hadamard Transform (WHT), as specified in sections 14.3 and 14.4.
+
+func clip8(i int32) uint8 {
+	if i < 0 {
+		return 0
+	}
+	if i > 255 {
+		return 255
+	}
+	return uint8(i)
+}
+
+func (z *Decoder) inverseDCT4(y, x, coeffBase int) {
+	const (
+		c1 = 85627 // 65536 * cos(pi/8) * sqrt(2).
+		c2 = 35468 // 65536 * sin(pi/8) * sqrt(2).
+	)
+	var m [4][4]int32
+	for i := 0; i < 4; i++ {
+		a := int32(z.coeff[coeffBase+0]) + int32(z.coeff[coeffBase+8])
+		b := int32(z.coeff[coeffBase+0]) - int32(z.coeff[coeffBase+8])
+		c := (int32(z.coeff[coeffBase+4])*c2)>>16 - (int32(z.coeff[coeffBase+12])*c1)>>16
+		d := (int32(z.coeff[coeffBase+4])*c1)>>16 + (int32(z.coeff[coeffBase+12])*c2)>>16
+		m[i][0] = a + d
+		m[i][1] = b + c
+		m[i][2] = b - c
+		m[i][3] = a - d
+		coeffBase++
+	}
+	for j := 0; j < 4; j++ {
+		dc := m[0][j] + 4
+		a := dc + m[2][j]
+		b := dc - m[2][j]
+		c := (m[1][j]*c2)>>16 - (m[3][j]*c1)>>16
+		d := (m[1][j]*c1)>>16 + (m[3][j]*c2)>>16
+		z.ybr[y+j][x+0] = clip8(int32(z.ybr[y+j][x+0]) + (a+d)>>3)
+		z.ybr[y+j][x+1] = clip8(int32(z.ybr[y+j][x+1]) + (b+c)>>3)
+		z.ybr[y+j][x+2] = clip8(int32(z.ybr[y+j][x+2]) + (b-c)>>3)
+		z.ybr[y+j][x+3] = clip8(int32(z.ybr[y+j][x+3]) + (a-d)>>3)
+	}
+}
+
+func (z *Decoder) inverseDCT4DCOnly(y, x, coeffBase int) {
+	dc := (int32(z.coeff[coeffBase+0]) + 4) >> 3
+	for j := 0; j < 4; j++ {
+		for i := 0; i < 4; i++ {
+			z.ybr[y+j][x+i] = clip8(int32(z.ybr[y+j][x+i]) + dc)
+		}
+	}
+}
+
+func (z *Decoder) inverseDCT8(y, x, coeffBase int) {
+	z.inverseDCT4(y+0, x+0, coeffBase+0*16)
+	z.inverseDCT4(y+0, x+4, coeffBase+1*16)
+	z.inverseDCT4(y+4, x+0, coeffBase+2*16)
+	z.inverseDCT4(y+4, x+4, coeffBase+3*16)
+}
+
+func (z *Decoder) inverseDCT8DCOnly(y, x, coeffBase int) {
+	z.inverseDCT4DCOnly(y+0, x+0, coeffBase+0*16)
+	z.inverseDCT4DCOnly(y+0, x+4, coeffBase+1*16)
+	z.inverseDCT4DCOnly(y+4, x+0, coeffBase+2*16)
+	z.inverseDCT4DCOnly(y+4, x+4, coeffBase+3*16)
+}
+
+func (d *Decoder) inverseWHT16() {
+	var m [16]int32
+	for i := 0; i < 4; i++ {
+		a0 := int32(d.coeff[384+0+i]) + int32(d.coeff[384+12+i])
+		a1 := int32(d.coeff[384+4+i]) + int32(d.coeff[384+8+i])
+		a2 := int32(d.coeff[384+4+i]) - int32(d.coeff[384+8+i])
+		a3 := int32(d.coeff[384+0+i]) - int32(d.coeff[384+12+i])
+		m[0+i] = a0 + a1
+		m[8+i] = a0 - a1
+		m[4+i] = a3 + a2
+		m[12+i] = a3 - a2
+	}
+	out := 0
+	for i := 0; i < 4; i++ {
+		dc := m[0+i*4] + 3
+		a0 := dc + m[3+i*4]
+		a1 := m[1+i*4] + m[2+i*4]
+		a2 := m[1+i*4] - m[2+i*4]
+		a3 := dc - m[3+i*4]
+		d.coeff[out+0] = int16((a0 + a1) >> 3)
+		d.coeff[out+16] = int16((a3 + a2) >> 3)
+		d.coeff[out+32] = int16((a0 - a1) >> 3)
+		d.coeff[out+48] = int16((a3 - a2) >> 3)
+		out += 64
+	}
+}
diff --git a/vp8/partition.go b/vp8/partition.go
new file mode 100644
index 0000000..a69bd65
--- /dev/null
+++ b/vp8/partition.go
@@ -0,0 +1,127 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vp8
+
+// Each VP8 frame consists of between 2 and 9 bitstream partitions.
+// Each partition is byte-aligned and is independently arithmetic-encoded.
+//
+// This file implements decoding a partition's bitstream, as specified in
+// chapter 7. The implementation follows libwebp's approach instead of the
+// specification's reference C implementation. For example, we use a look-up
+// table instead of a for loop to recalibrate the encoded range.
+
+var (
+	lutShift = [127]uint8{
+		7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	}
+	lutRangeM1 = [127]uint8{
+		127,
+		127, 191,
+		127, 159, 191, 223,
+		127, 143, 159, 175, 191, 207, 223, 239,
+		127, 135, 143, 151, 159, 167, 175, 183, 191, 199, 207, 215, 223, 231, 239, 247,
+		127, 131, 135, 139, 143, 147, 151, 155, 159, 163, 167, 171, 175, 179, 183, 187,
+		191, 195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251,
+		127, 129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157,
+		159, 161, 163, 165, 167, 169, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189,
+		191, 193, 195, 197, 199, 201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221,
+		223, 225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253,
+	}
+)
+
+// uniformProb represents a 50% probability that the next bit is 0.
+const uniformProb = 128
+
+// partition holds arithmetic-coded bits.
+type partition struct {
+	// buf is the input bytes.
+	buf []byte
+	// r is how many of buf's bytes have been consumed.
+	r int
+	// rangeM1 is range minus 1, where range is in the arithmetic coding sense,
+	// not the Go language sense.
+	rangeM1 uint32
+	// bits and nBits hold those bits shifted out of buf but not yet consumed.
+	bits  uint32
+	nBits uint8
+	// unexpectedEOF tells whether we tried to read past buf.
+	unexpectedEOF bool
+}
+
+// init initializes the partition.
+func (p *partition) init(buf []byte) {
+	p.buf = buf
+	p.r = 0
+	p.rangeM1 = 254
+	p.bits = 0
+	p.nBits = 0
+	p.unexpectedEOF = false
+}
+
+// readBit returns the next bit.
+func (p *partition) readBit(prob uint8) bool {
+	if p.nBits < 8 {
+		if p.r >= len(p.buf) {
+			p.unexpectedEOF = true
+			return false
+		}
+		p.bits |= uint32(p.buf[p.r]) << (8 - p.nBits)
+		p.r++
+		p.nBits += 8
+	}
+	split := (p.rangeM1*uint32(prob))>>8 + 1
+	bit := p.bits >= split<<8
+	if bit {
+		p.rangeM1 -= split
+		p.bits -= split << 8
+	} else {
+		p.rangeM1 = split - 1
+	}
+	if p.rangeM1 < 127 {
+		shift := lutShift[p.rangeM1]
+		p.rangeM1 = uint32(lutRangeM1[p.rangeM1])
+		p.bits <<= shift
+		p.nBits -= shift
+	}
+	return bit
+}
+
+// readUint returns the next n-bit unsigned integer.
+func (p *partition) readUint(prob, n uint8) uint32 {
+	var u uint32
+	for n > 0 {
+		n--
+		if p.readBit(prob) {
+			u |= 1 << n
+		}
+	}
+	return u
+}
+
+// readInt returns the next n-bit signed integer.
+func (p *partition) readInt(prob, n uint8) int32 {
+	u := p.readUint(prob, n)
+	b := p.readBit(prob)
+	if b {
+		return -int32(u)
+	}
+	return int32(u)
+}
+
+// readOptionalInt returns the next n-bit signed integer in an encoding
+// where the likely result is zero.
+func (p *partition) readOptionalInt(prob, n uint8) int32 {
+	if !p.readBit(prob) {
+		return 0
+	}
+	return p.readInt(prob, n)
+}
diff --git a/vp8/pred.go b/vp8/pred.go
new file mode 100644
index 0000000..58c2689
--- /dev/null
+++ b/vp8/pred.go
@@ -0,0 +1,201 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vp8
+
+// This file implements parsing the predictor modes, as specified in chapter
+// 11.
+
+func (d *Decoder) parsePredModeY16(mbx int) {
+	var p uint8
+	if !d.fp.readBit(156) {
+		if !d.fp.readBit(163) {
+			p = predDC
+		} else {
+			p = predVE
+		}
+	} else if !d.fp.readBit(128) {
+		p = predHE
+	} else {
+		p = predTM
+	}
+	for i := 0; i < 4; i++ {
+		d.upMB[mbx].pred[i] = p
+		d.leftMB.pred[i] = p
+	}
+	d.predY16 = p
+}
+
+func (d *Decoder) parsePredModeC8() {
+	if !d.fp.readBit(142) {
+		d.predC8 = predDC
+	} else if !d.fp.readBit(114) {
+		d.predC8 = predVE
+	} else if !d.fp.readBit(183) {
+		d.predC8 = predHE
+	} else {
+		d.predC8 = predTM
+	}
+}
+
+func (d *Decoder) parsePredModeY4(mbx int) {
+	for j := 0; j < 4; j++ {
+		p := d.leftMB.pred[j]
+		for i := 0; i < 4; i++ {
+			prob := &predProb[d.upMB[mbx].pred[i]][p]
+			if !d.fp.readBit(prob[0]) {
+				p = predDC
+			} else if !d.fp.readBit(prob[1]) {
+				p = predTM
+			} else if !d.fp.readBit(prob[2]) {
+				p = predVE
+			} else if !d.fp.readBit(prob[3]) {
+				if !d.fp.readBit(prob[4]) {
+					p = predHE
+				} else if !d.fp.readBit(prob[5]) {
+					p = predRD
+				} else {
+					p = predVR
+				}
+			} else if !d.fp.readBit(prob[6]) {
+				p = predLD
+			} else if !d.fp.readBit(prob[7]) {
+				p = predVL
+			} else if !d.fp.readBit(prob[8]) {
+				p = predHD
+			} else {
+				p = predHU
+			}
+			d.predY4[j][i] = p
+			d.upMB[mbx].pred[i] = p
+		}
+		d.leftMB.pred[j] = p
+	}
+}
+
+// predProb are the probabilities to decode a 4x4 region's predictor mode given
+// the predictor modes of the regions above and left of it.
+// These values are specified in section 11.5.
+var predProb = [nPred][nPred][9]uint8{
+	{
+		{231, 120, 48, 89, 115, 113, 120, 152, 112},
+		{152, 179, 64, 126, 170, 118, 46, 70, 95},
+		{175, 69, 143, 80, 85, 82, 72, 155, 103},
+		{56, 58, 10, 171, 218, 189, 17, 13, 152},
+		{114, 26, 17, 163, 44, 195, 21, 10, 173},
+		{121, 24, 80, 195, 26, 62, 44, 64, 85},
+		{144, 71, 10, 38, 171, 213, 144, 34, 26},
+		{170, 46, 55, 19, 136, 160, 33, 206, 71},
+		{63, 20, 8, 114, 114, 208, 12, 9, 226},
+		{81, 40, 11, 96, 182, 84, 29, 16, 36},
+	},
+	{
+		{134, 183, 89, 137, 98, 101, 106, 165, 148},
+		{72, 187, 100, 130, 157, 111, 32, 75, 80},
+		{66, 102, 167, 99, 74, 62, 40, 234, 128},
+		{41, 53, 9, 178, 241, 141, 26, 8, 107},
+		{74, 43, 26, 146, 73, 166, 49, 23, 157},
+		{65, 38, 105, 160, 51, 52, 31, 115, 128},
+		{104, 79, 12, 27, 217, 255, 87, 17, 7},
+		{87, 68, 71, 44, 114, 51, 15, 186, 23},
+		{47, 41, 14, 110, 182, 183, 21, 17, 194},
+		{66, 45, 25, 102, 197, 189, 23, 18, 22},
+	},
+	{
+		{88, 88, 147, 150, 42, 46, 45, 196, 205},
+		{43, 97, 183, 117, 85, 38, 35, 179, 61},
+		{39, 53, 200, 87, 26, 21, 43, 232, 171},
+		{56, 34, 51, 104, 114, 102, 29, 93, 77},
+		{39, 28, 85, 171, 58, 165, 90, 98, 64},
+		{34, 22, 116, 206, 23, 34, 43, 166, 73},
+		{107, 54, 32, 26, 51, 1, 81, 43, 31},
+		{68, 25, 106, 22, 64, 171, 36, 225, 114},
+		{34, 19, 21, 102, 132, 188, 16, 76, 124},
+		{62, 18, 78, 95, 85, 57, 50, 48, 51},
+	},
+	{
+		{193, 101, 35, 159, 215, 111, 89, 46, 111},
+		{60, 148, 31, 172, 219, 228, 21, 18, 111},
+		{112, 113, 77, 85, 179, 255, 38, 120, 114},
+		{40, 42, 1, 196, 245, 209, 10, 25, 109},
+		{88, 43, 29, 140, 166, 213, 37, 43, 154},
+		{61, 63, 30, 155, 67, 45, 68, 1, 209},
+		{100, 80, 8, 43, 154, 1, 51, 26, 71},
+		{142, 78, 78, 16, 255, 128, 34, 197, 171},
+		{41, 40, 5, 102, 211, 183, 4, 1, 221},
+		{51, 50, 17, 168, 209, 192, 23, 25, 82},
+	},
+	{
+		{138, 31, 36, 171, 27, 166, 38, 44, 229},
+		{67, 87, 58, 169, 82, 115, 26, 59, 179},
+		{63, 59, 90, 180, 59, 166, 93, 73, 154},
+		{40, 40, 21, 116, 143, 209, 34, 39, 175},
+		{47, 15, 16, 183, 34, 223, 49, 45, 183},
+		{46, 17, 33, 183, 6, 98, 15, 32, 183},
+		{57, 46, 22, 24, 128, 1, 54, 17, 37},
+		{65, 32, 73, 115, 28, 128, 23, 128, 205},
+		{40, 3, 9, 115, 51, 192, 18, 6, 223},
+		{87, 37, 9, 115, 59, 77, 64, 21, 47},
+	},
+	{
+		{104, 55, 44, 218, 9, 54, 53, 130, 226},
+		{64, 90, 70, 205, 40, 41, 23, 26, 57},
+		{54, 57, 112, 184, 5, 41, 38, 166, 213},
+		{30, 34, 26, 133, 152, 116, 10, 32, 134},
+		{39, 19, 53, 221, 26, 114, 32, 73, 255},
+		{31, 9, 65, 234, 2, 15, 1, 118, 73},
+		{75, 32, 12, 51, 192, 255, 160, 43, 51},
+		{88, 31, 35, 67, 102, 85, 55, 186, 85},
+		{56, 21, 23, 111, 59, 205, 45, 37, 192},
+		{55, 38, 70, 124, 73, 102, 1, 34, 98},
+	},
+	{
+		{125, 98, 42, 88, 104, 85, 117, 175, 82},
+		{95, 84, 53, 89, 128, 100, 113, 101, 45},
+		{75, 79, 123, 47, 51, 128, 81, 171, 1},
+		{57, 17, 5, 71, 102, 57, 53, 41, 49},
+		{38, 33, 13, 121, 57, 73, 26, 1, 85},
+		{41, 10, 67, 138, 77, 110, 90, 47, 114},
+		{115, 21, 2, 10, 102, 255, 166, 23, 6},
+		{101, 29, 16, 10, 85, 128, 101, 196, 26},
+		{57, 18, 10, 102, 102, 213, 34, 20, 43},
+		{117, 20, 15, 36, 163, 128, 68, 1, 26},
+	},
+	{
+		{102, 61, 71, 37, 34, 53, 31, 243, 192},
+		{69, 60, 71, 38, 73, 119, 28, 222, 37},
+		{68, 45, 128, 34, 1, 47, 11, 245, 171},
+		{62, 17, 19, 70, 146, 85, 55, 62, 70},
+		{37, 43, 37, 154, 100, 163, 85, 160, 1},
+		{63, 9, 92, 136, 28, 64, 32, 201, 85},
+		{75, 15, 9, 9, 64, 255, 184, 119, 16},
+		{86, 6, 28, 5, 64, 255, 25, 248, 1},
+		{56, 8, 17, 132, 137, 255, 55, 116, 128},
+		{58, 15, 20, 82, 135, 57, 26, 121, 40},
+	},
+	{
+		{164, 50, 31, 137, 154, 133, 25, 35, 218},
+		{51, 103, 44, 131, 131, 123, 31, 6, 158},
+		{86, 40, 64, 135, 148, 224, 45, 183, 128},
+		{22, 26, 17, 131, 240, 154, 14, 1, 209},
+		{45, 16, 21, 91, 64, 222, 7, 1, 197},
+		{56, 21, 39, 155, 60, 138, 23, 102, 213},
+		{83, 12, 13, 54, 192, 255, 68, 47, 28},
+		{85, 26, 85, 85, 128, 128, 32, 146, 171},
+		{18, 11, 7, 63, 144, 171, 4, 4, 246},
+		{35, 27, 10, 146, 174, 171, 12, 26, 128},
+	},
+	{
+		{190, 80, 35, 99, 180, 80, 126, 54, 45},
+		{85, 126, 47, 87, 176, 51, 41, 20, 32},
+		{101, 75, 128, 139, 118, 146, 116, 128, 85},
+		{56, 41, 15, 176, 236, 85, 37, 9, 62},
+		{71, 30, 17, 119, 118, 255, 17, 18, 138},
+		{101, 38, 60, 138, 55, 70, 43, 26, 142},
+		{146, 36, 19, 30, 171, 255, 97, 27, 20},
+		{138, 45, 61, 62, 219, 1, 81, 188, 64},
+		{32, 41, 20, 117, 151, 142, 20, 21, 163},
+		{112, 19, 12, 61, 195, 128, 48, 4, 24},
+	},
+}
diff --git a/vp8/predfunc.go b/vp8/predfunc.go
new file mode 100644
index 0000000..f899958
--- /dev/null
+++ b/vp8/predfunc.go
@@ -0,0 +1,553 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vp8
+
+// This file implements the predicition functions, as specified in chapter 12.
+//
+// For each macroblock (of 1x16x16 luma and 2x8x8 chroma coefficients), the
+// luma values are either predicted as one large 16x16 region or 16 separate
+// 4x4 regions. The chroma values are always predicted as one 8x8 region.
+//
+// For 4x4 regions, the target block's predicted values (Xs) are a function of
+// its previously-decoded top and left border values, as well as a number of
+// pixels from the top-right:
+//
+//	a b c d e f g h
+//	p X X X X
+//	q X X X X
+//	r X X X X
+//	s X X X X
+//
+// The predictor modes are:
+//	- DC: all Xs = (b + c + d + e + p + q + r + s + 4) / 8.
+//	- TM: the first X = (b + p - a), the second X = (c + p - a), and so on.
+//	- VE: each X = the weighted average of its column's top value and that
+//	      value's neighbors, i.e. averages of abc, bcd, cde or def.
+//	- HE: similar to VE except rows instead of columns, and the final row is
+//	      an average of r, s and s.
+//	- RD, VR, LD, VL, HD, HU: these diagonal modes ("Right Down", "Vertical
+//	      Right", etc) are more complicated and are described in section 12.3.
+// All Xs are clipped to the range [0, 255].
+//
+// For 8x8 and 16x16 regions, the target block's predicted values are a
+// function of the top and left border values without the top-right overhang,
+// i.e. without the 8x8 or 16x16 equivalent of f, g and h. Furthermore:
+//	- There are no diagonal predictor modes, only DC, TM, VE and HE.
+//	- The DC mode has variants for macroblocks in the top row and/or left
+//	  column, i.e. for macroblocks with mby == 0 || mbx == 0.
+//	- The VE and HE modes take only the column top or row left values; they do
+//	  not smooth that top/left value with its neighbors.
+
+// nPred is the number of predictor modes, not including the Top/Left versions
+// of the DC predictor mode.
+const nPred = 10
+
+const (
+	predDC = iota
+	predTM
+	predVE
+	predHE
+	predRD
+	predVR
+	predLD
+	predVL
+	predHD
+	predHU
+	predDCTop
+	predDCLeft
+	predDCTopLeft
+)
+
+func checkTopLeftPred(mbx, mby int, p uint8) uint8 {
+	if p != predDC {
+		return p
+	}
+	if mbx == 0 {
+		if mby == 0 {
+			return predDCTopLeft
+		}
+		return predDCLeft
+	}
+	if mby == 0 {
+		return predDCTop
+	}
+	return predDC
+}
+
+var predFunc4 = [...]func(*Decoder, int, int){
+	predFunc4DC,
+	predFunc4TM,
+	predFunc4VE,
+	predFunc4HE,
+	predFunc4RD,
+	predFunc4VR,
+	predFunc4LD,
+	predFunc4VL,
+	predFunc4HD,
+	predFunc4HU,
+	nil,
+	nil,
+	nil,
+}
+
+var predFunc8 = [...]func(*Decoder, int, int){
+	predFunc8DC,
+	predFunc8TM,
+	predFunc8VE,
+	predFunc8HE,
+	nil,
+	nil,
+	nil,
+	nil,
+	nil,
+	nil,
+	predFunc8DCTop,
+	predFunc8DCLeft,
+	predFunc8DCTopLeft,
+}
+
+var predFunc16 = [...]func(*Decoder, int, int){
+	predFunc16DC,
+	predFunc16TM,
+	predFunc16VE,
+	predFunc16HE,
+	nil,
+	nil,
+	nil,
+	nil,
+	nil,
+	nil,
+	predFunc16DCTop,
+	predFunc16DCLeft,
+	predFunc16DCTopLeft,
+}
+
+func predFunc4DC(z *Decoder, y, x int) {
+	sum := uint32(4)
+	for i := 0; i < 4; i++ {
+		sum += uint32(z.ybr[y-1][x+i])
+	}
+	for j := 0; j < 4; j++ {
+		sum += uint32(z.ybr[y+j][x-1])
+	}
+	avg := uint8(sum / 8)
+	for j := 0; j < 4; j++ {
+		for i := 0; i < 4; i++ {
+			z.ybr[y+j][x+i] = avg
+		}
+	}
+}
+
+func predFunc4TM(z *Decoder, y, x int) {
+	delta0 := -int32(z.ybr[y-1][x-1])
+	for j := 0; j < 4; j++ {
+		delta1 := delta0 + int32(z.ybr[y+j][x-1])
+		for i := 0; i < 4; i++ {
+			delta2 := delta1 + int32(z.ybr[y-1][x+i])
+			z.ybr[y+j][x+i] = uint8(clip(delta2, 0, 255))
+		}
+	}
+}
+
+func predFunc4VE(z *Decoder, y, x int) {
+	a := int32(z.ybr[y-1][x-1])
+	b := int32(z.ybr[y-1][x+0])
+	c := int32(z.ybr[y-1][x+1])
+	d := int32(z.ybr[y-1][x+2])
+	e := int32(z.ybr[y-1][x+3])
+	f := int32(z.ybr[y-1][x+4])
+	abc := uint8((a + 2*b + c + 2) / 4)
+	bcd := uint8((b + 2*c + d + 2) / 4)
+	cde := uint8((c + 2*d + e + 2) / 4)
+	def := uint8((d + 2*e + f + 2) / 4)
+	for j := 0; j < 4; j++ {
+		z.ybr[y+j][x+0] = abc
+		z.ybr[y+j][x+1] = bcd
+		z.ybr[y+j][x+2] = cde
+		z.ybr[y+j][x+3] = def
+	}
+}
+
+func predFunc4HE(z *Decoder, y, x int) {
+	s := int32(z.ybr[y+3][x-1])
+	r := int32(z.ybr[y+2][x-1])
+	q := int32(z.ybr[y+1][x-1])
+	p := int32(z.ybr[y+0][x-1])
+	a := int32(z.ybr[y-1][x-1])
+	ssr := uint8((s + 2*s + r + 2) / 4)
+	srq := uint8((s + 2*r + q + 2) / 4)
+	rqp := uint8((r + 2*q + p + 2) / 4)
+	apq := uint8((a + 2*p + q + 2) / 4)
+	for i := 0; i < 4; i++ {
+		z.ybr[y+0][x+i] = apq
+		z.ybr[y+1][x+i] = rqp
+		z.ybr[y+2][x+i] = srq
+		z.ybr[y+3][x+i] = ssr
+	}
+}
+
+func predFunc4RD(z *Decoder, y, x int) {
+	s := int32(z.ybr[y+3][x-1])
+	r := int32(z.ybr[y+2][x-1])
+	q := int32(z.ybr[y+1][x-1])
+	p := int32(z.ybr[y+0][x-1])
+	a := int32(z.ybr[y-1][x-1])
+	b := int32(z.ybr[y-1][x+0])
+	c := int32(z.ybr[y-1][x+1])
+	d := int32(z.ybr[y-1][x+2])
+	e := int32(z.ybr[y-1][x+3])
+	srq := uint8((s + 2*r + q + 2) / 4)
+	rqp := uint8((r + 2*q + p + 2) / 4)
+	qpa := uint8((q + 2*p + a + 2) / 4)
+	pab := uint8((p + 2*a + b + 2) / 4)
+	abc := uint8((a + 2*b + c + 2) / 4)
+	bcd := uint8((b + 2*c + d + 2) / 4)
+	cde := uint8((c + 2*d + e + 2) / 4)
+	z.ybr[y+0][x+0] = pab
+	z.ybr[y+0][x+1] = abc
+	z.ybr[y+0][x+2] = bcd
+	z.ybr[y+0][x+3] = cde
+	z.ybr[y+1][x+0] = qpa
+	z.ybr[y+1][x+1] = pab
+	z.ybr[y+1][x+2] = abc
+	z.ybr[y+1][x+3] = bcd
+	z.ybr[y+2][x+0] = rqp
+	z.ybr[y+2][x+1] = qpa
+	z.ybr[y+2][x+2] = pab
+	z.ybr[y+2][x+3] = abc
+	z.ybr[y+3][x+0] = srq
+	z.ybr[y+3][x+1] = rqp
+	z.ybr[y+3][x+2] = qpa
+	z.ybr[y+3][x+3] = pab
+}
+
+func predFunc4VR(z *Decoder, y, x int) {
+	r := int32(z.ybr[y+2][x-1])
+	q := int32(z.ybr[y+1][x-1])
+	p := int32(z.ybr[y+0][x-1])
+	a := int32(z.ybr[y-1][x-1])
+	b := int32(z.ybr[y-1][x+0])
+	c := int32(z.ybr[y-1][x+1])
+	d := int32(z.ybr[y-1][x+2])
+	e := int32(z.ybr[y-1][x+3])
+	ab := uint8((a + b + 1) / 2)
+	bc := uint8((b + c + 1) / 2)
+	cd := uint8((c + d + 1) / 2)
+	de := uint8((d + e + 1) / 2)
+	rqp := uint8((r + 2*q + p + 2) / 4)
+	qpa := uint8((q + 2*p + a + 2) / 4)
+	pab := uint8((p + 2*a + b + 2) / 4)
+	abc := uint8((a + 2*b + c + 2) / 4)
+	bcd := uint8((b + 2*c + d + 2) / 4)
+	cde := uint8((c + 2*d + e + 2) / 4)
+	z.ybr[y+0][x+0] = ab
+	z.ybr[y+0][x+1] = bc
+	z.ybr[y+0][x+2] = cd
+	z.ybr[y+0][x+3] = de
+	z.ybr[y+1][x+0] = pab
+	z.ybr[y+1][x+1] = abc
+	z.ybr[y+1][x+2] = bcd
+	z.ybr[y+1][x+3] = cde
+	z.ybr[y+2][x+0] = qpa
+	z.ybr[y+2][x+1] = ab
+	z.ybr[y+2][x+2] = bc
+	z.ybr[y+2][x+3] = cd
+	z.ybr[y+3][x+0] = rqp
+	z.ybr[y+3][x+1] = pab
+	z.ybr[y+3][x+2] = abc
+	z.ybr[y+3][x+3] = bcd
+}
+
+func predFunc4LD(z *Decoder, y, x int) {
+	a := int32(z.ybr[y-1][x+0])
+	b := int32(z.ybr[y-1][x+1])
+	c := int32(z.ybr[y-1][x+2])
+	d := int32(z.ybr[y-1][x+3])
+	e := int32(z.ybr[y-1][x+4])
+	f := int32(z.ybr[y-1][x+5])
+	g := int32(z.ybr[y-1][x+6])
+	h := int32(z.ybr[y-1][x+7])
+	abc := uint8((a + 2*b + c + 2) / 4)
+	bcd := uint8((b + 2*c + d + 2) / 4)
+	cde := uint8((c + 2*d + e + 2) / 4)
+	def := uint8((d + 2*e + f + 2) / 4)
+	efg := uint8((e + 2*f + g + 2) / 4)
+	fgh := uint8((f + 2*g + h + 2) / 4)
+	ghh := uint8((g + 2*h + h + 2) / 4)
+	z.ybr[y+0][x+0] = abc
+	z.ybr[y+0][x+1] = bcd
+	z.ybr[y+0][x+2] = cde
+	z.ybr[y+0][x+3] = def
+	z.ybr[y+1][x+0] = bcd
+	z.ybr[y+1][x+1] = cde
+	z.ybr[y+1][x+2] = def
+	z.ybr[y+1][x+3] = efg
+	z.ybr[y+2][x+0] = cde
+	z.ybr[y+2][x+1] = def
+	z.ybr[y+2][x+2] = efg
+	z.ybr[y+2][x+3] = fgh
+	z.ybr[y+3][x+0] = def
+	z.ybr[y+3][x+1] = efg
+	z.ybr[y+3][x+2] = fgh
+	z.ybr[y+3][x+3] = ghh
+}
+
+func predFunc4VL(z *Decoder, y, x int) {
+	a := int32(z.ybr[y-1][x+0])
+	b := int32(z.ybr[y-1][x+1])
+	c := int32(z.ybr[y-1][x+2])
+	d := int32(z.ybr[y-1][x+3])
+	e := int32(z.ybr[y-1][x+4])
+	f := int32(z.ybr[y-1][x+5])
+	g := int32(z.ybr[y-1][x+6])
+	h := int32(z.ybr[y-1][x+7])
+	ab := uint8((a + b + 1) / 2)
+	bc := uint8((b + c + 1) / 2)
+	cd := uint8((c + d + 1) / 2)
+	de := uint8((d + e + 1) / 2)
+	abc := uint8((a + 2*b + c + 2) / 4)
+	bcd := uint8((b + 2*c + d + 2) / 4)
+	cde := uint8((c + 2*d + e + 2) / 4)
+	def := uint8((d + 2*e + f + 2) / 4)
+	efg := uint8((e + 2*f + g + 2) / 4)
+	fgh := uint8((f + 2*g + h + 2) / 4)
+	z.ybr[y+0][x+0] = ab
+	z.ybr[y+0][x+1] = bc
+	z.ybr[y+0][x+2] = cd
+	z.ybr[y+0][x+3] = de
+	z.ybr[y+1][x+0] = abc
+	z.ybr[y+1][x+1] = bcd
+	z.ybr[y+1][x+2] = cde
+	z.ybr[y+1][x+3] = def
+	z.ybr[y+2][x+0] = bc
+	z.ybr[y+2][x+1] = cd
+	z.ybr[y+2][x+2] = de
+	z.ybr[y+2][x+3] = efg
+	z.ybr[y+3][x+0] = bcd
+	z.ybr[y+3][x+1] = cde
+	z.ybr[y+3][x+2] = def
+	z.ybr[y+3][x+3] = fgh
+}
+
+func predFunc4HD(z *Decoder, y, x int) {
+	s := int32(z.ybr[y+3][x-1])
+	r := int32(z.ybr[y+2][x-1])
+	q := int32(z.ybr[y+1][x-1])
+	p := int32(z.ybr[y+0][x-1])
+	a := int32(z.ybr[y-1][x-1])
+	b := int32(z.ybr[y-1][x+0])
+	c := int32(z.ybr[y-1][x+1])
+	d := int32(z.ybr[y-1][x+2])
+	sr := uint8((s + r + 1) / 2)
+	rq := uint8((r + q + 1) / 2)
+	qp := uint8((q + p + 1) / 2)
+	pa := uint8((p + a + 1) / 2)
+	srq := uint8((s + 2*r + q + 2) / 4)
+	rqp := uint8((r + 2*q + p + 2) / 4)
+	qpa := uint8((q + 2*p + a + 2) / 4)
+	pab := uint8((p + 2*a + b + 2) / 4)
+	abc := uint8((a + 2*b + c + 2) / 4)
+	bcd := uint8((b + 2*c + d + 2) / 4)
+	z.ybr[y+0][x+0] = pa
+	z.ybr[y+0][x+1] = pab
+	z.ybr[y+0][x+2] = abc
+	z.ybr[y+0][x+3] = bcd
+	z.ybr[y+1][x+0] = qp
+	z.ybr[y+1][x+1] = qpa
+	z.ybr[y+1][x+2] = pa
+	z.ybr[y+1][x+3] = pab
+	z.ybr[y+2][x+0] = rq
+	z.ybr[y+2][x+1] = rqp
+	z.ybr[y+2][x+2] = qp
+	z.ybr[y+2][x+3] = qpa
+	z.ybr[y+3][x+0] = sr
+	z.ybr[y+3][x+1] = srq
+	z.ybr[y+3][x+2] = rq
+	z.ybr[y+3][x+3] = rqp
+}
+
+func predFunc4HU(z *Decoder, y, x int) {
+	s := int32(z.ybr[y+3][x-1])
+	r := int32(z.ybr[y+2][x-1])
+	q := int32(z.ybr[y+1][x-1])
+	p := int32(z.ybr[y+0][x-1])
+	pq := uint8((p + q + 1) / 2)
+	qr := uint8((q + r + 1) / 2)
+	rs := uint8((r + s + 1) / 2)
+	pqr := uint8((p + 2*q + r + 2) / 4)
+	qrs := uint8((q + 2*r + s + 2) / 4)
+	rss := uint8((r + 2*s + s + 2) / 4)
+	sss := uint8(s)
+	z.ybr[y+0][x+0] = pq
+	z.ybr[y+0][x+1] = pqr
+	z.ybr[y+0][x+2] = qr
+	z.ybr[y+0][x+3] = qrs
+	z.ybr[y+1][x+0] = qr
+	z.ybr[y+1][x+1] = qrs
+	z.ybr[y+1][x+2] = rs
+	z.ybr[y+1][x+3] = rss
+	z.ybr[y+2][x+0] = rs
+	z.ybr[y+2][x+1] = rss
+	z.ybr[y+2][x+2] = sss
+	z.ybr[y+2][x+3] = sss
+	z.ybr[y+3][x+0] = sss
+	z.ybr[y+3][x+1] = sss
+	z.ybr[y+3][x+2] = sss
+	z.ybr[y+3][x+3] = sss
+}
+
+func predFunc8DC(z *Decoder, y, x int) {
+	sum := uint32(8)
+	for i := 0; i < 8; i++ {
+		sum += uint32(z.ybr[y-1][x+i])
+	}
+	for j := 0; j < 8; j++ {
+		sum += uint32(z.ybr[y+j][x-1])
+	}
+	avg := uint8(sum / 16)
+	for j := 0; j < 8; j++ {
+		for i := 0; i < 8; i++ {
+			z.ybr[y+j][x+i] = avg
+		}
+	}
+}
+
+func predFunc8TM(z *Decoder, y, x int) {
+	delta0 := -int32(z.ybr[y-1][x-1])
+	for j := 0; j < 8; j++ {
+		delta1 := delta0 + int32(z.ybr[y+j][x-1])
+		for i := 0; i < 8; i++ {
+			delta2 := delta1 + int32(z.ybr[y-1][x+i])
+			z.ybr[y+j][x+i] = uint8(clip(delta2, 0, 255))
+		}
+	}
+}
+
+func predFunc8VE(z *Decoder, y, x int) {
+	for j := 0; j < 8; j++ {
+		for i := 0; i < 8; i++ {
+			z.ybr[y+j][x+i] = z.ybr[y-1][x+i]
+		}
+	}
+}
+
+func predFunc8HE(z *Decoder, y, x int) {
+	for j := 0; j < 8; j++ {
+		for i := 0; i < 8; i++ {
+			z.ybr[y+j][x+i] = z.ybr[y+j][x-1]
+		}
+	}
+}
+
+func predFunc8DCTop(z *Decoder, y, x int) {
+	sum := uint32(4)
+	for j := 0; j < 8; j++ {
+		sum += uint32(z.ybr[y+j][x-1])
+	}
+	avg := uint8(sum / 8)
+	for j := 0; j < 8; j++ {
+		for i := 0; i < 8; i++ {
+			z.ybr[y+j][x+i] = avg
+		}
+	}
+}
+
+func predFunc8DCLeft(z *Decoder, y, x int) {
+	sum := uint32(4)
+	for i := 0; i < 8; i++ {
+		sum += uint32(z.ybr[y-1][x+i])
+	}
+	avg := uint8(sum / 8)
+	for j := 0; j < 8; j++ {
+		for i := 0; i < 8; i++ {
+			z.ybr[y+j][x+i] = avg
+		}
+	}
+}
+
+func predFunc8DCTopLeft(z *Decoder, y, x int) {
+	for j := 0; j < 8; j++ {
+		for i := 0; i < 8; i++ {
+			z.ybr[y+j][x+i] = 0x80
+		}
+	}
+}
+
+func predFunc16DC(z *Decoder, y, x int) {
+	sum := uint32(16)
+	for i := 0; i < 16; i++ {
+		sum += uint32(z.ybr[y-1][x+i])
+	}
+	for j := 0; j < 16; j++ {
+		sum += uint32(z.ybr[y+j][x-1])
+	}
+	avg := uint8(sum / 32)
+	for j := 0; j < 16; j++ {
+		for i := 0; i < 16; i++ {
+			z.ybr[y+j][x+i] = avg
+		}
+	}
+}
+
+func predFunc16TM(z *Decoder, y, x int) {
+	delta0 := -int32(z.ybr[y-1][x-1])
+	for j := 0; j < 16; j++ {
+		delta1 := delta0 + int32(z.ybr[y+j][x-1])
+		for i := 0; i < 16; i++ {
+			delta2 := delta1 + int32(z.ybr[y-1][x+i])
+			z.ybr[y+j][x+i] = uint8(clip(delta2, 0, 255))
+		}
+	}
+}
+
+func predFunc16VE(z *Decoder, y, x int) {
+	for j := 0; j < 16; j++ {
+		for i := 0; i < 16; i++ {
+			z.ybr[y+j][x+i] = z.ybr[y-1][x+i]
+		}
+	}
+}
+
+func predFunc16HE(z *Decoder, y, x int) {
+	for j := 0; j < 16; j++ {
+		for i := 0; i < 16; i++ {
+			z.ybr[y+j][x+i] = z.ybr[y+j][x-1]
+		}
+	}
+}
+
+func predFunc16DCTop(z *Decoder, y, x int) {
+	sum := uint32(8)
+	for j := 0; j < 16; j++ {
+		sum += uint32(z.ybr[y+j][x-1])
+	}
+	avg := uint8(sum / 16)
+	for j := 0; j < 16; j++ {
+		for i := 0; i < 16; i++ {
+			z.ybr[y+j][x+i] = avg
+		}
+	}
+}
+
+func predFunc16DCLeft(z *Decoder, y, x int) {
+	sum := uint32(8)
+	for i := 0; i < 16; i++ {
+		sum += uint32(z.ybr[y-1][x+i])
+	}
+	avg := uint8(sum / 16)
+	for j := 0; j < 16; j++ {
+		for i := 0; i < 16; i++ {
+			z.ybr[y+j][x+i] = avg
+		}
+	}
+}
+
+func predFunc16DCTopLeft(z *Decoder, y, x int) {
+	for j := 0; j < 16; j++ {
+		for i := 0; i < 16; i++ {
+			z.ybr[y+j][x+i] = 0x80
+		}
+	}
+}
diff --git a/vp8/quant.go b/vp8/quant.go
new file mode 100644
index 0000000..8bf12a0
--- /dev/null
+++ b/vp8/quant.go
@@ -0,0 +1,95 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vp8
+
+// This file implements parsing the quantization factors.
+
+// quant are DC/AC quantization factors.
+type quant struct {
+	y1 [2]uint16
+	y2 [2]uint16
+	uv [2]uint16
+}
+
+// clip clips x to the range [min, max] inclusive.
+func clip(x, min, max int32) int32 {
+	if x < min {
+		return min
+	}
+	if x > max {
+		return max
+	}
+	return x
+}
+
+// parseQuant parses the quantization factors, as specified in section 9.6.
+func (d *Decoder) parseQuant() {
+	baseQ0 := d.fp.readUint(uniformProb, 7)
+	dqy1DC := d.fp.readOptionalInt(uniformProb, 4)
+	const dqy1AC = 0
+	dqy2DC := d.fp.readOptionalInt(uniformProb, 4)
+	dqy2AC := d.fp.readOptionalInt(uniformProb, 4)
+	dquvDC := d.fp.readOptionalInt(uniformProb, 4)
+	dquvAC := d.fp.readOptionalInt(uniformProb, 4)
+	for i := 0; i < nSegment; i++ {
+		q := int32(baseQ0)
+		if d.segmentHeader.useSegment {
+			if d.segmentHeader.relativeDelta {
+				q += int32(d.segmentHeader.quantizer[i])
+			} else {
+				q = int32(d.segmentHeader.quantizer[i])
+			}
+		}
+		d.quant[i].y1[0] = dequantTableDC[clip(q+dqy1DC, 0, 127)]
+		d.quant[i].y1[1] = dequantTableAC[clip(q+dqy1AC, 0, 127)]
+		d.quant[i].y2[0] = dequantTableDC[clip(q+dqy2DC, 0, 127)] * 2
+		d.quant[i].y2[1] = dequantTableAC[clip(q+dqy2AC, 0, 127)] * 155 / 100
+		if d.quant[i].y2[1] < 8 {
+			d.quant[i].y2[1] = 8
+		}
+		d.quant[i].uv[0] = dequantTableDC[clip(q+dquvDC, 0, 127)]
+		d.quant[i].uv[1] = dequantTableAC[clip(q+dquvAC, 0, 127)]
+	}
+}
+
+// The dequantization tables are specified in section 14.1.
+var (
+	dequantTableDC = [128]uint16{
+		4, 5, 6, 7, 8, 9, 10, 10,
+		11, 12, 13, 14, 15, 16, 17, 17,
+		18, 19, 20, 20, 21, 21, 22, 22,
+		23, 23, 24, 25, 25, 26, 27, 28,
+		29, 30, 31, 32, 33, 34, 35, 36,
+		37, 37, 38, 39, 40, 41, 42, 43,
+		44, 45, 46, 46, 47, 48, 49, 50,
+		51, 52, 53, 54, 55, 56, 57, 58,
+		59, 60, 61, 62, 63, 64, 65, 66,
+		67, 68, 69, 70, 71, 72, 73, 74,
+		75, 76, 76, 77, 78, 79, 80, 81,
+		82, 83, 84, 85, 86, 87, 88, 89,
+		91, 93, 95, 96, 98, 100, 101, 102,
+		104, 106, 108, 110, 112, 114, 116, 118,
+		122, 124, 126, 128, 130, 132, 134, 136,
+		138, 140, 143, 145, 148, 151, 154, 157,
+	}
+	dequantTableAC = [128]uint16{
+		4, 5, 6, 7, 8, 9, 10, 11,
+		12, 13, 14, 15, 16, 17, 18, 19,
+		20, 21, 22, 23, 24, 25, 26, 27,
+		28, 29, 30, 31, 32, 33, 34, 35,
+		36, 37, 38, 39, 40, 41, 42, 43,
+		44, 45, 46, 47, 48, 49, 50, 51,
+		52, 53, 54, 55, 56, 57, 58, 60,
+		62, 64, 66, 68, 70, 72, 74, 76,
+		78, 80, 82, 84, 86, 88, 90, 92,
+		94, 96, 98, 100, 102, 104, 106, 108,
+		110, 112, 114, 116, 119, 122, 125, 128,
+		131, 134, 137, 140, 143, 146, 149, 152,
+		155, 158, 161, 164, 167, 170, 173, 177,
+		181, 185, 189, 193, 197, 201, 205, 209,
+		213, 217, 221, 225, 229, 234, 239, 245,
+		249, 254, 259, 264, 269, 274, 279, 284,
+	}
+)
diff --git a/vp8/reconstruct.go b/vp8/reconstruct.go
new file mode 100644
index 0000000..525e442
--- /dev/null
+++ b/vp8/reconstruct.go
@@ -0,0 +1,435 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vp8
+
+// This file implements decoding DCT/WHT residual coefficients and
+// reconstructing YCbCr data equal to predicted values plus residuals.
+//
+// There are 1*16*16 + 2*8*8 + 1*4*4 coefficients per macroblock:
+//	- 1*16*16 luma DCT coefficients,
+//	- 2*8*8 chroma DCT coefficients, and
+//	- 1*4*4 luma WHT coefficients.
+// Coefficients are read in lots of 16, and the later coefficients in each lot
+// are often zero.
+//
+// The YCbCr data consists of 1*16*16 luma values and 2*8*8 chroma values,
+// plus previously decoded values along the top and left borders. The combined
+// values are laid out as a [1+16+1+8][32]uint8 so that vertically adjacent
+// samples are 32 bytes apart. In detail, the layout is:
+//
+//	0 1 2 3 4 5 6 7  8 9 0 1 2 3 4 5  6 7 8 9 0 1 2 3  4 5 6 7 8 9 0 1
+//	. . . . . . . a  b b b b b b b b  b b b b b b b b  c c c c . . . .	0
+//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	1
+//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	2
+//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	3
+//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  c c c c . . . .	4
+//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	5
+//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	6
+//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	7
+//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  c c c c . . . .	8
+//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	9
+//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	10
+//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	11
+//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  c c c c . . . .	12
+//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	13
+//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	14
+//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	15
+//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	16
+//	. . . . . . . e  f f f f f f f f  . . . . . . . g  h h h h h h h h	17
+//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	18
+//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	19
+//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	20
+//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	21
+//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	22
+//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	23
+//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	24
+//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	25
+//
+// Y, B and R are the reconstructed luma (Y) and chroma (B, R) values.
+// The Y values are predicted (either as one 16x16 region or 16 4x4 regions)
+// based on the row above's Y values (some combination of {abc} or {dYC}) and
+// the column left's Y values (either {ad} or {bY}). Similarly, B and R values
+// are predicted on the row above and column left of their respective 8x8
+// region: {efi} for B, {ghj} for R.
+//
+// For uppermost macroblocks (i.e. those with mby == 0), the {abcefgh} values
+// are initialized to 0x81. Otherwise, they are copied from the bottom row of
+// the macroblock above. The {c} values are then duplicated from row 0 to rows
+// 4, 8 and 12 of the ybr workspace.
+// Similarly, for leftmost macroblocks (i.e. those with mbx == 0), the {adeigj}
+// values are initialized to 0x7f. Otherwise, they are copied from the right
+// column of the macroblock to the left.
+// For the top-left macroblock (with mby == 0 && mbx == 0), {aeg} is 0x81.
+//
+// When moving from one macroblock to the next horizontally, the {adeigj}
+// values can simply be copied from the workspace to itself, shifted by 8 or
+// 16 columns. When moving from one macroblock to the next vertically,
+// filtering can occur and hence the row values have to be copied from the
+// post-filtered image instead of the pre-filtered workspace.
+
+const (
+	bCoeffBase   = 1*16*16 + 0*8*8
+	rCoeffBase   = 1*16*16 + 1*8*8
+	whtCoeffBase = 1*16*16 + 2*8*8
+)
+
+const (
+	ybrYX = 8
+	ybrYY = 1
+	ybrBX = 8
+	ybrBY = 18
+	ybrRX = 24
+	ybrRY = 18
+)
+
+// prepareYBR prepares the {abcdefghij} elements of ybr.
+func (d *Decoder) prepareYBR(mbx, mby int) {
+	if mbx == 0 {
+		for y := 0; y < 17; y++ {
+			d.ybr[y][7] = 0x81
+		}
+		for y := 17; y < 26; y++ {
+			d.ybr[y][7] = 0x81
+			d.ybr[y][23] = 0x81
+		}
+	} else {
+		for y := 0; y < 17; y++ {
+			d.ybr[y][7] = d.ybr[y][7+16]
+		}
+		for y := 17; y < 26; y++ {
+			d.ybr[y][7] = d.ybr[y][15]
+			d.ybr[y][23] = d.ybr[y][31]
+		}
+	}
+	if mby == 0 {
+		for x := 7; x < 28; x++ {
+			d.ybr[0][x] = 0x7f
+		}
+		for x := 7; x < 16; x++ {
+			d.ybr[17][x] = 0x7f
+		}
+		for x := 23; x < 32; x++ {
+			d.ybr[17][x] = 0x7f
+		}
+	} else {
+		for i := 0; i < 16; i++ {
+			d.ybr[0][8+i] = d.img.Y[(16*mby-1)*d.img.YStride+16*mbx+i]
+		}
+		for i := 0; i < 8; i++ {
+			d.ybr[17][8+i] = d.img.Cb[(8*mby-1)*d.img.CStride+8*mbx+i]
+		}
+		for i := 0; i < 8; i++ {
+			d.ybr[17][24+i] = d.img.Cr[(8*mby-1)*d.img.CStride+8*mbx+i]
+		}
+		if mbx == d.mbw-1 {
+			for i := 16; i < 20; i++ {
+				d.ybr[0][8+i] = d.img.Y[(16*mby-1)*d.img.YStride+16*mbx+15]
+			}
+		} else {
+			for i := 16; i < 20; i++ {
+				d.ybr[0][8+i] = d.img.Y[(16*mby-1)*d.img.YStride+16*mbx+i]
+			}
+		}
+	}
+	for y := 4; y < 16; y += 4 {
+		d.ybr[y][24] = d.ybr[0][24]
+		d.ybr[y][25] = d.ybr[0][25]
+		d.ybr[y][26] = d.ybr[0][26]
+		d.ybr[y][27] = d.ybr[0][27]
+	}
+}
+
+// btou converts a bool to a 0/1 value.
+func btou(b bool) uint8 {
+	if b {
+		return 1
+	}
+	return 0
+}
+
+// pack packs four 0/1 values into four bits of a uint32.
+func pack(x [4]uint8, shift int) uint32 {
+	u := uint32(x[0])<<0 | uint32(x[1])<<1 | uint32(x[2])<<2 | uint32(x[3])<<3
+	return u << uint(shift)
+}
+
+// unpack unpacks four 0/1 values from a four-bit value.
+var unpack = [16][4]uint8{
+	{0, 0, 0, 0},
+	{1, 0, 0, 0},
+	{0, 1, 0, 0},
+	{1, 1, 0, 0},
+	{0, 0, 1, 0},
+	{1, 0, 1, 0},
+	{0, 1, 1, 0},
+	{1, 1, 1, 0},
+	{0, 0, 0, 1},
+	{1, 0, 0, 1},
+	{0, 1, 0, 1},
+	{1, 1, 0, 1},
+	{0, 0, 1, 1},
+	{1, 0, 1, 1},
+	{0, 1, 1, 1},
+	{1, 1, 1, 1},
+}
+
+var (
+	// The mapping from 4x4 region position to band is specified in section 13.3.
+	bands = [17]uint8{0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 0}
+	// Category probabilties are specified in section 13.2.
+	// Decoding categories 1 and 2 are done inline.
+	cat3456 = [4][12]uint8{
+		{173, 148, 140, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		{176, 155, 140, 135, 0, 0, 0, 0, 0, 0, 0, 0},
+		{180, 157, 141, 134, 130, 0, 0, 0, 0, 0, 0, 0},
+		{254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0},
+	}
+	// The zigzag order is:
+	//	0  1  5  6
+	//	2  4  7 12
+	//	3  8 11 13
+	//	9 10 14 15
+	zigzag = [16]uint8{0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15}
+)
+
+// parseResiduals4 parses a 4x4 region of residual coefficients, as specified
+// in section 13.3, and returns a 0/1 value indicating whether there was at
+// least one non-zero coefficient.
+// r is the partition to read bits from.
+// plane and context describe which token probability table to use. context is
+// either 0, 1 or 2, and equals how many of the macroblock left and macroblock
+// above have non-zero coefficients.
+// quant are the DC/AC quantization factors.
+// skipFirstCoeff is whether the DC coefficient has already been parsed.
+// coeffBase is the base index of d.coeff to write to.
+func (d *Decoder) parseResiduals4(r *partition, plane int, context uint8, quant [2]uint16, skipFirstCoeff bool, coeffBase int) uint8 {
+	prob, n := &d.tokenProb[plane], 0
+	if skipFirstCoeff {
+		n = 1
+	}
+	p := prob[bands[n]][context]
+	if !r.readBit(p[0]) {
+		return 0
+	}
+	for n != 16 {
+		n++
+		if !r.readBit(p[1]) {
+			p = prob[bands[n]][0]
+			continue
+		}
+		var v uint32
+		if !r.readBit(p[2]) {
+			v = 1
+			p = prob[bands[n]][1]
+		} else {
+			if !r.readBit(p[3]) {
+				if !r.readBit(p[4]) {
+					v = 2
+				} else {
+					v = 3 + r.readUint(p[5], 1)
+				}
+			} else if !r.readBit(p[6]) {
+				if !r.readBit(p[7]) {
+					// Category 1.
+					v = 5 + r.readUint(159, 1)
+				} else {
+					// Category 2.
+					v = 7 + 2*r.readUint(165, 1) + r.readUint(145, 1)
+				}
+			} else {
+				// Categories 3, 4, 5 or 6.
+				b1 := r.readUint(p[8], 1)
+				b0 := r.readUint(p[9+b1], 1)
+				cat := 2*b1 + b0
+				tab := &cat3456[cat]
+				v = 0
+				for i := 0; tab[i] != 0; i++ {
+					v *= 2
+					v += r.readUint(tab[i], 1)
+				}
+				v += 3 + (8 << cat)
+			}
+			p = prob[bands[n]][2]
+		}
+		z := zigzag[n-1]
+		c := int32(v) * int32(quant[btou(z > 0)])
+		if r.readBit(uniformProb) {
+			c = -c
+		}
+		d.coeff[coeffBase+int(z)] = int16(c)
+		if n == 16 || !r.readBit(p[0]) {
+			return 1
+		}
+	}
+	return 1
+}
+
+// parseResiduals parses the residuals.
+func (d *Decoder) parseResiduals(mbx, mby int) {
+	partition := &d.op[mby&(d.nOP-1)]
+	plane := planeY1SansY2
+	quant := &d.quant[d.segment]
+
+	// Parse the DC coefficient of each 4x4 luma region.
+	if d.usePredY16 {
+		nz := d.parseResiduals4(partition, planeY2, d.leftMB.nzY16+d.upMB[mbx].nzY16, quant.y2, false, whtCoeffBase)
+		d.leftMB.nzY16 = nz
+		d.upMB[mbx].nzY16 = nz
+		d.inverseWHT16()
+		plane = planeY1WithY2
+	}
+
+	var (
+		nzDC, nzAC         [4]uint8
+		nzDCMask, nzACMask uint32
+		coeffBase          int
+	)
+
+	// Parse the luma coefficients.
+	lnz := unpack[d.leftMB.nzMask&0x0f]
+	unz := unpack[d.upMB[mbx].nzMask&0x0f]
+	for y := 0; y < 4; y++ {
+		nz := lnz[y]
+		for x := 0; x < 4; x++ {
+			nz = d.parseResiduals4(partition, plane, nz+unz[x], quant.y1, d.usePredY16, coeffBase)
+			unz[x] = nz
+			nzAC[x] = nz
+			nzDC[x] = btou(d.coeff[coeffBase] != 0)
+			coeffBase += 16
+		}
+		lnz[y] = nz
+		nzDCMask |= pack(nzDC, y*4)
+		nzACMask |= pack(nzAC, y*4)
+	}
+	lnzMask := pack(lnz, 0)
+	unzMask := pack(unz, 0)
+
+	// Parse the chroma coefficients.
+	lnz = unpack[d.leftMB.nzMask>>4]
+	unz = unpack[d.upMB[mbx].nzMask>>4]
+	for c := 0; c < 4; c += 2 {
+		for y := 0; y < 2; y++ {
+			nz := lnz[y+c]
+			for x := 0; x < 2; x++ {
+				nz = d.parseResiduals4(partition, planeUV, nz+unz[x+c], quant.uv, false, coeffBase)
+				unz[x+c] = nz
+				nzAC[y*2+x] = nz
+				nzDC[y*2+x] = btou(d.coeff[coeffBase] != 0)
+				coeffBase += 16
+			}
+			lnz[y+c] = nz
+		}
+		nzDCMask |= pack(nzDC, 16+c*2)
+		nzACMask |= pack(nzAC, 16+c*2)
+	}
+	lnzMask |= pack(lnz, 4)
+	unzMask |= pack(unz, 4)
+
+	// Save decoder state.
+	d.leftMB.nzMask = uint8(lnzMask)
+	d.upMB[mbx].nzMask = uint8(unzMask)
+	d.nzDCMask = nzDCMask
+	d.nzACMask = nzACMask
+}
+
+// reconstructMacroblock applies the predictor functions and adds the inverse-
+// DCT transformed residuals to recover the YCbCr data.
+func (d *Decoder) reconstructMacroblock(mbx, mby int) {
+	if d.usePredY16 {
+		p := checkTopLeftPred(mbx, mby, d.predY16)
+		predFunc16[p](d, 1, 8)
+		for j := 0; j < 4; j++ {
+			for i := 0; i < 4; i++ {
+				n := 4*j + i
+				y := 4*j + 1
+				x := 4*i + 8
+				mask := uint32(1) << uint(n)
+				if d.nzACMask&mask != 0 {
+					d.inverseDCT4(y, x, 16*n)
+				} else if d.nzDCMask&mask != 0 {
+					d.inverseDCT4DCOnly(y, x, 16*n)
+				}
+			}
+		}
+	} else {
+		for j := 0; j < 4; j++ {
+			for i := 0; i < 4; i++ {
+				n := 4*j + i
+				y := 4*j + 1
+				x := 4*i + 8
+				predFunc4[d.predY4[j][i]](d, y, x)
+				mask := uint32(1) << uint(n)
+				if d.nzACMask&mask != 0 {
+					d.inverseDCT4(y, x, 16*n)
+				} else if d.nzDCMask&mask != 0 {
+					d.inverseDCT4DCOnly(y, x, 16*n)
+				}
+			}
+		}
+	}
+	p := checkTopLeftPred(mbx, mby, d.predC8)
+	predFunc8[p](d, ybrBY, ybrBX)
+	if d.nzACMask&0x0f0000 != 0 {
+		d.inverseDCT8(ybrBY, ybrBX, bCoeffBase)
+	} else if d.nzDCMask&0x0f0000 != 0 {
+		d.inverseDCT8DCOnly(ybrBY, ybrBX, bCoeffBase)
+	}
+	predFunc8[p](d, ybrRY, ybrRX)
+	if d.nzACMask&0xf00000 != 0 {
+		d.inverseDCT8(ybrRY, ybrRX, rCoeffBase)
+	} else if d.nzDCMask&0xf00000 != 0 {
+		d.inverseDCT8DCOnly(ybrRY, ybrRX, rCoeffBase)
+	}
+}
+
+// reconstruct reconstructs one macroblock.
+func (d *Decoder) reconstruct(mbx, mby int) {
+	if d.segmentHeader.updateMap {
+		if !d.fp.readBit(d.segmentHeader.prob[0]) {
+			d.segment = int(d.fp.readUint(d.segmentHeader.prob[1], 1))
+		} else {
+			d.segment = int(d.fp.readUint(d.segmentHeader.prob[2], 1)) + 2
+		}
+	}
+	skip := false
+	if d.useSkipProb {
+		skip = d.fp.readBit(d.skipProb)
+	}
+	// Prepare the workspace.
+	for i := range d.coeff {
+		d.coeff[i] = 0
+	}
+	d.prepareYBR(mbx, mby)
+	// Parse the predictor modes.
+	d.usePredY16 = d.fp.readBit(145)
+	if d.usePredY16 {
+		d.parsePredModeY16(mbx)
+	} else {
+		d.parsePredModeY4(mbx)
+	}
+	d.parsePredModeC8()
+	// Parse the residuals.
+	if !skip {
+		d.parseResiduals(mbx, mby)
+	} else {
+		if d.usePredY16 {
+			d.leftMB.nzY16 = 0
+			d.upMB[mbx].nzY16 = 0
+		}
+		d.leftMB.nzMask = 0
+		d.upMB[mbx].nzMask = 0
+		d.nzDCMask = 0
+		d.nzACMask = 0
+	}
+	// Reconstruct the YCbCr data and copy it to the image.
+	d.reconstructMacroblock(mbx, mby)
+	for i, y := (mby*d.img.YStride+mbx)*16, 0; y < 16; i, y = i+d.img.YStride, y+1 {
+		copy(d.img.Y[i:i+16], d.ybr[ybrYY+y][ybrYX:ybrYX+16])
+	}
+	for i, y := (mby*d.img.CStride+mbx)*8, 0; y < 8; i, y = i+d.img.CStride, y+1 {
+		copy(d.img.Cb[i:i+8], d.ybr[ybrBY+y][ybrBX:ybrBX+8])
+		copy(d.img.Cr[i:i+8], d.ybr[ybrRY+y][ybrRX:ybrRX+8])
+	}
+}
diff --git a/vp8/token.go b/vp8/token.go
new file mode 100644
index 0000000..da99cf0
--- /dev/null
+++ b/vp8/token.go
@@ -0,0 +1,381 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vp8
+
+// This file contains token probabilities for decoding DCT/WHT coefficients, as
+// specified in chapter 13.
+
+func (d *Decoder) parseTokenProb() {
+	for i := range d.tokenProb {
+		for j := range d.tokenProb[i] {
+			for k := range d.tokenProb[i][j] {
+				for l := range d.tokenProb[i][j][k] {
+					if d.fp.readBit(tokenProbUpdateProb[i][j][k][l]) {
+						d.tokenProb[i][j][k][l] = uint8(d.fp.readUint(uniformProb, 8))
+					}
+				}
+			}
+		}
+	}
+}
+
+// The plane enumeration is specified in section 13.3.
+const (
+	planeY1WithY2 = iota
+	planeY2
+	planeUV
+	planeY1SansY2
+	nPlane
+)
+
+const (
+	nBand    = 8
+	nContext = 3
+	nProb    = 11
+)
+
+// Token probability update probabilities are specified in section 13.4.
+var tokenProbUpdateProb = [nPlane][nBand][nContext][nProb]uint8{
+	{
+		{
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255},
+			{249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255},
+			{234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255},
+			{250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255},
+			{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+	},
+	{
+		{
+			{217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255},
+			{234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255},
+		},
+		{
+			{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+			{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+	},
+	{
+		{
+			{186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255},
+			{234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255},
+			{251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255},
+		},
+		{
+			{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+	},
+	{
+		{
+			{248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255},
+			{248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+			{246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+			{252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255},
+			{248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+			{253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+			{252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255},
+			{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+		{
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+			{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255},
+		},
+	},
+}
+
+// Default token probabilities are specified in section 13.5.
+var defaultTokenProb = [nPlane][nBand][nContext][nProb]uint8{
+	{
+		{
+			{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+			{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+			{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+		},
+		{
+			{253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128},
+			{189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128},
+			{106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128},
+		},
+		{
+			{1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128},
+			{181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128},
+			{78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128},
+		},
+		{
+			{1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128},
+			{184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128},
+			{77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128},
+		},
+		{
+			{1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128},
+			{170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128},
+			{37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128},
+		},
+		{
+			{1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128},
+			{207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128},
+			{102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128},
+		},
+		{
+			{1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128},
+			{177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128},
+			{80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128},
+		},
+		{
+			{1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+			{246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+			{255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+		},
+	},
+	{
+		{
+			{198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62},
+			{131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1},
+			{68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128},
+		},
+		{
+			{1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128},
+			{184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128},
+			{81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128},
+		},
+		{
+			{1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128},
+			{99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128},
+			{23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128},
+		},
+		{
+			{1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128},
+			{109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128},
+			{44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128},
+		},
+		{
+			{1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128},
+			{94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128},
+			{22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128},
+		},
+		{
+			{1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128},
+			{124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128},
+			{35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128},
+		},
+		{
+			{1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128},
+			{121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128},
+			{45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128},
+		},
+		{
+			{1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128},
+			{203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128},
+			{137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128},
+		},
+	},
+	{
+		{
+			{253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128},
+			{175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128},
+			{73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128},
+		},
+		{
+			{1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128},
+			{239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128},
+			{155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128},
+		},
+		{
+			{1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128},
+			{201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128},
+			{69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128},
+		},
+		{
+			{1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128},
+			{223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128},
+			{141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128},
+		},
+		{
+			{1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128},
+			{190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128},
+			{149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+		},
+		{
+			{1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+			{247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+			{240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+		},
+		{
+			{1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128},
+			{213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128},
+			{55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+		},
+		{
+			{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+			{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+			{128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128},
+		},
+	},
+	{
+		{
+			{202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255},
+			{126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128},
+			{61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128},
+		},
+		{
+			{1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128},
+			{166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128},
+			{39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128},
+		},
+		{
+			{1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128},
+			{124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128},
+			{24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128},
+		},
+		{
+			{1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128},
+			{149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128},
+			{28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128},
+		},
+		{
+			{1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128},
+			{123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128},
+			{20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128},
+		},
+		{
+			{1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128},
+			{168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128},
+			{47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128},
+		},
+		{
+			{1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128},
+			{141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128},
+			{42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128},
+		},
+		{
+			{1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+			{244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+			{238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+		},
+	},
+}
diff --git a/webp/decode.go b/webp/decode.go
new file mode 100644
index 0000000..e58cb53
--- /dev/null
+++ b/webp/decode.go
@@ -0,0 +1,75 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package webp implements a decoder for WEBP images.
+//
+// WEBP is defined in the VP8 specification at:
+// http://datatracker.ietf.org/doc/rfc6386/
+package webp
+
+import (
+	"errors"
+	"image"
+	"image/color"
+	"io"
+
+	"code.google.com/p/go.image/vp8"
+)
+
+func decode(r io.Reader) (d *vp8.Decoder, fh vp8.FrameHeader, err error) {
+	var b [20]byte
+	if _, err = io.ReadFull(r, b[:]); err != nil {
+		return
+	}
+	if string(b[0:4]) != "RIFF" || string(b[8:16]) != "WEBPVP8 " {
+		err = errors.New("webp: invalid format")
+		return
+	}
+	riffLen := uint32(b[4]) | uint32(b[5])<<8 | uint32(b[6])<<16 | uint32(b[7])<<24
+	dataLen := uint32(b[16]) | uint32(b[17])<<8 | uint32(b[18])<<16 | uint32(b[19])<<24
+	if riffLen < dataLen+12 {
+		err = errors.New("webp: invalid format")
+		return
+	}
+	if dataLen >= 1<<31 {
+		err = errors.New("webp: invalid format")
+		return
+	}
+	d = vp8.NewDecoder()
+	d.Init(r, int(dataLen))
+	fh, err = d.DecodeFrameHeader()
+	if err != nil {
+		d, fh = nil, vp8.FrameHeader{}
+		return
+	}
+	return
+}
+
+// Decode reads a WEBP image from r and returns it as an image.Image.
+func Decode(r io.Reader) (image.Image, error) {
+	d, _, err := decode(r)
+	if err != nil {
+		return nil, err
+	}
+	return d.DecodeFrame()
+}
+
+// DecodeConfig returns the color model and dimensions of a WEBP image without
+// decoding the entire image.
+func DecodeConfig(r io.Reader) (image.Config, error) {
+	_, fh, err := decode(r)
+	if err != nil {
+		return image.Config{}, err
+	}
+	c := image.Config{
+		ColorModel: color.YCbCrModel,
+		Width:      fh.Width,
+		Height:     fh.Height,
+	}
+	return c, nil
+}
+
+func init() {
+	image.RegisterFormat("webp", "RIFF????WEBPVP8 ", Decode, DecodeConfig)
+}