зеркало из https://github.com/microsoft/spark.git
For size compression, compress non zero values into non zero values.
This commit is contained in:
Родитель
cd16eab0db
Коммит
f24bfd2dd1
|
@ -2,6 +2,10 @@ package spark
|
||||||
|
|
||||||
import java.io._
|
import java.io._
|
||||||
import java.util.concurrent.ConcurrentHashMap
|
import java.util.concurrent.ConcurrentHashMap
|
||||||
|
import java.util.zip.{GZIPInputStream, GZIPOutputStream}
|
||||||
|
|
||||||
|
import scala.collection.mutable.HashMap
|
||||||
|
import scala.collection.mutable.HashSet
|
||||||
|
|
||||||
import akka.actor._
|
import akka.actor._
|
||||||
import akka.dispatch._
|
import akka.dispatch._
|
||||||
|
@ -11,12 +15,9 @@ import akka.util.Duration
|
||||||
import akka.util.Timeout
|
import akka.util.Timeout
|
||||||
import akka.util.duration._
|
import akka.util.duration._
|
||||||
|
|
||||||
import scala.collection.mutable.HashMap
|
import spark.scheduler.MapStatus
|
||||||
import scala.collection.mutable.HashSet
|
|
||||||
|
|
||||||
import scheduler.MapStatus
|
|
||||||
import spark.storage.BlockManagerId
|
import spark.storage.BlockManagerId
|
||||||
import java.util.zip.{GZIPInputStream, GZIPOutputStream}
|
|
||||||
|
|
||||||
private[spark] sealed trait MapOutputTrackerMessage
|
private[spark] sealed trait MapOutputTrackerMessage
|
||||||
private[spark] case class GetMapOutputStatuses(shuffleId: Int, requester: String)
|
private[spark] case class GetMapOutputStatuses(shuffleId: Int, requester: String)
|
||||||
|
@ -254,8 +255,10 @@ private[spark] object MapOutputTracker {
|
||||||
* sizes up to 35 GB with at most 10% error.
|
* sizes up to 35 GB with at most 10% error.
|
||||||
*/
|
*/
|
||||||
def compressSize(size: Long): Byte = {
|
def compressSize(size: Long): Byte = {
|
||||||
if (size <= 1L) {
|
if (size == 0) {
|
||||||
0
|
0
|
||||||
|
} else if (size <= 1L) {
|
||||||
|
1
|
||||||
} else {
|
} else {
|
||||||
math.min(255, math.ceil(math.log(size) / math.log(LOG_BASE)).toInt).toByte
|
math.min(255, math.ceil(math.log(size) / math.log(LOG_BASE)).toInt).toByte
|
||||||
}
|
}
|
||||||
|
@ -266,7 +269,7 @@ private[spark] object MapOutputTracker {
|
||||||
*/
|
*/
|
||||||
def decompressSize(compressedSize: Byte): Long = {
|
def decompressSize(compressedSize: Byte): Long = {
|
||||||
if (compressedSize == 0) {
|
if (compressedSize == 0) {
|
||||||
1
|
0
|
||||||
} else {
|
} else {
|
||||||
math.pow(LOG_BASE, (compressedSize & 0xFF)).toLong
|
math.pow(LOG_BASE, (compressedSize & 0xFF)).toLong
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,7 @@ import org.scalatest.FunSuite
|
||||||
class MapOutputTrackerSuite extends FunSuite {
|
class MapOutputTrackerSuite extends FunSuite {
|
||||||
test("compressSize") {
|
test("compressSize") {
|
||||||
assert(MapOutputTracker.compressSize(0L) === 0)
|
assert(MapOutputTracker.compressSize(0L) === 0)
|
||||||
assert(MapOutputTracker.compressSize(1L) === 0)
|
assert(MapOutputTracker.compressSize(1L) === 1)
|
||||||
assert(MapOutputTracker.compressSize(2L) === 8)
|
assert(MapOutputTracker.compressSize(2L) === 8)
|
||||||
assert(MapOutputTracker.compressSize(10L) === 25)
|
assert(MapOutputTracker.compressSize(10L) === 25)
|
||||||
assert((MapOutputTracker.compressSize(1000000L) & 0xFF) === 145)
|
assert((MapOutputTracker.compressSize(1000000L) & 0xFF) === 145)
|
||||||
|
@ -15,7 +15,7 @@ class MapOutputTrackerSuite extends FunSuite {
|
||||||
}
|
}
|
||||||
|
|
||||||
test("decompressSize") {
|
test("decompressSize") {
|
||||||
assert(MapOutputTracker.decompressSize(0) === 1)
|
assert(MapOutputTracker.decompressSize(0) === 0)
|
||||||
for (size <- Seq(2L, 10L, 100L, 50000L, 1000000L, 1000000000L)) {
|
for (size <- Seq(2L, 10L, 100L, 50000L, 1000000L, 1000000000L)) {
|
||||||
val size2 = MapOutputTracker.decompressSize(MapOutputTracker.compressSize(size))
|
val size2 = MapOutputTracker.decompressSize(MapOutputTracker.compressSize(size))
|
||||||
assert(size2 >= 0.99 * size && size2 <= 1.11 * size,
|
assert(size2 >= 0.99 * size && size2 <= 1.11 * size,
|
||||||
|
|
Загрузка…
Ссылка в новой задаче