Merge pull request #112 from cengle/master

Changed HadoopRDD to get key and value containers from the RecordReader instead of through reflection
2012-03-06 13:38:32 -08:00 · 2012-03-06 13:38:32 -08:00 · a5e2b6a6bd
--- a/core/src/main/scala/spark/HadoopRDD.scala
+++ b/core/src/main/scala/spark/HadoopRDD.scala
@ -60,18 +60,6 @@ class HadoopRDD[K, V](
      .asInstanceOf[InputFormat[K, V]]
  }

-  /**
-   * Helper method for creating a Hadoop Writable, because the commonly used NullWritable class has 
-   * no constructor.
-   */
-  def createWritable[T](clazz: Class[T]): T = {
-    if (clazz == classOf[NullWritable]) {
-      NullWritable.get().asInstanceOf[T]
-    } else {
-      clazz.newInstance()
-    }
-  }
-
  override def splits = splits_

  override def compute(theSplit: Split) = new Iterator[(K, V)] {
@ -82,8 +70,8 @@ class HadoopRDD[K, V](
    val fmt = createInputFormat(conf)
    reader = fmt.getRecordReader(split.inputSplit.value, conf, Reporter.NULL)

-    val key: K = createWritable(keyClass)
-    val value: V = createWritable(valueClass)
+    val key: K = reader.createKey()
+    val value: V = reader.createValue()
    var gotNext = false
    var finished = false