Add hBase example

2013-05-09 18:33:38 -05:00 · 2013-05-09 18:33:38 -05:00 · ee6f6aa6cd
--- a/examples/src/main/scala/spark/examples/HBaseTest.scala
+++ b/examples/src/main/scala/spark/examples/HBaseTest.scala
@ -0,0 +1,35 @@
+package spark.examples
+
+import spark._
+import spark.rdd.NewHadoopRDD
+import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor}
+import org.apache.hadoop.hbase.client.HBaseAdmin
+import org.apache.hadoop.hbase.mapreduce.TableInputFormat
+
+object HBaseTest {
+  def main(args: Array[String]) {
+    val sc = new SparkContext(args(0), "HBaseTest",
+      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+
+    val conf = HBaseConfiguration.create()
+
+    // Other options for configuring scan behavior are available. More information available at 
+    // http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/TableInputFormat.html
+    conf.set(TableInputFormat.INPUT_TABLE, args(1))
+
+    // Initialize hBase table if necessary
+    val admin = new HBaseAdmin(conf)
+    if(!admin.isTableAvailable(args(1))) {
+      val tableDesc = new HTableDescriptor(args(1))
+      admin.createTable(tableDesc)
+    }
+
+    val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], 
+      classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
+      classOf[org.apache.hadoop.hbase.client.Result])
+
+    hBaseRDD.count()
+
+    System.exit(0)
+  }
+}
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@ -200,7 +200,11 @@ object SparkBuild extends Build {

  def examplesSettings = sharedSettings ++ Seq(
    name := "spark-examples",
-    libraryDependencies ++= Seq("com.twitter" % "algebird-core_2.9.2" % "0.1.11")
+    resolvers ++= Seq("Apache HBase" at "https://repository.apache.org/content/repositories/releases"),
+    libraryDependencies ++= Seq(
+      "com.twitter" % "algebird-core_2.9.2" % "0.1.11",
+      "org.apache.hbase" % "hbase" % "0.94.6" excludeAll(excludeNetty)
+    )
  )

  def bagelSettings = sharedSettings ++ Seq(name := "spark-bagel")