Renamed ML package to MLlib and added it to classpath

2013-07-05 11:38:53 -07:00 · 2013-07-05 11:38:53 -07:00 · 43b24635ee
--- a/bin/compute-classpath.cmd
+++ b/bin/compute-classpath.cmd
@ -15,6 +15,7 @@ set CORE_DIR=%FWDIR%core
 set REPL_DIR=%FWDIR%repl
 set EXAMPLES_DIR=%FWDIR%examples
 set BAGEL_DIR=%FWDIR%bagel
+set MLLIB_DIR=%FWDIR%mllib
 set STREAMING_DIR=%FWDIR%streaming
 set PYSPARK_DIR=%FWDIR%python

@ -29,6 +30,7 @@ set CLASSPATH=%CLASSPATH%;%FWDIR%lib_managed\bundles\*
 set CLASSPATH=%CLASSPATH%;%FWDIR%repl\lib\*
 set CLASSPATH=%CLASSPATH%;%FWDIR%python\lib\*
 set CLASSPATH=%CLASSPATH%;%BAGEL_DIR%\target\scala-%SCALA_VERSION%\classes
+set CLASSPATH=%CLASSPATH%;%MLLIB_DIR%\target\scala-%SCALA_VERSION%\classes

 rem Add hadoop conf dir - else FileSystem.*, etc fail
 rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@ -18,6 +18,7 @@ REPL_DIR="$FWDIR/repl"
 REPL_BIN_DIR="$FWDIR/repl-bin"
 EXAMPLES_DIR="$FWDIR/examples"
 BAGEL_DIR="$FWDIR/bagel"
+MLLIB_DIR="$FWDIR/mllib"
 STREAMING_DIR="$FWDIR/streaming"
 PYSPARK_DIR="$FWDIR/python"

@ -49,6 +50,7 @@ if [ -e $REPL_BIN_DIR/target ]; then
  CLASSPATH+=":$EXAMPLES_JAR"
 fi
 CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
+CLASSPATH="$CLASSPATH:$MLLIB_DIR/target/scala-$SCALA_VERSION/classes"
 for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
  CLASSPATH="$CLASSPATH:$jar"
 done
--- a/mllib/data/als/test.data
+++ b/mllib/data/als/test.data
--- a/mllib/data/lr-data/random.data
+++ b/mllib/data/lr-data/random.data
--- a/mllib/data/ridge-data/lpsa.data
+++ b/mllib/data/ridge-data/lpsa.data
--- a/mllib/src/main/scala/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/spark/ml/clustering/KMeans.scala
@ -1,4 +1,4 @@
-package spark.ml.clustering
+package spark.mllib.clustering

 import scala.collection.mutable.ArrayBuffer
 import scala.util.Random
@ -6,7 +6,7 @@ import scala.util.Random
 import spark.{SparkContext, RDD}
 import spark.SparkContext._
 import spark.Logging
-import spark.ml.util.MLUtils
+import spark.mllib.util.MLUtils

 import org.jblas.DoubleMatrix

@ -270,7 +270,8 @@ object KMeans {
  /**
   * Return the index of the closest point in `centers` to `point`, as well as its distance.
   */
-  private[ml] def findClosest(centers: Array[Array[Double]], point: Array[Double]): (Int, Double) =
+  private[mllib] def findClosest(centers: Array[Array[Double]], point: Array[Double])
+    : (Int, Double) =
  {
    var bestDistance = Double.PositiveInfinity
    var bestIndex = 0
@ -287,7 +288,7 @@ object KMeans {
  /**
   * Return the K-means cost of a given point against the given cluster centers.
   */
-  private[ml] def pointCost(centers: Array[Array[Double]], point: Array[Double]): Double = {
+  private[mllib] def pointCost(centers: Array[Array[Double]], point: Array[Double]): Double = {
    var bestDistance = Double.PositiveInfinity
    for (i <- 0 until centers.length) {
      val distance = MLUtils.squaredDistance(point, centers(i))
--- a/mllib/src/main/scala/spark/ml/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/spark/ml/clustering/KMeansModel.scala
@ -1,8 +1,8 @@
-package spark.ml.clustering
+package spark.mllib.clustering

 import spark.RDD
 import spark.SparkContext._
-import spark.ml.util.MLUtils
+import spark.mllib.util.MLUtils


 /**
--- a/mllib/src/main/scala/spark/ml/clustering/LocalKMeans.scala
+++ b/mllib/src/main/scala/spark/ml/clustering/LocalKMeans.scala
@ -1,4 +1,4 @@
-package spark.ml.clustering
+package spark.mllib.clustering

 import scala.util.Random

@ -8,7 +8,7 @@ import org.jblas.{DoubleMatrix, SimpleBlas}
 * An utility object to run K-means locally. This is private to the ML package because it's used
 * in the initialization of KMeans but not meant to be publicly exposed.
 */
-private[ml] object LocalKMeans {
+private[mllib] object LocalKMeans {
  /**
   * Run K-means++ on the weighted point set `points`. This first does the K-means++
   * initialization procedure and then roudns of Lloyd's algorithm.
--- a/mllib/src/main/scala/spark/ml/optimization/Gradient.scala
+++ b/mllib/src/main/scala/spark/ml/optimization/Gradient.scala
@ -1,4 +1,4 @@
-package spark.ml.optimization
+package spark.mllib.optimization

 import org.jblas.DoubleMatrix

--- a/mllib/src/main/scala/spark/ml/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/spark/ml/optimization/GradientDescent.scala
@ -1,4 +1,4 @@
-package spark.ml.optimization
+package spark.mllib.optimization

 import spark.{Logging, RDD, SparkContext}
 import spark.SparkContext._
--- a/mllib/src/main/scala/spark/ml/optimization/Updater.scala
+++ b/mllib/src/main/scala/spark/ml/optimization/Updater.scala
@ -1,4 +1,4 @@
-package spark.ml.optimization
+package spark.mllib.optimization

 import org.jblas.DoubleMatrix

--- a/mllib/src/main/scala/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/spark/ml/recommendation/ALS.scala
@ -1,4 +1,4 @@
-package spark.ml.recommendation
+package spark.mllib.recommendation

 import scala.collection.mutable.{ArrayBuffer, BitSet}
 import scala.util.Random
--- a/mllib/src/main/scala/spark/ml/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/spark/ml/recommendation/MatrixFactorizationModel.scala
@ -1,4 +1,4 @@
-package spark.ml.recommendation
+package spark.mllib.recommendation

 import spark.RDD
 import spark.SparkContext._
--- a/mllib/src/main/scala/spark/ml/regression/LogisticRegression.scala
+++ b/mllib/src/main/scala/spark/ml/regression/LogisticRegression.scala
@ -1,8 +1,8 @@
-package spark.ml.regression
+package spark.mllib.regression

 import spark.{Logging, RDD, SparkContext}
-import spark.ml.optimization._
-import spark.ml.util.MLUtils
+import spark.mllib.optimization._
+import spark.mllib.util.MLUtils

 import org.jblas.DoubleMatrix

--- a/mllib/src/main/scala/spark/ml/regression/LogisticRegressionGenerator.scala
+++ b/mllib/src/main/scala/spark/ml/regression/LogisticRegressionGenerator.scala
@ -1,11 +1,11 @@
-package spark.ml.regression
+package spark.mllib.regression

 import scala.util.Random

 import org.jblas.DoubleMatrix

 import spark.{RDD, SparkContext}
-import spark.ml.util.MLUtils
+import spark.mllib.util.MLUtils

 object LogisticRegressionGenerator {

--- a/mllib/src/main/scala/spark/ml/regression/Regression.scala
+++ b/mllib/src/main/scala/spark/ml/regression/Regression.scala
@ -1,4 +1,4 @@
-package spark.ml.regression
+package spark.mllib.regression

 import spark.RDD

--- a/mllib/src/main/scala/spark/ml/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/spark/ml/regression/RidgeRegression.scala
@ -1,8 +1,8 @@
-package spark.ml.regression
+package spark.mllib.regression

 import spark.{Logging, RDD, SparkContext}
 import spark.SparkContext._
-import spark.ml.util.MLUtils
+import spark.mllib.util.MLUtils

 import org.jblas.DoubleMatrix
 import org.jblas.Solve
--- a/mllib/src/main/scala/spark/ml/regression/RidgeRegressionGenerator.scala
+++ b/mllib/src/main/scala/spark/ml/regression/RidgeRegressionGenerator.scala
@ -1,11 +1,11 @@
-package spark.ml.regression
+package spark.mllib.regression

 import scala.util.Random

 import org.jblas.DoubleMatrix

 import spark.{RDD, SparkContext}
-import spark.ml.util.MLUtils
+import spark.mllib.util.MLUtils


 object RidgeRegressionGenerator {
--- a/mllib/src/main/scala/spark/ml/util/MLUtils.scala
+++ b/mllib/src/main/scala/spark/ml/util/MLUtils.scala
@ -1,4 +1,4 @@
-package spark.ml.util
+package spark.mllib.util

 import spark.{RDD, SparkContext}
 import spark.SparkContext._
--- a/mllib/src/test/resources/log4j.properties
+++ b/mllib/src/test/resources/log4j.properties
--- a/mllib/src/test/scala/spark/ml/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/spark/ml/clustering/KMeansSuite.scala
@ -1,4 +1,4 @@
-package spark.ml.clustering
+package spark.mllib.clustering

 import scala.util.Random

--- a/mllib/src/test/scala/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/spark/ml/recommendation/ALSSuite.scala
@ -1,4 +1,4 @@
-package spark.ml.recommendation
+package spark.mllib.recommendation

 import scala.util.Random

--- a/mllib/src/test/scala/spark/ml/regression/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/spark/ml/regression/LogisticRegressionSuite.scala
@ -1,4 +1,4 @@
-package spark.ml.regression
+package spark.mllib.regression

 import scala.util.Random

--- a/mllib/src/test/scala/spark/ml/regression/RidgeRegressionSuite.scala
+++ b/mllib/src/test/scala/spark/ml/regression/RidgeRegressionSuite.scala
@ -1,4 +1,4 @@
-package spark.ml.regression
+package spark.mllib.regression

 import scala.util.Random

--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@ -25,7 +25,7 @@ object SparkBuild extends Build {
  //val HADOOP_MAJOR_VERSION = "2"
  //val HADOOP_YARN = true

-  lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming, ml)
+  lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming, mllib)

  lazy val core = Project("core", file("core"), settings = coreSettings)

@ -37,7 +37,7 @@ object SparkBuild extends Build {

  lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn (core)

-  lazy val ml = Project("ml", file("ml"), settings = mlSettings) dependsOn (core)
+  lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn (core)

  // A configuration to set an alternative publishLocalConfiguration
  lazy val MavenCompile = config("m2r") extend(Compile)
@ -221,8 +221,8 @@ object SparkBuild extends Build {

  def bagelSettings = sharedSettings ++ Seq(name := "spark-bagel")

-  def mlSettings = sharedSettings ++ Seq(
-    name := "spark-ml",
+  def mllibSettings = sharedSettings ++ Seq(
+    name := "spark-mllib",
    libraryDependencies ++= Seq(
      "org.jblas" % "jblas" % "1.2.3"
    )