Renamed ML package to MLlib and added it to classpath

2013-07-05 11:38:53 -07:00 · 2013-07-05 11:38:53 -07:00 · 43b24635ee
--- a/bin/compute-classpath.cmd
+++ b/bin/compute-classpath.cmd
@ -15,6 +15,7 @@ set CORE_DIR=%FWDIR%core
 set REPL_DIR=%FWDIR%repl
 set EXAMPLES_DIR=%FWDIR%examples
 set BAGEL_DIR=%FWDIR%bagel
 set MLLIB_DIR=%FWDIR%mllib
 set STREAMING_DIR=%FWDIR%streaming
 set PYSPARK_DIR=%FWDIR%python
@ -29,6 +30,7 @@ set CLASSPATH=%CLASSPATH%;%FWDIR%lib_managed\bundles\*
 set CLASSPATH=%CLASSPATH%;%FWDIR%repl\lib\*
 set CLASSPATH=%CLASSPATH%;%FWDIR%python\lib\*
 set CLASSPATH=%CLASSPATH%;%BAGEL_DIR%\target\scala-%SCALA_VERSION%\classes
 set CLASSPATH=%CLASSPATH%;%MLLIB_DIR%\target\scala-%SCALA_VERSION%\classes
 rem Add hadoop conf dir - else FileSystem.*, etc fail
 rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@ -18,6 +18,7 @@ REPL_DIR="$FWDIR/repl"
 REPL_BIN_DIR="$FWDIR/repl-bin"
 EXAMPLES_DIR="$FWDIR/examples"
 BAGEL_DIR="$FWDIR/bagel"
 MLLIB_DIR="$FWDIR/mllib"
 STREAMING_DIR="$FWDIR/streaming"
 PYSPARK_DIR="$FWDIR/python"
@ -49,6 +50,7 @@ if [ -e $REPL_BIN_DIR/target ]; then
  CLASSPATH+=":$EXAMPLES_JAR"
 fi
 CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
 CLASSPATH="$CLASSPATH:$MLLIB_DIR/target/scala-$SCALA_VERSION/classes"
 for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
  CLASSPATH="$CLASSPATH:$jar"
 done
--- a/mllib/data/als/test.data
+++ b/mllib/data/als/test.data
--- a/mllib/data/lr-data/random.data
+++ b/mllib/data/lr-data/random.data
--- a/mllib/data/ridge-data/lpsa.data
+++ b/mllib/data/ridge-data/lpsa.data
--- a/mllib/src/main/scala/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/spark/ml/clustering/KMeans.scala
@ -1,4 +1,4 @@
-package spark.ml.clustering
+package spark.mllib.clustering
 import scala.collection.mutable.ArrayBuffer
 import scala.util.Random
@ -6,7 +6,7 @@ import scala.util.Random
 import spark.{SparkContext, RDD}
 import spark.SparkContext._
 import spark.Logging
-import spark.ml.util.MLUtils
+import spark.mllib.util.MLUtils
 import org.jblas.DoubleMatrix
@ -270,7 +270,8 @@ object KMeans {
  /**
   * Return the index of the closest point in `centers` to `point`, as well as its distance.
   */
-  private[ml] def findClosest(centers: Array[Array[Double]], point: Array[Double]): (Int, Double) =
+  private[mllib] def findClosest(centers: Array[Array[Double]], point: Array[Double])
    : (Int, Double) =
  {
    var bestDistance = Double.PositiveInfinity
    var bestIndex = 0
@ -287,7 +288,7 @@ object KMeans {
  /**
   * Return the K-means cost of a given point against the given cluster centers.
   */
-  private[ml] def pointCost(centers: Array[Array[Double]], point: Array[Double]): Double = {
+  private[mllib] def pointCost(centers: Array[Array[Double]], point: Array[Double]): Double = {
    var bestDistance = Double.PositiveInfinity
    for (i <- 0 until centers.length) {
      val distance = MLUtils.squaredDistance(point, centers(i))
--- a/mllib/src/main/scala/spark/ml/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/spark/ml/clustering/KMeansModel.scala
@ -1,8 +1,8 @@
-package spark.ml.clustering
+package spark.mllib.clustering
 import spark.RDD
 import spark.SparkContext._
-import spark.ml.util.MLUtils
+import spark.mllib.util.MLUtils
 /**
--- a/mllib/src/main/scala/spark/ml/clustering/LocalKMeans.scala
+++ b/mllib/src/main/scala/spark/ml/clustering/LocalKMeans.scala
@ -1,4 +1,4 @@
-package spark.ml.clustering
+package spark.mllib.clustering
 import scala.util.Random
@ -8,7 +8,7 @@ import org.jblas.{DoubleMatrix, SimpleBlas}
 * An utility object to run K-means locally. This is private to the ML package because it's used
 * in the initialization of KMeans but not meant to be publicly exposed.
 */
-private[ml] object LocalKMeans {
+private[mllib] object LocalKMeans {
  /**
   * Run K-means++ on the weighted point set `points`. This first does the K-means++
   * initialization procedure and then roudns of Lloyd's algorithm.
--- a/mllib/src/main/scala/spark/ml/optimization/Gradient.scala
+++ b/mllib/src/main/scala/spark/ml/optimization/Gradient.scala
@ -1,4 +1,4 @@
-package spark.ml.optimization
+package spark.mllib.optimization
 import org.jblas.DoubleMatrix
--- a/mllib/src/main/scala/spark/ml/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/spark/ml/optimization/GradientDescent.scala
@ -1,4 +1,4 @@
-package spark.ml.optimization
+package spark.mllib.optimization
 import spark.{Logging, RDD, SparkContext}
 import spark.SparkContext._
--- a/mllib/src/main/scala/spark/ml/optimization/Updater.scala
+++ b/mllib/src/main/scala/spark/ml/optimization/Updater.scala
@ -1,4 +1,4 @@
-package spark.ml.optimization
+package spark.mllib.optimization
 import org.jblas.DoubleMatrix
--- a/mllib/src/main/scala/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/spark/ml/recommendation/ALS.scala
@ -1,4 +1,4 @@
-package spark.ml.recommendation
+package spark.mllib.recommendation
 import scala.collection.mutable.{ArrayBuffer, BitSet}
 import scala.util.Random
--- a/mllib/src/main/scala/spark/ml/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/spark/ml/recommendation/MatrixFactorizationModel.scala
@ -1,4 +1,4 @@
-package spark.ml.recommendation
+package spark.mllib.recommendation
 import spark.RDD
 import spark.SparkContext._
--- a/mllib/src/main/scala/spark/ml/regression/LogisticRegression.scala
+++ b/mllib/src/main/scala/spark/ml/regression/LogisticRegression.scala
@ -1,8 +1,8 @@
-package spark.ml.regression
+package spark.mllib.regression
 import spark.{Logging, RDD, SparkContext}
-import spark.ml.optimization._
+import spark.mllib.optimization._
-import spark.ml.util.MLUtils
+import spark.mllib.util.MLUtils
 import org.jblas.DoubleMatrix
--- a/mllib/src/main/scala/spark/ml/regression/LogisticRegressionGenerator.scala
+++ b/mllib/src/main/scala/spark/ml/regression/LogisticRegressionGenerator.scala
@ -1,11 +1,11 @@
-package spark.ml.regression
+package spark.mllib.regression
 import scala.util.Random
 import org.jblas.DoubleMatrix
 import spark.{RDD, SparkContext}
-import spark.ml.util.MLUtils
+import spark.mllib.util.MLUtils
 object LogisticRegressionGenerator {
--- a/mllib/src/main/scala/spark/ml/regression/Regression.scala
+++ b/mllib/src/main/scala/spark/ml/regression/Regression.scala
@ -1,4 +1,4 @@
-package spark.ml.regression
+package spark.mllib.regression
 import spark.RDD
--- a/mllib/src/main/scala/spark/ml/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/spark/ml/regression/RidgeRegression.scala
@ -1,8 +1,8 @@
-package spark.ml.regression
+package spark.mllib.regression
 import spark.{Logging, RDD, SparkContext}
 import spark.SparkContext._
-import spark.ml.util.MLUtils
+import spark.mllib.util.MLUtils
 import org.jblas.DoubleMatrix
 import org.jblas.Solve
--- a/mllib/src/main/scala/spark/ml/regression/RidgeRegressionGenerator.scala
+++ b/mllib/src/main/scala/spark/ml/regression/RidgeRegressionGenerator.scala
@ -1,11 +1,11 @@
-package spark.ml.regression
+package spark.mllib.regression
 import scala.util.Random
 import org.jblas.DoubleMatrix
 import spark.{RDD, SparkContext}
-import spark.ml.util.MLUtils
+import spark.mllib.util.MLUtils
 object RidgeRegressionGenerator {
--- a/mllib/src/main/scala/spark/ml/util/MLUtils.scala
+++ b/mllib/src/main/scala/spark/ml/util/MLUtils.scala
@ -1,4 +1,4 @@
-package spark.ml.util
+package spark.mllib.util
 import spark.{RDD, SparkContext}
 import spark.SparkContext._
--- a/mllib/src/test/resources/log4j.properties
+++ b/mllib/src/test/resources/log4j.properties
--- a/mllib/src/test/scala/spark/ml/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/spark/ml/clustering/KMeansSuite.scala
@ -1,4 +1,4 @@
-package spark.ml.clustering
+package spark.mllib.clustering
 import scala.util.Random
--- a/mllib/src/test/scala/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/spark/ml/recommendation/ALSSuite.scala
@ -1,4 +1,4 @@
-package spark.ml.recommendation
+package spark.mllib.recommendation
 import scala.util.Random
--- a/mllib/src/test/scala/spark/ml/regression/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/spark/ml/regression/LogisticRegressionSuite.scala
@ -1,4 +1,4 @@
-package spark.ml.regression
+package spark.mllib.regression
 import scala.util.Random
--- a/mllib/src/test/scala/spark/ml/regression/RidgeRegressionSuite.scala
+++ b/mllib/src/test/scala/spark/ml/regression/RidgeRegressionSuite.scala
@ -1,4 +1,4 @@
-package spark.ml.regression
+package spark.mllib.regression
 import scala.util.Random
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@ -25,7 +25,7 @@ object SparkBuild extends Build {
  //val HADOOP_MAJOR_VERSION = "2"
  //val HADOOP_YARN = true
-  lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming, ml)
+  lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming, mllib)
  lazy val core = Project("core", file("core"), settings = coreSettings)
@ -37,7 +37,7 @@ object SparkBuild extends Build {
  lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn (core)
-  lazy val ml = Project("ml", file("ml"), settings = mlSettings) dependsOn (core)
+  lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn (core)
  // A configuration to set an alternative publishLocalConfiguration
  lazy val MavenCompile = config("m2r") extend(Compile)
@ -221,8 +221,8 @@ object SparkBuild extends Build {
  def bagelSettings = sharedSettings ++ Seq(name := "spark-bagel")
-  def mlSettings = sharedSettings ++ Seq(
+  def mllibSettings = sharedSettings ++ Seq(
-    name := "spark-ml",
+    name := "spark-mllib",
    libraryDependencies ++= Seq(
      "org.jblas" % "jblas" % "1.2.3"
    )
`@ -1,4 +1,4 @@`
	`package spark.ml.optimization`	`package spark.mllib.optimization`

	`import org.jblas.DoubleMatrix`	`import org.jblas.DoubleMatrix`
`@ -1,4 +1,4 @@`
	`package spark.ml.regression`	`package spark.mllib.regression`

	`import spark.RDD`	`import spark.RDD`
`@ -1,4 +1,4 @@`
	`package spark.ml.clustering`	`package spark.mllib.clustering`

	`import scala.util.Random`	`import scala.util.Random`
`@ -1,4 +1,4 @@`
	`package spark.ml.recommendation`	`package spark.mllib.recommendation`

	`import scala.util.Random`	`import scala.util.Random`