Renamed ML package to MLlib and added it to classpath

This commit is contained in:
Matei Zaharia 2013-07-05 11:38:53 -07:00
Родитель 399bd65ef5
Коммит 43b24635ee
25 изменённых файлов: 37 добавлений и 32 удалений

Просмотреть файл

@ -15,6 +15,7 @@ set CORE_DIR=%FWDIR%core
set REPL_DIR=%FWDIR%repl set REPL_DIR=%FWDIR%repl
set EXAMPLES_DIR=%FWDIR%examples set EXAMPLES_DIR=%FWDIR%examples
set BAGEL_DIR=%FWDIR%bagel set BAGEL_DIR=%FWDIR%bagel
set MLLIB_DIR=%FWDIR%mllib
set STREAMING_DIR=%FWDIR%streaming set STREAMING_DIR=%FWDIR%streaming
set PYSPARK_DIR=%FWDIR%python set PYSPARK_DIR=%FWDIR%python
@ -29,6 +30,7 @@ set CLASSPATH=%CLASSPATH%;%FWDIR%lib_managed\bundles\*
set CLASSPATH=%CLASSPATH%;%FWDIR%repl\lib\* set CLASSPATH=%CLASSPATH%;%FWDIR%repl\lib\*
set CLASSPATH=%CLASSPATH%;%FWDIR%python\lib\* set CLASSPATH=%CLASSPATH%;%FWDIR%python\lib\*
set CLASSPATH=%CLASSPATH%;%BAGEL_DIR%\target\scala-%SCALA_VERSION%\classes set CLASSPATH=%CLASSPATH%;%BAGEL_DIR%\target\scala-%SCALA_VERSION%\classes
set CLASSPATH=%CLASSPATH%;%MLLIB_DIR%\target\scala-%SCALA_VERSION%\classes
rem Add hadoop conf dir - else FileSystem.*, etc fail rem Add hadoop conf dir - else FileSystem.*, etc fail
rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts

Просмотреть файл

@ -18,6 +18,7 @@ REPL_DIR="$FWDIR/repl"
REPL_BIN_DIR="$FWDIR/repl-bin" REPL_BIN_DIR="$FWDIR/repl-bin"
EXAMPLES_DIR="$FWDIR/examples" EXAMPLES_DIR="$FWDIR/examples"
BAGEL_DIR="$FWDIR/bagel" BAGEL_DIR="$FWDIR/bagel"
MLLIB_DIR="$FWDIR/mllib"
STREAMING_DIR="$FWDIR/streaming" STREAMING_DIR="$FWDIR/streaming"
PYSPARK_DIR="$FWDIR/python" PYSPARK_DIR="$FWDIR/python"
@ -49,6 +50,7 @@ if [ -e $REPL_BIN_DIR/target ]; then
CLASSPATH+=":$EXAMPLES_JAR" CLASSPATH+=":$EXAMPLES_JAR"
fi fi
CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes" CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
CLASSPATH="$CLASSPATH:$MLLIB_DIR/target/scala-$SCALA_VERSION/classes"
for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
CLASSPATH="$CLASSPATH:$jar" CLASSPATH="$CLASSPATH:$jar"
done done

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -1,4 +1,4 @@
package spark.ml.clustering package spark.mllib.clustering
import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.ArrayBuffer
import scala.util.Random import scala.util.Random
@ -6,7 +6,7 @@ import scala.util.Random
import spark.{SparkContext, RDD} import spark.{SparkContext, RDD}
import spark.SparkContext._ import spark.SparkContext._
import spark.Logging import spark.Logging
import spark.ml.util.MLUtils import spark.mllib.util.MLUtils
import org.jblas.DoubleMatrix import org.jblas.DoubleMatrix
@ -270,7 +270,8 @@ object KMeans {
/** /**
* Return the index of the closest point in `centers` to `point`, as well as its distance. * Return the index of the closest point in `centers` to `point`, as well as its distance.
*/ */
private[ml] def findClosest(centers: Array[Array[Double]], point: Array[Double]): (Int, Double) = private[mllib] def findClosest(centers: Array[Array[Double]], point: Array[Double])
: (Int, Double) =
{ {
var bestDistance = Double.PositiveInfinity var bestDistance = Double.PositiveInfinity
var bestIndex = 0 var bestIndex = 0
@ -287,7 +288,7 @@ object KMeans {
/** /**
* Return the K-means cost of a given point against the given cluster centers. * Return the K-means cost of a given point against the given cluster centers.
*/ */
private[ml] def pointCost(centers: Array[Array[Double]], point: Array[Double]): Double = { private[mllib] def pointCost(centers: Array[Array[Double]], point: Array[Double]): Double = {
var bestDistance = Double.PositiveInfinity var bestDistance = Double.PositiveInfinity
for (i <- 0 until centers.length) { for (i <- 0 until centers.length) {
val distance = MLUtils.squaredDistance(point, centers(i)) val distance = MLUtils.squaredDistance(point, centers(i))

Просмотреть файл

@ -1,8 +1,8 @@
package spark.ml.clustering package spark.mllib.clustering
import spark.RDD import spark.RDD
import spark.SparkContext._ import spark.SparkContext._
import spark.ml.util.MLUtils import spark.mllib.util.MLUtils
/** /**

Просмотреть файл

@ -1,4 +1,4 @@
package spark.ml.clustering package spark.mllib.clustering
import scala.util.Random import scala.util.Random
@ -8,7 +8,7 @@ import org.jblas.{DoubleMatrix, SimpleBlas}
* An utility object to run K-means locally. This is private to the ML package because it's used * An utility object to run K-means locally. This is private to the ML package because it's used
* in the initialization of KMeans but not meant to be publicly exposed. * in the initialization of KMeans but not meant to be publicly exposed.
*/ */
private[ml] object LocalKMeans { private[mllib] object LocalKMeans {
/** /**
* Run K-means++ on the weighted point set `points`. This first does the K-means++ * Run K-means++ on the weighted point set `points`. This first does the K-means++
* initialization procedure and then roudns of Lloyd's algorithm. * initialization procedure and then roudns of Lloyd's algorithm.

Просмотреть файл

@ -1,4 +1,4 @@
package spark.ml.optimization package spark.mllib.optimization
import org.jblas.DoubleMatrix import org.jblas.DoubleMatrix

Просмотреть файл

@ -1,4 +1,4 @@
package spark.ml.optimization package spark.mllib.optimization
import spark.{Logging, RDD, SparkContext} import spark.{Logging, RDD, SparkContext}
import spark.SparkContext._ import spark.SparkContext._

Просмотреть файл

@ -1,4 +1,4 @@
package spark.ml.optimization package spark.mllib.optimization
import org.jblas.DoubleMatrix import org.jblas.DoubleMatrix

Просмотреть файл

@ -1,4 +1,4 @@
package spark.ml.recommendation package spark.mllib.recommendation
import scala.collection.mutable.{ArrayBuffer, BitSet} import scala.collection.mutable.{ArrayBuffer, BitSet}
import scala.util.Random import scala.util.Random

Просмотреть файл

@ -1,4 +1,4 @@
package spark.ml.recommendation package spark.mllib.recommendation
import spark.RDD import spark.RDD
import spark.SparkContext._ import spark.SparkContext._

Просмотреть файл

@ -1,8 +1,8 @@
package spark.ml.regression package spark.mllib.regression
import spark.{Logging, RDD, SparkContext} import spark.{Logging, RDD, SparkContext}
import spark.ml.optimization._ import spark.mllib.optimization._
import spark.ml.util.MLUtils import spark.mllib.util.MLUtils
import org.jblas.DoubleMatrix import org.jblas.DoubleMatrix

Просмотреть файл

@ -1,11 +1,11 @@
package spark.ml.regression package spark.mllib.regression
import scala.util.Random import scala.util.Random
import org.jblas.DoubleMatrix import org.jblas.DoubleMatrix
import spark.{RDD, SparkContext} import spark.{RDD, SparkContext}
import spark.ml.util.MLUtils import spark.mllib.util.MLUtils
object LogisticRegressionGenerator { object LogisticRegressionGenerator {

Просмотреть файл

@ -1,4 +1,4 @@
package spark.ml.regression package spark.mllib.regression
import spark.RDD import spark.RDD

Просмотреть файл

@ -1,8 +1,8 @@
package spark.ml.regression package spark.mllib.regression
import spark.{Logging, RDD, SparkContext} import spark.{Logging, RDD, SparkContext}
import spark.SparkContext._ import spark.SparkContext._
import spark.ml.util.MLUtils import spark.mllib.util.MLUtils
import org.jblas.DoubleMatrix import org.jblas.DoubleMatrix
import org.jblas.Solve import org.jblas.Solve

Просмотреть файл

@ -1,11 +1,11 @@
package spark.ml.regression package spark.mllib.regression
import scala.util.Random import scala.util.Random
import org.jblas.DoubleMatrix import org.jblas.DoubleMatrix
import spark.{RDD, SparkContext} import spark.{RDD, SparkContext}
import spark.ml.util.MLUtils import spark.mllib.util.MLUtils
object RidgeRegressionGenerator { object RidgeRegressionGenerator {

Просмотреть файл

@ -1,4 +1,4 @@
package spark.ml.util package spark.mllib.util
import spark.{RDD, SparkContext} import spark.{RDD, SparkContext}
import spark.SparkContext._ import spark.SparkContext._

Просмотреть файл

@ -1,4 +1,4 @@
package spark.ml.clustering package spark.mllib.clustering
import scala.util.Random import scala.util.Random

Просмотреть файл

@ -1,4 +1,4 @@
package spark.ml.recommendation package spark.mllib.recommendation
import scala.util.Random import scala.util.Random

Просмотреть файл

@ -1,4 +1,4 @@
package spark.ml.regression package spark.mllib.regression
import scala.util.Random import scala.util.Random

Просмотреть файл

@ -1,4 +1,4 @@
package spark.ml.regression package spark.mllib.regression
import scala.util.Random import scala.util.Random

Просмотреть файл

@ -25,7 +25,7 @@ object SparkBuild extends Build {
//val HADOOP_MAJOR_VERSION = "2" //val HADOOP_MAJOR_VERSION = "2"
//val HADOOP_YARN = true //val HADOOP_YARN = true
lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming, ml) lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming, mllib)
lazy val core = Project("core", file("core"), settings = coreSettings) lazy val core = Project("core", file("core"), settings = coreSettings)
@ -37,7 +37,7 @@ object SparkBuild extends Build {
lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn (core) lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn (core)
lazy val ml = Project("ml", file("ml"), settings = mlSettings) dependsOn (core) lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn (core)
// A configuration to set an alternative publishLocalConfiguration // A configuration to set an alternative publishLocalConfiguration
lazy val MavenCompile = config("m2r") extend(Compile) lazy val MavenCompile = config("m2r") extend(Compile)
@ -221,8 +221,8 @@ object SparkBuild extends Build {
def bagelSettings = sharedSettings ++ Seq(name := "spark-bagel") def bagelSettings = sharedSettings ++ Seq(name := "spark-bagel")
def mlSettings = sharedSettings ++ Seq( def mllibSettings = sharedSettings ++ Seq(
name := "spark-ml", name := "spark-mllib",
libraryDependencies ++= Seq( libraryDependencies ++= Seq(
"org.jblas" % "jblas" % "1.2.3" "org.jblas" % "jblas" % "1.2.3"
) )