зеркало из https://github.com/microsoft/spark.git
Renamed ML package to MLlib and added it to classpath
This commit is contained in:
Родитель
399bd65ef5
Коммит
43b24635ee
|
@ -15,6 +15,7 @@ set CORE_DIR=%FWDIR%core
|
||||||
set REPL_DIR=%FWDIR%repl
|
set REPL_DIR=%FWDIR%repl
|
||||||
set EXAMPLES_DIR=%FWDIR%examples
|
set EXAMPLES_DIR=%FWDIR%examples
|
||||||
set BAGEL_DIR=%FWDIR%bagel
|
set BAGEL_DIR=%FWDIR%bagel
|
||||||
|
set MLLIB_DIR=%FWDIR%mllib
|
||||||
set STREAMING_DIR=%FWDIR%streaming
|
set STREAMING_DIR=%FWDIR%streaming
|
||||||
set PYSPARK_DIR=%FWDIR%python
|
set PYSPARK_DIR=%FWDIR%python
|
||||||
|
|
||||||
|
@ -29,6 +30,7 @@ set CLASSPATH=%CLASSPATH%;%FWDIR%lib_managed\bundles\*
|
||||||
set CLASSPATH=%CLASSPATH%;%FWDIR%repl\lib\*
|
set CLASSPATH=%CLASSPATH%;%FWDIR%repl\lib\*
|
||||||
set CLASSPATH=%CLASSPATH%;%FWDIR%python\lib\*
|
set CLASSPATH=%CLASSPATH%;%FWDIR%python\lib\*
|
||||||
set CLASSPATH=%CLASSPATH%;%BAGEL_DIR%\target\scala-%SCALA_VERSION%\classes
|
set CLASSPATH=%CLASSPATH%;%BAGEL_DIR%\target\scala-%SCALA_VERSION%\classes
|
||||||
|
set CLASSPATH=%CLASSPATH%;%MLLIB_DIR%\target\scala-%SCALA_VERSION%\classes
|
||||||
|
|
||||||
rem Add hadoop conf dir - else FileSystem.*, etc fail
|
rem Add hadoop conf dir - else FileSystem.*, etc fail
|
||||||
rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
|
rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
|
||||||
|
|
|
@ -18,6 +18,7 @@ REPL_DIR="$FWDIR/repl"
|
||||||
REPL_BIN_DIR="$FWDIR/repl-bin"
|
REPL_BIN_DIR="$FWDIR/repl-bin"
|
||||||
EXAMPLES_DIR="$FWDIR/examples"
|
EXAMPLES_DIR="$FWDIR/examples"
|
||||||
BAGEL_DIR="$FWDIR/bagel"
|
BAGEL_DIR="$FWDIR/bagel"
|
||||||
|
MLLIB_DIR="$FWDIR/mllib"
|
||||||
STREAMING_DIR="$FWDIR/streaming"
|
STREAMING_DIR="$FWDIR/streaming"
|
||||||
PYSPARK_DIR="$FWDIR/python"
|
PYSPARK_DIR="$FWDIR/python"
|
||||||
|
|
||||||
|
@ -49,6 +50,7 @@ if [ -e $REPL_BIN_DIR/target ]; then
|
||||||
CLASSPATH+=":$EXAMPLES_JAR"
|
CLASSPATH+=":$EXAMPLES_JAR"
|
||||||
fi
|
fi
|
||||||
CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
|
CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
|
||||||
|
CLASSPATH="$CLASSPATH:$MLLIB_DIR/target/scala-$SCALA_VERSION/classes"
|
||||||
for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
|
for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
|
||||||
CLASSPATH="$CLASSPATH:$jar"
|
CLASSPATH="$CLASSPATH:$jar"
|
||||||
done
|
done
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package spark.ml.clustering
|
package spark.mllib.clustering
|
||||||
|
|
||||||
import scala.collection.mutable.ArrayBuffer
|
import scala.collection.mutable.ArrayBuffer
|
||||||
import scala.util.Random
|
import scala.util.Random
|
||||||
|
@ -6,7 +6,7 @@ import scala.util.Random
|
||||||
import spark.{SparkContext, RDD}
|
import spark.{SparkContext, RDD}
|
||||||
import spark.SparkContext._
|
import spark.SparkContext._
|
||||||
import spark.Logging
|
import spark.Logging
|
||||||
import spark.ml.util.MLUtils
|
import spark.mllib.util.MLUtils
|
||||||
|
|
||||||
import org.jblas.DoubleMatrix
|
import org.jblas.DoubleMatrix
|
||||||
|
|
||||||
|
@ -270,7 +270,8 @@ object KMeans {
|
||||||
/**
|
/**
|
||||||
* Return the index of the closest point in `centers` to `point`, as well as its distance.
|
* Return the index of the closest point in `centers` to `point`, as well as its distance.
|
||||||
*/
|
*/
|
||||||
private[ml] def findClosest(centers: Array[Array[Double]], point: Array[Double]): (Int, Double) =
|
private[mllib] def findClosest(centers: Array[Array[Double]], point: Array[Double])
|
||||||
|
: (Int, Double) =
|
||||||
{
|
{
|
||||||
var bestDistance = Double.PositiveInfinity
|
var bestDistance = Double.PositiveInfinity
|
||||||
var bestIndex = 0
|
var bestIndex = 0
|
||||||
|
@ -287,7 +288,7 @@ object KMeans {
|
||||||
/**
|
/**
|
||||||
* Return the K-means cost of a given point against the given cluster centers.
|
* Return the K-means cost of a given point against the given cluster centers.
|
||||||
*/
|
*/
|
||||||
private[ml] def pointCost(centers: Array[Array[Double]], point: Array[Double]): Double = {
|
private[mllib] def pointCost(centers: Array[Array[Double]], point: Array[Double]): Double = {
|
||||||
var bestDistance = Double.PositiveInfinity
|
var bestDistance = Double.PositiveInfinity
|
||||||
for (i <- 0 until centers.length) {
|
for (i <- 0 until centers.length) {
|
||||||
val distance = MLUtils.squaredDistance(point, centers(i))
|
val distance = MLUtils.squaredDistance(point, centers(i))
|
|
@ -1,8 +1,8 @@
|
||||||
package spark.ml.clustering
|
package spark.mllib.clustering
|
||||||
|
|
||||||
import spark.RDD
|
import spark.RDD
|
||||||
import spark.SparkContext._
|
import spark.SparkContext._
|
||||||
import spark.ml.util.MLUtils
|
import spark.mllib.util.MLUtils
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
|
@ -1,4 +1,4 @@
|
||||||
package spark.ml.clustering
|
package spark.mllib.clustering
|
||||||
|
|
||||||
import scala.util.Random
|
import scala.util.Random
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ import org.jblas.{DoubleMatrix, SimpleBlas}
|
||||||
* An utility object to run K-means locally. This is private to the ML package because it's used
|
* An utility object to run K-means locally. This is private to the ML package because it's used
|
||||||
* in the initialization of KMeans but not meant to be publicly exposed.
|
* in the initialization of KMeans but not meant to be publicly exposed.
|
||||||
*/
|
*/
|
||||||
private[ml] object LocalKMeans {
|
private[mllib] object LocalKMeans {
|
||||||
/**
|
/**
|
||||||
* Run K-means++ on the weighted point set `points`. This first does the K-means++
|
* Run K-means++ on the weighted point set `points`. This first does the K-means++
|
||||||
* initialization procedure and then roudns of Lloyd's algorithm.
|
* initialization procedure and then roudns of Lloyd's algorithm.
|
|
@ -1,4 +1,4 @@
|
||||||
package spark.ml.optimization
|
package spark.mllib.optimization
|
||||||
|
|
||||||
import org.jblas.DoubleMatrix
|
import org.jblas.DoubleMatrix
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package spark.ml.optimization
|
package spark.mllib.optimization
|
||||||
|
|
||||||
import spark.{Logging, RDD, SparkContext}
|
import spark.{Logging, RDD, SparkContext}
|
||||||
import spark.SparkContext._
|
import spark.SparkContext._
|
|
@ -1,4 +1,4 @@
|
||||||
package spark.ml.optimization
|
package spark.mllib.optimization
|
||||||
|
|
||||||
import org.jblas.DoubleMatrix
|
import org.jblas.DoubleMatrix
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package spark.ml.recommendation
|
package spark.mllib.recommendation
|
||||||
|
|
||||||
import scala.collection.mutable.{ArrayBuffer, BitSet}
|
import scala.collection.mutable.{ArrayBuffer, BitSet}
|
||||||
import scala.util.Random
|
import scala.util.Random
|
|
@ -1,4 +1,4 @@
|
||||||
package spark.ml.recommendation
|
package spark.mllib.recommendation
|
||||||
|
|
||||||
import spark.RDD
|
import spark.RDD
|
||||||
import spark.SparkContext._
|
import spark.SparkContext._
|
|
@ -1,8 +1,8 @@
|
||||||
package spark.ml.regression
|
package spark.mllib.regression
|
||||||
|
|
||||||
import spark.{Logging, RDD, SparkContext}
|
import spark.{Logging, RDD, SparkContext}
|
||||||
import spark.ml.optimization._
|
import spark.mllib.optimization._
|
||||||
import spark.ml.util.MLUtils
|
import spark.mllib.util.MLUtils
|
||||||
|
|
||||||
import org.jblas.DoubleMatrix
|
import org.jblas.DoubleMatrix
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
package spark.ml.regression
|
package spark.mllib.regression
|
||||||
|
|
||||||
import scala.util.Random
|
import scala.util.Random
|
||||||
|
|
||||||
import org.jblas.DoubleMatrix
|
import org.jblas.DoubleMatrix
|
||||||
|
|
||||||
import spark.{RDD, SparkContext}
|
import spark.{RDD, SparkContext}
|
||||||
import spark.ml.util.MLUtils
|
import spark.mllib.util.MLUtils
|
||||||
|
|
||||||
object LogisticRegressionGenerator {
|
object LogisticRegressionGenerator {
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package spark.ml.regression
|
package spark.mllib.regression
|
||||||
|
|
||||||
import spark.RDD
|
import spark.RDD
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package spark.ml.regression
|
package spark.mllib.regression
|
||||||
|
|
||||||
import spark.{Logging, RDD, SparkContext}
|
import spark.{Logging, RDD, SparkContext}
|
||||||
import spark.SparkContext._
|
import spark.SparkContext._
|
||||||
import spark.ml.util.MLUtils
|
import spark.mllib.util.MLUtils
|
||||||
|
|
||||||
import org.jblas.DoubleMatrix
|
import org.jblas.DoubleMatrix
|
||||||
import org.jblas.Solve
|
import org.jblas.Solve
|
|
@ -1,11 +1,11 @@
|
||||||
package spark.ml.regression
|
package spark.mllib.regression
|
||||||
|
|
||||||
import scala.util.Random
|
import scala.util.Random
|
||||||
|
|
||||||
import org.jblas.DoubleMatrix
|
import org.jblas.DoubleMatrix
|
||||||
|
|
||||||
import spark.{RDD, SparkContext}
|
import spark.{RDD, SparkContext}
|
||||||
import spark.ml.util.MLUtils
|
import spark.mllib.util.MLUtils
|
||||||
|
|
||||||
|
|
||||||
object RidgeRegressionGenerator {
|
object RidgeRegressionGenerator {
|
|
@ -1,4 +1,4 @@
|
||||||
package spark.ml.util
|
package spark.mllib.util
|
||||||
|
|
||||||
import spark.{RDD, SparkContext}
|
import spark.{RDD, SparkContext}
|
||||||
import spark.SparkContext._
|
import spark.SparkContext._
|
|
@ -1,4 +1,4 @@
|
||||||
package spark.ml.clustering
|
package spark.mllib.clustering
|
||||||
|
|
||||||
import scala.util.Random
|
import scala.util.Random
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package spark.ml.recommendation
|
package spark.mllib.recommendation
|
||||||
|
|
||||||
import scala.util.Random
|
import scala.util.Random
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package spark.ml.regression
|
package spark.mllib.regression
|
||||||
|
|
||||||
import scala.util.Random
|
import scala.util.Random
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package spark.ml.regression
|
package spark.mllib.regression
|
||||||
|
|
||||||
import scala.util.Random
|
import scala.util.Random
|
||||||
|
|
|
@ -25,7 +25,7 @@ object SparkBuild extends Build {
|
||||||
//val HADOOP_MAJOR_VERSION = "2"
|
//val HADOOP_MAJOR_VERSION = "2"
|
||||||
//val HADOOP_YARN = true
|
//val HADOOP_YARN = true
|
||||||
|
|
||||||
lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming, ml)
|
lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming, mllib)
|
||||||
|
|
||||||
lazy val core = Project("core", file("core"), settings = coreSettings)
|
lazy val core = Project("core", file("core"), settings = coreSettings)
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ object SparkBuild extends Build {
|
||||||
|
|
||||||
lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn (core)
|
lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn (core)
|
||||||
|
|
||||||
lazy val ml = Project("ml", file("ml"), settings = mlSettings) dependsOn (core)
|
lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn (core)
|
||||||
|
|
||||||
// A configuration to set an alternative publishLocalConfiguration
|
// A configuration to set an alternative publishLocalConfiguration
|
||||||
lazy val MavenCompile = config("m2r") extend(Compile)
|
lazy val MavenCompile = config("m2r") extend(Compile)
|
||||||
|
@ -221,8 +221,8 @@ object SparkBuild extends Build {
|
||||||
|
|
||||||
def bagelSettings = sharedSettings ++ Seq(name := "spark-bagel")
|
def bagelSettings = sharedSettings ++ Seq(name := "spark-bagel")
|
||||||
|
|
||||||
def mlSettings = sharedSettings ++ Seq(
|
def mllibSettings = sharedSettings ++ Seq(
|
||||||
name := "spark-ml",
|
name := "spark-mllib",
|
||||||
libraryDependencies ++= Seq(
|
libraryDependencies ++= Seq(
|
||||||
"org.jblas" % "jblas" % "1.2.3"
|
"org.jblas" % "jblas" % "1.2.3"
|
||||||
)
|
)
|
||||||
|
|
Загрузка…
Ссылка в новой задаче