зеркало из https://github.com/microsoft/spark.git
Renamed ML package to MLlib and added it to classpath
This commit is contained in:
Родитель
399bd65ef5
Коммит
43b24635ee
|
@ -15,6 +15,7 @@ set CORE_DIR=%FWDIR%core
|
|||
set REPL_DIR=%FWDIR%repl
|
||||
set EXAMPLES_DIR=%FWDIR%examples
|
||||
set BAGEL_DIR=%FWDIR%bagel
|
||||
set MLLIB_DIR=%FWDIR%mllib
|
||||
set STREAMING_DIR=%FWDIR%streaming
|
||||
set PYSPARK_DIR=%FWDIR%python
|
||||
|
||||
|
@ -29,6 +30,7 @@ set CLASSPATH=%CLASSPATH%;%FWDIR%lib_managed\bundles\*
|
|||
set CLASSPATH=%CLASSPATH%;%FWDIR%repl\lib\*
|
||||
set CLASSPATH=%CLASSPATH%;%FWDIR%python\lib\*
|
||||
set CLASSPATH=%CLASSPATH%;%BAGEL_DIR%\target\scala-%SCALA_VERSION%\classes
|
||||
set CLASSPATH=%CLASSPATH%;%MLLIB_DIR%\target\scala-%SCALA_VERSION%\classes
|
||||
|
||||
rem Add hadoop conf dir - else FileSystem.*, etc fail
|
||||
rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
|
||||
|
|
|
@ -18,6 +18,7 @@ REPL_DIR="$FWDIR/repl"
|
|||
REPL_BIN_DIR="$FWDIR/repl-bin"
|
||||
EXAMPLES_DIR="$FWDIR/examples"
|
||||
BAGEL_DIR="$FWDIR/bagel"
|
||||
MLLIB_DIR="$FWDIR/mllib"
|
||||
STREAMING_DIR="$FWDIR/streaming"
|
||||
PYSPARK_DIR="$FWDIR/python"
|
||||
|
||||
|
@ -49,6 +50,7 @@ if [ -e $REPL_BIN_DIR/target ]; then
|
|||
CLASSPATH+=":$EXAMPLES_JAR"
|
||||
fi
|
||||
CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
|
||||
CLASSPATH="$CLASSPATH:$MLLIB_DIR/target/scala-$SCALA_VERSION/classes"
|
||||
for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
|
||||
CLASSPATH="$CLASSPATH:$jar"
|
||||
done
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
package spark.ml.clustering
|
||||
package spark.mllib.clustering
|
||||
|
||||
import scala.collection.mutable.ArrayBuffer
|
||||
import scala.util.Random
|
||||
|
@ -6,7 +6,7 @@ import scala.util.Random
|
|||
import spark.{SparkContext, RDD}
|
||||
import spark.SparkContext._
|
||||
import spark.Logging
|
||||
import spark.ml.util.MLUtils
|
||||
import spark.mllib.util.MLUtils
|
||||
|
||||
import org.jblas.DoubleMatrix
|
||||
|
||||
|
@ -270,7 +270,8 @@ object KMeans {
|
|||
/**
|
||||
* Return the index of the closest point in `centers` to `point`, as well as its distance.
|
||||
*/
|
||||
private[ml] def findClosest(centers: Array[Array[Double]], point: Array[Double]): (Int, Double) =
|
||||
private[mllib] def findClosest(centers: Array[Array[Double]], point: Array[Double])
|
||||
: (Int, Double) =
|
||||
{
|
||||
var bestDistance = Double.PositiveInfinity
|
||||
var bestIndex = 0
|
||||
|
@ -287,7 +288,7 @@ object KMeans {
|
|||
/**
|
||||
* Return the K-means cost of a given point against the given cluster centers.
|
||||
*/
|
||||
private[ml] def pointCost(centers: Array[Array[Double]], point: Array[Double]): Double = {
|
||||
private[mllib] def pointCost(centers: Array[Array[Double]], point: Array[Double]): Double = {
|
||||
var bestDistance = Double.PositiveInfinity
|
||||
for (i <- 0 until centers.length) {
|
||||
val distance = MLUtils.squaredDistance(point, centers(i))
|
|
@ -1,8 +1,8 @@
|
|||
package spark.ml.clustering
|
||||
package spark.mllib.clustering
|
||||
|
||||
import spark.RDD
|
||||
import spark.SparkContext._
|
||||
import spark.ml.util.MLUtils
|
||||
import spark.mllib.util.MLUtils
|
||||
|
||||
|
||||
/**
|
|
@ -1,4 +1,4 @@
|
|||
package spark.ml.clustering
|
||||
package spark.mllib.clustering
|
||||
|
||||
import scala.util.Random
|
||||
|
||||
|
@ -8,7 +8,7 @@ import org.jblas.{DoubleMatrix, SimpleBlas}
|
|||
* An utility object to run K-means locally. This is private to the ML package because it's used
|
||||
* in the initialization of KMeans but not meant to be publicly exposed.
|
||||
*/
|
||||
private[ml] object LocalKMeans {
|
||||
private[mllib] object LocalKMeans {
|
||||
/**
|
||||
* Run K-means++ on the weighted point set `points`. This first does the K-means++
|
||||
* initialization procedure and then roudns of Lloyd's algorithm.
|
|
@ -1,4 +1,4 @@
|
|||
package spark.ml.optimization
|
||||
package spark.mllib.optimization
|
||||
|
||||
import org.jblas.DoubleMatrix
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package spark.ml.optimization
|
||||
package spark.mllib.optimization
|
||||
|
||||
import spark.{Logging, RDD, SparkContext}
|
||||
import spark.SparkContext._
|
|
@ -1,4 +1,4 @@
|
|||
package spark.ml.optimization
|
||||
package spark.mllib.optimization
|
||||
|
||||
import org.jblas.DoubleMatrix
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package spark.ml.recommendation
|
||||
package spark.mllib.recommendation
|
||||
|
||||
import scala.collection.mutable.{ArrayBuffer, BitSet}
|
||||
import scala.util.Random
|
|
@ -1,4 +1,4 @@
|
|||
package spark.ml.recommendation
|
||||
package spark.mllib.recommendation
|
||||
|
||||
import spark.RDD
|
||||
import spark.SparkContext._
|
|
@ -1,8 +1,8 @@
|
|||
package spark.ml.regression
|
||||
package spark.mllib.regression
|
||||
|
||||
import spark.{Logging, RDD, SparkContext}
|
||||
import spark.ml.optimization._
|
||||
import spark.ml.util.MLUtils
|
||||
import spark.mllib.optimization._
|
||||
import spark.mllib.util.MLUtils
|
||||
|
||||
import org.jblas.DoubleMatrix
|
||||
|
|
@ -1,11 +1,11 @@
|
|||
package spark.ml.regression
|
||||
package spark.mllib.regression
|
||||
|
||||
import scala.util.Random
|
||||
|
||||
import org.jblas.DoubleMatrix
|
||||
|
||||
import spark.{RDD, SparkContext}
|
||||
import spark.ml.util.MLUtils
|
||||
import spark.mllib.util.MLUtils
|
||||
|
||||
object LogisticRegressionGenerator {
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package spark.ml.regression
|
||||
package spark.mllib.regression
|
||||
|
||||
import spark.RDD
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
package spark.ml.regression
|
||||
package spark.mllib.regression
|
||||
|
||||
import spark.{Logging, RDD, SparkContext}
|
||||
import spark.SparkContext._
|
||||
import spark.ml.util.MLUtils
|
||||
import spark.mllib.util.MLUtils
|
||||
|
||||
import org.jblas.DoubleMatrix
|
||||
import org.jblas.Solve
|
|
@ -1,11 +1,11 @@
|
|||
package spark.ml.regression
|
||||
package spark.mllib.regression
|
||||
|
||||
import scala.util.Random
|
||||
|
||||
import org.jblas.DoubleMatrix
|
||||
|
||||
import spark.{RDD, SparkContext}
|
||||
import spark.ml.util.MLUtils
|
||||
import spark.mllib.util.MLUtils
|
||||
|
||||
|
||||
object RidgeRegressionGenerator {
|
|
@ -1,4 +1,4 @@
|
|||
package spark.ml.util
|
||||
package spark.mllib.util
|
||||
|
||||
import spark.{RDD, SparkContext}
|
||||
import spark.SparkContext._
|
|
@ -1,4 +1,4 @@
|
|||
package spark.ml.clustering
|
||||
package spark.mllib.clustering
|
||||
|
||||
import scala.util.Random
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package spark.ml.recommendation
|
||||
package spark.mllib.recommendation
|
||||
|
||||
import scala.util.Random
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package spark.ml.regression
|
||||
package spark.mllib.regression
|
||||
|
||||
import scala.util.Random
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package spark.ml.regression
|
||||
package spark.mllib.regression
|
||||
|
||||
import scala.util.Random
|
||||
|
|
@ -25,7 +25,7 @@ object SparkBuild extends Build {
|
|||
//val HADOOP_MAJOR_VERSION = "2"
|
||||
//val HADOOP_YARN = true
|
||||
|
||||
lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming, ml)
|
||||
lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming, mllib)
|
||||
|
||||
lazy val core = Project("core", file("core"), settings = coreSettings)
|
||||
|
||||
|
@ -37,7 +37,7 @@ object SparkBuild extends Build {
|
|||
|
||||
lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn (core)
|
||||
|
||||
lazy val ml = Project("ml", file("ml"), settings = mlSettings) dependsOn (core)
|
||||
lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn (core)
|
||||
|
||||
// A configuration to set an alternative publishLocalConfiguration
|
||||
lazy val MavenCompile = config("m2r") extend(Compile)
|
||||
|
@ -221,8 +221,8 @@ object SparkBuild extends Build {
|
|||
|
||||
def bagelSettings = sharedSettings ++ Seq(name := "spark-bagel")
|
||||
|
||||
def mlSettings = sharedSettings ++ Seq(
|
||||
name := "spark-ml",
|
||||
def mllibSettings = sharedSettings ++ Seq(
|
||||
name := "spark-mllib",
|
||||
libraryDependencies ++= Seq(
|
||||
"org.jblas" % "jblas" % "1.2.3"
|
||||
)
|
||||
|
|
Загрузка…
Ссылка в новой задаче