зеркало из https://github.com/microsoft/spark.git
Merge remote-tracking branch 'origin/pr/662'
Conflicts: bin/compute-classpath.sh
This commit is contained in:
Коммит
cd28d9c147
|
@ -24,46 +24,71 @@ PYSPARK_DIR="$FWDIR/python"
|
|||
|
||||
# Build up classpath
|
||||
CLASSPATH="$SPARK_CLASSPATH"
|
||||
CLASSPATH="$CLASSPATH:$FWDIR/conf"
|
||||
CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/classes"
|
||||
if [ -n "$SPARK_TESTING" ] ; then
|
||||
CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/test-classes"
|
||||
CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/test-classes"
|
||||
fi
|
||||
CLASSPATH="$CLASSPATH:$CORE_DIR/src/main/resources"
|
||||
CLASSPATH="$CLASSPATH:$REPL_DIR/target/scala-$SCALA_VERSION/classes"
|
||||
CLASSPATH="$CLASSPATH:$EXAMPLES_DIR/target/scala-$SCALA_VERSION/classes"
|
||||
CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/classes"
|
||||
CLASSPATH="$CLASSPATH:$STREAMING_DIR/lib/org/apache/kafka/kafka/0.7.2-spark/*" # <-- our in-project Kafka Jar
|
||||
if [ -e "$FWDIR/lib_managed" ]; then
|
||||
CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/jars/*"
|
||||
CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/bundles/*"
|
||||
fi
|
||||
CLASSPATH="$CLASSPATH:$REPL_DIR/lib/*"
|
||||
# Add the shaded JAR for Maven builds
|
||||
if [ -e $REPL_BIN_DIR/target ]; then
|
||||
for jar in `find "$REPL_BIN_DIR/target" -name 'spark-repl-*-shaded-hadoop*.jar'`; do
|
||||
|
||||
function dev_classpath {
|
||||
CLASSPATH="$CLASSPATH:$FWDIR/conf"
|
||||
CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/classes"
|
||||
if [ -n "$SPARK_TESTING" ] ; then
|
||||
CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/test-classes"
|
||||
CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/test-classes"
|
||||
fi
|
||||
CLASSPATH="$CLASSPATH:$CORE_DIR/src/main/resources"
|
||||
CLASSPATH="$CLASSPATH:$REPL_DIR/target/scala-$SCALA_VERSION/classes"
|
||||
CLASSPATH="$CLASSPATH:$EXAMPLES_DIR/target/scala-$SCALA_VERSION/classes"
|
||||
CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/classes"
|
||||
CLASSPATH="$CLASSPATH:$STREAMING_DIR/lib/org/apache/kafka/kafka/0.7.2-spark/*" # <-- our in-project Kafka Jar
|
||||
if [ -e "$FWDIR/lib_managed" ]; then
|
||||
CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/jars/*"
|
||||
CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/bundles/*"
|
||||
fi
|
||||
CLASSPATH="$CLASSPATH:$REPL_DIR/lib/*"
|
||||
# Add the shaded JAR for Maven builds
|
||||
if [ -e $REPL_BIN_DIR/target ]; then
|
||||
for jar in `find "$REPL_BIN_DIR/target" -name 'spark-repl-*-shaded-hadoop*.jar'`; do
|
||||
CLASSPATH="$CLASSPATH:$jar"
|
||||
done
|
||||
# The shaded JAR doesn't contain examples, so include those separately
|
||||
EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar`
|
||||
CLASSPATH+=":$EXAMPLES_JAR"
|
||||
fi
|
||||
CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
|
||||
CLASSPATH="$CLASSPATH:$MLLIB_DIR/target/scala-$SCALA_VERSION/classes"
|
||||
for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
|
||||
CLASSPATH="$CLASSPATH:$jar"
|
||||
done
|
||||
# The shaded JAR doesn't contain examples, so include those separately
|
||||
EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar`
|
||||
CLASSPATH+=":$EXAMPLES_JAR"
|
||||
fi
|
||||
CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
|
||||
CLASSPATH="$CLASSPATH:$MLLIB_DIR/target/scala-$SCALA_VERSION/classes"
|
||||
for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
|
||||
CLASSPATH="$CLASSPATH:$jar"
|
||||
done
|
||||
|
||||
# Figure out the JAR file that our examples were packaged into. This includes a bit of a hack
|
||||
# to avoid the -sources and -doc packages that are built by publish-local.
|
||||
if [ -e "$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples"*[0-9T].jar ]; then
|
||||
# Use the JAR from the SBT build
|
||||
export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples"*[0-9T].jar`
|
||||
fi
|
||||
if [ -e "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar ]; then
|
||||
# Use the JAR from the Maven build
|
||||
export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar`
|
||||
# Figure out the JAR file that our examples were packaged into. This includes a bit of a hack
|
||||
# to avoid the -sources and -doc packages that are built by publish-local.
|
||||
if [ -e "$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples"*[0-9T].jar ]; then
|
||||
# Use the JAR from the SBT build
|
||||
export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples"*[0-9T].jar`
|
||||
fi
|
||||
if [ -e "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar ]; then
|
||||
# Use the JAR from the Maven build
|
||||
export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar`
|
||||
fi
|
||||
|
||||
# Add Scala standard library
|
||||
if [ -z "$SCALA_LIBRARY_PATH" ]; then
|
||||
if [ -z "$SCALA_HOME" ]; then
|
||||
echo "SCALA_HOME is not set" >&2
|
||||
exit 1
|
||||
fi
|
||||
SCALA_LIBRARY_PATH="$SCALA_HOME/lib"
|
||||
fi
|
||||
CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-library.jar"
|
||||
CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-compiler.jar"
|
||||
CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/jline.jar"
|
||||
}
|
||||
|
||||
function release_classpath {
|
||||
CLASSPATH="$CLASSPATH:$FWDIR/jars/*"
|
||||
}
|
||||
|
||||
if [ -f "$FWDIR/RELEASE" ]; then
|
||||
release_classpath
|
||||
else
|
||||
dev_classpath
|
||||
fi
|
||||
|
||||
# Add hadoop conf dir - else FileSystem.*, etc fail !
|
||||
|
@ -76,16 +101,4 @@ if [ "x" != "x$YARN_CONF_DIR" ]; then
|
|||
CLASSPATH="$CLASSPATH:$YARN_CONF_DIR"
|
||||
fi
|
||||
|
||||
# Add Scala standard library
|
||||
if [ -z "$SCALA_LIBRARY_PATH" ]; then
|
||||
if [ -z "$SCALA_HOME" ]; then
|
||||
echo "SCALA_HOME is not set" >&2
|
||||
exit 1
|
||||
fi
|
||||
SCALA_LIBRARY_PATH="$SCALA_HOME/lib"
|
||||
fi
|
||||
CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-library.jar"
|
||||
CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-compiler.jar"
|
||||
CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/jline.jar"
|
||||
|
||||
echo "$CLASSPATH"
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# Usage: start-slave.sh <worker#> <master-spark-URL>
|
||||
# where <master-spark-URL> is like "spark://localhost:7077"
|
||||
|
||||
bin=`dirname "$0"`
|
||||
bin=`cd "$bin"; pwd`
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Script to create a binary distribution for easy deploys of Spark.
|
||||
# The distribution directory defaults to dist/ but can be overridden below.
|
||||
# The distribution contains fat (assembly) jars that include the Scala library,
|
||||
# so it is completely self contained.
|
||||
# It does not contain source or *.class files.
|
||||
#
|
||||
# Recommended deploy/testing procedure (standalone mode):
|
||||
# 1) Rsync / deploy the dist/ dir to one host
|
||||
# 2) cd to deploy dir; ./bin/start-master.sh
|
||||
# 3) Verify master is up by visiting web page, ie http://master-ip:8080. Note the spark:// URL.
|
||||
# 4) ./bin/start-slave.sh 1 <<spark:// URL>>
|
||||
# 5) MASTER="spark://my-master-ip:7077" ./spark-shell
|
||||
|
||||
# Figure out where the Spark framework is installed
|
||||
FWDIR="$(cd `dirname $0`; pwd)"
|
||||
DISTDIR="$FWDIR/dist"
|
||||
|
||||
# Get version from SBT
|
||||
export TERM=dumb # Prevents color codes in SBT output
|
||||
VERSION=$($FWDIR/sbt/sbt "show version" | tail -1 | cut -f 2)
|
||||
echo "Making distribution for Spark $VERSION in $DISTDIR..."
|
||||
|
||||
# Build fat JAR
|
||||
$FWDIR/sbt/sbt "repl/assembly"
|
||||
|
||||
# Make directories
|
||||
rm -rf "$DISTDIR"
|
||||
mkdir -p "$DISTDIR/jars"
|
||||
echo "$VERSION" >$DISTDIR/RELEASE
|
||||
|
||||
# Copy jars
|
||||
cp $FWDIR/repl/target/*.jar "$DISTDIR/jars/"
|
||||
|
||||
# Copy other things
|
||||
cp -r "$FWDIR/bin" "$DISTDIR"
|
||||
cp -r "$FWDIR/conf" "$DISTDIR"
|
||||
cp "$FWDIR/run" "$FWDIR/spark-shell" "$DISTDIR"
|
|
@ -197,7 +197,7 @@ object SparkBuild extends Build {
|
|||
def replSettings = sharedSettings ++ Seq(
|
||||
name := "spark-repl",
|
||||
libraryDependencies <+= scalaVersion("org.scala-lang" % "scala-compiler" % _)
|
||||
)
|
||||
) ++ assemblySettings ++ extraAssemblySettings
|
||||
|
||||
def examplesSettings = sharedSettings ++ Seq(
|
||||
name := "spark-examples",
|
||||
|
|
44
run
44
run
|
@ -1,7 +1,5 @@
|
|||
#!/bin/bash
|
||||
|
||||
SCALA_VERSION=2.9.3
|
||||
|
||||
# Figure out where the Scala framework is installed
|
||||
FWDIR="$(cd `dirname $0`; pwd)"
|
||||
|
||||
|
@ -77,7 +75,7 @@ else
|
|||
exit 1
|
||||
fi
|
||||
fi
|
||||
if [ -z "$SCALA_LIBRARY_PATH" ]; then
|
||||
if [[ ! -f "$FWDIR/RELEASE" && -z "$SCALA_LIBRARY_PATH" ]]; then
|
||||
if [ -z "$SCALA_HOME" ]; then
|
||||
echo "SCALA_HOME is not set" >&2
|
||||
exit 1
|
||||
|
@ -104,43 +102,33 @@ fi
|
|||
export JAVA_OPTS
|
||||
# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in ExecutorRunner.scala!
|
||||
|
||||
CORE_DIR="$FWDIR/core"
|
||||
EXAMPLES_DIR="$FWDIR/examples"
|
||||
REPL_DIR="$FWDIR/repl"
|
||||
if [ ! -f "$FWDIR/RELEASE" ]; then
|
||||
CORE_DIR="$FWDIR/core"
|
||||
EXAMPLES_DIR="$FWDIR/examples"
|
||||
REPL_DIR="$FWDIR/repl"
|
||||
|
||||
# Exit if the user hasn't compiled Spark
|
||||
if [ ! -e "$CORE_DIR/target" ]; then
|
||||
echo "Failed to find Spark classes in $CORE_DIR/target" >&2
|
||||
echo "You need to compile Spark before running this program" >&2
|
||||
exit 1
|
||||
fi
|
||||
# Exit if the user hasn't compiled Spark
|
||||
if [ ! -e "$CORE_DIR/target" ]; then
|
||||
echo "Failed to find Spark classes in $CORE_DIR/target" >&2
|
||||
echo "You need to compile Spark before running this program" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$@" = *repl* && ! -e "$REPL_DIR/target" ]]; then
|
||||
echo "Failed to find Spark classes in $REPL_DIR/target" >&2
|
||||
echo "You need to compile Spark repl module before running this program" >&2
|
||||
exit 1
|
||||
if [[ "$@" = *repl* && ! -e "$REPL_DIR/target" ]]; then
|
||||
echo "Failed to find Spark classes in $REPL_DIR/target" >&2
|
||||
echo "You need to compile Spark repl module before running this program" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Compute classpath using external script
|
||||
CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
|
||||
export CLASSPATH
|
||||
|
||||
# Figure out the JAR file that our examples were packaged into. This includes a bit of a hack
|
||||
# to avoid the -sources and -doc packages that are built by publish-local.
|
||||
if [ -e "$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples"*[0-9T].jar ]; then
|
||||
# Use the JAR from the SBT build
|
||||
export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples"*[0-9T].jar`
|
||||
fi
|
||||
if [ -e "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar ]; then
|
||||
# Use the JAR from the Maven build
|
||||
export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar`
|
||||
fi
|
||||
|
||||
if [ "$SPARK_LAUNCH_WITH_SCALA" == "1" ]; then
|
||||
EXTRA_ARGS="" # Java options will be passed to scala as JAVA_OPTS
|
||||
else
|
||||
# The JVM doesn't read JAVA_OPTS by default so we need to pass it in
|
||||
EXTRA_ARGS="$JAVA_OPTS"
|
||||
fi
|
||||
|
||||
exec "$RUNNER" -cp "$CLASSPATH" $EXTRA_ARGS "$@"
|
||||
|
|
67
spark-shell
67
spark-shell
|
@ -1,4 +1,65 @@
|
|||
#!/bin/sh
|
||||
#!/bin/bash --posix
|
||||
#
|
||||
# Shell script for starting the Spark Shell REPL
|
||||
# Note that it will set MASTER to spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}
|
||||
# if those two env vars are set in spark-env.sh but MASTER is not.
|
||||
# Options:
|
||||
# -c <cores> Set the number of cores for REPL to use
|
||||
#
|
||||
FWDIR="`dirname $0`"
|
||||
export SPARK_LAUNCH_WITH_SCALA=1
|
||||
exec $FWDIR/run spark.repl.Main "$@"
|
||||
|
||||
for o in "$@"; do
|
||||
if [ "$1" = "-c" -o "$1" = "--cores" ]; then
|
||||
shift
|
||||
if [ -n "$1" ]; then
|
||||
OPTIONS="-Dspark.cores.max=$1"
|
||||
shift
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Set MASTER from spark-env if possible
|
||||
if [ -z "$MASTER" ]; then
|
||||
if [ -e "$FWDIR/conf/spark-env.sh" ]; then
|
||||
. "$FWDIR/conf/spark-env.sh"
|
||||
fi
|
||||
if [[ "x" != "x$SPARK_MASTER_IP" && "y" != "y$SPARK_MASTER_PORT" ]]; then
|
||||
MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
|
||||
export MASTER
|
||||
fi
|
||||
fi
|
||||
|
||||
# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in
|
||||
# binary distribution of Spark where Scala is not installed
|
||||
exit_status=127
|
||||
saved_stty=""
|
||||
|
||||
# restore stty settings (echo in particular)
|
||||
function restoreSttySettings() {
|
||||
stty $saved_stty
|
||||
saved_stty=""
|
||||
}
|
||||
|
||||
function onExit() {
|
||||
if [[ "$saved_stty" != "" ]]; then
|
||||
restoreSttySettings
|
||||
fi
|
||||
exit $exit_status
|
||||
}
|
||||
|
||||
# to reenable echo if we are interrupted before completing.
|
||||
trap onExit INT
|
||||
|
||||
# save terminal settings
|
||||
saved_stty=$(stty -g 2>/dev/null)
|
||||
# clear on error so we don't later try to restore them
|
||||
if [[ ! $? ]]; then
|
||||
saved_stty=""
|
||||
fi
|
||||
|
||||
$FWDIR/run $OPTIONS spark.repl.Main "$@"
|
||||
|
||||
# record the exit status lest it be overwritten:
|
||||
# then reenable echo and propagate the code.
|
||||
exit_status=$?
|
||||
onExit
|
||||
|
|
Загрузка…
Ссылка в новой задаче