Merge remote-tracking branch 'origin/pr/662'

Conflicts: bin/compute-classpath.sh
2013-07-13 19:10:00 -07:00 · 2013-07-13 19:10:00 -07:00 · cd28d9c147
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@ -24,46 +24,71 @@ PYSPARK_DIR="$FWDIR/python"

 # Build up classpath
 CLASSPATH="$SPARK_CLASSPATH"
-CLASSPATH="$CLASSPATH:$FWDIR/conf"
-CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/classes"
-if [ -n "$SPARK_TESTING" ] ; then
-  CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/test-classes"
-fi
-CLASSPATH="$CLASSPATH:$CORE_DIR/src/main/resources"
-CLASSPATH="$CLASSPATH:$REPL_DIR/target/scala-$SCALA_VERSION/classes"
-CLASSPATH="$CLASSPATH:$EXAMPLES_DIR/target/scala-$SCALA_VERSION/classes"
-CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/classes"
-CLASSPATH="$CLASSPATH:$STREAMING_DIR/lib/org/apache/kafka/kafka/0.7.2-spark/*" # <-- our in-project Kafka Jar
-if [ -e "$FWDIR/lib_managed" ]; then
-  CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/jars/*"
-  CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/bundles/*"
-fi
-CLASSPATH="$CLASSPATH:$REPL_DIR/lib/*"
-# Add the shaded JAR for Maven builds
-if [ -e $REPL_BIN_DIR/target ]; then
-  for jar in `find "$REPL_BIN_DIR/target" -name 'spark-repl-*-shaded-hadoop*.jar'`; do
+
+function dev_classpath {
+  CLASSPATH="$CLASSPATH:$FWDIR/conf"
+  CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/classes"
+  if [ -n "$SPARK_TESTING" ] ; then
+    CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/test-classes"
+    CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/test-classes"
+  fi
+  CLASSPATH="$CLASSPATH:$CORE_DIR/src/main/resources"
+  CLASSPATH="$CLASSPATH:$REPL_DIR/target/scala-$SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$EXAMPLES_DIR/target/scala-$SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$STREAMING_DIR/lib/org/apache/kafka/kafka/0.7.2-spark/*" # <-- our in-project Kafka Jar
+  if [ -e "$FWDIR/lib_managed" ]; then
+    CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/jars/*"
+    CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/bundles/*"
+  fi
+  CLASSPATH="$CLASSPATH:$REPL_DIR/lib/*"
+  # Add the shaded JAR for Maven builds
+  if [ -e $REPL_BIN_DIR/target ]; then
+    for jar in `find "$REPL_BIN_DIR/target" -name 'spark-repl-*-shaded-hadoop*.jar'`; do
+      CLASSPATH="$CLASSPATH:$jar"
+    done
+    # The shaded JAR doesn't contain examples, so include those separately
+    EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar`
+    CLASSPATH+=":$EXAMPLES_JAR"
+  fi
+  CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$MLLIB_DIR/target/scala-$SCALA_VERSION/classes"
+  for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
    CLASSPATH="$CLASSPATH:$jar"
  done
-  # The shaded JAR doesn't contain examples, so include those separately
-  EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar`
-  CLASSPATH+=":$EXAMPLES_JAR"
-fi
-CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
-CLASSPATH="$CLASSPATH:$MLLIB_DIR/target/scala-$SCALA_VERSION/classes"
-for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
-  CLASSPATH="$CLASSPATH:$jar"
-done

-# Figure out the JAR file that our examples were packaged into. This includes a bit of a hack
-# to avoid the -sources and -doc packages that are built by publish-local.
-if [ -e "$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples"*[0-9T].jar ]; then
-  # Use the JAR from the SBT build
-  export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples"*[0-9T].jar`
-fi
-if [ -e "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar ]; then
-  # Use the JAR from the Maven build
-  export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar`
+  # Figure out the JAR file that our examples were packaged into. This includes a bit of a hack
+  # to avoid the -sources and -doc packages that are built by publish-local.
+  if [ -e "$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples"*[0-9T].jar ]; then
+    # Use the JAR from the SBT build
+    export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples"*[0-9T].jar`
+  fi
+  if [ -e "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar ]; then
+    # Use the JAR from the Maven build
+    export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar`
+  fi
+
+  # Add Scala standard library
+  if [ -z "$SCALA_LIBRARY_PATH" ]; then
+    if [ -z "$SCALA_HOME" ]; then
+      echo "SCALA_HOME is not set" >&2
+      exit 1
+    fi
+    SCALA_LIBRARY_PATH="$SCALA_HOME/lib"
+  fi
+  CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-library.jar"
+  CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-compiler.jar"
+  CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/jline.jar"
+}
+
+function release_classpath {
+  CLASSPATH="$CLASSPATH:$FWDIR/jars/*"
+}
+
+if [ -f "$FWDIR/RELEASE" ]; then
+  release_classpath
+else
+  dev_classpath
 fi

 # Add hadoop conf dir - else FileSystem.*, etc fail !
@ -76,16 +101,4 @@ if [ "x" != "x$YARN_CONF_DIR" ]; then
  CLASSPATH="$CLASSPATH:$YARN_CONF_DIR"
 fi

-# Add Scala standard library
-if [ -z "$SCALA_LIBRARY_PATH" ]; then
-  if [ -z "$SCALA_HOME" ]; then
-    echo "SCALA_HOME is not set" >&2
-    exit 1
-  fi
-  SCALA_LIBRARY_PATH="$SCALA_HOME/lib"
-fi
-CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-library.jar"
-CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-compiler.jar"
-CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/jline.jar"
-
 echo "$CLASSPATH"
--- a/bin/start-slave.sh
+++ b/bin/start-slave.sh
@ -1,4 +1,7 @@
 #!/usr/bin/env bash
+#
+# Usage: start-slave.sh <worker#> <master-spark-URL>
+#   where <master-spark-URL> is like "spark://localhost:7077"

 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
--- a/make-distribution.sh
+++ b/make-distribution.sh
@ -0,0 +1,39 @@
+#!/bin/bash
+#
+# Script to create a binary distribution for easy deploys of Spark.
+# The distribution directory defaults to dist/ but can be overridden below.
+# The distribution contains fat (assembly) jars that include the Scala library,
+# so it is completely self contained.
+# It does not contain source or *.class files.
+#
+# Recommended deploy/testing procedure (standalone mode):
+# 1) Rsync / deploy the dist/ dir to one host
+# 2) cd to deploy dir; ./bin/start-master.sh
+# 3) Verify master is up by visiting web page, ie http://master-ip:8080.  Note the spark:// URL.
+# 4) ./bin/start-slave.sh 1 <<spark:// URL>>
+# 5) MASTER="spark://my-master-ip:7077" ./spark-shell
+
+# Figure out where the Spark framework is installed
+FWDIR="$(cd `dirname $0`; pwd)"
+DISTDIR="$FWDIR/dist"
+
+# Get version from SBT
+export TERM=dumb   # Prevents color codes in SBT output
+VERSION=$($FWDIR/sbt/sbt "show version" | tail -1 | cut -f 2)
+echo "Making distribution for Spark $VERSION in $DISTDIR..."
+
+# Build fat JAR
+$FWDIR/sbt/sbt "repl/assembly"
+
+# Make directories
+rm -rf "$DISTDIR"
+mkdir -p "$DISTDIR/jars"
+echo "$VERSION" >$DISTDIR/RELEASE
+
+# Copy jars
+cp $FWDIR/repl/target/*.jar "$DISTDIR/jars/"
+
+# Copy other things
+cp -r "$FWDIR/bin" "$DISTDIR"
+cp -r "$FWDIR/conf" "$DISTDIR"
+cp "$FWDIR/run" "$FWDIR/spark-shell" "$DISTDIR"
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@ -197,7 +197,7 @@ object SparkBuild extends Build {
  def replSettings = sharedSettings ++ Seq(
    name := "spark-repl",
    libraryDependencies <+= scalaVersion("org.scala-lang" % "scala-compiler" % _)
-  )
+  ) ++ assemblySettings ++ extraAssemblySettings

  def examplesSettings = sharedSettings ++ Seq(
    name := "spark-examples",
--- a/44
+++ b/44
@ -1,7 +1,5 @@
 #!/bin/bash

-SCALA_VERSION=2.9.3
-
 # Figure out where the Scala framework is installed
 FWDIR="$(cd `dirname $0`; pwd)"

@ -77,7 +75,7 @@ else
      exit 1
    fi
  fi
-  if [ -z "$SCALA_LIBRARY_PATH" ]; then
+  if [[ ! -f "$FWDIR/RELEASE" && -z "$SCALA_LIBRARY_PATH" ]]; then
    if [ -z "$SCALA_HOME" ]; then
      echo "SCALA_HOME is not set" >&2
      exit 1
@ -104,43 +102,33 @@ fi
 export JAVA_OPTS
 # Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in ExecutorRunner.scala!

-CORE_DIR="$FWDIR/core"
-EXAMPLES_DIR="$FWDIR/examples"
-REPL_DIR="$FWDIR/repl"
+if [ ! -f "$FWDIR/RELEASE" ]; then
+  CORE_DIR="$FWDIR/core"
+  EXAMPLES_DIR="$FWDIR/examples"
+  REPL_DIR="$FWDIR/repl"

-# Exit if the user hasn't compiled Spark
-if [ ! -e "$CORE_DIR/target" ]; then
-  echo "Failed to find Spark classes in $CORE_DIR/target" >&2
-  echo "You need to compile Spark before running this program" >&2
-  exit 1
-fi
+  # Exit if the user hasn't compiled Spark
+  if [ ! -e "$CORE_DIR/target" ]; then
+    echo "Failed to find Spark classes in $CORE_DIR/target" >&2
+    echo "You need to compile Spark before running this program" >&2
+    exit 1
+  fi

-if [[ "$@" = *repl* && ! -e "$REPL_DIR/target" ]]; then
-  echo "Failed to find Spark classes in $REPL_DIR/target" >&2
-  echo "You need to compile Spark repl module before running this program" >&2
-  exit 1
+  if [[ "$@" = *repl* && ! -e "$REPL_DIR/target" ]]; then
+    echo "Failed to find Spark classes in $REPL_DIR/target" >&2
+    echo "You need to compile Spark repl module before running this program" >&2
+    exit 1
+  fi
 fi

 # Compute classpath using external script
 CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
 export CLASSPATH

-# Figure out the JAR file that our examples were packaged into. This includes a bit of a hack
-# to avoid the -sources and -doc packages that are built by publish-local.
-if [ -e "$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples"*[0-9T].jar ]; then
-  # Use the JAR from the SBT build
-  export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples"*[0-9T].jar`
-fi
-if [ -e "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar ]; then
-  # Use the JAR from the Maven build
-  export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar`
-fi
-
 if [ "$SPARK_LAUNCH_WITH_SCALA" == "1" ]; then
  EXTRA_ARGS=""     # Java options will be passed to scala as JAVA_OPTS
 else
  # The JVM doesn't read JAVA_OPTS by default so we need to pass it in
  EXTRA_ARGS="$JAVA_OPTS"
 fi
-
 exec "$RUNNER" -cp "$CLASSPATH" $EXTRA_ARGS "$@"
--- a/67
+++ b/67
@ -1,4 +1,65 @@
-#!/bin/sh
+#!/bin/bash --posix
+#
+# Shell script for starting the Spark Shell REPL
+# Note that it will set MASTER to spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}
+# if those two env vars are set in spark-env.sh but MASTER is not.
+# Options:
+#    -c <cores>    Set the number of cores for REPL to use
+#
 FWDIR="`dirname $0`"
-export SPARK_LAUNCH_WITH_SCALA=1
-exec $FWDIR/run spark.repl.Main "$@"
+
+for o in "$@"; do
+  if [ "$1" = "-c" -o "$1" = "--cores" ]; then
+    shift
+    if [ -n "$1" ]; then
+      OPTIONS="-Dspark.cores.max=$1"
+      shift
+    fi
+  fi
+done
+
+# Set MASTER from spark-env if possible
+if [ -z "$MASTER" ]; then
+  if [ -e "$FWDIR/conf/spark-env.sh" ]; then
+    . "$FWDIR/conf/spark-env.sh"
+  fi
+  if [[ "x" != "x$SPARK_MASTER_IP" && "y" != "y$SPARK_MASTER_PORT" ]]; then
+    MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
+    export MASTER
+  fi
+fi
+
+# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in
+# binary distribution of Spark where Scala is not installed
+exit_status=127
+saved_stty=""
+
+# restore stty settings (echo in particular)
+function restoreSttySettings() {
+  stty $saved_stty
+  saved_stty=""
+}
+
+function onExit() {
+  if [[ "$saved_stty" != "" ]]; then
+    restoreSttySettings
+  fi
+  exit $exit_status
+}
+
+# to reenable echo if we are interrupted before completing.
+trap onExit INT
+
+# save terminal settings
+saved_stty=$(stty -g 2>/dev/null)
+# clear on error so we don't later try to restore them
+if [[ ! $? ]]; then
+  saved_stty=""
+fi
+
+$FWDIR/run $OPTIONS spark.repl.Main "$@"
+
+# record the exit status lest it be overwritten:
+# then reenable echo and propagate the code.
+exit_status=$?
+onExit