зеркало из https://github.com/microsoft/spark.git
[SPARK-4501][Core] - Create build/mvn to automatically download maven/zinc/scalac
Creates a top level directory script (as `build/mvn`) to automatically download zinc and the specific version of scala used to easily build spark. This will also download and install maven if the user doesn't already have it and all packages are hosted under the `build/` directory. Tested on both Linux and OSX OS's and both work. All commands pass through to the maven binary so it acts exactly as a traditional maven call would. Author: Brennon York <brennon.york@capitalone.com> Closes #3707 from brennonyork/SPARK-4501 and squashes the following commits: 0e5a0e4 [Brennon York] minor incorrect doc verbage (with -> this) 9b79e38 [Brennon York] fixed merge conflicts with dev/run-tests, properly quoted args in sbt/sbt, fixed bug where relative paths would fail if passed in from build/mvn d2d41b6 [Brennon York] added blurb about leverging zinc with build/mvn b979c58 [Brennon York] updated the merge conflict c5634de [Brennon York] updated documentation to overview build/mvn, updated all points where sbt/sbt was referenced with build/sbt b8437ba [Brennon York] set progress bars for curl and wget when not run on jenkins, no progress bar when run on jenkins, moved sbt script to build/sbt, wrote stub and warning under sbt/sbt which calls build/sbt, modified build/sbt to use the correct directory, fixed bug in build/sbt-launch-lib.bash to correctly pull the sbt version be11317 [Brennon York] added switch to silence download progress only if AMPLAB_JENKINS is set 28d0a99 [Brennon York] updated to remove the python dependency, uses grep instead 7e785a6 [Brennon York] added silent and quiet flags to curl and wget respectively, added single echo output to denote start of a download if download is needed 14a5da0 [Brennon York] removed unnecessary zinc output on startup 1af4a94 [Brennon York] fixed bug with uppercase vs lowercase variable 3e8b9b3 [Brennon York] updated to properly only restart zinc if it was freshly installed a680d12 [Brennon York] Added comments to functions and tested various mvn calls bb8cc9d [Brennon York] removed package files ef017e6 [Brennon York] removed OS complexities, setup generic install_app call, removed extra file complexities, removed help, removed forced install (defaults now), removed double-dash from cli 07bf018 [Brennon York] Updated to specifically handle pulling down the correct scala version f914dea [Brennon York] Beginning final portions of localized scala home 69c4e44 [Brennon York] working linux and osx installers for purely local mvn build 4a1609c [Brennon York] finalizing working linux install for maven to local ./build/apache-maven folder cbfcc68 [Brennon York] Changed the default sbt/sbt to build/sbt and added a build/mvn which will automatically download, install, and execute maven with zinc for easier build capability
This commit is contained in:
Родитель
080ceb771a
Коммит
a3e51cc990
|
@ -8,16 +8,19 @@
|
||||||
*.pyc
|
*.pyc
|
||||||
.idea/
|
.idea/
|
||||||
.idea_modules/
|
.idea_modules/
|
||||||
sbt/*.jar
|
build/*.jar
|
||||||
.settings
|
.settings
|
||||||
.cache
|
.cache
|
||||||
|
cache
|
||||||
.generated-mima*
|
.generated-mima*
|
||||||
/build/
|
|
||||||
work/
|
work/
|
||||||
out/
|
out/
|
||||||
.DS_Store
|
.DS_Store
|
||||||
third_party/libmesos.so
|
third_party/libmesos.so
|
||||||
third_party/libmesos.dylib
|
third_party/libmesos.dylib
|
||||||
|
build/apache-maven*
|
||||||
|
build/zinc*
|
||||||
|
build/scala*
|
||||||
conf/java-opts
|
conf/java-opts
|
||||||
conf/*.sh
|
conf/*.sh
|
||||||
conf/*.cmd
|
conf/*.cmd
|
||||||
|
|
|
@ -0,0 +1,132 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Determine the current working directory
|
||||||
|
_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||||
|
# Preserve the calling directory
|
||||||
|
_CALLING_DIR="$(pwd)"
|
||||||
|
|
||||||
|
# Installs any application tarball given a URL, the expected tarball name,
|
||||||
|
# and, optionally, a checkable binary path to determine if the binary has
|
||||||
|
# already been installed
|
||||||
|
## Arg1 - URL
|
||||||
|
## Arg2 - Tarball Name
|
||||||
|
## Arg3 - Checkable Binary
|
||||||
|
install_app() {
|
||||||
|
local remote_tarball="$1/$2"
|
||||||
|
local local_tarball="${_DIR}/$2"
|
||||||
|
local binary="${_DIR}/$3"
|
||||||
|
|
||||||
|
# setup `curl` and `wget` silent options if we're running on Jenkins
|
||||||
|
local curl_opts=""
|
||||||
|
local wget_opts=""
|
||||||
|
if [ -n "$AMPLAB_JENKINS" ]; then
|
||||||
|
curl_opts="-s"
|
||||||
|
wget_opts="--quiet"
|
||||||
|
else
|
||||||
|
curl_opts="--progress-bar"
|
||||||
|
wget_opts="--progress=bar:force"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$3" -o ! -f "$binary" ]; then
|
||||||
|
# check if we already have the tarball
|
||||||
|
# check if we have curl installed
|
||||||
|
# download application
|
||||||
|
[ ! -f "${local_tarball}" ] && [ -n "`which curl 2>/dev/null`" ] && \
|
||||||
|
echo "exec: curl ${curl_opts} ${remote_tarball}" && \
|
||||||
|
curl ${curl_opts} "${remote_tarball}" > "${local_tarball}"
|
||||||
|
# if the file still doesn't exist, lets try `wget` and cross our fingers
|
||||||
|
[ ! -f "${local_tarball}" ] && [ -n "`which wget 2>/dev/null`" ] && \
|
||||||
|
echo "exec: wget ${wget_opts} ${remote_tarball}" && \
|
||||||
|
wget ${wget_opts} -O "${local_tarball}" "${remote_tarball}"
|
||||||
|
# if both were unsuccessful, exit
|
||||||
|
[ ! -f "${local_tarball}" ] && \
|
||||||
|
echo -n "ERROR: Cannot download $2 with cURL or wget; " && \
|
||||||
|
echo "please install manually and try again." && \
|
||||||
|
exit 2
|
||||||
|
cd "${_DIR}" && tar -xzf "$2"
|
||||||
|
rm -rf "$local_tarball"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Install maven under the build/ folder
|
||||||
|
install_mvn() {
|
||||||
|
install_app \
|
||||||
|
"http://apache.claz.org/maven/maven-3/3.2.3/binaries" \
|
||||||
|
"apache-maven-3.2.3-bin.tar.gz" \
|
||||||
|
"apache-maven-3.2.3/bin/mvn"
|
||||||
|
MVN_BIN="${_DIR}/apache-maven-3.2.3/bin/mvn"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Install zinc under the build/ folder
|
||||||
|
install_zinc() {
|
||||||
|
local zinc_path="zinc-0.3.5.3/bin/zinc"
|
||||||
|
[ ! -f "${zinc_path}" ] && ZINC_INSTALL_FLAG=1
|
||||||
|
install_app \
|
||||||
|
"http://downloads.typesafe.com/zinc/0.3.5.3" \
|
||||||
|
"zinc-0.3.5.3.tgz" \
|
||||||
|
"${zinc_path}"
|
||||||
|
ZINC_BIN="${_DIR}/${zinc_path}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Determine the Scala version from the root pom.xml file, set the Scala URL,
|
||||||
|
# and, with that, download the specific version of Scala necessary under
|
||||||
|
# the build/ folder
|
||||||
|
install_scala() {
|
||||||
|
# determine the Scala version used in Spark
|
||||||
|
local scala_version=`grep "scala.version" "${_DIR}/../pom.xml" | \
|
||||||
|
head -1 | cut -f2 -d'>' | cut -f1 -d'<'`
|
||||||
|
local scala_bin="${_DIR}/scala-${scala_version}/bin/scala"
|
||||||
|
|
||||||
|
install_app \
|
||||||
|
"http://downloads.typesafe.com/scala/${scala_version}" \
|
||||||
|
"scala-${scala_version}.tgz" \
|
||||||
|
"scala-${scala_version}/bin/scala"
|
||||||
|
|
||||||
|
SCALA_COMPILER="$(cd "$(dirname ${scala_bin})/../lib" && pwd)/scala-compiler.jar"
|
||||||
|
SCALA_LIBRARY="$(cd "$(dirname ${scala_bin})/../lib" && pwd)/scala-library.jar"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Determines if a given application is already installed. If not, will attempt
|
||||||
|
# to install
|
||||||
|
## Arg1 - application name
|
||||||
|
## Arg2 - Alternate path to local install under build/ dir
|
||||||
|
check_and_install_app() {
|
||||||
|
# create the local environment variable in uppercase
|
||||||
|
local app_bin="`echo $1 | awk '{print toupper(\$0)}'`_BIN"
|
||||||
|
# some black magic to set the generated app variable (i.e. MVN_BIN) into the
|
||||||
|
# environment
|
||||||
|
eval "${app_bin}=`which $1 2>/dev/null`"
|
||||||
|
|
||||||
|
if [ -z "`which $1 2>/dev/null`" ]; then
|
||||||
|
install_$1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Setup healthy defaults for the Zinc port if none were provided from
|
||||||
|
# the environment
|
||||||
|
ZINC_PORT=${ZINC_PORT:-"3030"}
|
||||||
|
|
||||||
|
# Check and install all applications necessary to build Spark
|
||||||
|
check_and_install_app "mvn"
|
||||||
|
|
||||||
|
# Install the proper version of Scala and Zinc for the build
|
||||||
|
install_zinc
|
||||||
|
install_scala
|
||||||
|
|
||||||
|
# Reset the current working directory
|
||||||
|
cd "${_CALLING_DIR}"
|
||||||
|
|
||||||
|
# Now that zinc is ensured to be installed, check its status and, if its
|
||||||
|
# not running or just installed, start it
|
||||||
|
if [ -n "${ZINC_INSTALL_FLAG}" -o -z "`${ZINC_BIN} -status`" ]; then
|
||||||
|
${ZINC_BIN} -shutdown
|
||||||
|
${ZINC_BIN} -start -port ${ZINC_PORT} \
|
||||||
|
-scala-compiler "${SCALA_COMPILER}" \
|
||||||
|
-scala-library "${SCALA_LIBRARY}" &>/dev/null
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Set any `mvn` options if not already present
|
||||||
|
export MAVEN_OPTS=${MAVEN_OPTS:-"-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"}
|
||||||
|
|
||||||
|
# Last, call the `mvn` command as usual
|
||||||
|
${MVN_BIN} "$@"
|
|
@ -0,0 +1,111 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so
|
||||||
|
# that we can run Hive to generate the golden answer. This is not required for normal development
|
||||||
|
# or testing.
|
||||||
|
for i in "$HIVE_HOME"/lib/*
|
||||||
|
do HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$i"
|
||||||
|
done
|
||||||
|
export HADOOP_CLASSPATH
|
||||||
|
|
||||||
|
realpath () {
|
||||||
|
(
|
||||||
|
TARGET_FILE="$1"
|
||||||
|
|
||||||
|
cd "$(dirname "$TARGET_FILE")"
|
||||||
|
TARGET_FILE="$(basename "$TARGET_FILE")"
|
||||||
|
|
||||||
|
COUNT=0
|
||||||
|
while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ]
|
||||||
|
do
|
||||||
|
TARGET_FILE="$(readlink "$TARGET_FILE")"
|
||||||
|
cd $(dirname "$TARGET_FILE")
|
||||||
|
TARGET_FILE="$(basename $TARGET_FILE)"
|
||||||
|
COUNT=$(($COUNT + 1))
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "$(pwd -P)/"$TARGET_FILE""
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash
|
||||||
|
|
||||||
|
|
||||||
|
declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
|
||||||
|
declare -r sbt_opts_file=".sbtopts"
|
||||||
|
declare -r etc_sbt_opts_file="/etc/sbt/sbtopts"
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
cat <<EOM
|
||||||
|
Usage: $script_name [options]
|
||||||
|
|
||||||
|
-h | -help print this message
|
||||||
|
-v | -verbose this runner is chattier
|
||||||
|
-d | -debug set sbt log level to debug
|
||||||
|
-no-colors disable ANSI color codes
|
||||||
|
-sbt-create start sbt even if current directory contains no sbt project
|
||||||
|
-sbt-dir <path> path to global settings/plugins directory (default: ~/.sbt)
|
||||||
|
-sbt-boot <path> path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
|
||||||
|
-ivy <path> path to local Ivy repository (default: ~/.ivy2)
|
||||||
|
-mem <integer> set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem))
|
||||||
|
-no-share use all local caches; no sharing
|
||||||
|
-no-global uses global caches, but does not use global ~/.sbt directory.
|
||||||
|
-jvm-debug <port> Turn on JVM debugging, open at the given port.
|
||||||
|
-batch Disable interactive mode
|
||||||
|
|
||||||
|
# sbt version (default: from project/build.properties if present, else latest release)
|
||||||
|
-sbt-version <version> use the specified version of sbt
|
||||||
|
-sbt-jar <path> use the specified jar as the sbt launcher
|
||||||
|
-sbt-rc use an RC version of sbt
|
||||||
|
-sbt-snapshot use a snapshot version of sbt
|
||||||
|
|
||||||
|
# java version (default: java from PATH, currently $(java -version 2>&1 | grep version))
|
||||||
|
-java-home <path> alternate JAVA_HOME
|
||||||
|
|
||||||
|
# jvm options and output control
|
||||||
|
JAVA_OPTS environment variable, if unset uses "$java_opts"
|
||||||
|
SBT_OPTS environment variable, if unset uses "$default_sbt_opts"
|
||||||
|
.sbtopts if this file exists in the current directory, it is
|
||||||
|
prepended to the runner args
|
||||||
|
/etc/sbt/sbtopts if this file exists, it is prepended to the runner args
|
||||||
|
-Dkey=val pass -Dkey=val directly to the java runtime
|
||||||
|
-J-X pass option -X directly to the java runtime
|
||||||
|
(-J is stripped)
|
||||||
|
-S-X add -X to sbt's scalacOptions (-S is stripped)
|
||||||
|
-PmavenProfiles Enable a maven profile for the build.
|
||||||
|
|
||||||
|
In the case of duplicated or conflicting options, the order above
|
||||||
|
shows precedence: JAVA_OPTS lowest, command line options highest.
|
||||||
|
EOM
|
||||||
|
}
|
||||||
|
|
||||||
|
process_my_args () {
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
-no-colors) addJava "-Dsbt.log.noformat=true" && shift ;;
|
||||||
|
-no-share) addJava "$noshare_opts" && shift ;;
|
||||||
|
-no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;;
|
||||||
|
-sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;;
|
||||||
|
-sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;;
|
||||||
|
-debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;;
|
||||||
|
-batch) exec </dev/null && shift ;;
|
||||||
|
|
||||||
|
-sbt-create) sbt_create=true && shift ;;
|
||||||
|
|
||||||
|
*) addResidual "$1" && shift ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Now, ensure sbt version is used.
|
||||||
|
[[ "${sbt_version}XXX" != "XXX" ]] && addJava "-Dsbt.version=$sbt_version"
|
||||||
|
}
|
||||||
|
|
||||||
|
loadConfigFile() {
|
||||||
|
cat "$1" | sed '/^\#/d'
|
||||||
|
}
|
||||||
|
|
||||||
|
# if sbtopts files exist, prepend their contents to $@ so it can be processed by this runner
|
||||||
|
[[ -f "$etc_sbt_opts_file" ]] && set -- $(loadConfigFile "$etc_sbt_opts_file") "$@"
|
||||||
|
[[ -f "$sbt_opts_file" ]] && set -- $(loadConfigFile "$sbt_opts_file") "$@"
|
||||||
|
|
||||||
|
run "$@"
|
|
@ -37,10 +37,10 @@ dlog () {
|
||||||
}
|
}
|
||||||
|
|
||||||
acquire_sbt_jar () {
|
acquire_sbt_jar () {
|
||||||
SBT_VERSION=`awk -F "=" '/sbt\\.version/ {print $2}' ./project/build.properties`
|
SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties`
|
||||||
URL1=http://typesafe.artifactoryonline.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
|
URL1=http://typesafe.artifactoryonline.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
|
||||||
URL2=http://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
|
URL2=http://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
|
||||||
JAR=sbt/sbt-launch-${SBT_VERSION}.jar
|
JAR=build/sbt-launch-${SBT_VERSION}.jar
|
||||||
|
|
||||||
sbt_jar=$JAR
|
sbt_jar=$JAR
|
||||||
|
|
|
@ -219,7 +219,7 @@ scp spark-* \
|
||||||
|
|
||||||
# Docs
|
# Docs
|
||||||
cd spark
|
cd spark
|
||||||
sbt/sbt clean
|
build/sbt clean
|
||||||
cd docs
|
cd docs
|
||||||
# Compile docs with Java 7 to use nicer format
|
# Compile docs with Java 7 to use nicer format
|
||||||
JAVA_HOME=$JAVA_7_HOME PRODUCTION=1 jekyll build
|
JAVA_HOME=$JAVA_7_HOME PRODUCTION=1 jekyll build
|
||||||
|
|
4
dev/mima
4
dev/mima
|
@ -24,7 +24,7 @@ set -e
|
||||||
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
|
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
|
||||||
cd "$FWDIR"
|
cd "$FWDIR"
|
||||||
|
|
||||||
echo -e "q\n" | sbt/sbt oldDeps/update
|
echo -e "q\n" | build/sbt oldDeps/update
|
||||||
rm -f .generated-mima*
|
rm -f .generated-mima*
|
||||||
|
|
||||||
# Generate Mima Ignore is called twice, first with latest built jars
|
# Generate Mima Ignore is called twice, first with latest built jars
|
||||||
|
@ -38,7 +38,7 @@ echo "SPARK_CLASSPATH=$SPARK_CLASSPATH"
|
||||||
|
|
||||||
./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore
|
./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore
|
||||||
|
|
||||||
echo -e "q\n" | sbt/sbt mima-report-binary-issues | grep -v -e "info.*Resolving"
|
echo -e "q\n" | build/sbt mima-report-binary-issues | grep -v -e "info.*Resolving"
|
||||||
ret_val=$?
|
ret_val=$?
|
||||||
|
|
||||||
if [ $ret_val != 0 ]; then
|
if [ $ret_val != 0 ]; then
|
||||||
|
|
|
@ -151,7 +151,7 @@ CURRENT_BLOCK=$BLOCK_BUILD
|
||||||
HIVE_12_BUILD_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver -Phive-0.12.0"
|
HIVE_12_BUILD_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver -Phive-0.12.0"
|
||||||
echo "[info] Compile with Hive 0.12.0"
|
echo "[info] Compile with Hive 0.12.0"
|
||||||
echo -e "q\n" \
|
echo -e "q\n" \
|
||||||
| sbt/sbt $HIVE_12_BUILD_ARGS clean hive/compile hive-thriftserver/compile \
|
| build/sbt $HIVE_12_BUILD_ARGS clean hive/compile hive-thriftserver/compile \
|
||||||
| grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
|
| grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
|
||||||
|
|
||||||
# Then build with default Hive version (0.13.1) because tests are based on this version
|
# Then build with default Hive version (0.13.1) because tests are based on this version
|
||||||
|
@ -160,7 +160,7 @@ CURRENT_BLOCK=$BLOCK_BUILD
|
||||||
echo "[info] Building Spark with these arguments: $SBT_MAVEN_PROFILES_ARGS"\
|
echo "[info] Building Spark with these arguments: $SBT_MAVEN_PROFILES_ARGS"\
|
||||||
" -Phive -Phive-thriftserver"
|
" -Phive -Phive-thriftserver"
|
||||||
echo -e "q\n" \
|
echo -e "q\n" \
|
||||||
| sbt/sbt $SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver package assembly/assembly \
|
| build/sbt $SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver package assembly/assembly \
|
||||||
| grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
|
| grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -197,7 +197,7 @@ CURRENT_BLOCK=$BLOCK_SPARK_UNIT_TESTS
|
||||||
# QUESTION: Why doesn't 'yes "q"' work?
|
# QUESTION: Why doesn't 'yes "q"' work?
|
||||||
# QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
|
# QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
|
||||||
echo -e "q\n" \
|
echo -e "q\n" \
|
||||||
| sbt/sbt $SBT_MAVEN_PROFILES_ARGS "${SBT_MAVEN_TEST_ARGS[@]}" \
|
| build/sbt $SBT_MAVEN_PROFILES_ARGS "${SBT_MAVEN_TEST_ARGS[@]}" \
|
||||||
| grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
|
| grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,9 +17,9 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
echo -e "q\n" | sbt/sbt -Phive -Phive-thriftserver scalastyle > scalastyle.txt
|
echo -e "q\n" | build/sbt -Phive -Phive-thriftserver scalastyle > scalastyle.txt
|
||||||
# Check style with YARN built too
|
# Check style with YARN built too
|
||||||
echo -e "q\n" | sbt/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 scalastyle \
|
echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 scalastyle \
|
||||||
>> scalastyle.txt
|
>> scalastyle.txt
|
||||||
|
|
||||||
ERRORS=$(cat scalastyle.txt | awk '{if($1~/error/)print}')
|
ERRORS=$(cat scalastyle.txt | awk '{if($1~/error/)print}')
|
||||||
|
|
|
@ -60,7 +60,7 @@ We use Sphinx to generate Python API docs, so you will need to install it by run
|
||||||
|
|
||||||
## API Docs (Scaladoc and Sphinx)
|
## API Docs (Scaladoc and Sphinx)
|
||||||
|
|
||||||
You can build just the Spark scaladoc by running `sbt/sbt doc` from the SPARK_PROJECT_ROOT directory.
|
You can build just the Spark scaladoc by running `build/sbt doc` from the SPARK_PROJECT_ROOT directory.
|
||||||
|
|
||||||
Similarly, you can build just the PySpark docs by running `make html` from the
|
Similarly, you can build just the PySpark docs by running `make html` from the
|
||||||
SPARK_PROJECT_ROOT/python/docs directory. Documentation is only generated for classes that are listed as
|
SPARK_PROJECT_ROOT/python/docs directory. Documentation is only generated for classes that are listed as
|
||||||
|
@ -68,7 +68,7 @@ public in `__init__.py`.
|
||||||
|
|
||||||
When you run `jekyll` in the `docs` directory, it will also copy over the scaladoc for the various
|
When you run `jekyll` in the `docs` directory, it will also copy over the scaladoc for the various
|
||||||
Spark subprojects into the `docs` directory (and then also into the `_site` directory). We use a
|
Spark subprojects into the `docs` directory (and then also into the `_site` directory). We use a
|
||||||
jekyll plugin to run `sbt/sbt doc` before building the site so if you haven't run it (recently) it
|
jekyll plugin to run `build/sbt doc` before building the site so if you haven't run it (recently) it
|
||||||
may take some time as it generates all of the scaladoc. The jekyll plugin also generates the
|
may take some time as it generates all of the scaladoc. The jekyll plugin also generates the
|
||||||
PySpark docs [Sphinx](http://sphinx-doc.org/).
|
PySpark docs [Sphinx](http://sphinx-doc.org/).
|
||||||
|
|
||||||
|
|
|
@ -25,8 +25,8 @@ if not (ENV['SKIP_API'] == '1' or ENV['SKIP_SCALADOC'] == '1')
|
||||||
curr_dir = pwd
|
curr_dir = pwd
|
||||||
cd("..")
|
cd("..")
|
||||||
|
|
||||||
puts "Running 'sbt/sbt -Pkinesis-asl compile unidoc' from " + pwd + "; this may take a few minutes..."
|
puts "Running 'build/sbt -Pkinesis-asl compile unidoc' from " + pwd + "; this may take a few minutes..."
|
||||||
puts `sbt/sbt -Pkinesis-asl compile unidoc`
|
puts `build/sbt -Pkinesis-asl compile unidoc`
|
||||||
|
|
||||||
puts "Moving back into docs dir."
|
puts "Moving back into docs dir."
|
||||||
cd("docs")
|
cd("docs")
|
||||||
|
|
|
@ -9,6 +9,15 @@ redirect_from: "building-with-maven.html"
|
||||||
|
|
||||||
Building Spark using Maven requires Maven 3.0.4 or newer and Java 6+.
|
Building Spark using Maven requires Maven 3.0.4 or newer and Java 6+.
|
||||||
|
|
||||||
|
# Building with `build/mvn`
|
||||||
|
|
||||||
|
Spark now comes packaged with a self-contained Maven installation to ease building and deployment of Spark from source located under the `build/` directory. This script will automatically download and setup all necessary build requirements ([Maven](https://maven.apache.org/), [Scala](http://www.scala-lang.org/), and [Zinc](https://github.com/typesafehub/zinc)) locally within the `build/` directory itself. It honors any `mvn` binary if present already, however, will pull down its own copy of Scala and Zinc regardless to ensure proper version requirements are met. `build/mvn` execution acts as a pass through to the `mvn` call allowing easy transition from previous build methods. As an example, one can build a version of Spark as follows:
|
||||||
|
|
||||||
|
{% highlight bash %}
|
||||||
|
build/mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -DskipTests clean package
|
||||||
|
{% endhighlight %}
|
||||||
|
|
||||||
|
Other build examples can be found below.
|
||||||
|
|
||||||
# Setting up Maven's Memory Usage
|
# Setting up Maven's Memory Usage
|
||||||
|
|
||||||
|
@ -28,7 +37,9 @@ If you don't run this, you may see errors like the following:
|
||||||
|
|
||||||
You can fix this by setting the `MAVEN_OPTS` variable as discussed before.
|
You can fix this by setting the `MAVEN_OPTS` variable as discussed before.
|
||||||
|
|
||||||
**Note:** *For Java 8 and above this step is not required.*
|
**Note:**
|
||||||
|
* *For Java 8 and above this step is not required.*
|
||||||
|
* *If using `build/mvn` and `MAVEN_OPTS` were not already set, the script will automate this for you.*
|
||||||
|
|
||||||
# Specifying the Hadoop Version
|
# Specifying the Hadoop Version
|
||||||
|
|
||||||
|
@ -182,22 +193,22 @@ compilation. More advanced developers may wish to use SBT.
|
||||||
The SBT build is derived from the Maven POM files, and so the same Maven profiles and variables
|
The SBT build is derived from the Maven POM files, and so the same Maven profiles and variables
|
||||||
can be set to control the SBT build. For example:
|
can be set to control the SBT build. For example:
|
||||||
|
|
||||||
sbt/sbt -Pyarn -Phadoop-2.3 assembly
|
build/sbt -Pyarn -Phadoop-2.3 assembly
|
||||||
|
|
||||||
# Testing with SBT
|
# Testing with SBT
|
||||||
|
|
||||||
Some of the tests require Spark to be packaged first, so always run `sbt/sbt assembly` the first time. The following is an example of a correct (build, test) sequence:
|
Some of the tests require Spark to be packaged first, so always run `build/sbt assembly` the first time. The following is an example of a correct (build, test) sequence:
|
||||||
|
|
||||||
sbt/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver assembly
|
build/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver assembly
|
||||||
sbt/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver test
|
build/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver test
|
||||||
|
|
||||||
To run only a specific test suite as follows:
|
To run only a specific test suite as follows:
|
||||||
|
|
||||||
sbt/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver "test-only org.apache.spark.repl.ReplSuite"
|
build/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver "test-only org.apache.spark.repl.ReplSuite"
|
||||||
|
|
||||||
To run test suites of a specific sub project as follows:
|
To run test suites of a specific sub project as follows:
|
||||||
|
|
||||||
sbt/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver core/test
|
build/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver core/test
|
||||||
|
|
||||||
# Speeding up Compilation with Zinc
|
# Speeding up Compilation with Zinc
|
||||||
|
|
||||||
|
@ -206,3 +217,9 @@ compiler. When run locally as a background process, it speeds up builds of Scala
|
||||||
like Spark. Developers who regularly recompile Spark with Maven will be the most interested in
|
like Spark. Developers who regularly recompile Spark with Maven will be the most interested in
|
||||||
Zinc. The project site gives instructions for building and running `zinc`; OS X users can
|
Zinc. The project site gives instructions for building and running `zinc`; OS X users can
|
||||||
install it using `brew install zinc`.
|
install it using `brew install zinc`.
|
||||||
|
|
||||||
|
If using the `build/mvn` package `zinc` will automatically be downloaded and leveraged for all
|
||||||
|
builds. This process will auto-start after the first time `build/mvn` is called and bind to port
|
||||||
|
3030 unless the `ZINC_PORT` environment variable is set. The `zinc` process can subsequently be
|
||||||
|
shut down at any time by running `build/zinc-<version>/bin/zinc -shutdown` and will automatically
|
||||||
|
restart whenever `build/mvn` is called.
|
||||||
|
|
|
@ -50,7 +50,7 @@ the _exact_ Hadoop version you are running to avoid any compatibility errors.
|
||||||
|
|
||||||
In SBT, the equivalent can be achieved by setting the the `hadoop.version` property:
|
In SBT, the equivalent can be achieved by setting the the `hadoop.version` property:
|
||||||
|
|
||||||
sbt/sbt -Dhadoop.version=1.0.4 assembly
|
build/sbt -Dhadoop.version=1.0.4 assembly
|
||||||
|
|
||||||
# Linking Applications to the Hadoop Version
|
# Linking Applications to the Hadoop Version
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ to your Java location. The set-up depends a bit on the build system:
|
||||||
`-java-home` to the sbt launch script. If a Java 8 JDK is detected sbt will automatically
|
`-java-home` to the sbt launch script. If a Java 8 JDK is detected sbt will automatically
|
||||||
include the Java 8 test project.
|
include the Java 8 test project.
|
||||||
|
|
||||||
`$ JAVA_HOME=/opt/jdk1.8.0/ sbt/sbt clean "test-only org.apache.spark.Java8APISuite"`
|
`$ JAVA_HOME=/opt/jdk1.8.0/ build/sbt clean "test-only org.apache.spark.Java8APISuite"`
|
||||||
|
|
||||||
* For Maven users,
|
* For Maven users,
|
||||||
|
|
||||||
|
|
|
@ -1671,7 +1671,7 @@ class HiveContext(SQLContext):
|
||||||
except Py4JError as e:
|
except Py4JError as e:
|
||||||
raise Exception("You must build Spark with Hive. "
|
raise Exception("You must build Spark with Hive. "
|
||||||
"Export 'SPARK_HIVE=true' and run "
|
"Export 'SPARK_HIVE=true' and run "
|
||||||
"sbt/sbt assembly", e)
|
"build/sbt assembly", e)
|
||||||
|
|
||||||
def _get_hive_ctx(self):
|
def _get_hive_ctx(self):
|
||||||
return self._jvm.HiveContext(self._jsc.sc())
|
return self._jvm.HiveContext(self._jsc.sc())
|
||||||
|
|
117
sbt/sbt
117
sbt/sbt
|
@ -1,111 +1,12 @@
|
||||||
#!/usr/bin/env bash
|
#!/bin/bash
|
||||||
|
|
||||||
# When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so
|
# Determine the current working directory
|
||||||
# that we can run Hive to generate the golden answer. This is not required for normal development
|
_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||||
# or testing.
|
|
||||||
for i in "$HIVE_HOME"/lib/*
|
|
||||||
do HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$i"
|
|
||||||
done
|
|
||||||
export HADOOP_CLASSPATH
|
|
||||||
|
|
||||||
realpath () {
|
echo "NOTE: The sbt/sbt script has been relocated to build/sbt." >&2
|
||||||
(
|
echo " Please update references to point to the new location." >&2
|
||||||
TARGET_FILE="$1"
|
echo "" >&2
|
||||||
|
echo " Invoking 'build/sbt $@' now ..." >&2
|
||||||
|
echo "" >&2
|
||||||
|
|
||||||
cd "$(dirname "$TARGET_FILE")"
|
${_DIR}/../build/sbt "$@"
|
||||||
TARGET_FILE="$(basename "$TARGET_FILE")"
|
|
||||||
|
|
||||||
COUNT=0
|
|
||||||
while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ]
|
|
||||||
do
|
|
||||||
TARGET_FILE="$(readlink "$TARGET_FILE")"
|
|
||||||
cd $(dirname "$TARGET_FILE")
|
|
||||||
TARGET_FILE="$(basename $TARGET_FILE)"
|
|
||||||
COUNT=$(($COUNT + 1))
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "$(pwd -P)/"$TARGET_FILE""
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash
|
|
||||||
|
|
||||||
|
|
||||||
declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
|
|
||||||
declare -r sbt_opts_file=".sbtopts"
|
|
||||||
declare -r etc_sbt_opts_file="/etc/sbt/sbtopts"
|
|
||||||
|
|
||||||
usage() {
|
|
||||||
cat <<EOM
|
|
||||||
Usage: $script_name [options]
|
|
||||||
|
|
||||||
-h | -help print this message
|
|
||||||
-v | -verbose this runner is chattier
|
|
||||||
-d | -debug set sbt log level to debug
|
|
||||||
-no-colors disable ANSI color codes
|
|
||||||
-sbt-create start sbt even if current directory contains no sbt project
|
|
||||||
-sbt-dir <path> path to global settings/plugins directory (default: ~/.sbt)
|
|
||||||
-sbt-boot <path> path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
|
|
||||||
-ivy <path> path to local Ivy repository (default: ~/.ivy2)
|
|
||||||
-mem <integer> set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem))
|
|
||||||
-no-share use all local caches; no sharing
|
|
||||||
-no-global uses global caches, but does not use global ~/.sbt directory.
|
|
||||||
-jvm-debug <port> Turn on JVM debugging, open at the given port.
|
|
||||||
-batch Disable interactive mode
|
|
||||||
|
|
||||||
# sbt version (default: from project/build.properties if present, else latest release)
|
|
||||||
-sbt-version <version> use the specified version of sbt
|
|
||||||
-sbt-jar <path> use the specified jar as the sbt launcher
|
|
||||||
-sbt-rc use an RC version of sbt
|
|
||||||
-sbt-snapshot use a snapshot version of sbt
|
|
||||||
|
|
||||||
# java version (default: java from PATH, currently $(java -version 2>&1 | grep version))
|
|
||||||
-java-home <path> alternate JAVA_HOME
|
|
||||||
|
|
||||||
# jvm options and output control
|
|
||||||
JAVA_OPTS environment variable, if unset uses "$java_opts"
|
|
||||||
SBT_OPTS environment variable, if unset uses "$default_sbt_opts"
|
|
||||||
.sbtopts if this file exists in the current directory, it is
|
|
||||||
prepended to the runner args
|
|
||||||
/etc/sbt/sbtopts if this file exists, it is prepended to the runner args
|
|
||||||
-Dkey=val pass -Dkey=val directly to the java runtime
|
|
||||||
-J-X pass option -X directly to the java runtime
|
|
||||||
(-J is stripped)
|
|
||||||
-S-X add -X to sbt's scalacOptions (-S is stripped)
|
|
||||||
-PmavenProfiles Enable a maven profile for the build.
|
|
||||||
|
|
||||||
In the case of duplicated or conflicting options, the order above
|
|
||||||
shows precedence: JAVA_OPTS lowest, command line options highest.
|
|
||||||
EOM
|
|
||||||
}
|
|
||||||
|
|
||||||
process_my_args () {
|
|
||||||
while [[ $# -gt 0 ]]; do
|
|
||||||
case "$1" in
|
|
||||||
-no-colors) addJava "-Dsbt.log.noformat=true" && shift ;;
|
|
||||||
-no-share) addJava "$noshare_opts" && shift ;;
|
|
||||||
-no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;;
|
|
||||||
-sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;;
|
|
||||||
-sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;;
|
|
||||||
-debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;;
|
|
||||||
-batch) exec </dev/null && shift ;;
|
|
||||||
|
|
||||||
-sbt-create) sbt_create=true && shift ;;
|
|
||||||
|
|
||||||
*) addResidual "$1" && shift ;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
# Now, ensure sbt version is used.
|
|
||||||
[[ "${sbt_version}XXX" != "XXX" ]] && addJava "-Dsbt.version=$sbt_version"
|
|
||||||
}
|
|
||||||
|
|
||||||
loadConfigFile() {
|
|
||||||
cat "$1" | sed '/^\#/d'
|
|
||||||
}
|
|
||||||
|
|
||||||
# if sbtopts files exist, prepend their contents to $@ so it can be processed by this runner
|
|
||||||
[[ -f "$etc_sbt_opts_file" ]] && set -- $(loadConfigFile "$etc_sbt_opts_file") "$@"
|
|
||||||
[[ -f "$sbt_opts_file" ]] && set -- $(loadConfigFile "$sbt_opts_file") "$@"
|
|
||||||
|
|
||||||
run "$@"
|
|
||||||
|
|
|
@ -22,10 +22,10 @@ export HADOOP_HOME="<path to>/hadoop-1.0.4"
|
||||||
|
|
||||||
Using the console
|
Using the console
|
||||||
=================
|
=================
|
||||||
An interactive scala console can be invoked by running `sbt/sbt hive/console`. From here you can execute queries and inspect the various stages of query optimization.
|
An interactive scala console can be invoked by running `build/sbt hive/console`. From here you can execute queries and inspect the various stages of query optimization.
|
||||||
|
|
||||||
```scala
|
```scala
|
||||||
catalyst$ sbt/sbt hive/console
|
catalyst$ build/sbt hive/console
|
||||||
|
|
||||||
[info] Starting scala interpreter...
|
[info] Starting scala interpreter...
|
||||||
import org.apache.spark.sql.catalyst.analysis._
|
import org.apache.spark.sql.catalyst.analysis._
|
||||||
|
|
Загрузка…
Ссылка в новой задаче