* allowing to specify worker path for all modes - this will help specifying prefix script for worker exe in CentOS
* fixing issue 467 - with this change Mobius can run in YARN cluster mode
* updating notes since YARN cluster mode is supported with the fix to issue #467
* added check to run OS-specific path tests
This commit is contained in:
Kaarthik Sivashanmugam 2016-07-01 11:21:33 -07:00 коммит произвёл GitHub
Родитель 2337e7cee2
Коммит 4004001732
4 изменённых файлов: 77 добавлений и 34 удалений

Просмотреть файл

@ -109,45 +109,21 @@ namespace Microsoft.Spark.CSharp.Configuration
return portNo;
}
private string workerPath;
/// <summary>
/// The path of the CSharp external backend worker process.
/// </summary>
internal virtual string GetCSharpWorkerExePath()
{
return ProcFileName;
}
}
/// <summary>
/// Configuration for SparkCLR jobs in ** Local ** mode
/// Needs some investigation to find out why Local mode behaves
/// different than standalone cluster mode for the configuration values
/// overridden here
/// </summary>
private class SparkCLRLocalConfiguration : SparkCLRConfiguration
{
private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(SparkCLRLocalConfiguration));
internal SparkCLRLocalConfiguration(System.Configuration.Configuration configuration)
: base(configuration)
{ }
private string workerPath;
internal override string GetCSharpWorkerExePath()
{
// SparkCLR jar and driver, worker & dependencies are shipped using Spark file server.
// These files are available in the Spark executing directory at executor node.
if (workerPath != null) return workerPath; // Return cached value
KeyValueConfigurationElement workerPathConfig = appSettings.Settings[CSharpWorkerPathSettingKey];
var workerPathConfig = appSettings.Settings[CSharpWorkerPathSettingKey];
if (workerPathConfig == null)
{
// Path for the CSharpWorker.exe was not specified in App.config
// Try to work out where location relative to this class.
// Construct path based on well-known file name + directory this class was loaded from.
string procDir = Path.GetDirectoryName(GetType().Assembly.Location);
workerPath = Path.Combine(procDir, ProcFileName);
logger.LogDebug("Using SparkCLR Adapter dll path to construct CSharpWorkerPath : {0}", workerPath);
workerPath = GetCSharpProcFileName();
}
else
{
@ -157,6 +133,33 @@ namespace Microsoft.Spark.CSharp.Configuration
}
return workerPath;
}
internal virtual string GetCSharpProcFileName()
{
return ProcFileName;
}
}
/// <summary>
/// Configuration for SparkCLR jobs in ** Local ** mode
/// </summary>
private class SparkCLRLocalConfiguration : SparkCLRConfiguration
{
private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(SparkCLRLocalConfiguration));
internal SparkCLRLocalConfiguration(System.Configuration.Configuration configuration)
: base(configuration)
{ }
internal override string GetCSharpProcFileName()
{
// Path for the CSharpWorker.exe was not specified in App.config
// Try to work out where location relative to this class.
// Construct path based on well-known file name + directory this class was loaded from.
string procDir = Path.GetDirectoryName(GetType().Assembly.Location);
var procFilePath = Path.Combine(procDir, ProcFileName);
logger.LogDebug("Using SparkCLR Adapter dll path to construct CSharpWorkerPath : {0}", procFilePath);
return procFilePath;
}
}
/// <summary>

Просмотреть файл

@ -51,8 +51,6 @@ If you are using CentOS, Fedora, or similar Linux distributions or OS X, follow
* Create a script (referred to as 'prefix script') that will use Mono to execute Mobius driver application. See the [linux-prefix-script.md](./linux-prefix-script.md) for a sample. The name of this script will be used in the place of the name of the mobius driver application when launching [sparkclr-submit.cmd](./linux-instructions.md#running-mobius-samples-in-linux)
* Update CSharpWorkerPath setting in Mobius application config (refer to the config files used in Mobius examples like the [config for with Pi example](https://github.com/skaarthik/Mobius/blob/linux/examples/Batch/pi/App.config#L61)) to point to [CSharpWorker.sh.exe](./linux-csharpworker-prefix-script.md) (make sure to set the correct value appropriate for the Spark mode to be used)
**Note** - only client mode is support in Mobius on YARN in Linux. Support for [cluster mode](https://github.com/Microsoft/Mobius/issues/467) will be added soon.
### Mobius in Azure HDInsight Spark Cluster
* Mono version available in HDInsight cluster is 3.x. Mobius [requires](/notes/linux-instructions.md#prerequisites) 4.2 or above. So, Mono has to be upgraded in HDInsight cluster to use Mobius.
* Follow [instructions](./linux-instructions.md#requirements) for Ubuntu

Просмотреть файл

@ -6,14 +6,15 @@
package org.apache.spark.deploy.csharp
import java.io.File
import java.nio.file.Paths
import java.util.concurrent.{Semaphore, TimeUnit}
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkConf
import org.apache.spark.SecurityManager
import org.apache.spark.api.csharp.CSharpBackend
import org.apache.spark.deploy.{SparkHadoopUtil, SparkSubmitArguments, PythonRunner}
import org.apache.spark.util.{Utils, RedirectThread}
import org.apache.spark.deploy.{PythonRunner, SparkHadoopUtil, SparkSubmitArguments}
import org.apache.spark.util.{RedirectThread, Utils}
import org.apache.spark.util.csharp.{Utils => CSharpSparkUtils}
/**
@ -70,8 +71,8 @@ object CSharpRunner {
otherArgs = args.slice(1, args.length)
}
var processParameters = new java.util.ArrayList[String]()
processParameters.add(csharpExecutable)
var processParameters = new java.util.ArrayList[String]
processParameters.add(formatPath(csharpExecutable))
otherArgs.foreach( arg => processParameters.add(arg) )
println("[CSharpRunner.main] Starting CSharpBackend!")
@ -140,6 +141,18 @@ object CSharpRunner {
}
}
// when executing in YARN cluster mode, the name of the
// executable is single-part (just the exe name)
// this method will add "." to it
def formatPath(csharpExecutable: String): String = {
var formattedCSharpExecutable = csharpExecutable
var path = Paths.get(csharpExecutable)
if (!path.isAbsolute && path.getNameCount == 1) {
formattedCSharpExecutable = Paths.get(".", path.toString).toString
}
formattedCSharpExecutable
}
/**
* Download HDFS file into the supplied directory and return its local path.
* Will throw an exception if there are errors during downloading.

Просмотреть файл

@ -0,0 +1,29 @@
/*
* Copyright (c) Microsoft. All rights reserved.
* Licensed under the MIT license. See LICENSE file in the project root for full license information.
*/
package org.apache.spark.deploy.csharp
import org.apache.commons.lang3.SystemUtils
import org.apache.spark.csharp.SparkCLRFunSuite
class CSharpRunnerSuite extends SparkCLRFunSuite {
test("formatPath") {
if (SystemUtils.IS_OS_WINDOWS) {
// no change to absolute Windows path
val path1 =
"""c:\path\to\Mobius\application.exe"""
assert(path1.equals(CSharpRunner.formatPath(path1)))
} else {
// no change to absolute Linux path
val path2 =
"""/path/to/Mobius/application.sh.exe"""
assert(path2.equals(CSharpRunner.formatPath(path2)))
}
// non-absolute, single-part name is formatted in Windows and Linux
val path3 = """application.sh.exe"""
assert(CSharpRunner.formatPath(path3).startsWith("."))
assert(CSharpRunner.formatPath(path3).endsWith(path3))
}
}