From b9283ebdcbd958c88d23c692365afb3712d572ad Mon Sep 17 00:00:00 2001 From: Steve Suh Date: Wed, 24 Mar 2021 20:00:59 -0700 Subject: [PATCH] Add DOTNET_WORKER__DIR environment variable (#861) --- .../Microsoft.Spark.E2ETest/SparkFixture.cs | 2 +- .../ConfigurationServiceTests.cs | 175 ++++++++++++++++++ .../Services/ConfigurationService.cs | 73 ++++++-- 3 files changed, 236 insertions(+), 14 deletions(-) create mode 100644 src/csharp/Microsoft.Spark.UnitTest/ConfigurationServiceTests.cs diff --git a/src/csharp/Microsoft.Spark.E2ETest/SparkFixture.cs b/src/csharp/Microsoft.Spark.E2ETest/SparkFixture.cs index 78d66cd4..20f36900 100644 --- a/src/csharp/Microsoft.Spark.E2ETest/SparkFixture.cs +++ b/src/csharp/Microsoft.Spark.E2ETest/SparkFixture.cs @@ -35,7 +35,7 @@ namespace Microsoft.Spark.E2ETest /// /// This environment variable specifies the path where the DotNet worker is installed. /// - public const string WorkerDir = Services.ConfigurationService.WorkerDirEnvVarName; + public const string WorkerDir = Services.ConfigurationService.DefaultWorkerDirEnvVarName; } private readonly Process _process = new Process(); diff --git a/src/csharp/Microsoft.Spark.UnitTest/ConfigurationServiceTests.cs b/src/csharp/Microsoft.Spark.UnitTest/ConfigurationServiceTests.cs new file mode 100644 index 00000000..92fd9d78 --- /dev/null +++ b/src/csharp/Microsoft.Spark.UnitTest/ConfigurationServiceTests.cs @@ -0,0 +1,175 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.IO; +using Microsoft.Spark.Services; +using Microsoft.Spark.Utils; +using Xunit; + +namespace Microsoft.Spark.UnitTest +{ + public class ConfigurationServiceTests : IDisposable + { + private readonly WorkerDirEnvVars _workerDirEnvVars; + + public ConfigurationServiceTests() + { + var version = new Version(AssemblyInfoProvider.MicrosoftSparkAssemblyInfo().AssemblyVersion); + _workerDirEnvVars = new WorkerDirEnvVars + { + WorkerDir = new EnvVar(ConfigurationService.DefaultWorkerDirEnvVarName), + WorkerMajorMinorBuildDir = new EnvVar( + string.Format( + ConfigurationService.WorkerVerDirEnvVarNameFormat, + $"{version.Major}_{version.Minor}_{version.Build}")), + WorkerMajorMinorDir = new EnvVar( + string.Format( + ConfigurationService.WorkerVerDirEnvVarNameFormat, + $"{version.Major}_{version.Minor}")), + WorkerMajorDir = new EnvVar( + string.Format(ConfigurationService.WorkerVerDirEnvVarNameFormat, version.Major)) + }; + + Environment.SetEnvironmentVariable(_workerDirEnvVars.WorkerDir.Name, null); + Environment.SetEnvironmentVariable(_workerDirEnvVars.WorkerMajorMinorBuildDir.Name, null); + Environment.SetEnvironmentVariable(_workerDirEnvVars.WorkerMajorMinorDir.Name, null); + Environment.SetEnvironmentVariable(_workerDirEnvVars.WorkerMajorDir.Name, null); + } + + [Fact] + public void TestWorkerExePathWithNoEnvVars() + { + var configService = new ConfigurationService(); + + Assert.False(IsEnvVarSet(_workerDirEnvVars.WorkerMajorMinorBuildDir.Name)); + Assert.False(IsEnvVarSet(_workerDirEnvVars.WorkerMajorMinorDir.Name)); + Assert.False(IsEnvVarSet(_workerDirEnvVars.WorkerMajorDir.Name)); + Assert.False(IsEnvVarSet(_workerDirEnvVars.WorkerDir.Name)); + + // Environment variables not set, only Microsoft.Spark.Worker filename should be returned. + Assert.Equal(ConfigurationService.ProcFileName, configService.GetWorkerExePath()); + } + + [Fact] + public void TestWorkerExePathWithWorkerDirEnvVar() + { + var configService = new ConfigurationService(); + string workerDir = "workerDir"; + Environment.SetEnvironmentVariable(_workerDirEnvVars.WorkerDir.Name, workerDir); + + Assert.False(IsEnvVarSet(_workerDirEnvVars.WorkerMajorMinorBuildDir.Name)); + Assert.False(IsEnvVarSet(_workerDirEnvVars.WorkerMajorMinorDir.Name)); + Assert.False(IsEnvVarSet(_workerDirEnvVars.WorkerMajorDir.Name)); + Assert.True(IsEnvVarSet(_workerDirEnvVars.WorkerDir.Name)); + + // Only WorkerDir is set, WorkerExePath will be built using it. + Assert.Equal( + Path.Combine(workerDir, ConfigurationService.ProcFileName), + configService.GetWorkerExePath()); + } + + [Fact] + public void TestWorkerExePathWithEnvVarPrecedence() + { + { + var configService = new ConfigurationService(); + string workerDir = "workerDir"; + Environment.SetEnvironmentVariable(_workerDirEnvVars.WorkerDir.Name, workerDir); + + Assert.False(IsEnvVarSet(_workerDirEnvVars.WorkerMajorMinorBuildDir.Name)); + Assert.False(IsEnvVarSet(_workerDirEnvVars.WorkerMajorMinorDir.Name)); + Assert.False(IsEnvVarSet(_workerDirEnvVars.WorkerMajorDir.Name)); + Assert.True(IsEnvVarSet(_workerDirEnvVars.WorkerDir.Name)); + Assert.Equal( + Path.Combine(workerDir, ConfigurationService.ProcFileName), + configService.GetWorkerExePath()); + } + + { + var configService = new ConfigurationService(); + string workerMajorDir = "workerMajorDir"; + Environment.SetEnvironmentVariable(_workerDirEnvVars.WorkerMajorDir.Name, workerMajorDir); + + Assert.False(IsEnvVarSet(_workerDirEnvVars.WorkerMajorMinorBuildDir.Name)); + Assert.False(IsEnvVarSet(_workerDirEnvVars.WorkerMajorMinorDir.Name)); + Assert.True(IsEnvVarSet(_workerDirEnvVars.WorkerMajorDir.Name)); + Assert.True(IsEnvVarSet(_workerDirEnvVars.WorkerDir.Name)); + Assert.Equal( + Path.Combine(workerMajorDir, ConfigurationService.ProcFileName), + configService.GetWorkerExePath()); + } + + { + var configService = new ConfigurationService(); + string workerMajorMinorDir = "workerMajorMinorDir"; + Environment.SetEnvironmentVariable( + _workerDirEnvVars.WorkerMajorMinorDir.Name, workerMajorMinorDir); + + Assert.False(IsEnvVarSet(_workerDirEnvVars.WorkerMajorMinorBuildDir.Name)); + Assert.True(IsEnvVarSet(_workerDirEnvVars.WorkerMajorMinorDir.Name)); + Assert.True(IsEnvVarSet(_workerDirEnvVars.WorkerMajorDir.Name)); + Assert.True(IsEnvVarSet(_workerDirEnvVars.WorkerDir.Name)); + Assert.Equal( + Path.Combine(workerMajorMinorDir, ConfigurationService.ProcFileName), + configService.GetWorkerExePath()); + } + + { + var configService = new ConfigurationService(); + string workerMajorMinorBuildDir = "workerMajorMinorBuildDir"; + Environment.SetEnvironmentVariable( + _workerDirEnvVars.WorkerMajorMinorBuildDir.Name, workerMajorMinorBuildDir); + + Assert.True(IsEnvVarSet(_workerDirEnvVars.WorkerMajorMinorBuildDir.Name)); + Assert.True(IsEnvVarSet(_workerDirEnvVars.WorkerMajorMinorDir.Name)); + Assert.True(IsEnvVarSet(_workerDirEnvVars.WorkerMajorDir.Name)); + Assert.True(IsEnvVarSet(_workerDirEnvVars.WorkerDir.Name)); + Assert.Equal( + Path.Combine(workerMajorMinorBuildDir, ConfigurationService.ProcFileName), + configService.GetWorkerExePath()); + } + } + + public void Dispose() + { + Environment.SetEnvironmentVariable( + _workerDirEnvVars.WorkerDir.Name, + _workerDirEnvVars.WorkerDir.Value); + Environment.SetEnvironmentVariable( + _workerDirEnvVars.WorkerMajorMinorBuildDir.Name, + _workerDirEnvVars.WorkerMajorMinorBuildDir.Value); + Environment.SetEnvironmentVariable( + _workerDirEnvVars.WorkerMajorMinorDir.Name, + _workerDirEnvVars.WorkerMajorMinorDir.Value); + Environment.SetEnvironmentVariable( + _workerDirEnvVars.WorkerMajorDir.Name, + _workerDirEnvVars.WorkerMajorDir.Value); + } + + public bool IsEnvVarSet(string name) => + !string.IsNullOrWhiteSpace(Environment.GetEnvironmentVariable(name)); + + private class WorkerDirEnvVars + { + public EnvVar WorkerDir { get; set; } + public EnvVar WorkerMajorMinorBuildDir { get; set; } + public EnvVar WorkerMajorMinorDir { get; set; } + public EnvVar WorkerMajorDir { get; set; } + } + + private class EnvVar + { + public string Name { get; } + public string Value { get; } + + public EnvVar(string name) + { + Name = name; + Value = Environment.GetEnvironmentVariable(name); + } + + } + } +} diff --git a/src/csharp/Microsoft.Spark/Services/ConfigurationService.cs b/src/csharp/Microsoft.Spark/Services/ConfigurationService.cs index 74098de5..505868fa 100644 --- a/src/csharp/Microsoft.Spark/Services/ConfigurationService.cs +++ b/src/csharp/Microsoft.Spark/Services/ConfigurationService.cs @@ -16,11 +16,13 @@ namespace Microsoft.Spark.Services /// internal sealed class ConfigurationService : IConfigurationService { - public const string WorkerDirEnvVarName = "DOTNET_WORKER_DIR"; public const string WorkerReadBufferSizeEnvVarName = "spark.dotnet.worker.readBufferSize"; public const string WorkerWriteBufferSizeEnvVarName = "spark.dotnet.worker.writeBufferSize"; + internal const string DefaultWorkerDirEnvVarName = "DOTNET_WORKER_DIR"; + internal const string WorkerVerDirEnvVarNameFormat = "DOTNET_WORKER_{0}_DIR"; + private const string DotnetBackendPortEnvVarName = "DOTNETBACKEND_PORT"; private const int DotnetBackendDebugPort = 5567; @@ -28,15 +30,58 @@ namespace Microsoft.Spark.Services private const int DotnetNumBackendThreadsDefault = 10; private static readonly string s_procBaseFileName = "Microsoft.Spark.Worker"; - private static readonly string s_procFileName = - RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ? - $"{s_procBaseFileName}.exe" : - s_procBaseFileName; private readonly ILoggerService _logger = LoggerServiceFactory.GetLogger(typeof(ConfigurationService)); private string _workerPath; + private string _workerDirEnvVarName; + + /// + /// The Microsoft.Spark.Worker filename. + /// + internal static string ProcFileName { get; } = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ? + $"{s_procBaseFileName}.exe" : + s_procBaseFileName; + + /// + /// Returns the environment variable name that defines the path to the + /// Microsoft.Spark.Worker. Using the Microsoft.Spark assembly version + /// we use the first environment variable that is defined in the + /// following order and default to DOTNET_WORKER_DIR if not: + /// + /// - DOTNET_WORKER_{MAJOR}_{MINOR}_{BUILD}_DIR + /// - DOTNET_WORKER_{MAJOR}_{MINOR}_DIR + /// - DOTNET_WORKER_{MAJOR}_DIR + /// + internal string WorkerDirEnvVarName + { + get + { + if (_workerDirEnvVarName != null) + { + return _workerDirEnvVarName; + } + + var version = new Version(AssemblyInfoProvider.MicrosoftSparkAssemblyInfo().AssemblyVersion); + var versionComponents = new int[] { version.Major, version.Minor, version.Build }; + for (int i = versionComponents.Length; i > 0; --i) + { + var span = new ReadOnlySpan(versionComponents, 0, i); + string verEnvVarName = string.Format( + WorkerVerDirEnvVarNameFormat, + string.Join("_", span.ToArray())); + if (!string.IsNullOrWhiteSpace(GetEnvironmentVariable(verEnvVarName))) + { + _workerDirEnvVarName = verEnvVarName; + return _workerDirEnvVarName; + } + } + + _workerDirEnvVarName = DefaultWorkerDirEnvVarName; + return _workerDirEnvVarName; + } + } /// /// How often to run GC on JVM ThreadPool threads. Defaults to 5 minutes. @@ -45,7 +90,7 @@ namespace Microsoft.Spark.Services { get { - string envVar = Environment.GetEnvironmentVariable("DOTNET_JVM_THREAD_GC_INTERVAL"); + string envVar = GetEnvironmentVariable("DOTNET_JVM_THREAD_GC_INTERVAL"); return string.IsNullOrEmpty(envVar) ? TimeSpan.FromMinutes(5) : TimeSpan.Parse(envVar); } } @@ -59,7 +104,7 @@ namespace Microsoft.Spark.Services public int GetBackendPortNumber() { if (!int.TryParse( - Environment.GetEnvironmentVariable(DotnetBackendPortEnvVarName), + GetEnvironmentVariable(DotnetBackendPortEnvVarName), out int portNumber)) { _logger.LogInfo($"'{DotnetBackendPortEnvVarName}' environment variable is not set."); @@ -77,7 +122,7 @@ namespace Microsoft.Spark.Services public int GetNumBackendThreads() { if (!int.TryParse( - Environment.GetEnvironmentVariable(DotnetNumBackendThreadsEnvVarName), + GetEnvironmentVariable(DotnetNumBackendThreadsEnvVarName), out int numThreads)) { numThreads = DotnetNumBackendThreadsDefault; @@ -97,19 +142,21 @@ namespace Microsoft.Spark.Services return _workerPath; } - string workerDir = Environment.GetEnvironmentVariable(WorkerDirEnvVarName); - // If the WorkerDirEnvName environment variable is set, the worker path is constructed // based on it. + string workerDir = GetEnvironmentVariable(WorkerDirEnvVarName); if (!string.IsNullOrEmpty(workerDir)) { - _workerPath = Path.Combine(workerDir, s_procFileName); - _logger.LogDebug($"Using the environment variable to construct .NET worker path: {_workerPath}."); + _workerPath = Path.Combine(workerDir, ProcFileName); + _logger.LogDebug( + "Using the {0} environment variable to construct .NET worker path: {1}.", + WorkerDirEnvVarName, + _workerPath); return _workerPath; } // Otherwise, the worker executable name is returned meaning it should be PATH. - _workerPath = s_procFileName; + _workerPath = ProcFileName; return _workerPath; }