Fix the null reference exception
This commit is contained in:
Родитель
1899baafec
Коммит
f272dd54f5
|
@ -110,13 +110,13 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.HostsFile
|
|||
FileInfo fileInfo = new FileInfo(this.filepath);
|
||||
if (!fileInfo.Exists)
|
||||
{
|
||||
LinuxCommunicator.Instance.Tracer.TraceInfo("[HostsFileManager] The hosts file doesn't exists: {0}", this.filepath);
|
||||
LinuxCommunicator.Instance?.Tracer?.TraceInfo("[HostsFileManager] The hosts file doesn't exists: {0}", this.filepath);
|
||||
return;
|
||||
}
|
||||
|
||||
if (fileInfo.LastWriteTimeUtc <= this.lastModified)
|
||||
{
|
||||
LinuxCommunicator.Instance.Tracer.TraceInfo("[HostsFileManager] The hosts file isn't changed since last load");
|
||||
LinuxCommunicator.Instance?.Tracer?.TraceInfo("[HostsFileManager] The hosts file isn't changed since last load");
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -160,16 +160,16 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.HostsFile
|
|||
newHostList.AddRange(newEntries.Values.Where(entry => entry.Name.Contains('.')));
|
||||
this.ManagedEntries = newHostList;
|
||||
this.UpdateId = Guid.NewGuid();
|
||||
LinuxCommunicator.Instance.Tracer.TraceInfo("[HostsFileManager] The managed host entries updated, current update Id is {0}", this.UpdateId);
|
||||
LinuxCommunicator.Instance?.Tracer?.TraceInfo("[HostsFileManager] The managed host entries updated, current update Id is {0}", this.UpdateId);
|
||||
}
|
||||
else
|
||||
{
|
||||
LinuxCommunicator.Instance.Tracer.TraceInfo("[HostsFileManager] No update to HPC managed host entries, current update Id is {0}", this.UpdateId);
|
||||
LinuxCommunicator.Instance?.Tracer?.TraceInfo("[HostsFileManager] No update to HPC managed host entries, current update Id is {0}", this.UpdateId);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LinuxCommunicator.Instance.Tracer.TraceWarning("[HostsFileManager] Hosts file was not managed by HPC");
|
||||
LinuxCommunicator.Instance?.Tracer?.TraceWarning("[HostsFileManager] Hosts file was not managed by HPC");
|
||||
this.ManagedEntries.Clear();
|
||||
}
|
||||
|
||||
|
@ -178,7 +178,7 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.HostsFile
|
|||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
LinuxCommunicator.Instance.Tracer.TraceWarning("[HostsFileManager] Failed to reload host file: {0}", e);
|
||||
LinuxCommunicator.Instance?.Tracer?.TraceWarning("[HostsFileManager] Failed to reload host file: {0}", e);
|
||||
}
|
||||
finally
|
||||
{
|
||||
|
@ -188,7 +188,7 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.HostsFile
|
|||
}
|
||||
catch (Exception te)
|
||||
{
|
||||
LinuxCommunicator.Instance.Tracer.TraceWarning("[HostsFileManager] Failed to restart reload timer: {0}", te);
|
||||
LinuxCommunicator.Instance?.Tracer?.TraceWarning("[HostsFileManager] Failed to restart reload timer: {0}", te);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -69,9 +69,9 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator
|
|||
|
||||
public void Dispose()
|
||||
{
|
||||
this.server.Dispose();
|
||||
this.MonitoringConfigManager.Dispose();
|
||||
this.HostsManager.Dispose();
|
||||
this.server?.Dispose();
|
||||
this.MonitoringConfigManager?.Dispose();
|
||||
this.HostsManager?.Dispose();
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
|
||||
|
@ -127,8 +127,8 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator
|
|||
{
|
||||
this.Tracer.TraceInfo("Initializing LinuxCommunicator.");
|
||||
|
||||
this.MonitoringConfigManager = new MonitoringConfigManager();
|
||||
Task.Run(() => this.MonitoringConfigManager.Initialize(this.headNodeFqdn.Value));
|
||||
this.MonitoringConfigManager = new MonitoringConfigManager(this.headNodeFqdn.Value);
|
||||
Task.Run(() => this.MonitoringConfigManager.Initialize());
|
||||
this.HostsManager = new HostsFileManager();
|
||||
|
||||
ServicePointManager.ServerCertificateValidationCallback += (s, cert, chain, sslPolicyErrors) =>
|
||||
|
@ -162,7 +162,7 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator
|
|||
|
||||
public bool Start()
|
||||
{
|
||||
this.MonitoringConfigManager.Start();
|
||||
this.MonitoringConfigManager?.Start();
|
||||
return this.Start(0);
|
||||
}
|
||||
|
||||
|
@ -177,12 +177,12 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator
|
|||
return false;
|
||||
}
|
||||
|
||||
if (this.cancellationTokenSource != null) { this.cancellationTokenSource.Dispose(); }
|
||||
this.cancellationTokenSource?.Dispose();
|
||||
this.cancellationTokenSource = new CancellationTokenSource();
|
||||
|
||||
try
|
||||
{
|
||||
this.server.Start().Wait();
|
||||
this.server?.Start().Wait();
|
||||
}
|
||||
catch (AggregateException aggrEx)
|
||||
{
|
||||
|
@ -205,11 +205,11 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator
|
|||
|
||||
public bool Stop()
|
||||
{
|
||||
this.Tracer.TraceInfo("Stopping LinuxCommunicator.");
|
||||
this.server.Stop();
|
||||
this.MonitoringConfigManager.Stop();
|
||||
this.cancellationTokenSource.Cancel();
|
||||
this.cancellationTokenSource.Dispose();
|
||||
this.Tracer?.TraceInfo("Stopping LinuxCommunicator.");
|
||||
this.server?.Stop();
|
||||
this.MonitoringConfigManager?.Stop();
|
||||
this.cancellationTokenSource?.Cancel();
|
||||
this.cancellationTokenSource?.Dispose();
|
||||
this.cancellationTokenSource = null;
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ using System.Threading;
|
|||
using System.Threading.Tasks;
|
||||
using System.Timers;
|
||||
using Microsoft.Hpc.Monitoring;
|
||||
using System.Net.Sockets;
|
||||
|
||||
namespace Microsoft.Hpc.Communicators.LinuxCommunicator.Monitoring
|
||||
{
|
||||
|
@ -18,7 +19,7 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.Monitoring
|
|||
|
||||
private Dictionary<string, string[]> schedulerInstanceMap = new Dictionary<string, string[]>(StringComparer.CurrentCultureIgnoreCase)
|
||||
{
|
||||
{ "HPCSchedulerJobs",
|
||||
{ "HPCSchedulerJobs",
|
||||
new string[]
|
||||
{
|
||||
PerformanceCounterNames.Scheduler_ClusterPerfCounter_NumberOfCanceledJobs_Name,
|
||||
|
@ -31,7 +32,7 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.Monitoring
|
|||
}
|
||||
},
|
||||
|
||||
{ "HPCSchedulerNodes",
|
||||
{ "HPCSchedulerNodes",
|
||||
new string[]
|
||||
{
|
||||
PerformanceCounterNames.Scheduler_ClusterPerfCounter_NumberOfDrainingNodes_Name,
|
||||
|
@ -42,7 +43,7 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.Monitoring
|
|||
}
|
||||
},
|
||||
|
||||
{ "HPCSchedulerCores",
|
||||
{ "HPCSchedulerCores",
|
||||
new string[]
|
||||
{
|
||||
PerformanceCounterNames.Scheduler_ClusterPerfCounter_NumberOfOnlineProcessors_Name,
|
||||
|
@ -54,7 +55,7 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.Monitoring
|
|||
}
|
||||
},
|
||||
|
||||
{ "HPCPoolUsage",
|
||||
{ "HPCPoolUsage",
|
||||
new string[]
|
||||
{
|
||||
PerformanceCounterNames.Scheduler_ClusterPerfCounter_PoolGaurantee_Name,
|
||||
|
@ -64,21 +65,27 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.Monitoring
|
|||
};
|
||||
|
||||
private MetricCountersConfig metricCountersConfig = new MetricCountersConfig();
|
||||
private string server;
|
||||
|
||||
public MonitoringConfigManager() { }
|
||||
public MonitoringConfigManager(string server)
|
||||
{
|
||||
this.server = server;
|
||||
this.checkConfigTimer.AutoReset = true;
|
||||
this.checkConfigTimer.Interval = 5 * 60 * 1000;
|
||||
this.checkConfigTimer.Enabled = false;
|
||||
this.checkConfigTimer.Elapsed += this.checkConfigTimer_Elapsed;
|
||||
}
|
||||
|
||||
public void Initialize(string server)
|
||||
|
||||
public void Initialize()
|
||||
{
|
||||
RetryManager rm = new RetryManager(new PeriodicRetryTimer(30 * 1000));
|
||||
while (true)
|
||||
{
|
||||
try
|
||||
{
|
||||
this.Store = MonitoringStoreConnection.Connect(server, "LinuxCommunicator");
|
||||
this.checkConfigTimer_Elapsed(this, null);
|
||||
this.checkConfigTimer.AutoReset = true;
|
||||
this.checkConfigTimer.Interval = 5 * 60 * 1000;
|
||||
this.checkConfigTimer.Elapsed += checkConfigTimer_Elapsed;
|
||||
this.Store = MonitoringStoreConnection.Connect(this.server, "LinuxCommunicator");
|
||||
this.CheckConfig();
|
||||
break;
|
||||
}
|
||||
catch (Exception e)
|
||||
|
@ -123,6 +130,21 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.Monitoring
|
|||
}
|
||||
|
||||
private void checkConfigTimer_Elapsed(object sender, ElapsedEventArgs e)
|
||||
{
|
||||
try
|
||||
{
|
||||
this.CheckConfig();
|
||||
}
|
||||
catch (SocketException ex)
|
||||
{
|
||||
this.Stop();
|
||||
LinuxCommunicator.Instance?.Tracer?.TraceException(ex);
|
||||
this.Initialize();
|
||||
this.Start();
|
||||
}
|
||||
}
|
||||
|
||||
private void CheckConfig()
|
||||
{
|
||||
var metrics = this.Store.GetMetrics(MetricTarget.ComputeNode);
|
||||
if (this.currentConfig.UpdateWhenChanged(metrics))
|
||||
|
|
Загрузка…
Ссылка в новой задаче