Fix the null reference exception

This commit is contained in:
Evan Cui 2017-01-09 21:43:54 +08:00
Родитель 1899baafec
Коммит f272dd54f5
3 изменённых файлов: 53 добавлений и 31 удалений

Просмотреть файл

@ -110,13 +110,13 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.HostsFile
FileInfo fileInfo = new FileInfo(this.filepath);
if (!fileInfo.Exists)
{
LinuxCommunicator.Instance.Tracer.TraceInfo("[HostsFileManager] The hosts file doesn't exists: {0}", this.filepath);
LinuxCommunicator.Instance?.Tracer?.TraceInfo("[HostsFileManager] The hosts file doesn't exists: {0}", this.filepath);
return;
}
if (fileInfo.LastWriteTimeUtc <= this.lastModified)
{
LinuxCommunicator.Instance.Tracer.TraceInfo("[HostsFileManager] The hosts file isn't changed since last load");
LinuxCommunicator.Instance?.Tracer?.TraceInfo("[HostsFileManager] The hosts file isn't changed since last load");
return;
}
@ -160,16 +160,16 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.HostsFile
newHostList.AddRange(newEntries.Values.Where(entry => entry.Name.Contains('.')));
this.ManagedEntries = newHostList;
this.UpdateId = Guid.NewGuid();
LinuxCommunicator.Instance.Tracer.TraceInfo("[HostsFileManager] The managed host entries updated, current update Id is {0}", this.UpdateId);
LinuxCommunicator.Instance?.Tracer?.TraceInfo("[HostsFileManager] The managed host entries updated, current update Id is {0}", this.UpdateId);
}
else
{
LinuxCommunicator.Instance.Tracer.TraceInfo("[HostsFileManager] No update to HPC managed host entries, current update Id is {0}", this.UpdateId);
LinuxCommunicator.Instance?.Tracer?.TraceInfo("[HostsFileManager] No update to HPC managed host entries, current update Id is {0}", this.UpdateId);
}
}
else
{
LinuxCommunicator.Instance.Tracer.TraceWarning("[HostsFileManager] Hosts file was not managed by HPC");
LinuxCommunicator.Instance?.Tracer?.TraceWarning("[HostsFileManager] Hosts file was not managed by HPC");
this.ManagedEntries.Clear();
}
@ -178,7 +178,7 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.HostsFile
}
catch (Exception e)
{
LinuxCommunicator.Instance.Tracer.TraceWarning("[HostsFileManager] Failed to reload host file: {0}", e);
LinuxCommunicator.Instance?.Tracer?.TraceWarning("[HostsFileManager] Failed to reload host file: {0}", e);
}
finally
{
@ -188,7 +188,7 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.HostsFile
}
catch (Exception te)
{
LinuxCommunicator.Instance.Tracer.TraceWarning("[HostsFileManager] Failed to restart reload timer: {0}", te);
LinuxCommunicator.Instance?.Tracer?.TraceWarning("[HostsFileManager] Failed to restart reload timer: {0}", te);
}
}
}

Просмотреть файл

@ -69,9 +69,9 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator
public void Dispose()
{
this.server.Dispose();
this.MonitoringConfigManager.Dispose();
this.HostsManager.Dispose();
this.server?.Dispose();
this.MonitoringConfigManager?.Dispose();
this.HostsManager?.Dispose();
GC.SuppressFinalize(this);
}
@ -127,8 +127,8 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator
{
this.Tracer.TraceInfo("Initializing LinuxCommunicator.");
this.MonitoringConfigManager = new MonitoringConfigManager();
Task.Run(() => this.MonitoringConfigManager.Initialize(this.headNodeFqdn.Value));
this.MonitoringConfigManager = new MonitoringConfigManager(this.headNodeFqdn.Value);
Task.Run(() => this.MonitoringConfigManager.Initialize());
this.HostsManager = new HostsFileManager();
ServicePointManager.ServerCertificateValidationCallback += (s, cert, chain, sslPolicyErrors) =>
@ -162,7 +162,7 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator
public bool Start()
{
this.MonitoringConfigManager.Start();
this.MonitoringConfigManager?.Start();
return this.Start(0);
}
@ -177,12 +177,12 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator
return false;
}
if (this.cancellationTokenSource != null) { this.cancellationTokenSource.Dispose(); }
this.cancellationTokenSource?.Dispose();
this.cancellationTokenSource = new CancellationTokenSource();
try
{
this.server.Start().Wait();
this.server?.Start().Wait();
}
catch (AggregateException aggrEx)
{
@ -205,11 +205,11 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator
public bool Stop()
{
this.Tracer.TraceInfo("Stopping LinuxCommunicator.");
this.server.Stop();
this.MonitoringConfigManager.Stop();
this.cancellationTokenSource.Cancel();
this.cancellationTokenSource.Dispose();
this.Tracer?.TraceInfo("Stopping LinuxCommunicator.");
this.server?.Stop();
this.MonitoringConfigManager?.Stop();
this.cancellationTokenSource?.Cancel();
this.cancellationTokenSource?.Dispose();
this.cancellationTokenSource = null;
return true;
}

Просмотреть файл

@ -7,6 +7,7 @@ using System.Threading;
using System.Threading.Tasks;
using System.Timers;
using Microsoft.Hpc.Monitoring;
using System.Net.Sockets;
namespace Microsoft.Hpc.Communicators.LinuxCommunicator.Monitoring
{
@ -18,7 +19,7 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.Monitoring
private Dictionary<string, string[]> schedulerInstanceMap = new Dictionary<string, string[]>(StringComparer.CurrentCultureIgnoreCase)
{
{ "HPCSchedulerJobs",
{ "HPCSchedulerJobs",
new string[]
{
PerformanceCounterNames.Scheduler_ClusterPerfCounter_NumberOfCanceledJobs_Name,
@ -31,7 +32,7 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.Monitoring
}
},
{ "HPCSchedulerNodes",
{ "HPCSchedulerNodes",
new string[]
{
PerformanceCounterNames.Scheduler_ClusterPerfCounter_NumberOfDrainingNodes_Name,
@ -42,7 +43,7 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.Monitoring
}
},
{ "HPCSchedulerCores",
{ "HPCSchedulerCores",
new string[]
{
PerformanceCounterNames.Scheduler_ClusterPerfCounter_NumberOfOnlineProcessors_Name,
@ -54,7 +55,7 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.Monitoring
}
},
{ "HPCPoolUsage",
{ "HPCPoolUsage",
new string[]
{
PerformanceCounterNames.Scheduler_ClusterPerfCounter_PoolGaurantee_Name,
@ -64,21 +65,27 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.Monitoring
};
private MetricCountersConfig metricCountersConfig = new MetricCountersConfig();
private string server;
public MonitoringConfigManager() { }
public MonitoringConfigManager(string server)
{
this.server = server;
this.checkConfigTimer.AutoReset = true;
this.checkConfigTimer.Interval = 5 * 60 * 1000;
this.checkConfigTimer.Enabled = false;
this.checkConfigTimer.Elapsed += this.checkConfigTimer_Elapsed;
}
public void Initialize(string server)
public void Initialize()
{
RetryManager rm = new RetryManager(new PeriodicRetryTimer(30 * 1000));
while (true)
{
try
{
this.Store = MonitoringStoreConnection.Connect(server, "LinuxCommunicator");
this.checkConfigTimer_Elapsed(this, null);
this.checkConfigTimer.AutoReset = true;
this.checkConfigTimer.Interval = 5 * 60 * 1000;
this.checkConfigTimer.Elapsed += checkConfigTimer_Elapsed;
this.Store = MonitoringStoreConnection.Connect(this.server, "LinuxCommunicator");
this.CheckConfig();
break;
}
catch (Exception e)
@ -123,6 +130,21 @@ namespace Microsoft.Hpc.Communicators.LinuxCommunicator.Monitoring
}
private void checkConfigTimer_Elapsed(object sender, ElapsedEventArgs e)
{
try
{
this.CheckConfig();
}
catch (SocketException ex)
{
this.Stop();
LinuxCommunicator.Instance?.Tracer?.TraceException(ex);
this.Initialize();
this.Start();
}
}
private void CheckConfig()
{
var metrics = this.Store.GetMetrics(MetricTarget.ComputeNode);
if (this.currentConfig.UpdateWhenChanged(metrics))