FHProxy: IEnumerable<RepairData> support. Doc updates.

This commit is contained in:
Charles Torre 2022-04-19 13:52:50 -07:00
Родитель 84a213ae2f
Коммит 1aa006fed6
3 изменённых файлов: 122 добавлений и 71 удалений

Просмотреть файл

@ -1,6 +1,6 @@
# FabricHealerProxy
FabricHealerProxy is a .NET Standard 2.0 library that provides a very simple and reliable way to share Service Fabric entity repair information to FabricHealer service instances running in the same cluster. You can install FabricHealerProxy into your .NET Service Fabric service from the [nuget.org package gallery](...).
FabricHealerProxy is a .NET Standard 2.0 library that provides a very simple and reliable way for any .NET Service Fabric service to initiate Service Fabric entity repair by the FabricHealer service running in the same cluster.
### How to use FabricHealerProxy
@ -22,6 +22,7 @@ using System.Threading.Tasks;
using Microsoft.ServiceFabric.Services.Runtime;
using FabricHealerProxy;
using FabricHealerProxy.Exceptions;
using System.Collections.Generic;
namespace Stateless1
{
@ -45,7 +46,8 @@ namespace Stateless1
// This specifies that you want FabricHealer to repair a service instance deployed to a Fabric node named NodeName.
// FabricHealer supports both Replica and CodePackage restarts of services. The logic rules will dictate which one of these happens,
// so make sure to craft a specific logic rule that makes sense for you (and use some logic!).
// Note that, out of the box, FabricHealer's AppRules.guan file (located in the FabricHealer project's PackageRoot/Config/LogicRules folder) already has a restart replica catch-all (applies to any service) rule that will restart the primary replica of
// Note that, out of the box, FabricHealer's AppRules.guan file (located in the FabricHealer project's PackageRoot/Config/LogicRules folder)
// already has a restart replica catch-all (applies to any service) rule that will restart the primary replica of
// the specified service below, deployed to the a specified Fabric node.
var repairDataServiceTarget = new RepairData
{
@ -53,6 +55,12 @@ namespace Stateless1
NodeName = "_Node_0"
};
var repairDataServiceTarget2 = new RepairData
{
ServiceName = "fabric:/HealthMetrics/BandActorServiceType",
NodeName = "_Node_0"
};
// This specifies that you want FabricHealer to repair a Fabric node named NodeName. The only supported repair in FabricHealer is a Restart.
// Related rules can be found in FabricNodeRepair.guan file in the FabricHealer project's PackageRoot/Config/LogicRules folder.
// So, implicitly, this means you want FabricHealer to restart _Node_0. You can of course modify the related logic rules to do something else. It's up to you!
@ -61,10 +69,27 @@ namespace Stateless1
NodeName = "_Node_0"
};
// Service repair.
// For use in the IEnumerable<RepairData> RepairEntityAsync overload.
List<RepairData> repairDataList = new List<RepairData>
{
repairDataNodeTarget,
repairDataServiceTarget,
repairDataServiceTarget2
};
// For use in the single instance RepairData RepairEntityAsync overload.
var repairDataServiceTargetSingle = new RepairData
{
ServiceName = "fabric:/HealthMetrics/HealthMetrics.WebServiceType",
NodeName = "_Node_0"
};
// This demonstrates which exceptions will be thrown by the API. The first three represent user error (most likely). The last two are internal SF issues which
// will be thrown only after a series of retries. How to handle these is up to you.
try
{
await FabricHealer.Proxy.RepairEntityAsync(repairDataServiceTarget, cancellationToken, TimeSpan.FromMinutes(5)).ConfigureAwait(false);
await FabricHealer.Proxy.RepairEntityAsync(repairDataServiceTargetSingle, cancellationToken, TimeSpan.FromMinutes(5)).ConfigureAwait(false);
await FabricHealer.Proxy.RepairEntityAsync(repairDataList, cancellationToken, TimeSpan.FromMinutes(5)).ConfigureAwait(false);
}
catch (MissingRepairDataException)
{
@ -89,30 +114,6 @@ namespace Stateless1
// ClusterManager service could be hammered (flooded with queries), for example. You could retry RepairEntityAsync again after you wait a bit..
}
// Node repair.
try
{
await FabricHealer.Proxy.RepairEntityAsync(repairDataNodeTarget, cancellationToken, TimeSpan.FromMinutes(5)).ConfigureAwait(false);
}
catch (FabricNodeNotFoundException)
{
// Check your spelling..
}
catch (FabricException)
{
// No-op unless you want to re-run RepairEntityAsync again.
}
catch (TimeoutException)
{
// ClusterManager service could be hammered (flooded with queries), for example. You could retry RepairEntityAsync again after you wait a bit..
}
var repairDataServiceTarget2 = new RepairData
{
ServiceName = "fabric:/HealthMetrics/BandActorServiceType",
NodeName = "_Node_0"
};
// Do nothing and wait.
while (!cancellationToken.IsCancellationRequested)
{
@ -125,8 +126,9 @@ namespace Stateless1
}
}
// Close the Proxy to clear internal state and remove health reports that are still active.
// Close the proxy (this cleans up state and removes any health report that is currently active (not expired).
// Note: this does not cancel repairs that are in flight or in the FabricHealer internal repair queue.
await FabricHealer.Proxy.Close();
}
}

Просмотреть файл

@ -35,7 +35,8 @@ namespace FabricHealerProxy
// Use one FC for the lifetime of the consuming SF service process that loads FabricHealerProxy.dll.
private static readonly FabricClient fabricClient = new FabricClient(settings);
private static readonly object lockObj = new object();
private static readonly object instanceLock = new object();
private static readonly object writeLock = new object();
private static readonly TimeSpan defaultHealthReportTtl = TimeSpan.FromMinutes(15);
private static readonly TimeSpan maxDataLifeTime = defaultHealthReportTtl;
@ -60,7 +61,7 @@ namespace FabricHealerProxy
{
if (instance == null)
{
lock (lockObj)
lock (instanceLock)
{
if (instance == null)
{
@ -126,7 +127,30 @@ namespace FabricHealerProxy
repairData,
repairDataLifetime,
fabricClient,
cancellationToken));
cancellationToken)).ConfigureAwait(false);
}
/// <summary>
/// This function generates a specially-crafted Service Fabric Health Report that the FabricHealer service will understand and act upon given the facts supplied
/// in the RepairData instance. Use this function to supply a list or array of RepairData objects.
/// </summary>
/// <param name="repairDataCollection">A collection of RepairData instances. RepairData is a well-known (ITelemetryData) data ty[e that contains facts which FabricHealer will use
/// in the execution of its entity repair logic rules and related mitigation functions.</param>
/// <param name="cancellationToken">CancellationToken used to ensure this function stops processing when the token is cancelled.</param>
/// <param name="repairDataLifetime">The amount of time for the repair data to remain active (TTL of associated health report). Default is 15 mins.</param>
/// <exception cref="ArgumentNullException">Thrown when RepairData instance is null.</exception>
/// <exception cref="FabricException">Thrown when an internal Service Fabric operation fails.</exception>
/// <exception cref="FabricNodeNotFoundException">Thrown when specified RepairData.NodeName does not exist in the cluster.</exception>
/// <exception cref="FabricServiceNotFoundException">Thrown when specified service doesn't exist in the cluster.</exception>
/// <exception cref="MissingRepairDataException">Thrown when RepairData instance is missing values for required non-null members (E.g., NodeName).</exception>
/// <exception cref="UriFormatException">Thrown when required ApplicationName or ServiceName value is a malformed Uri string.</exception>
/// <exception cref="TimeoutException">Thrown when internal Fabric client API calls timeout.</exception>
public async Task RepairEntityAsync(IEnumerable<RepairData> repairDataCollection, CancellationToken cancellationToken, TimeSpan repairDataLifetime = default)
{
foreach (var repairData in repairDataCollection)
{
await RepairEntityAsync(repairData, cancellationToken, repairDataLifetime).ConfigureAwait(false);
}
}
private async Task RepairEntityAsyncInternal(RepairData repairData, TimeSpan repairDataLifetime, FabricClient fabricClient, CancellationToken cancellationToken)
@ -137,7 +161,10 @@ namespace FabricHealerProxy
}
// Remove expired repair data.
ManageRepairDataHistory();
lock (writeLock)
{
ManageRepairDataHistory();
}
if (string.IsNullOrWhiteSpace(repairData.ApplicationName))
{
@ -288,7 +315,10 @@ namespace FabricHealerProxy
}
// Add repairData to history.
repairDataHistory.Add((DateTime.UtcNow, repairData));
lock (writeLock)
{
repairDataHistory.Add((DateTime.UtcNow, repairData));
}
}
private void ManageRepairDataHistory()
@ -365,7 +395,7 @@ namespace FabricHealerProxy
break;
}
return await HealthReportExistsAsync(repairData, fabricClient, cancellationToken);
return await HealthReportExistsAsync(repairData, fabricClient, cancellationToken).ConfigureAwait(false);
}
private async Task<bool> HealthReportExistsAsync(RepairData repairData, FabricClient fabricClient, CancellationToken cancellationToken)
@ -466,7 +496,7 @@ namespace FabricHealerProxy
TimeSpan.FromSeconds(1),
TimeSpan.FromSeconds(3),
TimeSpan.FromSeconds(5)
}).ExecuteAsync(() => ClearHealthReportsInternalAsync());
}).ExecuteAsync(() => ClearHealthReportsInternalAsync()).ConfigureAwait(false);
}
private async Task ClearHealthReportsInternalAsync()
@ -534,7 +564,13 @@ namespace FabricHealerProxy
}
await Task.Delay(250);
await Task.Delay(250).ConfigureAwait(false);
lock (writeLock)
{
repairDataHistory.RemoveAt(i);
--i;
}
}
}
@ -544,9 +580,20 @@ namespace FabricHealerProxy
public async Task Close()
{
await ClearHealthReports();
repairDataHistory?.Clear();
repairDataHistory = null;
instance = null;
if (repairDataHistory != null)
{
lock (writeLock)
{
repairDataHistory?.Clear();
repairDataHistory = null;
if (instance != null)
{
instance = null;
}
}
}
}
}
}

Просмотреть файл

@ -1,6 +1,6 @@
# FabricHealerProxy
FabricHealerProxy is a .NET Standard 2.0 library that provides a very simple and reliable way to share Service Fabric entity repair information to FabricHealer service instances running in the same cluster. You can install FabricHealerProxy into your .NET Service Fabric service from the [nuget.org package gallery](...).
FabricHealerProxy is a .NET Standard 2.0 library that provides a very simple and reliable way for any .NET Service Fabric service to initiate Service Fabric entity repair by the FabricHealer service running in the same cluster. You can install FabricHealerProxy into your .NET Service Fabric service from the [nuget.org package gallery](...).
### How to use FabricHealerProxy
@ -22,6 +22,7 @@ using System.Threading.Tasks;
using Microsoft.ServiceFabric.Services.Runtime;
using FabricHealerProxy;
using FabricHealerProxy.Exceptions;
using System.Collections.Generic;
namespace Stateless1
{
@ -45,7 +46,8 @@ namespace Stateless1
// This specifies that you want FabricHealer to repair a service instance deployed to a Fabric node named NodeName.
// FabricHealer supports both Replica and CodePackage restarts of services. The logic rules will dictate which one of these happens,
// so make sure to craft a specific logic rule that makes sense for you (and use some logic!).
// Note that, out of the box, FabricHealer's AppRules.guan file (located in the FabricHealer project's PackageRoot/Config/LogicRules folder) already has a restart replica catch-all (applies to any service) rule that will restart the primary replica of
// Note that, out of the box, FabricHealer's AppRules.guan file (located in the FabricHealer project's PackageRoot/Config/LogicRules folder)
// already has a restart replica catch-all (applies to any service) rule that will restart the primary replica of
// the specified service below, deployed to the a specified Fabric node.
var repairDataServiceTarget = new RepairData
{
@ -53,6 +55,12 @@ namespace Stateless1
NodeName = "_Node_0"
};
var repairDataServiceTarget2 = new RepairData
{
ServiceName = "fabric:/HealthMetrics/BandActorServiceType",
NodeName = "_Node_0"
};
// This specifies that you want FabricHealer to repair a Fabric node named NodeName. The only supported repair in FabricHealer is a Restart.
// Related rules can be found in FabricNodeRepair.guan file in the FabricHealer project's PackageRoot/Config/LogicRules folder.
// So, implicitly, this means you want FabricHealer to restart _Node_0. You can of course modify the related logic rules to do something else. It's up to you!
@ -61,10 +69,27 @@ namespace Stateless1
NodeName = "_Node_0"
};
// Service repair.
// For use in the IEnumerable<RepairData> RepairEntityAsync overload.
List<RepairData> repairDataList = new List<RepairData>
{
repairDataNodeTarget,
repairDataServiceTarget,
repairDataServiceTarget2
};
// For use in the single instance RepairData RepairEntityAsync overload.
var repairDataServiceTargetSingle = new RepairData
{
ServiceName = "fabric:/HealthMetrics/HealthMetrics.WebServiceType",
NodeName = "_Node_0"
};
// This demonstrates which exceptions will be thrown by the API. The first three represent user error (most likely). The last two are internal SF issues which
// will be thrown only after a series of retries. How to handle these is up to you.
try
{
await FabricHealer.Proxy.RepairEntityAsync(repairDataServiceTarget, cancellationToken, TimeSpan.FromMinutes(5)).ConfigureAwait(false);
await FabricHealer.Proxy.RepairEntityAsync(repairDataServiceTargetSingle, cancellationToken, TimeSpan.FromMinutes(5)).ConfigureAwait(false);
await FabricHealer.Proxy.RepairEntityAsync(repairDataList, cancellationToken, TimeSpan.FromMinutes(5)).ConfigureAwait(false);
}
catch (MissingRepairDataException)
{
@ -89,30 +114,6 @@ namespace Stateless1
// ClusterManager service could be hammered (flooded with queries), for example. You could retry RepairEntityAsync again after you wait a bit..
}
// Node repair.
try
{
await FabricHealer.Proxy.RepairEntityAsync(repairDataNodeTarget, cancellationToken, TimeSpan.FromMinutes(5)).ConfigureAwait(false);
}
catch (FabricNodeNotFoundException)
{
// Check your spelling..
}
catch (FabricException)
{
// No-op unless you want to re-run RepairEntityAsync again.
}
catch (TimeoutException)
{
// ClusterManager service could be hammered (flooded with queries), for example. You could retry RepairEntityAsync again after you wait a bit..
}
var repairDataServiceTarget2 = new RepairData
{
ServiceName = "fabric:/HealthMetrics/BandActorServiceType",
NodeName = "_Node_0"
};
// Do nothing and wait.
while (!cancellationToken.IsCancellationRequested)
{
@ -125,8 +126,9 @@ namespace Stateless1
}
}
// Close the Proxy to clear internal state and remove health reports that are still active.
// Close the proxy (this cleans up state and removes any health report that is currently active (not expired).
// Note: this does not cancel repairs that are in flight or in the FabricHealer internal repair queue.
await FabricHealer.Proxy.Close();
}
}