This commit is contained in:
Charles Torre 2022-04-19 11:24:49 -07:00
Родитель 59fa5fce8e
Коммит fa0751dc36
12 изменённых файлов: 225 добавлений и 70 удалений

Просмотреть файл

@ -1,15 +0,0 @@
$ErrorActionPreference = "Stop"
$Configuration="Release"
[string] $scriptPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
try {
Push-Location $scriptPath
Remove-Item $scriptPath\FabricHealerLib\bin\release\netstandard2.0\ -Recurse -Force -EA SilentlyContinue
dotnet publish $scriptPath\FabricHealerLib\FabricHealerLib.csproj -o bin\release\netstandard2.0 -c $Configuration
}
finally {
Pop-Location
}

15
Build-FHProxy.ps1 Normal file
Просмотреть файл

@ -0,0 +1,15 @@
$ErrorActionPreference = "Stop"
$Configuration="Release"
[string] $scriptPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
try {
Push-Location $scriptPath
Remove-Item $scriptPath\FabricHealerProxy\bin\release\netstandard2.0\ -Recurse -Force -EA SilentlyContinue
dotnet publish $scriptPath\FabricHealerProxy\FabricHealerProxy.csproj -o bin\release\netstandard2.0 -c $Configuration
}
finally {
Pop-Location
}

Просмотреть файл

@ -20,13 +20,13 @@ function Build-Nuget {
$basePath
)
[string] $nugetSpecTemplate = [System.IO.File]::ReadAllText([System.IO.Path]::Combine($scriptPath, "FabricHealerLib.nuspec.template"))
[string] $nugetSpecTemplate = [System.IO.File]::ReadAllText([System.IO.Path]::Combine($scriptPath, "FabricHealerProxy.nuspec.template"))
[string] $nugetSpecPath = "$scriptPath\FabricHealerLib\bin\release\netstandard2.0\$($packageId).nuspec"
[string] $nugetSpecPath = "$scriptPath\FabricHealerProxy\bin\release\netstandard2.0\$($packageId).nuspec"
[System.IO.File]::WriteAllText($nugetSpecPath, $nugetSpecTemplate.Replace("%PACKAGE_ID%", $packageId).Replace("%ROOT_PATH%", $scriptPath))
.\nuget.exe pack $nugetSpecPath -basepath $basePath -OutputDirectory bin\release\FabricHealerLib\Nugets -properties NoWarn=NU5100,NU5128
.\nuget.exe pack $nugetSpecPath -basepath $basePath -OutputDirectory bin\release\FabricHealerProxy\Nugets -properties NoWarn=NU5100,NU5128
}
[string] $scriptPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
@ -37,7 +37,7 @@ try {
Install-Nuget
Build-Nuget "Microsoft.ServiceFabricApps.FabricHealerLib" "$scriptPath\FabricHealerLib\bin\release\netstandard2.0"
Build-Nuget "Microsoft.ServiceFabricApps.FabricHealerProxy" "$scriptPath\FabricHealerProxy\bin\release\netstandard2.0"
}
finally {

Просмотреть файл

@ -5,7 +5,7 @@
<version>1.1.0</version>
<releaseNotes>
- FabricHealer no longer requires FabricObserver to be deployed in the same cluster.
- Any .NET SF service can now interoperate with FabricHealer using the FabricHealerLib .NET Standard library.
- Any .NET SF service can now interoperate with FabricHealer using the FabricHealerProxy .NET Standard 2.0 library.
- FabricHealer now requires Microsoft.ServiceFabric.Services Version 5.0.516 and higher. Lesser runtime versions than 8.0.516 are no longer supported.
- FabricHealer can now be deployed via ARM, directly from the repo.
</releaseNotes>

Просмотреть файл

@ -10,13 +10,13 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
.editorconfig = .editorconfig
.gitignore = .gitignore
Build-FabricHealer.ps1 = Build-FabricHealer.ps1
Build-FHLib.ps1 = Build-FHLib.ps1
Build-FHLibNupkg.ps1 = Build-FHLibNupkg.ps1
Build-FHProxy.ps1 = Build-FHProxy.ps1
Build-FHProxyNupkg.ps1 = Build-FHProxyNupkg.ps1
Build-NugetPackages.ps1 = Build-NugetPackages.ps1
Build-SFPKGs.ps1 = Build-SFPKGs.ps1
FabricHealer.nuspec.template = FabricHealer.nuspec.template
FabricHealerLib.nuspec.template = FabricHealerLib.nuspec.template
FabricHealerLibnuget.md = FabricHealerLibnuget.md
FabricHealerProxy.md = FabricHealerProxy.md
FabricHealerProxy.nuspec.template = FabricHealerProxy.nuspec.template
fhnuget.md = fhnuget.md
icon.png = icon.png
Documentation\LogicWorkflows.md = Documentation\LogicWorkflows.md

Просмотреть файл

@ -819,7 +819,7 @@ namespace FabricHealer
continue;
}
// Only FabricObserver can initiate Service Fabric System service repair. FabricHealerLib does not support this.
// Only FabricObserver can initiate Service Fabric System service repair. FabricHealerProxy does not support this.
if (repairData.ObserverName != RepairConstants.FabricSystemObserver)
{
continue;
@ -1347,11 +1347,11 @@ namespace FabricHealer
}
}
// FabricHealer only supports VM level repairs that are identified by FabricObserver. FabricHealerLib does not support communicating these types of repairs
// FabricHealer only supports VM level repairs that are identified by FabricObserver. FabricHealerProxy does not support communicating these types of repairs
// to FabricHealer from a non-FO service (TOTHINK: this should change?).
if (repairData.ObserverName == null && repairData.EntityType == EntityType.Node)
{
// FabricHealerLib-generated report, so a restart fabric node request, for example.
// FabricHealerProxy-generated report, so a restart fabric node request, for example.
await ProcessFabricNodeHealthAsync(evt, repairData);
continue;
}
@ -1812,7 +1812,7 @@ namespace FabricHealer
repairPolicySectionName = RepairConstants.VmRepairPolicySectionName;
break;
// Fabric Node repair (from FabricHealerLib, for example, where there is no concept of Observer).
// Fabric Node repair (from FabricHealerProxy, for example, where there is no concept of Observer).
case EntityType.Node when repairData.ObserverName == null && repairData.NodeName != null && repairData.NodeType != null:
repairPolicySectionName = RepairConstants.FabricNodeRepairPolicySectionName;
break;

Просмотреть файл

@ -1,3 +0,0 @@
# FabricHealerLib
### Getting Started

134
FabricHealerProxy.md Normal file
Просмотреть файл

@ -0,0 +1,134 @@
# FabricHealerProxy
FabricHealerProxy is a .NET Standard 2.0 library that provides a very simple and reliable way to share Service Fabric entity repair information to FabricHealer service instances running in the same cluster. You can install FabricHealerProxy into your .NET Service Fabric service from the [nuget.org package gallery](...).
### How to use FabricHealerProxy
- Deploy [FabricHealer](https://github.com/microsoft/service-fabric-healer/releases) [TODO: this will point to Deployment doc folder] to your cluster (Do note that if you deploy FabricHealer as a singleton partition 1 (versus -1), then FH will only conduct SF-related repairs).
- Install FabricHealerProxy nupkg into your own service from where you want to initiate repair of SF entities (stateful/stateless services, Fabric nodes).
FabricHealer will execute entity-related logic rules (housed in it's FabricNodeRules.guan file in this case), and if any of the rules succeed, then FH will create a Repair Job with pre and post safety checks (default),
orchestrate RM through to repair completion (FH will be the executor of the repair), emit repair step information via telemetry, local logging, and etw.
### Sample application (Stateless Service)
stateless1.cs
```C#
using System;
using System.Fabric;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.ServiceFabric.Services.Runtime;
using FabricHealerProxy;
using FabricHealerProxy.Exceptions;
namespace Stateless1
{
/// <summary>
/// An instance of this class is created for each service instance by the Service Fabric runtime.
/// </summary>
internal sealed class Stateless1 : StatelessService
{
public Stateless1(StatelessServiceContext context)
: base(context)
{
}
/// <summary>
/// This is the main entry point for your service instance.
/// </summary>
/// <param name="cancellationToken">Canceled when Service Fabric needs to shut down this service instance.</param>
protected override async Task RunAsync(CancellationToken cancellationToken)
{
// This specifies that you want FabricHealer to repair a service instance deployed to a Fabric node named NodeName.
// FabricHealer supports both Replica and CodePackage restarts of services. The logic rules will dictate which one of these happens,
// so make sure to craft a specific logic rule that makes sense for you (and use some logic!).
// Note that, out of the box, FabricHealer's AppRules.guan file (located in the FabricHealer project's PackageRoot/Config/LogicRules folder) already has a restart replica catch-all (applies to any service) rule that will restart the primary replica of
// the specified service below, deployed to the a specified Fabric node.
var repairDataServiceTarget = new RepairData
{
ServiceName = "fabric:/HealthMetrics/DoctorActorServiceType",
NodeName = "_Node_0"
};
// This specifies that you want FabricHealer to repair a Fabric node named NodeName. The only supported repair in FabricHealer is a Restart.
// Related rules can be found in FabricNodeRepair.guan file in the FabricHealer project's PackageRoot/Config/LogicRules folder.
// So, implicitly, this means you want FabricHealer to restart _Node_0. You can of course modify the related logic rules to do something else. It's up to you!
var repairDataNodeTarget = new RepairData
{
NodeName = "_Node_0"
};
// Service repair.
try
{
await FabricHealer.Proxy.RepairEntityAsync(repairDataServiceTarget, cancellationToken, TimeSpan.FromMinutes(5)).ConfigureAwait(false);
}
catch (MissingRepairDataException)
{
// This means a required non-null value for a RepairData property was not specified. For example, RepairData.NodeName was not set.
}
catch (FabricNodeNotFoundException)
{
// The Fabric node you specified in RepairData.NodeName does not exist.
}
catch (FabricServiceNotFoundException)
{
// The Fabric service you specified in RepairData.ServiceName does not exist.
}
catch (FabricException)
{
// Thrown when an internal Service Fabric operation fails. Internally, RepairEntityAsync will retry failed Fabric client operations 3 times.
// This will have already lead to 3 internal retries before surfacing here.
}
catch (TimeoutException)
{
// Thrown when a Fabric client API call times out. This will have already lead to 3 internal retries before surfacing here.
// ClusterManager service could be hammered (flooded with queries), for example. You could retry RepairEntityAsync again after you wait a bit..
}
// Node repair.
try
{
await FabricHealer.Proxy.RepairEntityAsync(repairDataNodeTarget, cancellationToken, TimeSpan.FromMinutes(5)).ConfigureAwait(false);
}
catch (FabricNodeNotFoundException)
{
// Check your spelling..
}
catch (FabricException)
{
// No-op unless you want to re-run RepairEntityAsync again.
}
catch (TimeoutException)
{
// ClusterManager service could be hammered (flooded with queries), for example. You could retry RepairEntityAsync again after you wait a bit..
}
var repairDataServiceTarget2 = new RepairData
{
ServiceName = "fabric:/HealthMetrics/BandActorServiceType",
NodeName = "_Node_0"
};
// Do nothing and wait.
while (!cancellationToken.IsCancellationRequested)
{
try
{
await Task.Delay(TimeSpan.FromSeconds(30), cancellationToken);
}
catch (TaskCanceledException)
{
}
}
// Close the Proxy to clear internal state and remove health reports that are still active.
await FabricHealer.Proxy.Close();
}
}
}
```

Просмотреть файл

@ -8,11 +8,11 @@
<authors>Microsoft</authors>
<license type="expression">MIT</license>
<requireLicenseAcceptance>true</requireLicenseAcceptance>
<title>FabricHealerLib: Utility library for communicating with FabricHealer service.</title>
<title>FabricHealerProxy: Utility library for communicating with FabricHealer service.</title>
<icon>icon.png</icon>
<readme>FabricHealerLibnuget.md</readme>
<readme>FabricHealerProxy.md</readme>
<language>en-US</language>
<description>This package contains FabricHealerLib, a .NET Standard 2.0 library that provides a very simple and reliable way to share Service Fabric entity repair information to FabricHealer service instances running in the same cluster.</description>
<description>This package contains FabricHealerProxy, a .NET Standard 2.0 library that provides a very simple and reliable way to share Service Fabric entity repair information to FabricHealer service instances running in the same cluster.</description>
<contentFiles>
<files include="**" buildAction="None" copyToOutput="true" />
</contentFiles>
@ -21,14 +21,14 @@
<dependency id="Microsoft.ServiceFabric.Services" version="5.0.516" />
<dependency id="Polly" version="7.2.3" />
</dependencies>
<projectUrl>https://github.com/microsoft/FabricHealerLib</projectUrl>
<tags>FabricHealerLib service-fabric netstandard20 netcore csharp</tags>
<projectUrl>https://github.com/microsoft/FabricHealerProxy</projectUrl>
<tags>FabricHealerProxy service-fabric netstandard20 netcore csharp</tags>
<copyright>© Microsoft Corporation. All rights reserved.</copyright>
</metadata>
<files>
<file src="FabricHealerLib.dll" target="lib\netstandard2.0" />
<file src="FabricHealerLib.xml" target="lib\netstandard2.0" />
<file src="FabricHealerProxy.dll" target="lib\netstandard2.0" />
<file src="FabricHealerProxy.xml" target="lib\netstandard2.0" />
<file src="%ROOT_PATH%\icon.png" target="" />
<file src="%ROOT_PATH%\FabricHealerLibnuget.md" target="" />
<file src="%ROOT_PATH%\FabricHealerProxy.md" target="" />
</files>
</package>

Просмотреть файл

@ -25,8 +25,6 @@ namespace FabricHealerProxy
public sealed class FabricHealer
{
private static FabricHealer instance;
private static readonly List<(DateTime DateAdded, RepairData RepairData)> repairDataHistory =
new List<(DateTime DateAdded, RepairData RepairData)>();
private static readonly FabricClientSettings settings = new FabricClientSettings
{
@ -35,15 +33,22 @@ namespace FabricHealerProxy
HealthReportRetrySendInterval = TimeSpan.FromSeconds(3),
};
// Use one FC for the lifetime of the consuming SF service process that loads FabricHealerLib.dll.
// Use one FC for the lifetime of the consuming SF service process that loads FabricHealerProxy.dll.
private static readonly FabricClient fabricClient = new FabricClient(settings);
private static readonly object lockObj = new object();
private static readonly TimeSpan defaultHealthReportTtl = TimeSpan.FromMinutes(15);
private static readonly TimeSpan maxDataLifeTime = defaultHealthReportTtl;
// Instance tuple that stores RepairData objects for a specified duration (defaultHealthReportTtl).
private List<(DateTime DateAdded, RepairData RepairData)> repairDataHistory =
new List<(DateTime DateAdded, RepairData RepairData)>();
private FabricHealer()
{
if (repairDataHistory == null)
{
repairDataHistory = new List<(DateTime DateAdded, RepairData RepairData)>();
}
}
/// <summary>
@ -166,7 +171,7 @@ namespace FabricHealerProxy
CodePackageActivationContext context =
await FabricRuntime.GetActivationContextAsync(TimeSpan.FromSeconds(30), cancellationToken);
repairData.Source = context.GetServiceManifestName() + "_" + "FabricHealerLib";
repairData.Source = context.GetServiceManifestName() + "_" + "FabricHealerProxy";
}
// Support for repair data that does not contain replica/partition facts for service level repair.
@ -451,11 +456,7 @@ namespace FabricHealerProxy
return false;
}
/// <summary>
/// Clears any active health reports generated by FabricHealerLib. You should call this when your service exits gracefully.
/// Note that this will not cancel repairs that are in flight.
/// </summary>
public async Task ClearHealthReports()
private async Task ClearHealthReports()
{
await Policy.Handle<FabricException>()
.Or<TimeoutException>()
@ -478,7 +479,7 @@ namespace FabricHealerProxy
{
var healthInformation = new HealthInformation(repairData.Source, repairData.Property, HealthState.Ok)
{
Description = "Clearing existing health reports from FabricHealerLib",
Description = "Clearing existing health reports from FabricHealerProxy",
TimeToLive = TimeSpan.FromMinutes(5),
RemoveWhenExpired = true
};
@ -536,5 +537,16 @@ namespace FabricHealerProxy
await Task.Delay(250);
}
}
/// <summary>
/// Releases resources used by FabricHealer.Proxy, including cleaning up any active health reports.
/// </summary>
public async Task Close()
{
await ClearHealthReports();
repairDataHistory?.Clear();
repairDataHistory = null;
instance = null;
}
}
}

Просмотреть файл

@ -1,11 +1,11 @@
# FabricHealerLib
# FabricHealerProxy
FabricHealerLib is a .NET Standard 2.0 library that provides a very simple and reliable way to share Service Fabric entity repair information to FabricHealer service instances running in the same cluster. You can install FabricHealerLib into your .NET Service Fabric service from the [nuget.org package gallery](...).
FabricHealerProxy is a .NET Standard 2.0 library that provides a very simple and reliable way to share Service Fabric entity repair information to FabricHealer service instances running in the same cluster. You can install FabricHealerProxy into your .NET Service Fabric service from the [nuget.org package gallery](...).
### How to use FabricHealerLib
### How to use FabricHealerProxy
- Deploy [FabricHealer](https://github.com/microsoft/service-fabric-healer/releases) [TODO: this will point to Deployment doc folder] to your cluster (Do note that if you deploy FabricHealer as a singleton partition 1 (versus -1), then FH will only conduct SF-related repairs).
- Install FabricHealerLib nupkg into your own service from where you want to initiate repair of SF entities (stateful/stateless services, Fabric nodes).
- Install FabricHealerProxy nupkg into your own service from where you want to initiate repair of SF entities (stateful/stateless services, Fabric nodes).
FabricHealer will execute entity-related logic rules (housed in it's FabricNodeRules.guan file in this case), and if any of the rules succeed, then FH will create a Repair Job with pre and post safety checks (default),
orchestrate RM through to repair completion (FH will be the executor of the repair), emit repair step information via telemetry, local logging, and etw.
@ -20,8 +20,8 @@ using System.Fabric;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.ServiceFabric.Services.Runtime;
using FabricHealerLib;
using FabricHealerLib.Exceptions;
using FabricHealerProxy;
using FabricHealerProxy.Exceptions;
namespace Stateless1
{
@ -45,7 +45,7 @@ namespace Stateless1
// This specifies that you want FabricHealer to repair a service instance deployed to a Fabric node named NodeName.
// FabricHealer supports both Replica and CodePackage restarts of services. The logic rules will dictate which one of these happens,
// so make sure to craft a specific logic rule that makes sense for you (and use some logic!).
// Note that, out of the box, FabricHealer's AppRules.guan file already has a restart replica catch-all (applies to any service) rule that will restart the primary replica of
// Note that, out of the box, FabricHealer's AppRules.guan file (located in the FabricHealer project's PackageRoot/Config/LogicRules folder) already has a restart replica catch-all (applies to any service) rule that will restart the primary replica of
// the specified service below, deployed to the a specified Fabric node.
var repairDataServiceTarget = new RepairData
{
@ -54,25 +54,21 @@ namespace Stateless1
};
// This specifies that you want FabricHealer to repair a Fabric node named NodeName. The only supported repair in FabricHealer is a Restart.
// So, implicitly, this means you want FabricHealer to restart _Node_0.
// Related rules can be found in FabricNodeRepair.guan file in the FabricHealer project's PackageRoot/Config/LogicRules folder.
// So, implicitly, this means you want FabricHealer to restart _Node_0. You can of course modify the related logic rules to do something else. It's up to you!
var repairDataNodeTarget = new RepairData
{
NodeName = "_Node_0"
};
// In this case, you must place this using declaration of FabricHealerProxy instance at function scope (so, not within the try below).
// Failure to do so will result in nothing happening as the FabricClient instance that FabricHealerProxy creates will have closed before
// Service Fabric's HealthManager has completed its related work.
// Service repair.
try
{
await FabricHealerProxy.RepairEntityAsync(repairDataServiceTarget, cancellationToken, TimeSpan.FromMinutes(5)).ConfigureAwait(false);
await FabricHealer.Proxy.RepairEntityAsync(repairDataServiceTarget, cancellationToken, TimeSpan.FromMinutes(5)).ConfigureAwait(false);
}
catch (MissingRequiredDataException)
catch (MissingRepairDataException)
{
// This means a required RepairData property was not specified. For example, RepairData.NodeName was not set.
// This means a required non-null value for a RepairData property was not specified. For example, RepairData.NodeName was not set.
}
catch (FabricNodeNotFoundException)
{
@ -96,7 +92,7 @@ namespace Stateless1
// Node repair.
try
{
await FabricHealerProxy.RepairEntityAsync(repairDataNodeTarget, cancellationToken, TimeSpan.FromMinutes(5)).ConfigureAwait(false);
await FabricHealer.Proxy.RepairEntityAsync(repairDataNodeTarget, cancellationToken, TimeSpan.FromMinutes(5)).ConfigureAwait(false);
}
catch (FabricNodeNotFoundException)
{
@ -111,11 +107,27 @@ namespace Stateless1
// ClusterManager service could be hammered (flooded with queries), for example. You could retry RepairEntityAsync again after you wait a bit..
}
var repairDataServiceTarget2 = new RepairData
{
ServiceName = "fabric:/HealthMetrics/BandActorServiceType",
NodeName = "_Node_0"
};
// Do nothing and wait.
while (!cancellationToken.IsCancellationRequested)
{
await Task.Delay(TimeSpan.FromSeconds(30), cancellationToken);
try
{
await Task.Delay(TimeSpan.FromSeconds(30), cancellationToken);
}
catch (TaskCanceledException)
{
}
}
// Close the Proxy to clear internal state and remove health reports that are still active.
await FabricHealer.Proxy.Close();
}
}
}

Просмотреть файл

@ -8,9 +8,9 @@ Repair workflow configuration is written as [Prolog](http://www.let.rug.nl/bos/l
FabricHealer's Configuration-as-Logic feature is made possible by a new logic programming library for .NET, [Guan](https://github.com/microsoft/guan).
The fun starts when FabricHealer detects supported error or warning health events reported by [FabricObserver](https://github.com/microsoft/service-fabric-observer), for example.
You can use FabricHealer if you don't also deploy FabricObserver. Just install FabricHealerLib into your .NET Service Fabric project and you can leverage the power of FH from there.
You can use FabricHealer if you don't also deploy FabricObserver. Just install FabricHealerProxy into your .NET Service Fabric project and you can leverage the power of FH from there.
There is a very simple "interface" to FabricHealer that begins with some service generating a Service Fabric Health Report. This health report must contain a specially-crafted
Description value: a serialized instance of a well-known (to FH) type (must implement ITelemetryData). As mentioned above, just use FabricHealerLib to push FH into motion from your
Description value: a serialized instance of a well-known (to FH) type (must implement ITelemetryData). As mentioned above, just use FabricHealerProxy to push FH into motion from your
Service Fabric service.
FabricHealer is implemented as a stateless singleton service that runs on all nodes in a Linux or Windows Service Fabric cluster.