This commit is contained in:
Charles Torre 2023-03-03 13:11:55 -08:00
Родитель 94e0a280fb
Коммит caa558fb51
12 изменённых файлов: 66 добавлений и 44 удалений

Просмотреть файл

@ -23,11 +23,11 @@ function Build-SFPkg {
try {
Push-Location $scriptPath
Build-SFPkg "Microsoft.ServiceFabricApps.FabricHealer.Linux.SelfContained.1.1.19" "$scriptPath\bin\release\FabricHealer\linux-x64\self-contained\FabricHealerType"
Build-SFPkg "Microsoft.ServiceFabricApps.FabricHealer.Linux.FrameworkDependent.1.1.19" "$scriptPath\bin\release\FabricHealer\linux-x64\framework-dependent\FabricHealerType"
Build-SFPkg "Microsoft.ServiceFabricApps.FabricHealer.Linux.SelfContained.1.1.20" "$scriptPath\bin\release\FabricHealer\linux-x64\self-contained\FabricHealerType"
Build-SFPkg "Microsoft.ServiceFabricApps.FabricHealer.Linux.FrameworkDependent.1.1.20" "$scriptPath\bin\release\FabricHealer\linux-x64\framework-dependent\FabricHealerType"
Build-SFPkg "Microsoft.ServiceFabricApps.FabricHealer.Windows.SelfContained.1.1.19" "$scriptPath\bin\release\FabricHealer\win-x64\self-contained\FabricHealerType"
Build-SFPkg "Microsoft.ServiceFabricApps.FabricHealer.Windows.FrameworkDependent.1.1.19" "$scriptPath\bin\release\FabricHealer\win-x64\framework-dependent\FabricHealerType"
Build-SFPkg "Microsoft.ServiceFabricApps.FabricHealer.Windows.SelfContained.1.1.20" "$scriptPath\bin\release\FabricHealer\win-x64\self-contained\FabricHealerType"
Build-SFPkg "Microsoft.ServiceFabricApps.FabricHealer.Windows.FrameworkDependent.1.1.20" "$scriptPath\bin\release\FabricHealer\win-x64\framework-dependent\FabricHealerType"
}
finally {
Pop-Location

Просмотреть файл

@ -11,7 +11,7 @@
},
"applicationTypeVersionFabricHealer": {
"type": "string",
"defaultValue": "1.1.19",
"defaultValue": "1.1.20",
"metadata": {
"description": "Provide the app version number of FabricHealer. This must be identical to the version specified in the sfpkg."
}

Просмотреть файл

@ -6,7 +6,7 @@
"value": "<YOUR-CLUSTER-RESOURCE-NAME>"
},
"applicationTypeVersionFabricHealer": {
"value": "1.1.19"
"value": "1.1.20"
},
"packageUrlFabricHealer": {
"value": "<PUBLIC-ACCESSIBLE-URL-FOR-FABRICHEALER-SFPKG>"

Просмотреть файл

@ -44,7 +44,7 @@ Here is a full example of exactly what is sent in one of these telemetry events,
"ClusterId": "00000000-1111-1111-0000-00f00d000d",
"ClusterType": "SFRP",
"NodeNameHash": "3e83569d4c6aad78083cd081215dafc81e5218556b6a46cb8dd2b183ed0095ad",
"FHVersion": "1.1.19",
"FHVersion": "1.1.20",
"UpTime": "00:00:00.2164523",
"Timestamp": "2023-02-07T21:45:25.2443014Z",
"OS": "Windows",

Просмотреть файл

@ -2,7 +2,7 @@
<package xmlns="http://schemas.microsoft.com/packaging/2013/05/nuspec.xsd">
<metadata minClientVersion="3.3.0">
<id>%PACKAGE_ID%</id>
<version>1.1.19</version>
<version>1.1.20</version>
<releaseNotes>
This release requires Service Fabric runtime version 9 and higher and at least Service Fabric SDK version 6.0.1017. There are several changes and improvements in this
release including a new machine repair model, updated logic rules, bug fixes, and many code improvements.

Просмотреть файл

@ -26,7 +26,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
README.md = README.md
Documentation\Deployment\service-fabric-healer.json = Documentation\Deployment\service-fabric-healer.json
Documentation\Using.md = Documentation\Using.md
Documentation\Deployment\service-fabric-healer.v1.1.19.parameters.json = Documentation\Deployment\service-fabric-healer.v1.1.19.parameters.json
Documentation\Deployment\service-fabric-healer.v1.1.20.parameters.json = Documentation\Deployment\service-fabric-healer.v1.1.20.parameters.json
EndProjectSection
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FHTest", "FHTest\FHTest.csproj", "{8D9712BF-C026-4A36-B6D1-6345137D3B6F}"

Просмотреть файл

@ -12,8 +12,8 @@
<RuntimeIdentifier>win-x64</RuntimeIdentifier>-->
<RuntimeIdentifiers>linux-x64;win-x64</RuntimeIdentifiers>
<Product>FabricHealer</Product>
<Version>1.1.19</Version>
<FileVersion>1.1.19</FileVersion>
<Version>1.1.20</Version>
<FileVersion>1.1.20</FileVersion>
<StartupObject>FabricHealer.Program</StartupObject>
<Platforms>x64</Platforms>
</PropertyGroup>

Просмотреть файл

@ -37,7 +37,7 @@ namespace FabricHealer
public static string CurrentlyExecutingLogicRulesFileName { get; set; }
// Folks often use their own version numbers. This is for internal diagnostic telemetry.
private const string InternalVersionNumber = "1.1.19";
private const string InternalVersionNumber = "1.1.20";
private static FabricHealerManager singleton;
private static FabricClient _fabricClient;
private bool disposedValue;
@ -1225,7 +1225,7 @@ namespace FabricHealer
{
continue;
}
else if (InstanceCount > 1)
else if (InstanceCount == -1 || InstanceCount > 1)
{
// Randomly wait to decrease chances of simultaneous ownership among FH instances.
await RandomWaitAsync();
@ -1256,14 +1256,19 @@ namespace FabricHealer
RepairConstants.FHTaskIdPrefix,
Token);
if (fhRepairTasks.Count > 0)
if (fhRepairTasks != null && fhRepairTasks?.Count > 0)
{
foreach (var repair in fhRepairTasks)
{
var executorData = JsonSerializationUtility.TryDeserializeObject(repair.ExecutorData, out RepairExecutorData exData) ? exData : null;
if (executorData == null)
{
continue;
}
if (executorData?.RepairPolicy?.RepairAction != RepairActionType.RestartFabricNode &&
executorData?.RepairPolicy?.RepairAction != RepairActionType.RestartProcess)
if (executorData.RepairPolicy?.RepairAction != RepairActionType.RestartFabricNode &&
executorData.RepairPolicy?.RepairAction != RepairActionType.RestartProcess)
{
continue;
}
@ -1347,17 +1352,21 @@ namespace FabricHealer
repairId = $"{repairData.NodeName}_{serviceProcessName}_{repairData.Metric?.Replace(" ", string.Empty)}";
// All FH repairs have serialized instances of RepairExecutorData set as the value for a RepairTask's ExecutorData property.
if (currentFHRepairs?.Count > 0)
if (currentFHRepairs != null && currentFHRepairs?.Count > 0)
{
// This prevents starting creating a new repair if another service running on a different node needs to be restarted, for example.
// Think of this as a UD Walk across nodes of service instances in need of repair.
RepairLogger.LogInfo($"In Rolling service restart section.. ConfigSettings == null: {ConfigSettings == null}");
if (ConfigSettings.EnableRollingServiceRestarts
&& !isOneNodeCluster
&& currentFHRepairs.Any(r => JsonSerializationUtility.TryDeserializeObject(r.ExecutorData, out RepairExecutorData execData)
&& execData?.RepairPolicy.ServiceName?.ToLower() == repairData.ServiceName.ToLower()))
&& currentFHRepairs.Any(r => !string.IsNullOrWhiteSpace(r.ExecutorData)
&& JsonSerializationUtility.TryDeserializeObject(r.ExecutorData, out RepairExecutorData execData)
&& execData?.RepairPolicy?.ServiceName?.ToLower() == repairData.ServiceName.ToLower()))
{
var repair = currentFHRepairs.FirstOrDefault(r => JsonSerializationUtility.TryDeserializeObject(r.ExecutorData, out RepairExecutorData execData)
&& execData.RepairPolicy.ServiceName.ToLower() == repairData.ServiceName.ToLower());
var repair =
currentFHRepairs.FirstOrDefault(r => !string.IsNullOrWhiteSpace(r.ExecutorData)
&& JsonSerializationUtility.TryDeserializeObject(r.ExecutorData, out RepairExecutorData execData)
&& execData?.RepairPolicy?.ServiceName?.ToLower() == repairData.ServiceName.ToLower());
await TelemetryUtilities.EmitTelemetryEtwHealthEventAsync(
LogLevel.Info,
@ -1370,11 +1379,13 @@ namespace FabricHealer
}
// For the case where a service repair is still not Completed (e.g., the repair status is Restoring, which would happen after the repair executor has completed
// its work, but RM is performing post safety checks (safety checks can be enabled/disabled in logic rules).
else if (currentFHRepairs.Any(r => JsonSerializationUtility.TryDeserializeObject(r.ExecutorData, out RepairExecutorData execData)
else if (currentFHRepairs.Any(r => !string.IsNullOrWhiteSpace(r.ExecutorData)
&& JsonSerializationUtility.TryDeserializeObject(r.ExecutorData, out RepairExecutorData execData)
&& execData?.RepairPolicy?.RepairId == repairId))
{
var repair = currentFHRepairs.FirstOrDefault(r => JsonSerializationUtility.TryDeserializeObject(r.ExecutorData, out RepairExecutorData execData)
&& execData.RepairPolicy.RepairId == repairId);
var repair = currentFHRepairs.FirstOrDefault(r => !string.IsNullOrWhiteSpace(r.ExecutorData)
&& JsonSerializationUtility.TryDeserializeObject(r.ExecutorData, out RepairExecutorData execData)
&& execData?.RepairPolicy?.RepairId == repairId);
await TelemetryUtilities.EmitTelemetryEtwHealthEventAsync(
LogLevel.Info,
@ -2290,19 +2301,21 @@ namespace FabricHealer
public static async Task TryClearExistingHealthReportsAsync()
{
var healthReporter = new FabricHealthReporter(RepairLogger);
var healthReport = new HealthReport
{
HealthMessage = "Clearing existing health reports as FabricHealer is stopping or updating.",
NodeName = ServiceContext.NodeContext.NodeName,
State = HealthState.Ok,
HealthReportTimeToLive = TimeSpan.FromMinutes(5),
};
// FH.
try
{
var healthReporter = new FabricHealthReporter(RepairLogger);
var healthReport = new HealthReport
{
HealthMessage = "Clearing existing health reports as FabricHealer is stopping or updating.",
NodeName = ServiceContext.NodeContext.NodeName,
State = HealthState.Ok,
HealthReportTimeToLive = TimeSpan.FromMinutes(5),
};
var appName = new Uri(RepairConstants.FabricHealerAppName);
var appHealth = await FabricClientSingleton.HealthManager.GetApplicationHealthAsync(appName);
var appHealth =
await FabricClientSingleton.HealthManager.GetApplicationHealthAsync(appName, ConfigSettings.AsyncTimeout, Token);
var FHAppEvents = appHealth.HealthEvents?.Where(s => s.HealthInformation.SourceId.Contains($"{RepairConstants.FabricHealer}."));
foreach (HealthEvent evt in FHAppEvents)
@ -2315,8 +2328,17 @@ namespace FabricHealer
healthReporter.ReportHealthToServiceFabric(healthReport);
Thread.Sleep(50);
}
}
catch (Exception e) when (e is ArgumentException || e is FabricException || e is TimeoutException)
{
var nodeHealth = await FabricClientSingleton.HealthManager.GetNodeHealthAsync(ServiceContext.NodeContext.NodeName);
}
// Node.
try
{
var nodeHealth =
await FabricClientSingleton.HealthManager.GetNodeHealthAsync(ServiceContext.NodeContext.NodeName, ConfigSettings.AsyncTimeout, Token);
var FHNodeEvents = nodeHealth.HealthEvents?.Where(s => s.HealthInformation.SourceId.Contains(RepairConstants.FabricHealer));
foreach (HealthEvent evt in FHNodeEvents)

Просмотреть файл

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<ServiceManifest Name="FabricHealerPkg"
Version="1.1.19"
Version="1.1.20"
xmlns="http://schemas.microsoft.com/2011/01/fabric"
xmlns:xsd="http://www.w3.org/2001/XMLSchema"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
@ -11,7 +11,7 @@
</ServiceTypes>
<!-- Code package is your service executable. -->
<CodePackage Name="Code" Version="1.1.19">
<CodePackage Name="Code" Version="1.1.20">
<EntryPoint>
<ExeHost>
<Program>FabricHealer</Program>
@ -21,5 +21,5 @@
<!-- Config package is the contents of the Config directory under PackageRoot that contains an
independently-updateable and versioned set of custom configuration settings for your service. -->
<ConfigPackage Name="Config" Version="1.1.19" />
<ConfigPackage Name="Config" Version="1.1.20" />
</ServiceManifest>

Просмотреть файл

@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<ApplicationManifest xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" ApplicationTypeName="FabricHealerType" ApplicationTypeVersion="1.1.19" xmlns="http://schemas.microsoft.com/2011/01/fabric">
<ApplicationManifest xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" ApplicationTypeName="FabricHealerType" ApplicationTypeVersion="1.1.20" xmlns="http://schemas.microsoft.com/2011/01/fabric">
<Parameters>
<!-- FabricHealerManager Settings -->
<Parameter Name="AutoMitigationEnabled" DefaultValue="true" />
@ -31,7 +31,7 @@
should match the Name and Version attributes of the ServiceManifest element defined in the
ServiceManifest.xml file. -->
<ServiceManifestImport>
<ServiceManifestRef ServiceManifestName="FabricHealerPkg" ServiceManifestVersion="1.1.19" />
<ServiceManifestRef ServiceManifestName="FabricHealerPkg" ServiceManifestVersion="1.1.20" />
<ConfigOverrides>
<ConfigOverride Name="Config">
<Settings>

Просмотреть файл

@ -1,4 +1,4 @@
## FabricHealer 1.1.19
## FabricHealer 1.1.20
### Configuration as Logic and auto-mitigation in Service Fabric clusters
FabricHealer (FH) is a .NET 6 Service Fabric application that attempts to automatically fix a set of reliably solvable problems that can take place in Service Fabric
@ -78,7 +78,7 @@ Register-ServiceFabricApplicationType -ApplicationPathInImageStore FH1110
#Create FO application (if not already deployed at lesser version):
New-ServiceFabricApplication -ApplicationName fabric:/FabricHealer -ApplicationTypeName FabricHealerType -ApplicationTypeVersion 1.1.19
New-ServiceFabricApplication -ApplicationName fabric:/FabricHealer -ApplicationTypeName FabricHealerType -ApplicationTypeVersion 1.1.20
#Create the Service instance:
@ -87,7 +87,7 @@ New-ServiceFabricService -Stateless -PartitionSchemeSingleton -ApplicationName f
#OR if updating existing version:
Start-ServiceFabricApplicationUpgrade -ApplicationName fabric:/FabricHealer -ApplicationTypeVersion 1.1.19 -Monitored -FailureAction rollback
Start-ServiceFabricApplicationUpgrade -ApplicationName fabric:/FabricHealer -ApplicationTypeVersion 1.1.20 -Monitored -FailureAction rollback
```
## Using FabricHealer

Просмотреть файл

@ -1,4 +1,4 @@
## FabricHealer 1.1.19
## FabricHealer 1.1.20
### Configuration as Logic and auto-mitigation in Service Fabric clusters
FabricHealer (FH) is a .NET 6 Service Fabric application that attempts to automatically fix a set of reliably solvable problems that can take place in Service Fabric