diff --git a/FabricHealer/PackageRoot/Config/LogicRules/SystemServiceRules.guan b/FabricHealer/PackageRoot/Config/LogicRules/SystemServiceRules.guan index 37d538a..9031198 100644 --- a/FabricHealer/PackageRoot/Config/LogicRules/SystemServiceRules.guan +++ b/FabricHealer/PackageRoot/Config/LogicRules/SystemServiceRules.guan @@ -112,4 +112,9 @@ Mitigate(MetricName="FileHandles", OS="Linux", ProcessName=?SysProcName) :- not( ## Open File Handles - Linux, Fabric or FabricHost. In these cases, we want a safe (graceful) restart of the Fabric node; not just kill the process, which will restart the node, but not gracefully. ## Restart the Fabric node where the offending instance is running. -Mitigate(MetricName="FileHandles", OS="Linux", ProcessName="Fabric") :- TimeScopedRestartFabricNode(2, 08:00:00). \ No newline at end of file +Mitigate(MetricName="FileHandles", OS="Linux", ProcessName="Fabric") :- TimeScopedRestartFabricNode(2, 08:00:00). + +## Generic catch-all. +## Doesn't matter what the problem is, just restart the service process if it is not Fabric or FabricHost (for hopefully obvious reasons). + +Mitigate(ProcessName=?SysProcName) :- not(?SysProcName == "Fabric" || ?SysProcName == "FabricHost"), TimeScopedRestartFabricSystemProcess(2, 08:00:00). \ No newline at end of file diff --git a/FabricHealer/Repair/RepairExecutor.cs b/FabricHealer/Repair/RepairExecutor.cs index 138641f..b0e6d50 100644 --- a/FabricHealer/Repair/RepairExecutor.cs +++ b/FabricHealer/Repair/RepairExecutor.cs @@ -600,12 +600,10 @@ namespace FabricHealer.Repair try { - // FO provided the offending process id in TelemetryData instance. Chances are good it will still be running. - // If the process with this id is no longer running, then we can assume it makes no sense to try to restart it: - // Just let the ArgumentException bubble out to the catch. + // FO/FHProxy provided the offending process id and (or, in the case of FHProxy) name in TelemetryData instance. if (repairData.ProcessId > 0) { - p = Process.GetProcessById((int)repairData.ProcessId); + p = Process.GetProcessById((int)repairData.ProcessId); } else // We need to figure out the procId from the FO-supplied proc name. {