Add a net health recovery service to Qemu machines

There is a network stability issue in qemu + virtio, affecting
some users after long periods of usage, which can lead to
suspended queue delivery. Until the issue is resolved, add a
temporary recovery service which restarts networking when host
communication becomes inoperable.

[NO NEW TESTS NEEDED]

Signed-off-by: Jason T. Greene <jason.greene@redhat.com>
This commit is contained in:
Jason T. Greene
2024-01-15 17:53:30 -06:00
parent f1ea4fbb3d
commit 79fad91dbb
6 changed files with 100 additions and 19 deletions

View File

@ -62,6 +62,7 @@ type DynamicIgnition struct {
WritePath string
Cfg Config
Rootful bool
NetRecover bool
}
func (ign *DynamicIgnition) Write() error {
@ -97,7 +98,7 @@ func (ign *DynamicIgnition) GenerateIgnitionConfig() error {
ignStorage := Storage{
Directories: getDirs(ign.Name),
Files: getFiles(ign.Name, ign.UID, ign.Rootful, ign.VMType),
Files: getFiles(ign.Name, ign.UID, ign.Rootful, ign.VMType, ign.NetRecover),
Links: getLinks(ign.Name),
}
@ -231,6 +232,21 @@ func (ign *DynamicIgnition) GenerateIgnitionConfig() error {
}
ignSystemd.Units = append(ignSystemd.Units, qemuUnit)
}
if ign.NetRecover {
contents, err := GetNetRecoveryUnitFile().ToString()
if err != nil {
return err
}
recoveryUnit := Unit{
Enabled: BoolToPtr(true),
Name: "net-health-recovery.service",
Contents: &contents,
}
ignSystemd.Units = append(ignSystemd.Units, recoveryUnit)
}
// Only after all checks are done
// it's ready create the ingConfig
ign.Cfg = Config{
@ -303,7 +319,7 @@ func getDirs(usrName string) []Directory {
return dirs
}
func getFiles(usrName string, uid int, rootful bool, vmtype define.VMType) []File {
func getFiles(usrName string, uid int, rootful bool, vmtype define.VMType, netRecover bool) []File {
files := make([]File, 0)
lingerExample := parser.NewUnitFile()
@ -574,6 +590,23 @@ Delegate=memory pids cpu io
},
})
// Only necessary for qemu on mac
if netRecover {
files = append(files, File{
Node: Node{
User: GetNodeUsr("root"),
Group: GetNodeGrp("root"),
Path: "/usr/local/bin/net-health-recovery.sh",
},
FileEmbedded1: FileEmbedded1{
Mode: IntToPtr(0755),
Contents: Resource{
Source: EncodeDataURLPtr(GetNetRecoveryFile()),
},
},
})
}
return files
}
@ -743,6 +776,37 @@ func (i *IgnitionBuilder) Build() error {
return i.dynamicIgnition.Write()
}
func GetNetRecoveryFile() string {
return `#!/bin/bash
# Verify network health, and bounce the network device if host connectivity
# is lost. This is a temporary workaround for a known rare qemu/virtio issue
# that affects some systems
sleep 120 # allow time for network setup on initial boot
while true; do
sleep 30
curl -s -o /dev/null --max-time 30 http://192.168.127.1/health
if [ "$?" != "0" ]; then
echo "bouncing nic due to loss of connectivity with host"
ifconfig enp0s1 down; ifconfig enp0s1 up
fi
done
`
}
func GetNetRecoveryUnitFile() *parser.UnitFile {
recoveryUnit := parser.NewUnitFile()
recoveryUnit.Add("Unit", "Description", "Verifies health of network and recovers if necessary")
recoveryUnit.Add("Unit", "After", "sshd.socket sshd.service")
recoveryUnit.Add("Service", "ExecStart", "/usr/local/bin/net-health-recovery.sh")
recoveryUnit.Add("Service", "StandardOutput", "journal")
recoveryUnit.Add("Service", "StandardError", "journal")
recoveryUnit.Add("Service", "StandardInput", "null")
recoveryUnit.Add("Install", "WantedBy", "default.target")
return recoveryUnit
}
func DefaultReadyUnitFile() parser.UnitFile {
u := parser.NewUnitFile()
u.Add("Unit", "After", "remove-moby.service sshd.socket sshd.service")

View File

@ -202,6 +202,7 @@ func (v *MachineVM) Init(opts machine.InitOptions) (bool, error) {
WritePath: v.getIgnitionFile(),
UID: v.UID,
Rootful: v.Rootful,
NetRecover: useNetworkRecover(),
})
// If the user provides an ignition file, we need to

View File

@ -11,3 +11,7 @@ func getRuntimeDir() (string, error) {
}
return tmpDir, nil
}
func useNetworkRecover() bool {
return true
}

View File

@ -11,3 +11,7 @@ func getRuntimeDir() (string, error) {
}
return tmpDir, nil
}
func useNetworkRecover() bool {
return false
}

View File

@ -11,3 +11,7 @@ func getRuntimeDir() (string, error) {
}
return util.GetRootlessRuntimeDir()
}
func useNetworkRecover() bool {
return false
}

View File

@ -11,3 +11,7 @@ func getRuntimeDir() (string, error) {
}
return tmpDir, nil
}
func useNetworkRecover() bool {
return false
}