mirror of
https://github.com/containers/podman.git
synced 2025-10-16 18:53:19 +08:00
Add a net health recovery service to Qemu machines
There is a network stability issue in qemu + virtio, affecting some users after long periods of usage, which can lead to suspended queue delivery. Until the issue is resolved, add a temporary recovery service which restarts networking when host communication becomes inoperable. [NO NEW TESTS NEEDED] Signed-off-by: Jason T. Greene <jason.greene@redhat.com>
This commit is contained in:
@ -53,15 +53,16 @@ func GetNodeGrp(grpName string) NodeGroup {
|
||||
}
|
||||
|
||||
type DynamicIgnition struct {
|
||||
Name string
|
||||
Key string
|
||||
TimeZone string
|
||||
UID int
|
||||
VMName string
|
||||
VMType define.VMType
|
||||
WritePath string
|
||||
Cfg Config
|
||||
Rootful bool
|
||||
Name string
|
||||
Key string
|
||||
TimeZone string
|
||||
UID int
|
||||
VMName string
|
||||
VMType define.VMType
|
||||
WritePath string
|
||||
Cfg Config
|
||||
Rootful bool
|
||||
NetRecover bool
|
||||
}
|
||||
|
||||
func (ign *DynamicIgnition) Write() error {
|
||||
@ -97,7 +98,7 @@ func (ign *DynamicIgnition) GenerateIgnitionConfig() error {
|
||||
|
||||
ignStorage := Storage{
|
||||
Directories: getDirs(ign.Name),
|
||||
Files: getFiles(ign.Name, ign.UID, ign.Rootful, ign.VMType),
|
||||
Files: getFiles(ign.Name, ign.UID, ign.Rootful, ign.VMType, ign.NetRecover),
|
||||
Links: getLinks(ign.Name),
|
||||
}
|
||||
|
||||
@ -231,6 +232,21 @@ func (ign *DynamicIgnition) GenerateIgnitionConfig() error {
|
||||
}
|
||||
ignSystemd.Units = append(ignSystemd.Units, qemuUnit)
|
||||
}
|
||||
|
||||
if ign.NetRecover {
|
||||
contents, err := GetNetRecoveryUnitFile().ToString()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
recoveryUnit := Unit{
|
||||
Enabled: BoolToPtr(true),
|
||||
Name: "net-health-recovery.service",
|
||||
Contents: &contents,
|
||||
}
|
||||
ignSystemd.Units = append(ignSystemd.Units, recoveryUnit)
|
||||
}
|
||||
|
||||
// Only after all checks are done
|
||||
// it's ready create the ingConfig
|
||||
ign.Cfg = Config{
|
||||
@ -303,7 +319,7 @@ func getDirs(usrName string) []Directory {
|
||||
return dirs
|
||||
}
|
||||
|
||||
func getFiles(usrName string, uid int, rootful bool, vmtype define.VMType) []File {
|
||||
func getFiles(usrName string, uid int, rootful bool, vmtype define.VMType, netRecover bool) []File {
|
||||
files := make([]File, 0)
|
||||
|
||||
lingerExample := parser.NewUnitFile()
|
||||
@ -574,6 +590,23 @@ Delegate=memory pids cpu io
|
||||
},
|
||||
})
|
||||
|
||||
// Only necessary for qemu on mac
|
||||
if netRecover {
|
||||
files = append(files, File{
|
||||
Node: Node{
|
||||
User: GetNodeUsr("root"),
|
||||
Group: GetNodeGrp("root"),
|
||||
Path: "/usr/local/bin/net-health-recovery.sh",
|
||||
},
|
||||
FileEmbedded1: FileEmbedded1{
|
||||
Mode: IntToPtr(0755),
|
||||
Contents: Resource{
|
||||
Source: EncodeDataURLPtr(GetNetRecoveryFile()),
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return files
|
||||
}
|
||||
|
||||
@ -743,6 +776,37 @@ func (i *IgnitionBuilder) Build() error {
|
||||
return i.dynamicIgnition.Write()
|
||||
}
|
||||
|
||||
func GetNetRecoveryFile() string {
|
||||
return `#!/bin/bash
|
||||
# Verify network health, and bounce the network device if host connectivity
|
||||
# is lost. This is a temporary workaround for a known rare qemu/virtio issue
|
||||
# that affects some systems
|
||||
|
||||
sleep 120 # allow time for network setup on initial boot
|
||||
while true; do
|
||||
sleep 30
|
||||
curl -s -o /dev/null --max-time 30 http://192.168.127.1/health
|
||||
if [ "$?" != "0" ]; then
|
||||
echo "bouncing nic due to loss of connectivity with host"
|
||||
ifconfig enp0s1 down; ifconfig enp0s1 up
|
||||
fi
|
||||
done
|
||||
`
|
||||
}
|
||||
|
||||
func GetNetRecoveryUnitFile() *parser.UnitFile {
|
||||
recoveryUnit := parser.NewUnitFile()
|
||||
recoveryUnit.Add("Unit", "Description", "Verifies health of network and recovers if necessary")
|
||||
recoveryUnit.Add("Unit", "After", "sshd.socket sshd.service")
|
||||
recoveryUnit.Add("Service", "ExecStart", "/usr/local/bin/net-health-recovery.sh")
|
||||
recoveryUnit.Add("Service", "StandardOutput", "journal")
|
||||
recoveryUnit.Add("Service", "StandardError", "journal")
|
||||
recoveryUnit.Add("Service", "StandardInput", "null")
|
||||
recoveryUnit.Add("Install", "WantedBy", "default.target")
|
||||
|
||||
return recoveryUnit
|
||||
}
|
||||
|
||||
func DefaultReadyUnitFile() parser.UnitFile {
|
||||
u := parser.NewUnitFile()
|
||||
u.Add("Unit", "After", "remove-moby.service sshd.socket sshd.service")
|
||||
|
@ -194,14 +194,15 @@ func (v *MachineVM) Init(opts machine.InitOptions) (bool, error) {
|
||||
}
|
||||
|
||||
builder := ignition.NewIgnitionBuilder(ignition.DynamicIgnition{
|
||||
Name: opts.Username,
|
||||
Key: key,
|
||||
VMName: v.Name,
|
||||
VMType: define.QemuVirt,
|
||||
TimeZone: opts.TimeZone,
|
||||
WritePath: v.getIgnitionFile(),
|
||||
UID: v.UID,
|
||||
Rootful: v.Rootful,
|
||||
Name: opts.Username,
|
||||
Key: key,
|
||||
VMName: v.Name,
|
||||
VMType: define.QemuVirt,
|
||||
TimeZone: opts.TimeZone,
|
||||
WritePath: v.getIgnitionFile(),
|
||||
UID: v.UID,
|
||||
Rootful: v.Rootful,
|
||||
NetRecover: useNetworkRecover(),
|
||||
})
|
||||
|
||||
// If the user provides an ignition file, we need to
|
||||
|
@ -11,3 +11,7 @@ func getRuntimeDir() (string, error) {
|
||||
}
|
||||
return tmpDir, nil
|
||||
}
|
||||
|
||||
func useNetworkRecover() bool {
|
||||
return true
|
||||
}
|
||||
|
@ -11,3 +11,7 @@ func getRuntimeDir() (string, error) {
|
||||
}
|
||||
return tmpDir, nil
|
||||
}
|
||||
|
||||
func useNetworkRecover() bool {
|
||||
return false
|
||||
}
|
||||
|
@ -11,3 +11,7 @@ func getRuntimeDir() (string, error) {
|
||||
}
|
||||
return util.GetRootlessRuntimeDir()
|
||||
}
|
||||
|
||||
func useNetworkRecover() bool {
|
||||
return false
|
||||
}
|
||||
|
@ -11,3 +11,7 @@ func getRuntimeDir() (string, error) {
|
||||
}
|
||||
return tmpDir, nil
|
||||
}
|
||||
|
||||
func useNetworkRecover() bool {
|
||||
return false
|
||||
}
|
||||
|
Reference in New Issue
Block a user