AppleHV - make gz ops sparse

gz by definition is not able to preserve the sparse nature of files.  using some code from the crc project and gluing it together with our decompression code, we can re-create the sparseness of a file.  one downside is the operation is a little bit slower, but i think the gains from the sparse file are well worth it in IO alone.

there are a number of todo's in this PR that would be ripe for quick hitting fixes.

[NO NEW TESTS NEEDED]

Signed-off-by: Brent Baude <baude@redhat.com>
This commit is contained in:
Brent Baude
2024-02-04 08:32:41 -06:00
committed by Brent Baude
parent 85d8281484
commit d5eb8f3b71
5 changed files with 237 additions and 11 deletions

View File

@ -5,7 +5,6 @@ package applehv
import (
"fmt"
"os"
"os/exec"
"syscall"
"github.com/containers/common/pkg/strongunits"
@ -101,15 +100,7 @@ func checkProcessRunning(processName string, pid int) error {
// is assumed GiB
func resizeDisk(mc *vmconfigs.MachineConfig, newSize strongunits.GiB) error {
logrus.Debugf("resizing %s to %d bytes", mc.ImagePath.GetPath(), newSize.ToBytes())
// seems like os.truncate() is not very performant with really large files
// so exec'ing out to the command truncate
size := fmt.Sprintf("%dG", newSize)
c := exec.Command("truncate", "-s", size, mc.ImagePath.GetPath())
if logrus.IsLevelEnabled(logrus.DebugLevel) {
c.Stderr = os.Stderr
c.Stdout = os.Stdout
}
return c.Run()
return os.Truncate(mc.ImagePath.GetPath(), int64(newSize.ToBytes()))
}
func generateSystemDFilesForVirtiofsMounts(mounts []machine.VirtIoFs) []ignition.Unit {

View File

@ -295,7 +295,6 @@ func (a AppleHVStubber) VMType() define.VMType {
return define.AppleHvVirt
}
func waitForGvProxy(gvproxySocket *define.VMFile) error {
backoffWait := gvProxyWaitBackoff
logrus.Debug("checking that gvproxy is running")

View File

@ -0,0 +1,117 @@
package compression
import (
"bytes"
"io"
"os"
)
// TODO vendor this in ... pkg/os directory is small and code should be negligible
/*
NOTE: copy.go and copy.test were lifted from github.com/crc-org/crc because
i was having trouble getting go to vendor it properly. all credit to them
*/
func copyFile(src, dst string, sparse bool) error {
in, err := os.Open(src)
if err != nil {
return err
}
defer in.Close()
out, err := os.Create(dst)
if err != nil {
return err
}
defer out.Close()
if sparse {
if _, err = CopySparse(out, in); err != nil {
return err
}
} else {
if _, err = io.Copy(out, in); err != nil {
return err
}
}
fi, err := os.Stat(src)
if err != nil {
return err
}
if err = os.Chmod(dst, fi.Mode()); err != nil {
return err
}
return out.Close()
}
func CopyFile(src, dst string) error {
return copyFile(src, dst, false)
}
func CopyFileSparse(src, dst string) error {
return copyFile(src, dst, true)
}
func CopySparse(dst io.WriteSeeker, src io.Reader) (int64, error) {
copyBuf := make([]byte, copyChunkSize)
sparseWriter := newSparseWriter(dst)
bytesWritten, err := io.CopyBuffer(sparseWriter, src, copyBuf)
if err != nil {
return bytesWritten, err
}
err = sparseWriter.Close()
return bytesWritten, err
}
type sparseWriter struct {
writer io.WriteSeeker
lastChunkSparse bool
}
func newSparseWriter(writer io.WriteSeeker) *sparseWriter {
return &sparseWriter{writer: writer}
}
const copyChunkSize = 4096
var emptyChunk = make([]byte, copyChunkSize)
func isEmptyChunk(p []byte) bool {
// HasPrefix instead of bytes.Equal in order to handle the last chunk
// of the file, which may be shorter than len(emptyChunk), and would
// fail bytes.Equal()
return bytes.HasPrefix(emptyChunk, p)
}
func (w *sparseWriter) Write(p []byte) (n int, err error) {
if isEmptyChunk(p) {
offset, err := w.writer.Seek(int64(len(p)), io.SeekCurrent)
if err != nil {
w.lastChunkSparse = false
return 0, err
}
_ = offset
w.lastChunkSparse = true
return len(p), nil
}
w.lastChunkSparse = false
return w.writer.Write(p)
}
func (w *sparseWriter) Close() error {
if w.lastChunkSparse {
if _, err := w.writer.Seek(-1, io.SeekCurrent); err != nil {
return err
}
if _, err := w.writer.Write([]byte{0}); err != nil {
return err
}
}
return nil
}

View File

@ -0,0 +1,52 @@
package compression
import (
"os"
"path/filepath"
"testing"
)
func TestCopyFile(t *testing.T) {
testStr := "test-machine"
srcFile, err := os.CreateTemp("", "machine-test-")
if err != nil {
t.Fatal(err)
}
srcFi, err := srcFile.Stat()
if err != nil {
t.Fatal(err)
}
_, _ = srcFile.Write([]byte(testStr)) //nolint:mirror
srcFile.Close()
srcFilePath := filepath.Join(os.TempDir(), srcFi.Name())
destFile, err := os.CreateTemp("", "machine-copy-test-")
if err != nil {
t.Fatal(err)
}
destFi, err := destFile.Stat()
if err != nil {
t.Fatal(err)
}
destFile.Close()
destFilePath := filepath.Join(os.TempDir(), destFi.Name())
if err := CopyFile(srcFilePath, destFilePath); err != nil {
t.Fatal(err)
}
data, err := os.ReadFile(destFilePath)
if err != nil {
t.Fatal(err)
}
if string(data) != testStr {
t.Fatalf("expected data \"%s\"; received \"%s\"", testStr, string(data))
}
}

View File

@ -3,6 +3,7 @@ package compression
import (
"archive/zip"
"bufio"
"compress/gzip"
"errors"
"io"
"os"
@ -19,12 +20,20 @@ import (
"github.com/ulikunitz/xz"
)
// Decompress is a generic wrapper for various decompression algos
// TODO this needs some love. in the various decompression functions that are
// called, the same uncompressed path is being opened multiple times.
func Decompress(localPath *define.VMFile, uncompressedPath string) error {
var isZip bool
uncompressedFileWriter, err := os.OpenFile(uncompressedPath, os.O_CREATE|os.O_RDWR, 0600)
if err != nil {
return err
}
defer func() {
if err := uncompressedFileWriter.Close(); err != nil {
logrus.Errorf("unable to to close decompressed file %s: %q", uncompressedPath, err)
}
}()
sourceFile, err := localPath.Read()
if err != nil {
return err
@ -44,6 +53,11 @@ func Decompress(localPath *define.VMFile, uncompressedPath string) error {
if isZip && runtime.GOOS == "windows" {
return decompressZip(prefix, localPath.GetPath(), uncompressedFileWriter)
}
// Unfortunately GZ is not sparse capable. Lets handle it differently
if compressionType == archive.Gzip && runtime.GOOS == "darwin" {
return decompressGzWithSparse(prefix, localPath, uncompressedPath)
}
return decompressEverythingElse(prefix, localPath.GetPath(), uncompressedFileWriter)
}
@ -182,3 +196,56 @@ func decompressZip(prefix string, src string, output io.WriteCloser) error {
p.Wait()
return err
}
func decompressGzWithSparse(prefix string, compressedPath *define.VMFile, uncompressedPath string) error {
stat, err := os.Stat(compressedPath.GetPath())
if err != nil {
return err
}
dstFile, err := os.OpenFile(uncompressedPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, stat.Mode())
if err != nil {
return err
}
defer func() {
if err := dstFile.Close(); err != nil {
logrus.Errorf("unable to close uncompressed file %s: %q", uncompressedPath, err)
}
}()
f, err := os.Open(compressedPath.GetPath())
if err != nil {
return err
}
defer func() {
if err := f.Close(); err != nil {
logrus.Errorf("unable to close on compressed file %s: %q", compressedPath.GetPath(), err)
}
}()
gzReader, err := gzip.NewReader(f)
if err != nil {
return err
}
defer func() {
if err := gzReader.Close(); err != nil {
logrus.Errorf("unable to close gzreader: %q", err)
}
}()
// TODO remove the following line when progress bars work
_ = prefix
// p, bar := utils.ProgressBar(prefix, stat.Size(), prefix+": done")
// proxyReader := bar.ProxyReader(f)
// defer func() {
// if err := proxyReader.Close(); err != nil {
// logrus.Error(err)
// }
// }()
logrus.Debugf("decompressing %s", compressedPath.GetPath())
_, err = CopySparse(dstFile, gzReader)
logrus.Debug("decompression complete")
// p.Wait()
return err
}