Refactor machine decompress.go

Added some tests to verify that files extractions works
with different compression format.

Created a decompressor interface with 2 main methods:
  reader(): returns an io.Reader for the specific compression algorithm
  copy(): extracts the compressed file into the file provided as param

Created 5 decompressor types:
- gzip: extract gzip files
- xz: extract xz files
- zip: extract zip files
- generic: extract any other file using github.com/containers/image/v5/pkg/compression
- uncompressed: only do a copy of the file

Minor fix to the progress bar instances: added a call to bar.Abort(false)
that happens before Progress.Wait() to avoid that it hangs when a bar is
not set as completed although extraction is done.

Signed-off-by: Mario Loriedo <mario.loriedo@gmail.com>
This commit is contained in:
Mario Loriedo
2024-02-09 17:31:01 +01:00
parent d2c2e665e3
commit 88af8852db
13 changed files with 486 additions and 281 deletions

View File

@ -1,6 +1,11 @@
package compression
import "testing"
import (
"os"
"testing"
"github.com/containers/podman/v5/pkg/machine/define"
)
func Test_compressionFromFile(t *testing.T) {
type args struct {
@ -89,3 +94,102 @@ func TestImageCompression_String(t *testing.T) {
})
}
}
func Test_Decompress(t *testing.T) {
type args struct {
src string
dst string
}
type want struct {
content string
}
tests := []struct {
name string
args args
want want
}{
{
name: "zip",
args: args{
src: "./testfiles/sample.zip",
dst: "./testfiles/hellozip",
},
want: want{
content: "zip\n",
},
},
{
name: "xz",
args: args{
src: "./testfiles/sample.xz",
dst: "./testfiles/helloxz",
},
want: want{
content: "xz\n",
},
},
{
name: "gzip",
args: args{
src: "./testfiles/sample.gz",
dst: "./testfiles/hellogz",
},
want: want{
content: "gzip\n",
},
},
{
name: "bzip2",
args: args{
src: "./testfiles/sample.bz2",
dst: "./testfiles/hellobz2",
},
want: want{
content: "bzip2\n",
},
},
{
name: "zstd",
args: args{
src: "./testfiles/sample.zst",
dst: "./testfiles/hellozstd",
},
want: want{
content: "zstd\n",
},
},
{
name: "uncompressed",
args: args{
src: "./testfiles/sample.uncompressed",
dst: "./testfiles/hellozuncompressed",
},
want: want{
content: "uncompressed\n",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
srcVMFile := &define.VMFile{Path: tt.args.src}
dstFilePath := tt.args.dst
defer os.Remove(dstFilePath)
if err := Decompress(srcVMFile, dstFilePath); err != nil {
t.Fatalf("decompress() error = %v", err)
}
data, err := os.ReadFile(dstFilePath)
if err != nil {
t.Fatalf("ReadFile() error = %v", err)
}
if got := string(data); got != tt.want.content {
t.Fatalf("content = %v, want %v", got, tt.want.content)
}
})
}
}

View File

@ -1,313 +1,114 @@
package compression
import (
"archive/zip"
"bufio"
"compress/gzip"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"github.com/containers/image/v5/pkg/compression"
"github.com/containers/podman/v5/pkg/machine/define"
"github.com/containers/podman/v5/utils"
"github.com/containers/storage/pkg/archive"
crcOs "github.com/crc-org/crc/v2/pkg/os"
"github.com/klauspost/compress/zstd"
"github.com/sirupsen/logrus"
"github.com/ulikunitz/xz"
)
// Decompress is a generic wrapper for various decompression algos
// TODO this needs some love. in the various decompression functions that are
// called, the same uncompressed path is being opened multiple times.
func Decompress(localPath *define.VMFile, uncompressedPath string) error {
var isZip bool
uncompressedFileWriter, err := os.OpenFile(uncompressedPath, os.O_CREATE|os.O_RDWR, 0600)
if err != nil {
return err
}
defer func() {
if err := uncompressedFileWriter.Close(); err != nil && !errors.Is(err, os.ErrClosed) {
logrus.Warnf("unable to close decompressed file %s: %q", uncompressedPath, err)
}
}()
sourceFile, err := localPath.Read()
if err != nil {
return err
}
if strings.HasSuffix(localPath.GetPath(), ".zip") {
isZip = true
}
compressionType := archive.DetectCompression(sourceFile)
const (
zipExt = ".zip"
progressBarPrefix = "Extracting compressed file"
macOs = "darwin"
)
prefix := "Extracting compressed file"
prefix += ": " + filepath.Base(uncompressedPath)
switch compressionType {
case archive.Xz:
return decompressXZ(prefix, localPath.GetPath(), uncompressedFileWriter)
case archive.Uncompressed:
if isZip && runtime.GOOS == "windows" {
return decompressZip(prefix, localPath.GetPath(), uncompressedFileWriter)
}
// here we should just do a copy
dstFile, err := os.Open(localPath.GetPath())
if err != nil {
return err
}
// darwin really struggles with sparse files. being diligent here
fmt.Printf("Copying uncompressed file %q to %q/n", localPath.GetPath(), dstFile.Name())
// Keeping CRC implementation for now, but ideally this could be pruned and
// sparsewriter could be used. in that case, this area needs rework or
// sparsewriter be made to honor the *file interface
_, err = crcOs.CopySparse(uncompressedFileWriter, dstFile)
return err
case archive.Gzip:
if runtime.GOOS == "darwin" {
return decompressGzWithSparse(prefix, localPath, uncompressedFileWriter)
}
fallthrough
case archive.Zstd:
if runtime.GOOS == "darwin" {
return decompressZstdWithSparse(prefix, localPath, uncompressedFileWriter)
}
fallthrough
default:
return decompressEverythingElse(prefix, localPath.GetPath(), uncompressedFileWriter)
}
// if compressionType != archive.Uncompressed || isZip {
// prefix = "Extracting compressed file"
// }
// prefix += ": " + filepath.Base(uncompressedPath)
// if compressionType == archive.Xz {
// return decompressXZ(prefix, localPath.GetPath(), uncompressedFileWriter)
// }
// if isZip && runtime.GOOS == "windows" {
// return decompressZip(prefix, localPath.GetPath(), uncompressedFileWriter)
// }
// Unfortunately GZ is not sparse capable. Lets handle it differently
// if compressionType == archive.Gzip && runtime.GOOS == "darwin" {
// return decompressGzWithSparse(prefix, localPath, uncompressedPath)
// }
// return decompressEverythingElse(prefix, localPath.GetPath(), uncompressedFileWriter)
type decompressor interface {
srcFilePath() string
reader() (io.Reader, error)
copy(w *os.File, r io.Reader) error
close()
}
// Will error out if file without .Xz already exists
// Maybe extracting then renaming is a good idea here..
// depends on Xz: not pre-installed on mac, so it becomes a brew dependency
func decompressXZ(prefix string, src string, output io.WriteCloser) error {
var read io.Reader
var cmd *exec.Cmd
func newDecompressor(compressedFilePath string, compressedFileContent []byte) decompressor {
compressionType := archive.DetectCompression(compressedFileContent)
os := runtime.GOOS
hasZipSuffix := strings.HasSuffix(compressedFilePath, zipExt)
stat, err := os.Stat(src)
switch {
case compressionType == archive.Xz:
return newXzDecompressor(compressedFilePath)
case compressionType == archive.Uncompressed && hasZipSuffix:
return newZipDecompressor(compressedFilePath)
case compressionType == archive.Uncompressed:
return newUncompressedDecompressor(compressedFilePath)
case compressionType == archive.Gzip && os == macOs:
return newGzipDecompressor(compressedFilePath)
default:
return newGenericDecompressor(compressedFilePath)
}
}
func Decompress(srcVMFile *define.VMFile, dstFilePath string) error {
srcFilePath := srcVMFile.GetPath()
// Are we reading full image file?
// Only few bytes are read to detect
// the compression type
srcFileContent, err := srcVMFile.Read()
if err != nil {
return err
}
file, err := os.Open(src)
d := newDecompressor(srcFilePath, srcFileContent)
return runDecompression(d, dstFilePath)
}
func runDecompression(d decompressor, dstFilePath string) error {
decompressorReader, err := d.reader()
if err != nil {
return err
}
defer file.Close()
defer d.close()
p, bar := utils.ProgressBar(prefix, stat.Size(), prefix+": done")
proxyReader := bar.ProxyReader(file)
stat, err := os.Stat(d.srcFilePath())
if err != nil {
return err
}
initMsg := progressBarPrefix + ": " + filepath.Base(dstFilePath)
finalMsg := initMsg + ": done"
// We are getting the compressed file size but
// the progress bar needs the full size of the
// decompressed file.
// As a result the progress bar shows 100%
// before the decompression completes.
// A workaround is to set the size to -1 but the
// side effect is that we won't see any advancment in
// the bar.
// An update in utils.ProgressBar to handle is needed
// to improve the case of size=-1 (i.e. unkwonw size).
p, bar := utils.ProgressBar(initMsg, stat.Size(), finalMsg)
// Wait for bars to complete and then shut down the bars container
defer p.Wait()
readProxy := bar.ProxyReader(decompressorReader)
// Interrupts the bar goroutine. It's important that
// bar.Abort(false) is called before p.Wait(), otherwise
// can hang.
defer bar.Abort(false)
dstFileWriter, err := os.OpenFile(dstFilePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, stat.Mode())
if err != nil {
logrus.Errorf("Unable to open destination file %s for writing: %q", dstFilePath, err)
return err
}
defer func() {
if err := proxyReader.Close(); err != nil {
logrus.Error(err)
if err := dstFileWriter.Close(); err != nil {
logrus.Errorf("Unable to to close destination file %s: %q", dstFilePath, err)
}
}()
// Prefer Xz utils for fastest performance, fallback to go xi2 impl
if _, err := exec.LookPath("xz"); err == nil {
cmd = exec.Command("xz", "-d", "-c")
cmd.Stdin = proxyReader
read, err = cmd.StdoutPipe()
if err != nil {
return err
}
cmd.Stderr = os.Stderr
} else {
// This XZ implementation is reliant on buffering. It is also 3x+ slower than XZ utils.
// Consider replacing with a faster implementation (e.g. xi2) if podman machine is
// updated with a larger image for the distribution base.
buf := bufio.NewReader(proxyReader)
read, err = xz.NewReader(buf)
if err != nil {
return err
}
err = d.copy(dstFileWriter, readProxy)
if err != nil {
logrus.Errorf("Error extracting compressed file: %q", err)
return err
}
done := make(chan bool)
go func() {
if _, err := io.Copy(output, read); err != nil {
logrus.Error(err)
}
output.Close()
done <- true
}()
if cmd != nil {
err := cmd.Start()
if err != nil {
return err
}
p.Wait()
return cmd.Wait()
}
<-done
p.Wait()
return nil
}
func decompressEverythingElse(prefix string, src string, output io.WriteCloser) error {
stat, err := os.Stat(src)
if err != nil {
return err
}
f, err := os.Open(src)
if err != nil {
return err
}
p, bar := utils.ProgressBar(prefix, stat.Size(), prefix+": done")
proxyReader := bar.ProxyReader(f)
defer func() {
if err := proxyReader.Close(); err != nil {
logrus.Error(err)
}
}()
uncompressStream, _, err := compression.AutoDecompress(proxyReader)
if err != nil {
return err
}
defer func() {
if err := uncompressStream.Close(); err != nil {
logrus.Error(err)
}
if err := output.Close(); err != nil {
logrus.Error(err)
}
}()
_, err = io.Copy(output, uncompressStream)
p.Wait()
return err
}
func decompressZip(prefix string, src string, output io.WriteCloser) error {
zipReader, err := zip.OpenReader(src)
if err != nil {
return err
}
if len(zipReader.File) != 1 {
return errors.New("machine image files should consist of a single compressed file")
}
f, err := zipReader.File[0].Open()
if err != nil {
return err
}
defer func() {
if err := f.Close(); err != nil {
logrus.Error(err)
}
}()
defer func() {
if err := output.Close(); err != nil {
logrus.Error(err)
}
}()
size := int64(zipReader.File[0].CompressedSize64)
p, bar := utils.ProgressBar(prefix, size, prefix+": done")
proxyReader := bar.ProxyReader(f)
defer func() {
if err := proxyReader.Close(); err != nil {
logrus.Error(err)
}
}()
_, err = io.Copy(output, proxyReader)
p.Wait()
return err
}
func decompressWithSparse(prefix string, compressedReader io.Reader, uncompressedFile *os.File) error {
dstFile := NewSparseWriter(uncompressedFile)
defer func() {
if err := dstFile.Close(); err != nil {
logrus.Errorf("unable to close uncompressed file %s: %q", uncompressedFile.Name(), err)
}
}()
// TODO remove the following line when progress bars work
_ = prefix
// p, bar := utils.ProgressBar(prefix, stat.Size(), prefix+": done")
// proxyReader := bar.ProxyReader(f)
// defer func() {
// if err := proxyReader.Close(); err != nil {
// logrus.Error(err)
// }
// }()
// p.Wait()
_, err := io.Copy(dstFile, compressedReader)
return err
}
func decompressGzWithSparse(prefix string, compressedPath *define.VMFile, uncompressedFileWriter *os.File) error {
logrus.Debugf("decompressing %s", compressedPath.GetPath())
f, err := os.Open(compressedPath.GetPath())
if err != nil {
return err
}
defer func() {
if err := f.Close(); err != nil {
logrus.Errorf("unable to close on compressed file %s: %q", compressedPath.GetPath(), err)
}
}()
gzReader, err := gzip.NewReader(f)
if err != nil {
return err
}
defer func() {
if err := gzReader.Close(); err != nil {
logrus.Errorf("unable to close gzreader: %q", err)
}
}()
// This way we get something to look at in debug mode
defer func() {
logrus.Debug("decompression complete")
}()
return decompressWithSparse(prefix, gzReader, uncompressedFileWriter)
}
func decompressZstdWithSparse(prefix string, compressedPath *define.VMFile, uncompressedFileWriter *os.File) error {
logrus.Debugf("decompressing %s", compressedPath.GetPath())
f, err := os.Open(compressedPath.GetPath())
if err != nil {
return err
}
defer func() {
if err := f.Close(); err != nil {
logrus.Errorf("unable to close on compressed file %s: %q", compressedPath.GetPath(), err)
}
}()
zstdReader, err := zstd.NewReader(f)
if err != nil {
return err
}
defer zstdReader.Close()
// This way we get something to look at in debug mode
defer func() {
logrus.Debug("decompression complete")
}()
return decompressWithSparse(prefix, zstdReader, uncompressedFileWriter)
}

View File

@ -0,0 +1,54 @@
package compression
import (
"io"
"os"
"github.com/containers/image/v5/pkg/compression"
"github.com/sirupsen/logrus"
)
type genericDecompressor struct {
compressedFilePath string
compressedFile *os.File
uncompressStream io.ReadCloser
}
func newGenericDecompressor(compressedFilePath string) decompressor {
return &genericDecompressor{
compressedFilePath: compressedFilePath,
}
}
func (d *genericDecompressor) srcFilePath() string {
return d.compressedFilePath
}
func (d *genericDecompressor) reader() (io.Reader, error) {
srcFile, err := os.Open(d.compressedFilePath)
if err != nil {
return nil, err
}
d.compressedFile = srcFile
return srcFile, nil
}
func (d *genericDecompressor) copy(w *os.File, r io.Reader) error {
uncompressStream, _, err := compression.AutoDecompress(r)
if err != nil {
return err
}
d.uncompressStream = uncompressStream
_, err = io.Copy(w, uncompressStream)
return err
}
func (d *genericDecompressor) close() {
if err := d.compressedFile.Close(); err != nil {
logrus.Errorf("Unable to close compressed file: %q", err)
}
if err := d.uncompressStream.Close(); err != nil {
logrus.Errorf("Unable to close uncompressed stream: %q", err)
}
}

View File

@ -0,0 +1,56 @@
package compression
import (
"compress/gzip"
"io"
"os"
crcOs "github.com/crc-org/crc/v2/pkg/os"
"github.com/sirupsen/logrus"
)
type gzDecompressor struct {
compressedFilePath string
compressedFile *os.File
gzReader *gzip.Reader
}
func newGzipDecompressor(compressedFilePath string) decompressor {
return &gzDecompressor{
compressedFilePath: compressedFilePath,
}
}
func (d *gzDecompressor) srcFilePath() string {
return d.compressedFilePath
}
func (d *gzDecompressor) reader() (io.Reader, error) {
srcFile, err := os.Open(d.compressedFilePath)
if err != nil {
return nil, err
}
d.compressedFile = srcFile
gzReader, err := gzip.NewReader(srcFile)
if err != nil {
return gzReader, err
}
d.gzReader = gzReader
return gzReader, nil
}
func (*gzDecompressor) copy(w *os.File, r io.Reader) error {
_, err := crcOs.CopySparse(w, r)
return err
}
func (d *gzDecompressor) close() {
if err := d.compressedFile.Close(); err != nil {
logrus.Errorf("Unable to close gz file: %q", err)
}
if err := d.gzReader.Close(); err != nil {
logrus.Errorf("Unable to close gz file: %q", err)
}
}

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1 @@
uncompressed

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,45 @@
package compression
import (
"io"
"os"
crcOs "github.com/crc-org/crc/v2/pkg/os"
"github.com/sirupsen/logrus"
)
type uncompressedDecompressor struct {
compressedFilePath string
compressedFile *os.File
}
func newUncompressedDecompressor(compressedFilePath string) decompressor {
return &uncompressedDecompressor{
compressedFilePath: compressedFilePath,
}
}
func (d *uncompressedDecompressor) srcFilePath() string {
return d.compressedFilePath
}
func (d *uncompressedDecompressor) reader() (io.Reader, error) {
srcFile, err := os.Open(d.compressedFilePath)
if err != nil {
return nil, err
}
d.compressedFile = srcFile
return srcFile, nil
}
func (*uncompressedDecompressor) copy(w *os.File, r io.Reader) error {
_, err := crcOs.CopySparse(w, r)
return err
}
func (d *uncompressedDecompressor) close() {
if err := d.compressedFile.Close(); err != nil {
logrus.Errorf("Unable to close gz file: %q", err)
}
}

View File

@ -0,0 +1,87 @@
package compression
import (
"bufio"
"io"
"os"
"os/exec"
"github.com/sirupsen/logrus"
"github.com/ulikunitz/xz"
)
type xzDecompressor struct {
compressedFilePath string
compressedFile *os.File
}
func newXzDecompressor(compressedFilePath string) decompressor {
return &xzDecompressor{
compressedFilePath: compressedFilePath,
}
}
func (d *xzDecompressor) srcFilePath() string {
return d.compressedFilePath
}
func (d *xzDecompressor) reader() (io.Reader, error) {
srcFile, err := os.Open(d.compressedFilePath)
if err != nil {
return nil, err
}
d.compressedFile = srcFile
return srcFile, nil
}
// Will error out if file without .Xz already exists
// Maybe extracting then renaming is a good idea here..
// depends on Xz: not pre-installed on mac, so it becomes a brew dependency
func (*xzDecompressor) copy(w *os.File, r io.Reader) error {
var cmd *exec.Cmd
var read io.Reader
// Prefer Xz utils for fastest performance, fallback to go xi2 impl
if _, err := exec.LookPath("xz"); err == nil {
cmd = exec.Command("xz", "-d", "-c")
cmd.Stdin = r
read, err = cmd.StdoutPipe()
if err != nil {
return err
}
cmd.Stderr = os.Stderr
} else {
// This XZ implementation is reliant on buffering. It is also 3x+ slower than XZ utils.
// Consider replacing with a faster implementation (e.g. xi2) if podman machine is
// updated with a larger image for the distribution base.
buf := bufio.NewReader(r)
read, err = xz.NewReader(buf)
if err != nil {
return err
}
}
done := make(chan bool)
go func() {
if _, err := io.Copy(w, read); err != nil {
logrus.Error(err)
}
done <- true
}()
if cmd != nil {
err := cmd.Start()
if err != nil {
return err
}
return cmd.Wait()
}
<-done
return nil
}
func (d *xzDecompressor) close() {
if err := d.compressedFile.Close(); err != nil {
logrus.Errorf("Unable to close xz file: %q", err)
}
}

View File

@ -0,0 +1,57 @@
package compression
import (
"archive/zip"
"errors"
"io"
"os"
"github.com/sirupsen/logrus"
)
type zipDecompressor struct {
compressedFilePath string
zipReader *zip.ReadCloser
fileReader io.ReadCloser
}
func newZipDecompressor(compressedFilePath string) decompressor {
return &zipDecompressor{
compressedFilePath: compressedFilePath,
}
}
func (d *zipDecompressor) srcFilePath() string {
return d.compressedFilePath
}
func (d *zipDecompressor) reader() (io.Reader, error) {
zipReader, err := zip.OpenReader(d.compressedFilePath)
if err != nil {
return nil, err
}
d.zipReader = zipReader
if len(zipReader.File) != 1 {
return nil, errors.New("machine image files should consist of a single compressed file")
}
z, err := zipReader.File[0].Open()
if err != nil {
return nil, err
}
d.fileReader = z
return z, nil
}
func (*zipDecompressor) copy(w *os.File, r io.Reader) error {
_, err := io.Copy(w, r)
return err
}
func (d *zipDecompressor) close() {
if err := d.zipReader.Close(); err != nil {
logrus.Errorf("Unable to close zip file: %q", err)
}
if err := d.fileReader.Close(); err != nil {
logrus.Errorf("Unable to close zip file: %q", err)
}
}