Files
podman/pkg/machine/compression/sparse_file_writer.go
Miloslav Trmač 5d303ca267 Reformulate sparseWriter to deal with starting/ending zeroes explicitly
... instead of using a multi-variable state machine.

The net effect of this code is exactly the same as the previous implementation,
except:
- the operation after Write() returns an error might differ
- If the file ends with zeroes, we don't Seek(-1), and
  we don't create a hole at all if it is too small, preferring
  to save a syscall.

But this formulation is hopefully easier to prove correct.

Signed-off-by: Miloslav Trmač <mitr@redhat.com>
2024-02-23 02:09:39 +01:00

139 lines
4.1 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package compression
import (
"errors"
"fmt"
"io"
)
const zerosThreshold = 1024
type WriteSeekCloser interface {
io.Closer
io.WriteSeeker
}
type sparseWriter struct {
file WriteSeekCloser
// Invariant between method calls:
// The contents of the file match the contents passed to Write, except that pendingZeroes trailing zeroes have not been written.
// Also, the data that _has_ been written does not end with a zero byte (i.e. pendingZeroes is the largest possible value.
pendingZeroes int64
}
func NewSparseWriter(file WriteSeekCloser) *sparseWriter {
return &sparseWriter{
file: file,
pendingZeroes: 0,
}
}
func (sw *sparseWriter) createHole(size int64) error {
_, err := sw.file.Seek(size, io.SeekCurrent)
return err
}
func zeroSpanEnd(b []byte, i int) int {
for i < len(b) && b[i] == 0 {
i++
}
return i
}
func nonzeroSpanEnd(b []byte, i int) int {
for i < len(b) && b[i] != 0 {
i++
}
return i
}
// Write writes data to the file, creating holes for long sequences of zeros.
func (sw *sparseWriter) Write(data []byte) (int, error) {
initialZeroSpanLength := zeroSpanEnd(data, 0)
if initialZeroSpanLength == len(data) {
sw.pendingZeroes += int64(initialZeroSpanLength)
return initialZeroSpanLength, nil
}
// We have _some_ non-zero data to write.
// Think of the input as an alternating sequence of spans of zeroes / non-zeroes 0a0b…c0,
// where the starting/ending span of zeroes may be empty.
pendingWriteOffset := 0
// The expected condition for creating a hole would be sw.pendingZeroes + initialZeroSpanLength >= zerosThreshold; but
// if sw.pendingZeroes != 0, we are going to spend a syscall to deal with sw.pendingZeroes either way.
// We might just as well make it a createHole(), even if the hole size is below zeroThreshold.
if sw.pendingZeroes != 0 || initialZeroSpanLength >= zerosThreshold {
if err := sw.createHole(sw.pendingZeroes + int64(initialZeroSpanLength)); err != nil {
return -1, err
}
// We could set sw.pendingZeroes = 0 now; it would always be overwritten on successful return from this function.
pendingWriteOffset = initialZeroSpanLength
}
current := initialZeroSpanLength
for {
// Invariant at this point of this loop:
// - pendingWriteOffset <= current < len(data)
// - data[current] != 0
// - data[pendingWriteOffset:current] has not yet been written
if pendingWriteOffset > current || current >= len(data) {
return -1, fmt.Errorf("internal error: sparseWriter invariant violation: %d <= %d < %d", pendingWriteOffset, current, len(data))
}
if b := data[current]; b == 0 {
return -1, fmt.Errorf("internal error: sparseWriter invariant violation: %d@%d", b, current)
}
nonzeroSpanEnd := nonzeroSpanEnd(data, current)
if nonzeroSpanEnd == current {
return -1, fmt.Errorf("internal error: sparseWriters nonzeroSpanEnd didnt advance")
}
zeroSpanEnd := zeroSpanEnd(data, nonzeroSpanEnd) // possibly == nonzeroSpanEnd
zeroSpanLength := zeroSpanEnd - nonzeroSpanEnd
if zeroSpanEnd < len(data) && zeroSpanLength < zerosThreshold {
// Too small a hole, keep going
current = zeroSpanEnd
continue
}
// We have either reached the end, or found an interesting hole. Issue a write.
if _, err := sw.file.Write(data[pendingWriteOffset:nonzeroSpanEnd]); err != nil {
return -1, err
}
if zeroSpanEnd == len(data) {
sw.pendingZeroes = int64(zeroSpanLength)
return zeroSpanEnd, nil
}
if err := sw.createHole(int64(zeroSpanLength)); err != nil {
return -1, err
}
pendingWriteOffset = zeroSpanEnd
current = zeroSpanEnd
}
}
// Close closes the SparseWriter's underlying file.
func (sw *sparseWriter) Close() error {
if sw.file == nil {
return errors.New("file is already closed")
}
if sw.pendingZeroes != 0 {
if holeSize := sw.pendingZeroes - 1; holeSize >= zerosThreshold {
if err := sw.createHole(holeSize); err != nil {
sw.file.Close()
return err
}
sw.pendingZeroes -= holeSize
}
var zeroArray [zerosThreshold]byte
if _, err := sw.file.Write(zeroArray[:sw.pendingZeroes]); err != nil {
sw.file.Close()
return err
}
}
err := sw.file.Close()
sw.file = nil
return err
}