Add LFS Migration and Mirror (#14726)

* Implemented LFS client.

* Implemented scanning for pointer files.

* Implemented downloading of lfs files.

* Moved model-dependent code into services.

* Removed models dependency. Added TryReadPointerFromBuffer.

* Migrated code from service to module.

* Centralised storage creation.

* Removed dependency from models.

* Moved ContentStore into modules.

* Share structs between server and client.

* Moved method to services.

* Implemented lfs download on clone.

* Implemented LFS sync on clone and mirror update.

* Added form fields.

* Updated templates.

* Fixed condition.

* Use alternate endpoint.

* Added missing methods.

* Fixed typo and make linter happy.

* Detached pointer parser from gogit dependency.

* Fixed TestGetLFSRange test.

* Added context to support cancellation.

* Use ReadFull to probably read more data.

* Removed duplicated code from models.

* Moved scan implementation into pointer_scanner_nogogit.

* Changed method name.

* Added comments.

* Added more/specific log/error messages.

* Embedded lfs.Pointer into models.LFSMetaObject.

* Moved code from models to module.

* Moved code from models to module.

* Moved code from models to module.

* Reduced pointer usage.

* Embedded type.

* Use promoted fields.

* Fixed unexpected eof.

* Added unit tests.

* Implemented migration of local file paths.

* Show an error on invalid LFS endpoints.

* Hide settings if not used.

* Added LFS info to mirror struct.

* Fixed comment.

* Check LFS endpoint.

* Manage LFS settings from mirror page.

* Fixed selector.

* Adjusted selector.

* Added more tests.

* Added local filesystem migration test.

* Fixed typo.

* Reset settings.

* Added special windows path handling.

* Added unit test for HTTPClient.

* Added unit test for BasicTransferAdapter.

* Moved into util package.

* Test if LFS endpoint is allowed.

* Added support for git://

* Just use a static placeholder as the displayed url may be invalid.

* Reverted to original code.

* Added "Advanced Settings".

* Updated wording.

* Added discovery info link.

* Implemented suggestion.

* Fixed missing format parameter.

* Added Pointer.IsValid().

* Always remove model on error.

* Added suggestions.

* Use channel instead of array.

* Update routers/repo/migrate.go

* fmt

Signed-off-by: Andrew Thornton <art27@cantab.net>

Co-authored-by: zeripath <art27@cantab.net>
This commit is contained in:
KN4CK3R
2021-04-09 00:25:57 +02:00
committed by GitHub
parent f544414a23
commit c03e488e14
75 changed files with 2159 additions and 711 deletions

View File

@ -13,14 +13,15 @@ import (
"io"
"os"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/storage"
)
var (
errHashMismatch = errors.New("Content hash does not match OID")
errSizeMismatch = errors.New("Content size does not match")
// ErrHashMismatch occurs if the content has does not match OID
ErrHashMismatch = errors.New("Content hash does not match OID")
// ErrSizeMismatch occurs if the content size does not match
ErrSizeMismatch = errors.New("Content size does not match")
)
// ErrRangeNotSatisfiable represents an error which request range is not satisfiable.
@ -28,61 +29,67 @@ type ErrRangeNotSatisfiable struct {
FromByte int64
}
func (err ErrRangeNotSatisfiable) Error() string {
return fmt.Sprintf("Requested range %d is not satisfiable", err.FromByte)
}
// IsErrRangeNotSatisfiable returns true if the error is an ErrRangeNotSatisfiable
func IsErrRangeNotSatisfiable(err error) bool {
_, ok := err.(ErrRangeNotSatisfiable)
return ok
}
func (err ErrRangeNotSatisfiable) Error() string {
return fmt.Sprintf("Requested range %d is not satisfiable", err.FromByte)
}
// ContentStore provides a simple file system based storage.
type ContentStore struct {
storage.ObjectStorage
}
// NewContentStore creates the default ContentStore
func NewContentStore() *ContentStore {
contentStore := &ContentStore{ObjectStorage: storage.LFS}
return contentStore
}
// Get takes a Meta object and retrieves the content from the store, returning
// it as an io.ReadSeekCloser.
func (s *ContentStore) Get(meta *models.LFSMetaObject) (storage.Object, error) {
f, err := s.Open(meta.RelativePath())
func (s *ContentStore) Get(pointer Pointer) (storage.Object, error) {
f, err := s.Open(pointer.RelativePath())
if err != nil {
log.Error("Whilst trying to read LFS OID[%s]: Unable to open Error: %v", meta.Oid, err)
log.Error("Whilst trying to read LFS OID[%s]: Unable to open Error: %v", pointer.Oid, err)
return nil, err
}
return f, err
}
// Put takes a Meta object and an io.Reader and writes the content to the store.
func (s *ContentStore) Put(meta *models.LFSMetaObject, r io.Reader) error {
p := meta.RelativePath()
func (s *ContentStore) Put(pointer Pointer, r io.Reader) error {
p := pointer.RelativePath()
// Wrap the provided reader with an inline hashing and size checker
wrappedRd := newHashingReader(meta.Size, meta.Oid, r)
wrappedRd := newHashingReader(pointer.Size, pointer.Oid, r)
// now pass the wrapped reader to Save - if there is a size mismatch or hash mismatch then
// the errors returned by the newHashingReader should percolate up to here
written, err := s.Save(p, wrappedRd, meta.Size)
written, err := s.Save(p, wrappedRd, pointer.Size)
if err != nil {
log.Error("Whilst putting LFS OID[%s]: Failed to copy to tmpPath: %s Error: %v", meta.Oid, p, err)
log.Error("Whilst putting LFS OID[%s]: Failed to copy to tmpPath: %s Error: %v", pointer.Oid, p, err)
return err
}
// This shouldn't happen but it is sensible to test
if written != meta.Size {
if written != pointer.Size {
if err := s.Delete(p); err != nil {
log.Error("Cleaning the LFS OID[%s] failed: %v", meta.Oid, err)
log.Error("Cleaning the LFS OID[%s] failed: %v", pointer.Oid, err)
}
return errSizeMismatch
return ErrSizeMismatch
}
return nil
}
// Exists returns true if the object exists in the content store.
func (s *ContentStore) Exists(meta *models.LFSMetaObject) (bool, error) {
_, err := s.ObjectStorage.Stat(meta.RelativePath())
func (s *ContentStore) Exists(pointer Pointer) (bool, error) {
_, err := s.ObjectStorage.Stat(pointer.RelativePath())
if err != nil {
if os.IsNotExist(err) {
return false, nil
@ -93,19 +100,25 @@ func (s *ContentStore) Exists(meta *models.LFSMetaObject) (bool, error) {
}
// Verify returns true if the object exists in the content store and size is correct.
func (s *ContentStore) Verify(meta *models.LFSMetaObject) (bool, error) {
p := meta.RelativePath()
func (s *ContentStore) Verify(pointer Pointer) (bool, error) {
p := pointer.RelativePath()
fi, err := s.ObjectStorage.Stat(p)
if os.IsNotExist(err) || (err == nil && fi.Size() != meta.Size) {
if os.IsNotExist(err) || (err == nil && fi.Size() != pointer.Size) {
return false, nil
} else if err != nil {
log.Error("Unable stat file: %s for LFS OID[%s] Error: %v", p, meta.Oid, err)
log.Error("Unable stat file: %s for LFS OID[%s] Error: %v", p, pointer.Oid, err)
return false, err
}
return true, nil
}
// ReadMetaObject will read a models.LFSMetaObject and return a reader
func ReadMetaObject(pointer Pointer) (io.ReadCloser, error) {
contentStore := NewContentStore()
return contentStore.Get(pointer)
}
type hashingReader struct {
internal io.Reader
currentSize int64
@ -127,12 +140,12 @@ func (r *hashingReader) Read(b []byte) (int, error) {
if err != nil && err == io.EOF {
if r.currentSize != r.expectedSize {
return n, errSizeMismatch
return n, ErrSizeMismatch
}
shaStr := hex.EncodeToString(r.hash.Sum(nil))
if shaStr != r.expectedHash {
return n, errHashMismatch
return n, ErrHashMismatch
}
}