CloudMigrations: Query Grafana Migration Status for status while the snapshot is in the cloud (#90314)

* implement querying gms for snapshot status

* add some documentation

* provide snapshot resources after snapshot is created

* add rate limiting to backend

* fix compilation error

* fix typo

* add unit tests

* finish merge

* lint

* swagger gen

* more testing

* remove duplicate test

* address a couple PR comments

* update switch statement to a map

* add timeouts to gms client through the http client

* remove extra whitespace

* put method back where it was so the PR is less confusing

* fix tests

* add todo

* fix final unit test
This commit is contained in:
Michael Mandrus
2024-07-15 09:22:57 -04:00
committed by GitHub
parent 5beaae8561
commit 542a1bf3ac
19 changed files with 532 additions and 125 deletions

View File

@ -104,13 +104,11 @@ func ProvideService(
s.objectStorage = objectstorage.NewS3()
if !cfg.CloudMigration.IsDeveloperMode {
// get GMS path from the config
domain, err := s.parseCloudMigrationConfig()
c, err := gmsclient.NewGMSClient(cfg)
if err != nil {
return nil, fmt.Errorf("config parse error: %w", err)
return nil, fmt.Errorf("initializing GMS client: %w", err)
}
s.gmsClient = gmsclient.NewGMSClient(domain)
s.gmsClient = c
s.gcomService = gcom.New(gcom.Config{ApiURL: cfg.GrafanaComAPIURL, Token: cfg.CloudMigration.GcomAPIToken})
} else {
s.gmsClient = gmsclient.NewInMemoryClient()
@ -474,10 +472,8 @@ func (s *Service) CreateSnapshot(ctx context.Context, signedInUser *user.SignedI
return nil, fmt.Errorf("fetching migration session for uid %s: %w", sessionUid, err)
}
// query gms to establish new snapshot
timeoutCtx, cancel := context.WithTimeout(ctx, s.cfg.CloudMigration.StartSnapshotTimeout)
defer cancel()
initResp, err := s.gmsClient.StartSnapshot(timeoutCtx, *session)
// query gms to establish new snapshot s.cfg.CloudMigration.StartSnapshotTimeout
initResp, err := s.gmsClient.StartSnapshot(ctx, *session)
if err != nil {
return nil, fmt.Errorf("initializing snapshot with GMS for session %s: %w", sessionUid, err)
}
@ -489,7 +485,7 @@ func (s *Service) CreateSnapshot(ctx context.Context, signedInUser *user.SignedI
snapshot := cloudmigration.CloudMigrationSnapshot{
UID: util.GenerateShortUID(),
SessionUID: sessionUid,
Status: cloudmigration.SnapshotStatusInitializing,
Status: cloudmigration.SnapshotStatusCreating,
EncryptionKey: initResp.EncryptionKey,
UploadURL: initResp.UploadURL,
GMSSnapshotUID: initResp.SnapshotID,
@ -532,24 +528,43 @@ func (s *Service) GetSnapshot(ctx context.Context, query cloudmigration.GetSnaps
// ask GMS for status if it's in the cloud
snapshotMeta, err := s.gmsClient.GetSnapshotStatus(ctx, *session, *snapshot)
if err != nil {
return nil, fmt.Errorf("error fetching snapshot status from GMS: sessionUid: %s, snapshotUid: %s", sessionUid, snapshotUid)
return snapshot, fmt.Errorf("error fetching snapshot status from GMS: sessionUid: %s, snapshotUid: %s", sessionUid, snapshotUid)
}
if snapshotMeta.Status == cloudmigration.SnapshotStatusFinished {
// we need to update the snapshot in our db before reporting anything finished to the client
if err := s.store.UpdateSnapshot(ctx, cloudmigration.UpdateSnapshotCmd{
UID: snapshot.UID,
Status: cloudmigration.SnapshotStatusFinished,
Resources: snapshotMeta.Resources,
}); err != nil {
return nil, fmt.Errorf("error updating snapshot status: %w", err)
}
if snapshotMeta.State == cloudmigration.SnapshotStateUnknown {
// If a status from Grafana Migration Service is unavailable, return the snapshot as-is
return snapshot, nil
}
localStatus, ok := gmsStateToLocalStatus[snapshotMeta.State]
if !ok {
s.log.Error("unexpected GMS snapshot state: %s", snapshotMeta.State)
return snapshot, nil
}
// We need to update the snapshot in our db before reporting anything
if err := s.store.UpdateSnapshot(ctx, cloudmigration.UpdateSnapshotCmd{
UID: snapshot.UID,
Status: localStatus,
Resources: snapshotMeta.Results,
}); err != nil {
return nil, fmt.Errorf("error updating snapshot status: %w", err)
}
snapshot.Status = localStatus
snapshot.Resources = append(snapshot.Resources, snapshotMeta.Results...)
}
return snapshot, nil
}
var gmsStateToLocalStatus map[cloudmigration.SnapshotState]cloudmigration.SnapshotStatus = map[cloudmigration.SnapshotState]cloudmigration.SnapshotStatus{
cloudmigration.SnapshotStateInitialized: cloudmigration.SnapshotStatusPendingProcessing, // GMS has not yet received a notification for the data
cloudmigration.SnapshotStateProcessing: cloudmigration.SnapshotStatusProcessing, // GMS has received a notification and is migrating the data
cloudmigration.SnapshotStateFinished: cloudmigration.SnapshotStatusFinished, // GMS has completed the migration - all resources were attempted to be migrated
cloudmigration.SnapshotStateCanceled: cloudmigration.SnapshotStatusCanceled, // GMS has processed a cancelation request. Snapshot cancelation is not supported yet.
cloudmigration.SnapshotStateError: cloudmigration.SnapshotStatusError, // Something unrecoverable has occurred in the migration process.
}
func (s *Service) GetSnapshotList(ctx context.Context, query cloudmigration.ListSnapshotsQuery) ([]cloudmigration.CloudMigrationSnapshot, error) {
ctx, span := s.tracer.Start(ctx, "CloudMigrationService.GetSnapshotList")
defer span.End()
@ -599,15 +614,3 @@ func (s *Service) UploadSnapshot(ctx context.Context, sessionUid string, snapsho
func (s *Service) CancelSnapshot(ctx context.Context, sessionUid string, snapshotUid string) error {
panic("not implemented")
}
func (s *Service) parseCloudMigrationConfig() (string, error) {
if s.cfg == nil {
return "", fmt.Errorf("cfg cannot be nil")
}
section := s.cfg.Raw.Section("cloud_migration")
domain := section.Key("domain").MustString("")
if domain == "" {
return "", fmt.Errorf("cloudmigration domain not set")
}
return domain, nil
}