From 8a0cd99473cb64fd6252ac15096e931038845910 Mon Sep 17 00:00:00 2001 From: Wang Yan Date: Fri, 4 Jun 2021 15:56:53 +0800 Subject: [PATCH] fixes 13976 (#15047) Fixes #13976 for the quota exceed case, gc will print the untagged blobs for dry-run Signed-off-by: Wang Yan --- .../job/impl/gc/garbage_collection.go | 30 ++++++--- .../job/impl/gc/garbage_collection_test.go | 2 +- src/pkg/blob/manager.go | 7 +++ src/pkg/blob/manager_test.go | 61 +++++++++++++++++++ src/testing/pkg/blob/manager.go | 23 +++++++ 5 files changed, 114 insertions(+), 9 deletions(-) diff --git a/src/jobservice/job/impl/gc/garbage_collection.go b/src/jobservice/job/impl/gc/garbage_collection.go index c3e64b6f7..07d38b48a 100644 --- a/src/jobservice/job/impl/gc/garbage_collection.go +++ b/src/jobservice/job/impl/gc/garbage_collection.go @@ -189,14 +189,15 @@ func (gc *GarbageCollector) mark(ctx job.Context) error { // get gc candidates, and set the repositories. // AS the reference count is calculated by joining table project_blob and blob, here needs to call removeUntaggedBlobs to remove these non-used blobs from table project_blob firstly. - if !gc.dryRun { - gc.removeUntaggedBlobs(ctx) - } + untaggedBlobs := gc.markOrSweepUntaggedBlobs(ctx) blobs, err := gc.blobMgr.UselessBlobs(ctx.SystemContext(), gc.timeWindowHours) if err != nil { gc.logger.Errorf("failed to get gc candidate: %v", err) return err } + if len(untaggedBlobs) != 0 { + blobs = append(blobs, untaggedBlobs...) + } if len(blobs) == 0 { gc.logger.Info("no need to execute GC as there is no non referenced artifacts.") return nil @@ -415,8 +416,11 @@ func (gc *GarbageCollector) deletedArt(ctx job.Context) (map[string][]model.Arti return artMap, nil } -// clean the untagged blobs in each project, these blobs are not referenced by any manifest and will be cleaned by GC -func (gc *GarbageCollector) removeUntaggedBlobs(ctx job.Context) { +// mark or sweep the untagged blobs in each project, these blobs are not referenced by any manifest and will be cleaned by GC +// * dry-run, find and return the untagged blobs +// * non dry-run, remove the reference of the untagged blobs +func (gc *GarbageCollector) markOrSweepUntaggedBlobs(ctx job.Context) []*blob_models.Blob { + var untaggedBlobs []*blob_models.Blob for result := range project.ListAll(ctx.SystemContext(), 50, nil, project.Metadata(false)) { if result.Error != nil { gc.logger.Errorf("remove untagged blobs for all projects got error: %v", result.Error) @@ -451,9 +455,18 @@ func (gc *GarbageCollector) removeUntaggedBlobs(ctx job.Context) { gc.logger.Errorf("failed to get blobs of project, %v", err) break } - if err := gc.blobMgr.CleanupAssociationsForProject(ctx.SystemContext(), p.ProjectID, blobs); err != nil { - gc.logger.Errorf("failed to clean untagged blobs of project, %v", err) - break + if gc.dryRun { + unassociated, err := gc.blobMgr.FindBlobsShouldUnassociatedWithProject(ctx.SystemContext(), p.ProjectID, blobs) + if err != nil { + gc.logger.Errorf("failed to find untagged blobs of project, %v", err) + break + } + untaggedBlobs = append(untaggedBlobs, unassociated...) + } else { + if err := gc.blobMgr.CleanupAssociationsForProject(ctx.SystemContext(), p.ProjectID, blobs); err != nil { + gc.logger.Errorf("failed to clean untagged blobs of project, %v", err) + break + } } if len(blobs) < ps { break @@ -461,6 +474,7 @@ func (gc *GarbageCollector) removeUntaggedBlobs(ctx job.Context) { lastBlobID = blobs[len(blobs)-1].ID } } + return untaggedBlobs } // markDeleteFailed set the blob status to StatusDeleteFailed diff --git a/src/jobservice/job/impl/gc/garbage_collection_test.go b/src/jobservice/job/impl/gc/garbage_collection_test.go index 77e7d53f0..340af8d19 100644 --- a/src/jobservice/job/impl/gc/garbage_collection_test.go +++ b/src/jobservice/job/impl/gc/garbage_collection_test.go @@ -139,7 +139,7 @@ func (suite *gcTestSuite) TestRemoveUntaggedBlobs() { } suite.NotPanics(func() { - gc.removeUntaggedBlobs(ctx) + gc.markOrSweepUntaggedBlobs(ctx) }) } diff --git a/src/pkg/blob/manager.go b/src/pkg/blob/manager.go index cda6cc6d4..795a9b8fc 100644 --- a/src/pkg/blob/manager.go +++ b/src/pkg/blob/manager.go @@ -53,6 +53,9 @@ type Manager interface { // CleanupAssociationsForProject remove unneeded associations between blobs and project CleanupAssociationsForProject(ctx context.Context, projectID int64, blobs []*Blob) error + // FindBlobsShouldUnassociatedWithProject filter the blobs which should not be associated with the project + FindBlobsShouldUnassociatedWithProject(ctx context.Context, projectID int64, blobs []*models.Blob) ([]*models.Blob, error) + // Get get blob by digest Get(ctx context.Context, digest string) (*Blob, error) @@ -114,6 +117,10 @@ func (m *manager) CleanupAssociationsForProject(ctx context.Context, projectID i return m.dao.DeleteProjectBlob(ctx, projectID, blobIDs...) } +func (m *manager) FindBlobsShouldUnassociatedWithProject(ctx context.Context, projectID int64, blobs []*models.Blob) ([]*models.Blob, error) { + return m.dao.FindBlobsShouldUnassociatedWithProject(ctx, projectID, blobs) +} + func (m *manager) Get(ctx context.Context, digest string) (*Blob, error) { return m.dao.GetBlobByDigest(ctx, digest) } diff --git a/src/pkg/blob/manager_test.go b/src/pkg/blob/manager_test.go index 7a98bc40e..ea123f15e 100644 --- a/src/pkg/blob/manager_test.go +++ b/src/pkg/blob/manager_test.go @@ -192,6 +192,67 @@ func (suite *ManagerTestSuite) TestCleanupAssociationsForProject() { }) } +func (suite *ManagerTestSuite) TestFindBlobsShouldUnassociatedWithProject() { + ctx := suite.Context() + + suite.WithProject(func(projectID int64, projectName string) { + artifact1 := suite.DigestString() + artifact2 := suite.DigestString() + + sql := `INSERT INTO artifact ("type", media_type, manifest_media_type, digest, project_id, repository_id, repository_name) VALUES ('image', 'media_type', 'manifest_media_type', ?, ?, ?, 'library/hello-world')` + suite.ExecSQL(sql, artifact1, projectID, 11) + suite.ExecSQL(sql, artifact2, projectID, 11) + + defer suite.ExecSQL(`DELETE FROM artifact WHERE project_id = ?`, projectID) + + digest1 := suite.DigestString() + digest2 := suite.DigestString() + digest3 := suite.DigestString() + digest4 := suite.DigestString() + digest5 := suite.DigestString() + + var ol q.OrList + blobDigests := []string{digest1, digest2, digest3, digest4, digest5} + for _, digest := range blobDigests { + blobID, err := Mgr.Create(ctx, digest, "", 100) + if suite.Nil(err) { + Mgr.AssociateWithProject(ctx, blobID, projectID) + } + ol.Values = append(ol.Values, digest) + } + + blobs, err := Mgr.List(ctx, q.New(q.KeyWords{"digest": &ol})) + suite.Nil(err) + suite.Len(blobs, 5) + + for _, digest := range []string{digest1, digest2, digest3} { + Mgr.AssociateWithArtifact(ctx, digest, artifact1) + } + + for _, digest := range blobDigests { + Mgr.AssociateWithArtifact(ctx, digest, artifact2) + } + + { + results, err := Mgr.FindBlobsShouldUnassociatedWithProject(ctx, projectID, blobs) + suite.Nil(err) + suite.Len(results, 0) + } + + suite.ExecSQL(`DELETE FROM artifact WHERE digest = ?`, artifact2) + + { + results, err := Mgr.FindBlobsShouldUnassociatedWithProject(ctx, projectID, blobs) + suite.Nil(err) + if suite.Len(results, 2) { + suite.Contains([]string{results[0].Digest, results[1].Digest}, digest4) + suite.Contains([]string{results[0].Digest, results[1].Digest}, digest5) + } + + } + }) +} + func (suite *ManagerTestSuite) TestGet() { ctx := suite.Context() diff --git a/src/testing/pkg/blob/manager.go b/src/testing/pkg/blob/manager.go index 3b7f1b143..988e3bc98 100644 --- a/src/testing/pkg/blob/manager.go +++ b/src/testing/pkg/blob/manager.go @@ -163,6 +163,29 @@ func (_m *Manager) Delete(ctx context.Context, id int64) error { return r0 } +// FindBlobsShouldUnassociatedWithProject provides a mock function with given fields: ctx, projectID, blobs +func (_m *Manager) FindBlobsShouldUnassociatedWithProject(ctx context.Context, projectID int64, blobs []*models.Blob) ([]*models.Blob, error) { + ret := _m.Called(ctx, projectID, blobs) + + var r0 []*models.Blob + if rf, ok := ret.Get(0).(func(context.Context, int64, []*models.Blob) []*models.Blob); ok { + r0 = rf(ctx, projectID, blobs) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]*models.Blob) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context, int64, []*models.Blob) error); ok { + r1 = rf(ctx, projectID, blobs) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + // Get provides a mock function with given fields: ctx, digest func (_m *Manager) Get(ctx context.Context, digest string) (*models.Blob, error) { ret := _m.Called(ctx, digest)