add clean untagged blobs in gc job (#11248)

Fixes #11190, delete all of non-referenced blobs of each project in GC job, thun the quota
can be released.

Signed-off-by: wang yan <wangyan@vmware.com>
This commit is contained in:
Wang Yan 2020-04-01 18:01:46 +08:00 committed by GitHub
parent b0d0b292cd
commit 4594d58ba8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 132 additions and 8 deletions

View File

@ -16,9 +16,12 @@ package gc
import ( import (
"fmt" "fmt"
"github.com/goharbor/harbor/src/common/models"
"github.com/goharbor/harbor/src/controller/artifact" "github.com/goharbor/harbor/src/controller/artifact"
"github.com/goharbor/harbor/src/controller/project"
"github.com/goharbor/harbor/src/lib/q" "github.com/goharbor/harbor/src/lib/q"
"github.com/goharbor/harbor/src/pkg/artifactrash" "github.com/goharbor/harbor/src/pkg/artifactrash"
"github.com/goharbor/harbor/src/pkg/blob"
"os" "os"
"time" "time"
@ -63,6 +66,8 @@ const (
type GarbageCollector struct { type GarbageCollector struct {
artCtl artifact.Controller artCtl artifact.Controller
artrashMgr artifactrash.Manager artrashMgr artifactrash.Manager
blobMgr blob.Manager
projectCtl project.Controller
registryCtlClient client.Client registryCtlClient client.Client
logger logger.Interface logger logger.Interface
cfgMgr *config.CfgManager cfgMgr *config.CfgManager
@ -126,6 +131,7 @@ func (gc *GarbageCollector) Run(ctx job.Context, params job.Parameters) error {
gc.logger.Errorf("failed to get gc result: %v", err) gc.logger.Errorf("failed to get gc result: %v", err)
return err return err
} }
gc.removeUntaggedBlobs(ctx)
if err := gc.cleanCache(); err != nil { if err := gc.cleanCache(); err != nil {
return err return err
} }
@ -136,11 +142,15 @@ func (gc *GarbageCollector) Run(ctx job.Context, params job.Parameters) error {
func (gc *GarbageCollector) init(ctx job.Context, params job.Parameters) error { func (gc *GarbageCollector) init(ctx job.Context, params job.Parameters) error {
regCtlInit() regCtlInit()
gc.registryCtlClient = registryctl.RegistryCtlClient
gc.logger = ctx.GetLogger() gc.logger = ctx.GetLogger()
gc.artCtl = artifact.Ctl // UT will use the mock client, ctl and mgr
gc.artrashMgr = artifactrash.NewManager() if os.Getenv("UTTEST") != "true" {
gc.registryCtlClient = registryctl.RegistryCtlClient
gc.artCtl = artifact.Ctl
gc.artrashMgr = artifactrash.NewManager()
gc.blobMgr = blob.NewManager()
gc.projectCtl = project.Ctl
}
if err := gc.registryCtlClient.Health(); err != nil { if err := gc.registryCtlClient.Health(); err != nil {
gc.logger.Errorf("failed to start gc as registry controller is unreachable: %v", err) gc.logger.Errorf("failed to start gc as registry controller is unreachable: %v", err)
return err return err
@ -250,3 +260,54 @@ func (gc *GarbageCollector) deleteCandidates(ctx job.Context) error {
flushTrash = true flushTrash = true
return nil return nil
} }
// clean the untagged blobs in each project, these blobs are not referenced by any manifest and will be cleaned by GC
func (gc *GarbageCollector) removeUntaggedBlobs(ctx job.Context) {
// get all projects
projects := func(chunkSize int) <-chan *models.Project {
ch := make(chan *models.Project, chunkSize)
go func() {
defer close(ch)
params := &models.ProjectQueryParam{
Pagination: &models.Pagination{Page: 1, Size: int64(chunkSize)},
}
for {
results, err := gc.projectCtl.List(ctx.SystemContext(), params, project.Metadata(false))
if err != nil {
gc.logger.Errorf("list projects failed, error: %v", err)
return
}
for _, p := range results {
ch <- p
}
if len(results) < chunkSize {
break
}
params.Pagination.Page++
}
}()
return ch
}(50)
for project := range projects {
all, err := gc.blobMgr.List(ctx.SystemContext(), blob.ListParams{
ProjectID: project.ProjectID,
})
if err != nil {
gc.logger.Errorf("failed to get blobs of project, %v", err)
continue
}
if err := gc.blobMgr.CleanupAssociationsForProject(ctx.SystemContext(), project.ProjectID, all); err != nil {
gc.logger.Errorf("failed to clean untagged blobs of project, %v", err)
continue
}
}
}

View File

@ -2,15 +2,19 @@ package gc
import ( import (
"github.com/goharbor/harbor/src/common/config" "github.com/goharbor/harbor/src/common/config"
"github.com/goharbor/harbor/src/common/models"
commom_regctl "github.com/goharbor/harbor/src/common/registryctl" commom_regctl "github.com/goharbor/harbor/src/common/registryctl"
"github.com/goharbor/harbor/src/pkg/artifact" "github.com/goharbor/harbor/src/pkg/artifact"
"github.com/goharbor/harbor/src/pkg/artifactrash/model" "github.com/goharbor/harbor/src/pkg/artifactrash/model"
artifacttesting "github.com/goharbor/harbor/src/testing/controller/artifact" artifacttesting "github.com/goharbor/harbor/src/testing/controller/artifact"
projecttesting "github.com/goharbor/harbor/src/testing/controller/project"
mockjobservice "github.com/goharbor/harbor/src/testing/jobservice" mockjobservice "github.com/goharbor/harbor/src/testing/jobservice"
"github.com/goharbor/harbor/src/testing/mock" "github.com/goharbor/harbor/src/testing/mock"
trashtesting "github.com/goharbor/harbor/src/testing/pkg/artifactrash" trashtesting "github.com/goharbor/harbor/src/testing/pkg/artifactrash"
"github.com/goharbor/harbor/src/testing/pkg/blob"
"github.com/goharbor/harbor/src/testing/registryctl" "github.com/goharbor/harbor/src/testing/registryctl"
"github.com/stretchr/testify/suite" "github.com/stretchr/testify/suite"
"os"
"testing" "testing"
) )
@ -19,6 +23,8 @@ type gcTestSuite struct {
artifactCtl *artifacttesting.Controller artifactCtl *artifacttesting.Controller
artrashMgr *trashtesting.FakeManager artrashMgr *trashtesting.FakeManager
registryCtlClient *registryctl.Mockclient registryCtlClient *registryctl.Mockclient
projectCtl *projecttesting.Controller
blobMgr *blob.Manager
regCtlInit func() regCtlInit func()
setReadOnly func(cfgMgr *config.CfgManager, switcher bool) error setReadOnly func(cfgMgr *config.CfgManager, switcher bool) error
@ -29,6 +35,8 @@ func (suite *gcTestSuite) SetupTest() {
suite.artifactCtl = &artifacttesting.Controller{} suite.artifactCtl = &artifacttesting.Controller{}
suite.artrashMgr = &trashtesting.FakeManager{} suite.artrashMgr = &trashtesting.FakeManager{}
suite.registryCtlClient = &registryctl.Mockclient{} suite.registryCtlClient = &registryctl.Mockclient{}
suite.blobMgr = &blob.Manager{}
suite.projectCtl = &projecttesting.Controller{}
regCtlInit = func() { commom_regctl.RegistryCtlClient = suite.registryCtlClient } regCtlInit = func() { commom_regctl.RegistryCtlClient = suite.registryCtlClient }
setReadOnly = func(cfgMgr *config.CfgManager, switcher bool) error { return nil } setReadOnly = func(cfgMgr *config.CfgManager, switcher bool) error { return nil }
@ -72,13 +80,47 @@ func (suite *gcTestSuite) TestDeleteCandidates() {
suite.Nil(gc.deleteCandidates(ctx)) suite.Nil(gc.deleteCandidates(ctx))
} }
func (suite *gcTestSuite) TestRemoveUntaggedBlobs() {
ctx := &mockjobservice.MockJobContext{}
logger := &mockjobservice.MockJobLogger{}
ctx.On("GetLogger").Return(logger)
mock.OnAnything(suite.projectCtl, "List").Return([]*models.Project{
{
ProjectID: 1234,
Name: "test GC",
},
}, nil)
mock.OnAnything(suite.blobMgr, "List").Return([]*models.Blob{
{
ID: 1234,
Digest: "sha256:1234",
Size: 1234,
},
}, nil)
mock.OnAnything(suite.blobMgr, "CleanupAssociationsForProject").Return(nil)
gc := &GarbageCollector{
projectCtl: suite.projectCtl,
blobMgr: suite.blobMgr,
}
suite.NotPanics(func() {
gc.removeUntaggedBlobs(ctx)
})
}
func (suite *gcTestSuite) TestInit() { func (suite *gcTestSuite) TestInit() {
ctx := &mockjobservice.MockJobContext{} ctx := &mockjobservice.MockJobContext{}
logger := &mockjobservice.MockJobLogger{} logger := &mockjobservice.MockJobLogger{}
mock.OnAnything(ctx, "Get").Return("core url", true) mock.OnAnything(ctx, "Get").Return("core url", true)
ctx.On("GetLogger").Return(logger) ctx.On("GetLogger").Return(logger)
gc := &GarbageCollector{} gc := &GarbageCollector{
registryCtlClient: suite.registryCtlClient,
}
params := map[string]interface{}{ params := map[string]interface{}{
"delete_untagged": true, "delete_untagged": true,
"redis_url_reg": "redis url", "redis_url_reg": "redis url",
@ -123,10 +165,30 @@ func (suite *gcTestSuite) TestRun() {
suite.artifactCtl.On("Delete").Return(nil) suite.artifactCtl.On("Delete").Return(nil)
suite.artrashMgr.On("Filter").Return([]model.ArtifactTrash{}, nil) suite.artrashMgr.On("Filter").Return([]model.ArtifactTrash{}, nil)
mock.OnAnything(suite.projectCtl, "List").Return([]*models.Project{
{
ProjectID: 12345,
Name: "test GC",
},
}, nil)
mock.OnAnything(suite.blobMgr, "List").Return([]*models.Blob{
{
ID: 12345,
Digest: "sha256:12345",
Size: 12345,
},
}, nil)
mock.OnAnything(suite.blobMgr, "CleanupAssociationsForProject").Return(nil)
gc := &GarbageCollector{ gc := &GarbageCollector{
artCtl: suite.artifactCtl, artCtl: suite.artifactCtl,
artrashMgr: suite.artrashMgr, artrashMgr: suite.artrashMgr,
cfgMgr: config.NewInMemoryManager(), cfgMgr: config.NewInMemoryManager(),
projectCtl: suite.projectCtl,
blobMgr: suite.blobMgr,
registryCtlClient: suite.registryCtlClient,
} }
params := map[string]interface{}{ params := map[string]interface{}{
"delete_untagged": false, "delete_untagged": false,
@ -138,5 +200,6 @@ func (suite *gcTestSuite) TestRun() {
} }
func TestGCTestSuite(t *testing.T) { func TestGCTestSuite(t *testing.T) {
os.Setenv("UTTEST", "true")
suite.Run(t, &gcTestSuite{}) suite.Run(t, &gcTestSuite{})
} }