Refactor scan job service make it easy to add new scan type (#20177)

Signed-off-by: stonezdj <daojunz@vmware.com>
Signed-off-by: stonezdj(Daojun Zhang) <stonezdj@gmail.com>
Co-authored-by: stonezdj <daojunz@vmware.com>
This commit is contained in:
stonezdj(Daojun Zhang) 2024-04-09 16:05:30 +08:00 committed by GitHub
parent ff1a5056d7
commit be648ea47f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 303 additions and 58 deletions

View File

@ -25,7 +25,6 @@ import (
"github.com/google/uuid"
"github.com/goharbor/harbor/src/common/rbac"
ar "github.com/goharbor/harbor/src/controller/artifact"
"github.com/goharbor/harbor/src/controller/event/operator"
"github.com/goharbor/harbor/src/controller/robot"
@ -91,6 +90,7 @@ type launchScanJobParam struct {
Artifact *ar.Artifact
Tag string
Reports []*scan.Report
Type string
}
// basicController is default implementation of api.Controller interface
@ -287,6 +287,7 @@ func (bc *basicController) Scan(ctx context.Context, artifact *ar.Artifact, opti
Artifact: art,
Tag: tag,
Reports: reports,
Type: opts.GetScanType(),
})
}
}
@ -912,7 +913,7 @@ func (bc *basicController) GetVulnerable(ctx context.Context, artifact *ar.Artif
}
// makeRobotAccount creates a robot account based on the arguments for scanning.
func (bc *basicController) makeRobotAccount(ctx context.Context, projectID int64, repository string, registration *scanner.Registration) (*robot.Robot, error) {
func (bc *basicController) makeRobotAccount(ctx context.Context, projectID int64, repository string, registration *scanner.Registration, permission []*types.Policy) (*robot.Robot, error) {
// Use uuid as name to avoid duplicated entries.
UUID, err := bc.uuid()
if err != nil {
@ -934,16 +935,7 @@ func (bc *basicController) makeRobotAccount(ctx context.Context, projectID int64
{
Kind: "project",
Namespace: projectName,
Access: []*types.Policy{
{
Resource: rbac.ResourceRepository,
Action: rbac.ActionPull,
},
{
Resource: rbac.ResourceRepository,
Action: rbac.ActionScannerPull,
},
},
Access: permission,
},
},
}
@ -980,7 +972,12 @@ func (bc *basicController) launchScanJob(ctx context.Context, param *launchScanJ
return errors.Wrap(err, "scan controller: launch scan job")
}
robot, err := bc.makeRobotAccount(ctx, param.Artifact.ProjectID, param.Artifact.RepositoryName, param.Registration)
// Get Scanner handler by scan type to separate the scan logic for different scan types
handler := sca.GetScanHandler(param.Type)
if handler == nil {
return fmt.Errorf("failed to get scan handler, type is %v", param.Type)
}
robot, err := bc.makeRobotAccount(ctx, param.Artifact.ProjectID, param.Artifact.RepositoryName, param.Registration, handler.RequiredPermissions())
if err != nil {
return errors.Wrap(err, "scan controller: launch scan job")
}

View File

@ -45,6 +45,7 @@ import (
"github.com/goharbor/harbor/src/pkg/scan/dao/scanner"
v1 "github.com/goharbor/harbor/src/pkg/scan/rest/v1"
"github.com/goharbor/harbor/src/pkg/scan/vuln"
_ "github.com/goharbor/harbor/src/pkg/scan/vulnerability"
"github.com/goharbor/harbor/src/pkg/task"
artifacttesting "github.com/goharbor/harbor/src/testing/controller/artifact"
robottesting "github.com/goharbor/harbor/src/testing/controller/robot"

View File

@ -14,6 +14,8 @@
package scan
import v1 "github.com/goharbor/harbor/src/pkg/scan/rest/v1"
// Options keep the settings/configurations for scanning.
type Options struct {
ExecutionID int64 // The execution id to scan artifact
@ -24,7 +26,7 @@ type Options struct {
// GetScanType returns the scan type. for backward compatibility, the default type is vulnerability.
func (o *Options) GetScanType() string {
if len(o.ScanType) == 0 {
o.ScanType = "vulnerability"
o.ScanType = v1.ScanTypeVulnerability
}
return o.ScanType
}

View File

@ -70,6 +70,7 @@ import (
"github.com/goharbor/harbor/src/pkg/oidc"
"github.com/goharbor/harbor/src/pkg/scan"
"github.com/goharbor/harbor/src/pkg/scan/dao/scanner"
_ "github.com/goharbor/harbor/src/pkg/scan/vulnerability"
pkguser "github.com/goharbor/harbor/src/pkg/user"
"github.com/goharbor/harbor/src/pkg/version"
"github.com/goharbor/harbor/src/server"

View File

@ -36,6 +36,7 @@ import (
_ "github.com/goharbor/harbor/src/pkg/accessory/model/subject"
_ "github.com/goharbor/harbor/src/pkg/config/inmemory"
_ "github.com/goharbor/harbor/src/pkg/config/rest"
_ "github.com/goharbor/harbor/src/pkg/scan/vulnerability"
)
func main() {

44
src/pkg/scan/handler.go Normal file
View File

@ -0,0 +1,44 @@
// Copyright Project Harbor Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scan
import (
"time"
"github.com/goharbor/harbor/src/jobservice/job"
"github.com/goharbor/harbor/src/pkg/permission/types"
"github.com/goharbor/harbor/src/pkg/robot/model"
"github.com/goharbor/harbor/src/pkg/scan/dao/scan"
v1 "github.com/goharbor/harbor/src/pkg/scan/rest/v1"
)
var handlerRegistry = map[string]Handler{}
// RegisterScanHanlder register scanner handler
func RegisterScanHanlder(requestType string, handler Handler) {
handlerRegistry[requestType] = handler
}
// GetScanHandler get the handler
func GetScanHandler(requestType string) Handler {
return handlerRegistry[requestType]
}
// Handler handler for scan job, it could be implement by different scan type, such as vulnerability, sbom
type Handler interface {
RequiredPermissions() []*types.Policy
// PostScan defines the operation after scan
PostScan(ctx job.Context, sr *v1.ScanRequest, rp *scan.Report, rawReport string, startTime time.Time, robot *model.Robot) (string, error)
}

View File

@ -16,6 +16,7 @@ package scan
import (
"bytes"
"context"
"encoding/base64"
"encoding/json"
"fmt"
@ -34,8 +35,8 @@ import (
"github.com/goharbor/harbor/src/lib/config"
"github.com/goharbor/harbor/src/lib/errors"
"github.com/goharbor/harbor/src/pkg/robot/model"
"github.com/goharbor/harbor/src/pkg/scan/dao/scan"
"github.com/goharbor/harbor/src/pkg/scan/dao/scanner"
"github.com/goharbor/harbor/src/pkg/scan/postprocessors"
"github.com/goharbor/harbor/src/pkg/scan/report"
v1 "github.com/goharbor/harbor/src/pkg/scan/rest/v1"
)
@ -145,6 +146,7 @@ func (j *Job) Validate(params job.Parameters) error {
func (j *Job) Run(ctx job.Context, params job.Parameters) error {
// Get logger
myLogger := ctx.GetLogger()
startTime := time.Now()
// shouldStop checks if the job should be stopped
shouldStop := func() bool {
@ -160,6 +162,11 @@ func (j *Job) Run(ctx job.Context, params job.Parameters) error {
r, _ := extractRegistration(params)
req, _ := ExtractScanReq(params)
mimeTypes, _ := extractMimeTypes(params)
scanType := v1.ScanTypeVulnerability
if len(req.RequestType) > 0 {
scanType = req.RequestType[0].Type
}
handler := GetScanHandler(scanType)
// Print related infos to log
printJSONParameter(JobParamRegistration, removeRegistrationAuthInfo(r), myLogger)
@ -235,30 +242,19 @@ func (j *Job) Run(ctx job.Context, params job.Parameters) error {
}
myLogger.Debugf("check scan report for mime %s at %s", m, t.Format("2006/01/02 15:04:05"))
rawReport, err := client.GetScanReport(resp.ID, m)
rawReport, err := fetchScanReportFromScanner(client, resp.ID, m)
if err != nil {
// Not ready yet
if notReadyErr, ok := err.(*v1.ReportNotReadyError); ok {
// Reset to the new check interval
tm.Reset(time.Duration(notReadyErr.RetryAfter) * time.Second)
myLogger.Infof("Report with mime type %s is not ready yet, retry after %d seconds", m, notReadyErr.RetryAfter)
continue
}
errs[i] = errors.Wrap(err, fmt.Sprintf("check scan report with mime type %s", m))
errs[i] = errors.Wrap(err, fmt.Sprintf("scan job: fetch scan report, mimetype %v", m))
return
}
// Make sure the data is aligned with the v1 spec.
if _, err = report.ResolveData(m, []byte(rawReport)); err != nil {
errs[i] = errors.Wrap(err, "scan job: resolve report data")
return
}
rawReports[i] = rawReport
return
case <-ctx.SystemContext().Done():
// Terminated by system
@ -292,33 +288,19 @@ func (j *Job) Run(ctx job.Context, params job.Parameters) error {
// Log error to the job log
if err != nil {
myLogger.Error(err)
return err
}
for i, mimeType := range mimeTypes {
reports, err := report.Mgr.GetBy(ctx.SystemContext(), req.Artifact.Digest, r.UUID, []string{mimeType})
rp, err := getReportPlaceholder(ctx.SystemContext(), req.Artifact.Digest, r.UUID, mimeType, myLogger)
if err != nil {
myLogger.Error("Failed to get report for artifact %s of mimetype %s, error %v", req.Artifact.Digest, mimeType, err)
return err
}
myLogger.Debugf("Converting report ID %s to the new V2 schema", rp.UUID)
if len(reports) == 0 {
myLogger.Error("No report found for artifact %s of mimetype %s, error %v", req.Artifact.Digest, mimeType, err)
return errors.NotFoundError(nil).WithMessage("no report found to update data")
}
rp := reports[0]
logger.Debugf("Converting report ID %s to the new V2 schema", rp.UUID)
// use a new ormer here to use the short db connection
_, reportData, err := postprocessors.Converter.ToRelationalSchema(ctx.SystemContext(), rp.UUID, rp.RegistrationUUID, rp.Digest, rawReports[i])
reportData, err := handler.PostScan(ctx, req, rp, rawReports[i], startTime, robotAccount)
if err != nil {
myLogger.Errorf("Failed to convert vulnerability data to new schema for report %s, error %v", rp.UUID, err)
return err
}
@ -328,7 +310,6 @@ func (j *Job) Run(ctx job.Context, params job.Parameters) error {
// would be redundant
if err := report.Mgr.UpdateReportData(ctx.SystemContext(), rp.UUID, reportData); err != nil {
myLogger.Errorf("Failed to update report data for report %s, error %v", rp.UUID, err)
return err
}
@ -338,6 +319,31 @@ func (j *Job) Run(ctx job.Context, params job.Parameters) error {
return nil
}
func getReportPlaceholder(ctx context.Context, digest string, reportUUID string, mimeType string, logger logger.Interface) (*scan.Report, error) {
reports, err := report.Mgr.GetBy(ctx, digest, reportUUID, []string{mimeType})
if err != nil {
logger.Error("Failed to get report for artifact %s of mimetype %s, error %v", digest, mimeType, err)
return nil, err
}
if len(reports) == 0 {
logger.Errorf("No report found for artifact %s of mimetype %s, error %v", digest, mimeType, err)
return nil, errors.NotFoundError(nil).WithMessage("no report found to update data")
}
return reports[0], nil
}
func fetchScanReportFromScanner(client v1.Client, requestID string, m string) (rawReport string, err error) {
rawReport, err = client.GetScanReport(requestID, m)
if err != nil {
return "", err
}
// Make sure the data is aligned with the v1 spec.
if _, err = report.ResolveData(m, []byte(rawReport)); err != nil {
return "", err
}
return rawReport, nil
}
// ExtractScanReq extracts the scan request from the job parameters.
func ExtractScanReq(params job.Parameters) (*v1.ScanRequest, error) {
v, ok := params[JobParameterRequest]

View File

@ -19,15 +19,19 @@ import (
"testing"
"time"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"github.com/goharbor/harbor/src/controller/robot"
"github.com/goharbor/harbor/src/jobservice/job"
"github.com/goharbor/harbor/src/pkg/robot/model"
"github.com/goharbor/harbor/src/pkg/scan/dao/scan"
"github.com/goharbor/harbor/src/pkg/scan/dao/scanner"
"github.com/goharbor/harbor/src/pkg/scan/report"
v1 "github.com/goharbor/harbor/src/pkg/scan/rest/v1"
"github.com/goharbor/harbor/src/pkg/scan/vuln"
htesting "github.com/goharbor/harbor/src/testing"
mockjobservice "github.com/goharbor/harbor/src/testing/jobservice"
mocktesting "github.com/goharbor/harbor/src/testing/mock"
v1testing "github.com/goharbor/harbor/src/testing/pkg/scan/rest/v1"
@ -35,10 +39,11 @@ import (
// JobTestSuite is a test suite to test the scan job.
type JobTestSuite struct {
suite.Suite
htesting.Suite
defaultClientPool v1.ClientPool
mcp *v1testing.ClientPool
reportIDs []string
}
// TestJob is the entry of JobTestSuite.
@ -48,6 +53,7 @@ func TestJob(t *testing.T) {
// SetupSuite sets up test env for JobTestSuite.
func (suite *JobTestSuite) SetupSuite() {
suite.Suite.SetupSuite()
mcp := &v1testing.ClientPool{}
suite.defaultClientPool = v1.DefaultClientPool
v1.DefaultClientPool = mcp
@ -55,9 +61,12 @@ func (suite *JobTestSuite) SetupSuite() {
suite.mcp = mcp
}
// TeraDownSuite clears test env for TeraDownSuite.
func (suite *JobTestSuite) TeraDownSuite() {
// TearDownSuite clears test env for TearDownSuite.
func (suite *JobTestSuite) TearDownSuite() {
v1.DefaultClientPool = suite.defaultClientPool
for _, id := range suite.reportIDs {
_ = report.Mgr.Delete(suite.Context(), id)
}
}
// TestJob tests the scan job
@ -151,3 +160,59 @@ func (suite *JobTestSuite) TestJob() {
err = j.Run(ctx, jp)
require.NoError(suite.T(), err)
}
func (suite *JobTestSuite) TestgetReportPlaceholder() {
dgst := "sha256:mydigest"
uuid := `7f20b1b9-6117-4a2e-820b-e4cc0401f15e`
scannerUUID := `7f20b1b9-6117-4a2e-820b-e4cc0401f15f`
rpt := &scan.Report{
UUID: uuid,
RegistrationUUID: scannerUUID,
Digest: dgst,
MimeType: v1.MimeTypeDockerArtifact,
}
ctx := suite.Context()
rptID, err := report.Mgr.Create(ctx, rpt)
suite.reportIDs = append(suite.reportIDs, rptID)
require.NoError(suite.T(), err)
jobLogger := &mockjobservice.MockJobLogger{}
report, err := getReportPlaceholder(ctx, dgst, scannerUUID, v1.MimeTypeDockerArtifact, jobLogger)
require.NoError(suite.T(), err)
require.NotNil(suite.T(), report)
}
func (suite *JobTestSuite) TestfetchScanReportFromScanner() {
vulnRpt := &vuln.Report{
GeneratedAt: time.Now().UTC().String(),
Scanner: &v1.Scanner{
Name: "Trivy",
Vendor: "Harbor",
Version: "0.1.0",
},
Severity: vuln.High,
}
rptContent, err := json.Marshal(vulnRpt)
require.NoError(suite.T(), err)
rawContent := string(rptContent)
ctx := suite.Context()
dgst := "sha256:mydigest"
uuid := `7f20b1b9-6117-4a2e-820b-e4cc0401f15a`
scannerUUID := `7f20b1b9-6117-4a2e-820b-e4cc0401f15b`
rpt := &scan.Report{
UUID: uuid,
RegistrationUUID: scannerUUID,
Digest: dgst,
MimeType: v1.MimeTypeDockerArtifact,
Report: rawContent,
}
ctx = suite.Context()
rptID, err := report.Mgr.Create(ctx, rpt)
suite.reportIDs = append(suite.reportIDs, rptID)
require.NoError(suite.T(), err)
client := &v1testing.Client{}
client.On("GetScanReport", mock.Anything, v1.MimeTypeGenericVulnerabilityReport).Return(rawContent, nil)
rawRept, err := fetchScanReportFromScanner(client, "abc", v1.MimeTypeGenericVulnerabilityReport)
require.NoError(suite.T(), err)
require.Equal(suite.T(), rawContent, rawRept)
}

View File

@ -21,6 +21,13 @@ import (
"github.com/goharbor/harbor/src/lib/errors"
)
const (
// ScanTypeVulnerability the scan type for vulnerability
ScanTypeVulnerability = "vulnerability"
// ScanTypeSbom the scan type for sbom
ScanTypeSbom = "sbom"
)
// Scanner represents metadata of a Scanner Adapter which allow Harbor to lookup a scanner capable of
// scanning a given Artifact stored in its registry and making sure that it can interpret a
// returned result.
@ -173,6 +180,18 @@ type ScanRequest struct {
Registry *Registry `json:"registry"`
// Artifact to be scanned.
Artifact *Artifact `json:"artifact"`
// RequestType
RequestType []*ScanType `json:"enabled_capabilities"`
}
// ScanType represent the type of the scan request
type ScanType struct {
// Type sets the type of the scan, it could be sbom or vulnerability, default is vulnerability
Type string `json:"type"`
// ProducesMimeTypes defines scanreport should be
ProducesMimeTypes []string `json:"produces_mime_types"`
// Parameters extra parameters
Parameters map[string]interface{} `json:"parameters"`
}
// FromJSON parses ScanRequest from json data

View File

@ -30,7 +30,7 @@ import (
"github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/goharbor/harbor/src/controller/robot"
"github.com/goharbor/harbor/src/pkg/robot/model"
v1sq "github.com/goharbor/harbor/src/pkg/scan/rest/v1"
)
@ -49,7 +49,7 @@ type referrer struct {
}
// GenAccessoryArt composes the accessory oci object and push it back to harbor core as an accessory of the scanned artifact.
func GenAccessoryArt(sq v1sq.ScanRequest, accData []byte, accAnnotations map[string]string, mediaType string, robot robot.Robot) (string, error) {
func GenAccessoryArt(sq v1sq.ScanRequest, accData []byte, accAnnotations map[string]string, mediaType string, robot *model.Robot) (string, error) {
accArt, err := mutate.Append(empty.Image, mutate.Addendum{
Layer: static.NewLayer(accData, ocispec.MediaTypeImageLayer),
History: v1.History{

View File

@ -22,8 +22,7 @@ import (
"github.com/google/go-containerregistry/pkg/registry"
"github.com/stretchr/testify/assert"
"github.com/goharbor/harbor/src/controller/robot"
rm "github.com/goharbor/harbor/src/pkg/robot/model"
"github.com/goharbor/harbor/src/pkg/robot/model"
v1sq "github.com/goharbor/harbor/src/pkg/scan/rest/v1"
)
@ -47,11 +46,9 @@ func TestGenAccessoryArt(t *testing.T) {
Digest: "sha256:d37ada95d47ad12224c205a938129df7a3e52345828b4fa27b03a98825d1e2e7",
},
}
r := robot.Robot{
Robot: rm.Robot{
Name: "admin",
Secret: "Harbor12345",
},
r := &model.Robot{
Name: "admin",
Secret: "Harbor12345",
}
annotations := map[string]string{

View File

@ -33,6 +33,9 @@ type Report struct {
Vulnerabilities []*VulnerabilityItem `json:"vulnerabilities"`
vulnerabilityItemList *VulnerabilityItemList
// SBOM sbom content
SBOM map[string]interface{} `json:"sbom,omitempty"`
}
// GetVulnerabilityItemList returns VulnerabilityItemList from the Vulnerabilities of report

View File

@ -0,0 +1,57 @@
// Copyright Project Harbor Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package vulnerability
import (
"time"
"github.com/goharbor/harbor/src/common/rbac"
"github.com/goharbor/harbor/src/jobservice/job"
"github.com/goharbor/harbor/src/pkg/permission/types"
"github.com/goharbor/harbor/src/pkg/robot/model"
scanJob "github.com/goharbor/harbor/src/pkg/scan"
"github.com/goharbor/harbor/src/pkg/scan/dao/scan"
"github.com/goharbor/harbor/src/pkg/scan/postprocessors"
v1 "github.com/goharbor/harbor/src/pkg/scan/rest/v1"
)
func init() {
scanJob.RegisterScanHanlder(v1.ScanTypeVulnerability, &ScanHandler{})
}
// ScanHandler defines the handler for scan vulnerability
type ScanHandler struct {
}
// RequiredPermissions defines the permission used by the scan robot account
func (v *ScanHandler) RequiredPermissions() []*types.Policy {
return []*types.Policy{
{
Resource: rbac.ResourceRepository,
Action: rbac.ActionPull,
},
{
Resource: rbac.ResourceRepository,
Action: rbac.ActionScannerPull,
},
}
}
// PostScan ...
func (v *ScanHandler) PostScan(ctx job.Context, _ *v1.ScanRequest, origRp *scan.Report, rawReport string, _ time.Time, _ *model.Robot) (string, error) {
// use a new ormer here to use the short db connection
_, refreshedReport, err := postprocessors.Converter.ToRelationalSchema(ctx.SystemContext(), origRp.UUID, origRp.RegistrationUUID, origRp.Digest, rawReport)
return refreshedReport, err
}

View File

@ -0,0 +1,52 @@
package vulnerability
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/goharbor/harbor/src/common/rbac"
"github.com/goharbor/harbor/src/pkg/permission/types"
"github.com/goharbor/harbor/src/pkg/robot/model"
"github.com/goharbor/harbor/src/pkg/scan/dao/scan"
"github.com/goharbor/harbor/src/pkg/scan/postprocessors"
v1 "github.com/goharbor/harbor/src/pkg/scan/rest/v1"
"github.com/goharbor/harbor/src/testing/jobservice"
postprocessorstesting "github.com/goharbor/harbor/src/testing/pkg/scan/postprocessors"
)
func TestRequiredPermissions(t *testing.T) {
v := &ScanHandler{}
expected := []*types.Policy{
{
Resource: rbac.ResourceRepository,
Action: rbac.ActionPull,
},
{
Resource: rbac.ResourceRepository,
Action: rbac.ActionScannerPull,
},
}
result := v.RequiredPermissions()
assert.Equal(t, expected, result, "RequiredPermissions should return correct permissions")
}
func TestPostScan(t *testing.T) {
v := &ScanHandler{}
ctx := &jobservice.MockJobContext{}
artifact := &v1.Artifact{}
origRp := &scan.Report{}
rawReport := ""
mocker := &postprocessorstesting.ScanReportV1ToV2Converter{}
mocker.On("ToRelationalSchema", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, "original report", nil)
postprocessors.Converter = mocker
sr := &v1.ScanRequest{Artifact: artifact}
refreshedReport, err := v.PostScan(ctx, sr, origRp, rawReport, time.Now(), &model.Robot{})
assert.Equal(t, "", refreshedReport, "PostScan should return the refreshed report")
assert.Nil(t, err, "PostScan should not return an error")
}