harbor/src/pkg/scan/job.go

603 lines
16 KiB
Go

// Copyright Project Harbor Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scan
import (
"bytes"
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"reflect"
"sync"
"time"
"github.com/goharbor/harbor/src/common"
commonhttp "github.com/goharbor/harbor/src/common/http"
"github.com/goharbor/harbor/src/common/models"
"github.com/goharbor/harbor/src/jobservice/job"
"github.com/goharbor/harbor/src/jobservice/logger"
"github.com/goharbor/harbor/src/lib/config"
"github.com/goharbor/harbor/src/lib/errors"
"github.com/goharbor/harbor/src/pkg/robot/model"
"github.com/goharbor/harbor/src/pkg/scan/dao/scan"
"github.com/goharbor/harbor/src/pkg/scan/dao/scanner"
"github.com/goharbor/harbor/src/pkg/scan/report"
v1 "github.com/goharbor/harbor/src/pkg/scan/rest/v1"
)
const (
// JobParamRegistration ...
JobParamRegistration = "registration"
// JobParameterRequest ...
JobParameterRequest = "scanRequest"
// JobParameterMimes ...
JobParameterMimes = "mimeTypes"
// JobParameterAuthType ...
JobParameterAuthType = "authType"
// JobParameterRobot ...
JobParameterRobot = "robotAccount"
checkTimeout = 30 * time.Minute
firstCheckInterval = 2 * time.Second
authorizationBearer = "Bearer"
authorizationBasic = "Basic"
service = "harbor-registry"
)
// CheckInReport defines model for checking in the scan report with specified mime.
type CheckInReport struct {
Digest string `json:"digest"`
RegistrationUUID string `json:"registration_uuid"`
MimeType string `json:"mime_type"`
RawReport string `json:"raw_report"`
}
// FromJSON parse json to CheckInReport
func (cir *CheckInReport) FromJSON(jsonData string) error {
if len(jsonData) == 0 {
return errors.New("empty JSON data")
}
return json.Unmarshal([]byte(jsonData), cir)
}
// ToJSON marshal CheckInReport to JSON
func (cir *CheckInReport) ToJSON() (string, error) {
jsonData, err := json.Marshal(cir)
if err != nil {
return "", errors.Wrap(err, "To JSON: CheckInReport")
}
return string(jsonData), nil
}
// Job for running scan in the job service with async way
type Job struct{}
// MaxFails for defining the number of retries
func (j *Job) MaxFails() uint {
return 1
}
// MaxCurrency is implementation of same method in Interface.
func (j *Job) MaxCurrency() uint {
return 0
}
// ShouldRetry indicates if the job should be retried
func (j *Job) ShouldRetry() bool {
return false
}
// Validate the parameters of this job
func (j *Job) Validate(params job.Parameters) error {
if params == nil {
// Params are required
return errors.New("missing parameter of scan job")
}
if _, err := extractRegistration(params); err != nil {
return errors.Wrap(err, "job validate")
}
if _, err := ExtractScanReq(params); err != nil {
return errors.Wrap(err, "job validate")
}
if _, err := extractMimeTypes(params); err != nil {
return errors.Wrap(err, "job validate")
}
if _, err := extractRobotAccount(params); err != nil {
return errors.Wrap(err, "job validate")
}
authType, err := extractAuthType(params)
if err != nil {
return errors.Wrap(err, "job validate")
}
if authType != authorizationBearer && authType != authorizationBasic {
return errors.Wrapf(err, "job validate: not support auth type %s", authType)
}
return nil
}
// Run the job
func (j *Job) Run(ctx job.Context, params job.Parameters) error {
// Get logger
myLogger := ctx.GetLogger()
startTime := time.Now()
// shouldStop checks if the job should be stopped
shouldStop := func() bool {
if cmd, ok := ctx.OPCommand(); ok && cmd == job.StopCommand {
myLogger.Info("scan job being stopped")
return true
}
return false
}
// Ignore errors as they have been validated already
r, _ := extractRegistration(params)
req, _ := ExtractScanReq(params)
mimeTypes, _ := extractMimeTypes(params)
scanType := v1.ScanTypeVulnerability
if len(req.RequestType) > 0 {
scanType = req.RequestType[0].Type
}
handler := GetScanHandler(scanType)
// Print related infos to log
printJSONParameter(JobParamRegistration, removeRegistrationAuthInfo(r), myLogger)
printJSONParameter(JobParameterRequest, removeScanAuthInfo(req), myLogger)
myLogger.Infof("Report mime types: %v\n", mimeTypes)
if shouldStop() {
return nil
}
// Submit scan request to the scanner adapter
client, err := r.Client(v1.DefaultClientPool)
if err != nil {
return logAndWrapError(myLogger, err, "scan job: get client")
}
// Ignore the namespace ID here
req.Artifact.NamespaceID = 0
robotAccount, _ := extractRobotAccount(params)
var authorization string
var tokenURL string
authType, _ := extractAuthType(params)
if authType == authorizationBearer {
tokenURL, err = getInternalTokenServiceEndpoint(ctx)
if err != nil {
return errors.Wrap(err, "scan job: get token service endpoint")
}
authorization, err = makeBearerAuthorization(robotAccount, tokenURL, req.Artifact.Repository)
} else {
authorization, err = makeBasicAuthorization(robotAccount)
}
if err != nil {
_ = logAndWrapError(myLogger, err, "scan job: make authorization")
}
if shouldStop() {
return nil
}
req.Registry.Authorization = authorization
resp, err := client.SubmitScan(req)
if err != nil {
return logAndWrapError(myLogger, err, "scan job: submit scan request")
}
// For collecting errors
errs := make([]error, len(mimeTypes))
rawReports := make([]string, len(mimeTypes))
// Concurrently retrieving report by different mime types
wg := &sync.WaitGroup{}
wg.Add(len(mimeTypes))
for i, mimeType := range mimeTypes {
go func(i int, m string) {
defer wg.Done()
// Log info
myLogger.Infof("Get report for mime type: %s", m)
// Loop check if the report is ready
tm := time.NewTimer(firstCheckInterval)
defer tm.Stop()
for {
select {
case t := <-tm.C:
if shouldStop() {
return
}
myLogger.Debugf("check scan report for mime %s at %s", m, t.Format("2006/01/02 15:04:05"))
rawReport, err := fetchScanReportFromScanner(client, resp.ID, m)
if err != nil {
// Not ready yet
if notReadyErr, ok := err.(*v1.ReportNotReadyError); ok {
// Reset to the new check interval
tm.Reset(time.Duration(notReadyErr.RetryAfter) * time.Second)
myLogger.Infof("Report with mime type %s is not ready yet, retry after %d seconds", m, notReadyErr.RetryAfter)
continue
}
errs[i] = errors.Wrap(err, fmt.Sprintf("scan job: fetch scan report, mimetype %v", m))
return
}
rawReports[i] = rawReport
return
case <-ctx.SystemContext().Done():
// Terminated by system
return
case <-time.After(checkTimeout):
errs[i] = errors.New("check scan report timeout")
return
}
}
}(i, mimeType)
}
// Wait for all the retrieving routines are completed
wg.Wait()
if shouldStop() {
return nil
}
// Merge errors
for _, e := range errs {
if e != nil {
if err != nil {
err = errors.Wrap(e, err.Error())
} else {
err = e
}
}
}
// Log error to the job log
if err != nil {
myLogger.Error(err)
return err
}
for i, mimeType := range mimeTypes {
rp, err := getReportPlaceholder(ctx.SystemContext(), req.Artifact.Digest, r.UUID, mimeType, myLogger)
if err != nil {
return err
}
myLogger.Debugf("Converting report ID %s to the new V2 schema", rp.UUID)
reportData, err := handler.PostScan(ctx, req, rp, rawReports[i], startTime, robotAccount)
if err != nil {
myLogger.Errorf("Failed to convert vulnerability data to new schema for report %s, error %v", rp.UUID, err)
return err
}
// update the original report with the new summarized report with all vulnerability data removed.
// this is required since the top level layers relay on the vuln.Report struct that
// contains additional metadata within the report which if stored in the new columns within the scan_report table
// would be redundant
if err := report.Mgr.UpdateReportData(ctx.SystemContext(), rp.UUID, reportData); err != nil {
myLogger.Errorf("Failed to update report data for report %s, error %v", rp.UUID, err)
return err
}
myLogger.Debugf("Converted report ID %s to the new V2 schema", rp.UUID)
}
return nil
}
func getReportPlaceholder(ctx context.Context, digest string, reportUUID string, mimeType string, logger logger.Interface) (*scan.Report, error) {
reports, err := report.Mgr.GetBy(ctx, digest, reportUUID, []string{mimeType})
if err != nil {
logger.Error("Failed to get report for artifact %s of mimetype %s, error %v", digest, mimeType, err)
return nil, err
}
if len(reports) == 0 {
logger.Errorf("No report found for artifact %s of mimetype %s, error %v", digest, mimeType, err)
return nil, errors.NotFoundError(nil).WithMessage("no report found to update data")
}
return reports[0], nil
}
func fetchScanReportFromScanner(client v1.Client, requestID string, m string) (rawReport string, err error) {
rawReport, err = client.GetScanReport(requestID, m)
if err != nil {
return "", err
}
// Make sure the data is aligned with the v1 spec.
if _, err = report.ResolveData(m, []byte(rawReport)); err != nil {
return "", err
}
return rawReport, nil
}
// ExtractScanReq extracts the scan request from the job parameters.
func ExtractScanReq(params job.Parameters) (*v1.ScanRequest, error) {
v, ok := params[JobParameterRequest]
if !ok {
return nil, errors.Errorf("missing job parameter '%s'", JobParameterRequest)
}
jsonData, ok := v.(string)
if !ok {
return nil, errors.Errorf(
"malformed job parameter '%s', expecting string but got %s",
JobParameterRequest,
reflect.TypeOf(v).String(),
)
}
req := &v1.ScanRequest{}
if err := req.FromJSON(jsonData); err != nil {
return nil, err
}
if err := req.Validate(); err != nil {
return nil, err
}
return req, nil
}
func logAndWrapError(logger logger.Interface, err error, message string) error {
e := errors.Wrap(err, message)
logger.Error(e)
return e
}
func printJSONParameter(parameter string, v string, logger logger.Interface) {
logger.Debugf("%s:\n", parameter)
printPrettyJSON([]byte(v), logger)
}
func printPrettyJSON(in []byte, logger logger.Interface) {
var out bytes.Buffer
if err := json.Indent(&out, in, "", " "); err != nil {
logger.Errorf("Print pretty JSON error: %s", err)
return
}
logger.Infof("%s\n", out.String())
}
func removeScanAuthInfo(sr *v1.ScanRequest) string {
req := &v1.ScanRequest{
Artifact: sr.Artifact,
Registry: &v1.Registry{
URL: sr.Registry.URL,
Authorization: "[HIDDEN]",
},
}
str, err := req.ToJSON()
if err != nil {
logger.Error(errors.Wrap(err, "scan job: remove auth for scan request"))
}
return str
}
func removeRegistrationAuthInfo(sr *scanner.Registration) string {
req := &scanner.Registration{
ID: sr.ID,
UUID: sr.UUID,
Name: sr.Name,
Description: sr.Description,
URL: sr.URL,
Disabled: sr.Disabled,
IsDefault: sr.IsDefault,
Health: sr.Health,
Auth: sr.Auth,
AccessCredential: "[HIDDEN]",
SkipCertVerify: sr.SkipCertVerify,
UseInternalAddr: sr.UseInternalAddr,
Immutable: sr.Immutable,
Adapter: sr.Adapter,
Vendor: sr.Vendor,
Version: sr.Version,
Metadata: sr.Metadata,
CreateTime: sr.CreateTime,
UpdateTime: sr.UpdateTime,
}
str, err := req.ToJSON()
if err != nil {
logger.Error(errors.Wrap(err, "scan job: remove auth for registration"))
}
return str
}
func extractRegistration(params job.Parameters) (*scanner.Registration, error) {
v, ok := params[JobParamRegistration]
if !ok {
return nil, errors.Errorf("missing job parameter '%s'", JobParamRegistration)
}
jsonData, ok := v.(string)
if !ok {
return nil, errors.Errorf(
"malformed job parameter '%s', expecting string but got %s",
JobParamRegistration,
reflect.TypeOf(v).String(),
)
}
r := &scanner.Registration{}
if err := r.FromJSON(jsonData); err != nil {
return nil, err
}
if err := r.Validate(true); err != nil {
return nil, err
}
return r, nil
}
func extractRobotAccount(params job.Parameters) (*model.Robot, error) {
v, ok := params[JobParameterRobot]
if !ok {
return nil, errors.Errorf("missing job parameter '%s'", JobParameterRobot)
}
jsonData, ok := v.(string)
if !ok {
return nil, errors.Errorf(
"malformed job parameter '%s', expecting string but got %s",
JobParameterRobot,
reflect.TypeOf(v).String(),
)
}
r := &model.Robot{}
if err := r.FromJSON(jsonData); err != nil {
return nil, err
}
return r, nil
}
func extractMimeTypes(params job.Parameters) ([]string, error) {
v, ok := params[JobParameterMimes]
if !ok {
return nil, errors.Errorf("missing job parameter '%s'", JobParameterMimes)
}
l, ok := v.([]interface{})
if !ok {
return nil, errors.Errorf(
"malformed job parameter '%s', expecting []interface{} but got %s",
JobParameterMimes,
reflect.TypeOf(v).String(),
)
}
mimes := make([]string, 0)
for _, v := range l {
mime, ok := v.(string)
if !ok {
return nil, errors.Errorf("expect string but got %s", reflect.TypeOf(v).String())
}
mimes = append(mimes, mime)
}
return mimes, nil
}
func extractAuthType(params job.Parameters) (string, error) {
v, ok := params[JobParameterAuthType]
if !ok {
return "", errors.Errorf("missing job parameter '%s'", JobParameterAuthType)
}
authType, ok := v.(string)
if !ok {
return "", errors.Errorf(
"malformed job parameter '%s', expecting string but got %s",
JobParameterAuthType,
reflect.TypeOf(v).String(),
)
}
return authType, nil
}
func getInternalTokenServiceEndpoint(ctx job.Context) (string, error) {
cfgMgr, ok := config.FromContext(ctx.SystemContext())
if !ok {
return "", errors.Errorf("failed to get config manager")
}
return cfgMgr.Get(ctx.SystemContext(), common.CoreURL).GetString() + "/service/token", nil
}
// makeBasicAuthorization creates authorization from a robot account based on the arguments for scanning.
func makeBasicAuthorization(robotAccount *model.Robot) (string, error) {
basic := fmt.Sprintf("%s:%s", robotAccount.Name, robotAccount.Secret)
encoded := base64.StdEncoding.EncodeToString([]byte(basic))
return fmt.Sprintf("Basic %s", encoded), nil
}
// makeBearerAuthorization creates bearer token from a robot account
func makeBearerAuthorization(robotAccount *model.Robot, tokenURL string, repository string) (string, error) {
u, err := url.Parse(tokenURL)
if err != nil {
return "", err
}
query := u.Query()
query.Add("service", service)
query.Add("scope", fmt.Sprintf("repository:%s:pull", repository))
u.RawQuery = query.Encode()
req, err := http.NewRequest(http.MethodGet, u.String(), nil)
if err != nil {
return "", err
}
auth, _ := makeBasicAuthorization(robotAccount)
req.Header.Set("Authorization", auth)
client := &http.Client{
Transport: commonhttp.GetHTTPTransport(commonhttp.WithInsecure(true)),
}
resp, err := client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
data, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("get bearer token failed, %s", string(data))
}
token := &models.Token{}
if err = json.Unmarshal(data, token); err != nil {
return "", err
}
return fmt.Sprintf("Bearer %s", token.GetToken()), nil
}