mirror of https://github.com/goharbor/harbor.git
381 lines
12 KiB
Go
381 lines
12 KiB
Go
// Copyright Project Harbor Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package scandataexport
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/gocarina/gocsv"
|
|
"github.com/opencontainers/go-digest"
|
|
|
|
"github.com/goharbor/harbor/src/jobservice/job"
|
|
"github.com/goharbor/harbor/src/lib/errors"
|
|
"github.com/goharbor/harbor/src/pkg/project"
|
|
"github.com/goharbor/harbor/src/pkg/scan/export"
|
|
"github.com/goharbor/harbor/src/pkg/systemartifact"
|
|
"github.com/goharbor/harbor/src/pkg/systemartifact/model"
|
|
"github.com/goharbor/harbor/src/pkg/task"
|
|
)
|
|
|
|
// ScanDataExport is the struct to implement the scan data export.
|
|
// implements the Job interface
|
|
type ScanDataExport struct {
|
|
execMgr task.ExecutionManager
|
|
scanDataExportDirPath string
|
|
exportMgr export.Manager
|
|
digestCalculator export.ArtifactDigestCalculator
|
|
filterProcessor export.FilterProcessor
|
|
vulnDataSelector export.VulnerabilityDataSelector
|
|
projectMgr project.Manager
|
|
sysArtifactMgr systemartifact.Manager
|
|
}
|
|
|
|
func (sde *ScanDataExport) MaxFails() uint {
|
|
return 1
|
|
}
|
|
|
|
// MaxCurrency of the job. Unlike the WorkerPool concurrency, it controls the limit on the number jobs of that type
|
|
// that can be active at one time by within a single redis instance.
|
|
// The default value is 0, which means "no limit on job concurrency".
|
|
func (sde *ScanDataExport) MaxCurrency() uint {
|
|
return 1
|
|
}
|
|
|
|
// ShouldRetry tells worker if retry the failed job when the fails is
|
|
// still less that the number declared by the method 'MaxFails'.
|
|
//
|
|
// Returns:
|
|
//
|
|
// true for retry and false for none-retry
|
|
func (sde *ScanDataExport) ShouldRetry() bool {
|
|
return true
|
|
}
|
|
|
|
// Validate Indicate whether the parameters of job are valid.
|
|
// Return:
|
|
// error if parameters are not valid. NOTES: If no parameters needed, directly return nil.
|
|
func (sde *ScanDataExport) Validate(_ job.Parameters) error {
|
|
return nil
|
|
}
|
|
|
|
// Run the business logic here.
|
|
// The related arguments will be injected by the workerpool.
|
|
//
|
|
// ctx Context : Job execution context.
|
|
// params map[string]interface{} : parameters with key-pair style for the job execution.
|
|
//
|
|
// Returns:
|
|
//
|
|
// error if failed to run. NOTES: If job is stopped or cancelled, a specified error should be returned
|
|
func (sde *ScanDataExport) Run(ctx job.Context, params job.Parameters) error {
|
|
if _, ok := params[export.JobModeKey]; !ok {
|
|
return errors.Errorf("no mode specified for scan data export execution")
|
|
}
|
|
|
|
mode := params[export.JobModeKey].(string)
|
|
logger := ctx.GetLogger()
|
|
logger.Infof("Scan data export job started in mode : %v", mode)
|
|
sde.init()
|
|
fileName := fmt.Sprintf("%s/scandata_export_%s.csv", sde.scanDataExportDirPath, params[export.JobID])
|
|
|
|
// ensure that CSV files are cleared post the completion of the Run.
|
|
defer sde.cleanupCsvFile(ctx, fileName, params)
|
|
err := sde.writeCsvFile(ctx, params, fileName)
|
|
if err != nil {
|
|
logger.Errorf("error when writing data to CSV: %v", err)
|
|
return err
|
|
}
|
|
|
|
hash, err := sde.calculateFileHash(fileName)
|
|
if err != nil {
|
|
logger.Errorf("Error when calculating checksum for generated file: %v", err)
|
|
return err
|
|
}
|
|
logger.Infof("Export Job Id = %s, FileName = %s, Hash = %v", params[export.JobID], fileName, hash)
|
|
|
|
csvFile, err := os.OpenFile(fileName, os.O_RDONLY, os.ModePerm)
|
|
if err != nil {
|
|
logger.Errorf(
|
|
"Export Job Id = %s. Error when moving report file %s to persistent storage: %v", params[export.JobID], fileName, err)
|
|
return err
|
|
}
|
|
baseFileName := filepath.Base(fileName)
|
|
repositoryName := strings.TrimSuffix(baseFileName, filepath.Ext(baseFileName))
|
|
logger.Infof("Creating repository for CSV file with blob : %s", repositoryName)
|
|
stat, err := os.Stat(fileName)
|
|
if err != nil {
|
|
logger.Errorf("Error when fetching file size: %v", err)
|
|
return err
|
|
}
|
|
logger.Infof("Export Job Id = %s. CSV file size: %d", params[export.JobID], stat.Size())
|
|
// earlier return and update status message if the file size is 0, unnecessary to push a empty system artifact.
|
|
if stat.Size() == 0 {
|
|
extra := map[string]interface{}{
|
|
export.StatusMessageAttribute: "No vulnerabilities found or matched",
|
|
}
|
|
updateErr := sde.updateExecAttributes(ctx, params, extra)
|
|
if updateErr != nil {
|
|
logger.Errorf("Export Job Id = %s. Error when updating the exec extra attributes 'status_message' to 'No vulnerabilities found or matched': %v", params[export.JobID], updateErr)
|
|
}
|
|
|
|
logger.Infof("Export Job Id = %s. Exported CSV file is empty, skip to push system artifact, exit job", params[export.JobID])
|
|
return nil
|
|
}
|
|
|
|
csvExportArtifactRecord := model.SystemArtifact{Repository: repositoryName, Digest: hash.String(), Size: stat.Size(), Type: "ScanData_CSV", Vendor: strings.ToLower(export.Vendor)}
|
|
artID, err := sde.sysArtifactMgr.Create(ctx.SystemContext(), &csvExportArtifactRecord, csvFile)
|
|
if err != nil {
|
|
logger.Errorf(
|
|
"Export Job Id = %s. Error when persisting report file %s to persistent storage: %v", params[export.JobID], fileName, err)
|
|
return err
|
|
}
|
|
|
|
logger.Infof("Export Job Id = %s. Created system artifact: %v for report file %s to persistent storage: %v", params[export.JobID], artID, fileName, err)
|
|
err = sde.updateExecAttributes(ctx, params, map[string]interface{}{export.DigestKey: hash.String()})
|
|
if err != nil {
|
|
logger.Errorf("Export Job Id = %s. Error when updating execution record : %v", params[export.JobID], err)
|
|
return err
|
|
}
|
|
logger.Info("Scan data export job completed")
|
|
|
|
return nil
|
|
}
|
|
|
|
func (sde *ScanDataExport) updateExecAttributes(ctx job.Context, params job.Parameters, attrs map[string]interface{}) error {
|
|
logger := ctx.GetLogger()
|
|
execID, err := strconv.ParseInt(params[export.JobID].(string), 10, 64)
|
|
if err != nil {
|
|
logger.Errorf("Export Job Id = %s. Error when parse execution id from params: %v", params[export.JobID], err)
|
|
return err
|
|
}
|
|
|
|
exec, err := sde.execMgr.Get(ctx.SystemContext(), execID)
|
|
if err != nil {
|
|
logger.Errorf("Export Job Id = %s. Error when fetching execution record for update : %v", params[export.JobID], err)
|
|
return err
|
|
}
|
|
// copy old extra
|
|
attrsToUpdate := exec.ExtraAttrs
|
|
for k, v := range attrs {
|
|
attrsToUpdate[k] = v
|
|
}
|
|
return sde.execMgr.UpdateExtraAttrs(ctx.SystemContext(), execID, attrsToUpdate)
|
|
}
|
|
|
|
func (sde *ScanDataExport) writeCsvFile(ctx job.Context, params job.Parameters, fileName string) error {
|
|
csvFile, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_APPEND, os.ModePerm)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
systemContext := ctx.SystemContext()
|
|
defer csvFile.Close()
|
|
|
|
logger := ctx.GetLogger()
|
|
if err != nil {
|
|
logger.Errorf("Failed to create CSV export file %s. Error : %v", fileName, err)
|
|
return err
|
|
}
|
|
logger.Infof("Created CSV export file %s", csvFile.Name())
|
|
|
|
var exportParams export.Params
|
|
var artIDGroups [][]int64
|
|
|
|
if criteria, ok := params[export.JobRequest]; ok {
|
|
logger.Infof("Request for export : %v", criteria)
|
|
filterCriteria, err := sde.extractCriteria(params)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
projectIds := filterCriteria.Projects
|
|
if len(projectIds) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// extract the repository ids if any repositories have been specified
|
|
repoIds, err := sde.filterProcessor.ProcessRepositoryFilter(systemContext, filterCriteria.Repositories, projectIds)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if len(repoIds) == 0 {
|
|
logger.Infof("No repositories found with specified names: %v", filterCriteria.Repositories)
|
|
return nil
|
|
}
|
|
|
|
// filter artifacts by tags
|
|
arts, err := sde.filterProcessor.ProcessTagFilter(systemContext, filterCriteria.Tags, repoIds)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if len(arts) == 0 {
|
|
logger.Infof("No artifacts found with specified names: %v and tags: %v", filterCriteria.Repositories, filterCriteria.Tags)
|
|
return nil
|
|
}
|
|
|
|
// filter artifacts by labels
|
|
arts, err = sde.filterProcessor.ProcessLabelFilter(systemContext, filterCriteria.Labels, arts)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if len(arts) == 0 {
|
|
logger.Infof("No artifacts found with specified labels: %v", filterCriteria.Labels)
|
|
return nil
|
|
}
|
|
|
|
size := export.ArtifactGroupSize
|
|
artIDGroups = make([][]int64, len(arts)/size+1)
|
|
for i, art := range arts {
|
|
// group artIDs to improve performance and avoid spliced sql over
|
|
// max length
|
|
artIDGroups[i/size] = append(artIDGroups[i/size], art.ID)
|
|
}
|
|
|
|
exportParams = export.Params{
|
|
CVEIds: filterCriteria.CVEIds,
|
|
}
|
|
}
|
|
|
|
for groupID, artIDGroup := range artIDGroups {
|
|
// fetch data by group
|
|
if len(artIDGroup) == 0 {
|
|
continue
|
|
}
|
|
|
|
exportParams.ArtifactIDs = artIDGroup
|
|
exportParams.PageNumber = 1
|
|
exportParams.PageSize = export.QueryPageSize
|
|
|
|
for {
|
|
data, err := sde.exportMgr.Fetch(systemContext, exportParams)
|
|
if err != nil {
|
|
logger.Error("Encountered error reading from the report table", err)
|
|
return err
|
|
}
|
|
if len(data) == 0 {
|
|
logger.Infof("No more data to fetch. Exiting...")
|
|
break
|
|
}
|
|
logger.Infof("Export Group Id = %d, Job Id = %s, Page Number = %d, Page Size = %d Num Records = %d", groupID, params[export.JobID], exportParams.PageNumber, exportParams.PageSize, len(data))
|
|
|
|
// for the first page write the CSV with the headers
|
|
if exportParams.PageNumber == 1 && groupID == 0 {
|
|
err = gocsv.Marshal(data, csvFile)
|
|
} else {
|
|
err = gocsv.MarshalWithoutHeaders(data, csvFile)
|
|
}
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
|
|
exportParams.PageNumber = exportParams.PageNumber + 1
|
|
// break earlier if this is last page
|
|
if len(data) < int(exportParams.PageSize) {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (sde *ScanDataExport) extractCriteria(params job.Parameters) (*export.Request, error) {
|
|
filterMap, ok := params[export.JobRequest].(map[string]interface{})
|
|
if !ok {
|
|
return nil, errors.Errorf("malformed criteria '%v'", params[export.JobRequest])
|
|
}
|
|
jsonData, err := json.Marshal(filterMap)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
criteria := &export.Request{}
|
|
err = criteria.FromJSON(string(jsonData))
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// sterilize trim spaces for some fields.
|
|
sterilize := func(c *export.Request) *export.Request {
|
|
if c != nil {
|
|
space, empty := " ", ""
|
|
c.Repositories = strings.ReplaceAll(c.Repositories, space, empty)
|
|
c.Tags = strings.ReplaceAll(c.Tags, space, empty)
|
|
c.CVEIds = strings.ReplaceAll(c.CVEIds, space, empty)
|
|
}
|
|
|
|
return c
|
|
}
|
|
|
|
return sterilize(criteria), nil
|
|
}
|
|
|
|
func (sde *ScanDataExport) calculateFileHash(fileName string) (digest.Digest, error) {
|
|
return sde.digestCalculator.Calculate(fileName)
|
|
}
|
|
|
|
func (sde *ScanDataExport) init() {
|
|
if sde.execMgr == nil {
|
|
sde.execMgr = task.NewExecutionManager()
|
|
}
|
|
|
|
if sde.scanDataExportDirPath == "" {
|
|
sde.scanDataExportDirPath = export.ScanDataExportDir
|
|
}
|
|
|
|
if sde.exportMgr == nil {
|
|
sde.exportMgr = export.NewManager()
|
|
}
|
|
|
|
if sde.digestCalculator == nil {
|
|
sde.digestCalculator = &export.SHA256ArtifactDigestCalculator{}
|
|
}
|
|
|
|
if sde.filterProcessor == nil {
|
|
sde.filterProcessor = export.NewFilterProcessor()
|
|
}
|
|
|
|
if sde.vulnDataSelector == nil {
|
|
sde.vulnDataSelector = export.NewVulnerabilityDataSelector()
|
|
}
|
|
|
|
if sde.projectMgr == nil {
|
|
sde.projectMgr = project.New()
|
|
}
|
|
|
|
if sde.sysArtifactMgr == nil {
|
|
sde.sysArtifactMgr = systemartifact.Mgr
|
|
}
|
|
}
|
|
|
|
func (sde *ScanDataExport) cleanupCsvFile(ctx job.Context, fileName string, params job.Parameters) {
|
|
logger := ctx.GetLogger()
|
|
if _, err := os.Stat(fileName); os.IsNotExist(err) {
|
|
logger.Infof("Export Job Id = %s, CSV Export File = %s does not exist. Nothing to do", params[export.JobID], fileName)
|
|
return
|
|
}
|
|
err := os.Remove(fileName)
|
|
if err != nil {
|
|
logger.Errorf("Export Job Id = %s, CSV Export File = %s could not deleted. Error = %v", params[export.JobID], fileName, err)
|
|
return
|
|
}
|
|
}
|