mirror of https://github.com/goharbor/harbor.git
492 lines
13 KiB
Go
492 lines
13 KiB
Go
// Copyright Project Harbor Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package cworker
|
|
|
|
import (
|
|
"fmt"
|
|
"reflect"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/gocraft/work"
|
|
"github.com/gomodule/redigo/redis"
|
|
|
|
"github.com/goharbor/harbor/src/jobservice/common/utils"
|
|
"github.com/goharbor/harbor/src/jobservice/env"
|
|
"github.com/goharbor/harbor/src/jobservice/errs"
|
|
"github.com/goharbor/harbor/src/jobservice/job"
|
|
"github.com/goharbor/harbor/src/jobservice/lcm"
|
|
"github.com/goharbor/harbor/src/jobservice/logger"
|
|
"github.com/goharbor/harbor/src/jobservice/period"
|
|
"github.com/goharbor/harbor/src/jobservice/runner"
|
|
"github.com/goharbor/harbor/src/jobservice/worker"
|
|
"github.com/goharbor/harbor/src/lib"
|
|
"github.com/goharbor/harbor/src/lib/errors"
|
|
)
|
|
|
|
var (
|
|
workerPoolDeadTime = 10 * time.Second
|
|
)
|
|
|
|
const (
|
|
workerPoolStatusHealthy = "Healthy"
|
|
workerPoolStatusDead = "Dead"
|
|
pingRedisMaxTimes = 10
|
|
defaultWorkerCount uint = 10
|
|
)
|
|
|
|
// basicWorker is the worker implementation based on gocraft/work powered by redis.
|
|
type basicWorker struct {
|
|
namespace string
|
|
redisPool *redis.Pool
|
|
pool *work.WorkerPool
|
|
enqueuer *work.Enqueuer
|
|
client *work.Client
|
|
context *env.Context
|
|
scheduler period.Scheduler
|
|
ctl lcm.Controller
|
|
reaper *reaper
|
|
|
|
// key is name of known job
|
|
// value is the type of known job
|
|
knownJobs *sync.Map
|
|
}
|
|
|
|
// workerContext ...
|
|
// We did not use this context to pass context info so far, just a placeholder.
|
|
type workerContext struct{}
|
|
|
|
// log the job
|
|
func (rpc *workerContext) logJob(job *work.Job, next work.NextMiddlewareFunc) error {
|
|
jobCopy := *job
|
|
// as the args may contain sensitive information, ignore them when logging the detail
|
|
jobCopy.Args = nil
|
|
jobInfo, _ := utils.SerializeJob(&jobCopy)
|
|
logger.Infof("Job incoming: %s", jobInfo)
|
|
|
|
return next()
|
|
}
|
|
|
|
// NewWorker is constructor of worker
|
|
func NewWorker(ctx *env.Context, namespace string, workerCount uint, redisPool *redis.Pool, ctl lcm.Controller) worker.Interface {
|
|
wc := defaultWorkerCount
|
|
if workerCount > 0 {
|
|
wc = workerCount
|
|
}
|
|
|
|
return &basicWorker{
|
|
namespace: namespace,
|
|
redisPool: redisPool,
|
|
pool: work.NewWorkerPool(workerContext{}, wc, namespace, redisPool),
|
|
enqueuer: work.NewEnqueuer(namespace, redisPool),
|
|
client: work.NewClient(namespace, redisPool),
|
|
scheduler: period.NewScheduler(ctx.SystemContext, namespace, redisPool, ctl),
|
|
ctl: ctl,
|
|
context: ctx,
|
|
knownJobs: new(sync.Map),
|
|
reaper: &reaper{
|
|
context: ctx.SystemContext,
|
|
namespace: namespace,
|
|
pool: redisPool,
|
|
lcmCtl: ctl,
|
|
jobTypes: make([]string, 0), // Append data later (at the start step)
|
|
},
|
|
}
|
|
}
|
|
|
|
// Start to serve
|
|
// Unblock action
|
|
func (w *basicWorker) Start() error {
|
|
if w.redisPool == nil {
|
|
return errors.New("missing redis pool")
|
|
}
|
|
|
|
if utils.IsEmptyStr(w.namespace) {
|
|
return errors.New("missing namespace")
|
|
}
|
|
|
|
if w.context == nil || w.context.SystemContext == nil {
|
|
// report and exit
|
|
return errors.New("missing context")
|
|
}
|
|
|
|
if w.ctl == nil {
|
|
return errors.New("missing job life cycle controller")
|
|
}
|
|
|
|
// Test the redis connection
|
|
if err := w.ping(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Start the periodic scheduler
|
|
w.scheduler.Start()
|
|
|
|
// Listen to the system signal
|
|
w.context.WG.Add(1)
|
|
go func() {
|
|
defer func() {
|
|
w.context.WG.Done()
|
|
logger.Infof("Basic worker is stopped")
|
|
}()
|
|
|
|
<-w.context.SystemContext.Done()
|
|
w.pool.Stop()
|
|
}()
|
|
|
|
// Start the backend worker pool
|
|
// Add middleware
|
|
w.pool.Middleware((*workerContext).logJob)
|
|
// Non blocking call
|
|
w.pool.Start()
|
|
logger.Infof("Basic worker is started")
|
|
|
|
// Start the reaper
|
|
w.knownJobs.Range(func(k interface{}, v interface{}) bool {
|
|
w.reaper.jobTypes = append(w.reaper.jobTypes, k.(string))
|
|
|
|
return true
|
|
})
|
|
w.reaper.start()
|
|
|
|
return nil
|
|
}
|
|
|
|
// GetPoolID returns the worker pool id
|
|
func (w *basicWorker) GetPoolID() string {
|
|
v := reflect.ValueOf(*w.pool)
|
|
return v.FieldByName("workerPoolID").String()
|
|
}
|
|
|
|
// RegisterJobs is used to register multiple jobs to worker.
|
|
func (w *basicWorker) RegisterJobs(jobs map[string]interface{}) error {
|
|
if len(jobs) == 0 {
|
|
// Do nothing
|
|
return nil
|
|
}
|
|
|
|
for name, j := range jobs {
|
|
if err := w.registerJob(name, j); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Enqueue job
|
|
func (w *basicWorker) Enqueue(jobName string, params job.Parameters, isUnique bool, webHook string) (*job.Stats, error) {
|
|
var (
|
|
j *work.Job
|
|
err error
|
|
)
|
|
|
|
// As the job is declared to be unique,
|
|
// check the uniqueness of the job,
|
|
// Here we only need to make sure only 1 job with the same type and parameters in the queue
|
|
// For the uniqueness of executing, it can be checked in the running stage
|
|
if isUnique {
|
|
if j, err = w.enqueuer.EnqueueUnique(jobName, params); err != nil {
|
|
return nil, err
|
|
}
|
|
} else {
|
|
// Enqueue job
|
|
if j, err = w.enqueuer.Enqueue(jobName, params); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// avoid backend worker bug
|
|
if j == nil {
|
|
return nil, fmt.Errorf("job '%s' can not be enqueued, please check the job metatdata", jobName)
|
|
}
|
|
|
|
return generateResult(j, job.KindGeneric, isUnique, params, webHook), nil
|
|
}
|
|
|
|
// Schedule job
|
|
func (w *basicWorker) Schedule(jobName string, params job.Parameters, runAfterSeconds uint64, isUnique bool, webHook string) (*job.Stats, error) {
|
|
var (
|
|
j *work.ScheduledJob
|
|
err error
|
|
)
|
|
|
|
// As the job is declared to be unique,
|
|
// check the uniqueness of the job,
|
|
// Here we only need to make sure only 1 job with the same type and parameters in the queue
|
|
// For the uniqueness of executing, it can be checked in the running stage
|
|
if isUnique {
|
|
if j, err = w.enqueuer.EnqueueUniqueIn(jobName, int64(runAfterSeconds), params); err != nil {
|
|
return nil, err
|
|
}
|
|
} else {
|
|
// Enqueue job in
|
|
if j, err = w.enqueuer.EnqueueIn(jobName, int64(runAfterSeconds), params); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// avoid backend worker bug
|
|
if j == nil {
|
|
return nil, fmt.Errorf("job '%s' can not be enqueued, please check the job metatdata", jobName)
|
|
}
|
|
|
|
res := generateResult(j.Job, job.KindScheduled, isUnique, params, webHook)
|
|
res.Info.RunAt = j.RunAt
|
|
res.Info.Status = job.ScheduledStatus.String()
|
|
|
|
return res, nil
|
|
}
|
|
|
|
// PeriodicallyEnqueue job
|
|
func (w *basicWorker) PeriodicallyEnqueue(jobName string, params job.Parameters, cronSetting string, _ bool, webHook string) (*job.Stats, error) {
|
|
p := &period.Policy{
|
|
ID: utils.MakeIdentifier(),
|
|
JobName: jobName,
|
|
CronSpec: cronSetting,
|
|
JobParameters: params,
|
|
WebHookURL: webHook,
|
|
}
|
|
|
|
id, err := w.scheduler.Schedule(p)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
res := &job.Stats{
|
|
Info: &job.StatsInfo{
|
|
JobID: p.ID,
|
|
JobName: jobName,
|
|
Status: job.ScheduledStatus.String(),
|
|
JobKind: job.KindPeriodic,
|
|
CronSpec: cronSetting,
|
|
WebHookURL: webHook,
|
|
NumericPID: id,
|
|
EnqueueTime: time.Now().Unix(),
|
|
UpdateTime: time.Now().Unix(),
|
|
RefLink: fmt.Sprintf("/api/v1/jobs/%s", p.ID),
|
|
Parameters: params,
|
|
},
|
|
}
|
|
|
|
return res, nil
|
|
}
|
|
|
|
// Info of worker
|
|
func (w *basicWorker) Stats() (*worker.Stats, error) {
|
|
// Get the status of worker pool via client
|
|
hbs, err := w.client.WorkerPoolHeartbeats()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Find the heartbeat of this worker via pid
|
|
stats := make([]*worker.StatsData, 0)
|
|
for _, hb := range hbs {
|
|
if hb.HeartbeatAt == 0 {
|
|
continue // invalid ones
|
|
}
|
|
|
|
wPoolStatus := workerPoolStatusHealthy
|
|
if time.Unix(hb.HeartbeatAt, 0).Add(workerPoolDeadTime).Before(time.Now()) {
|
|
wPoolStatus = workerPoolStatusDead
|
|
}
|
|
stat := &worker.StatsData{
|
|
WorkerPoolID: hb.WorkerPoolID,
|
|
StartedAt: hb.StartedAt,
|
|
HeartbeatAt: hb.HeartbeatAt,
|
|
JobNames: hb.JobNames,
|
|
Concurrency: hb.Concurrency,
|
|
Status: wPoolStatus,
|
|
}
|
|
stats = append(stats, stat)
|
|
}
|
|
|
|
if len(stats) == 0 {
|
|
return nil, errors.New("failed to get stats of worker pools")
|
|
}
|
|
|
|
return &worker.Stats{
|
|
Pools: stats,
|
|
}, nil
|
|
}
|
|
|
|
// StopJob will stop the job
|
|
func (w *basicWorker) StopJob(jobID string) error {
|
|
if utils.IsEmptyStr(jobID) {
|
|
return errors.New("empty job ID to stop")
|
|
}
|
|
|
|
t, err := w.ctl.Track(jobID)
|
|
if err != nil && !errs.IsObjectNotFoundError(err) {
|
|
// For none not found error, directly return
|
|
return err
|
|
}
|
|
|
|
// For periodical job and stats not found cases
|
|
if errs.IsObjectNotFoundError(err) || (t != nil && t.Job().Info.JobKind == job.KindPeriodic) {
|
|
// If the job kind is periodic or
|
|
// if the original job stats tracker is not found (the scheduler will have a try based on other data under this case)
|
|
return w.scheduler.UnSchedule(jobID)
|
|
}
|
|
|
|
// General or scheduled job
|
|
if job.RunningStatus.Before(job.Status(t.Job().Info.Status)) {
|
|
// Job has been in the final states
|
|
logger.Warningf("Trying to stop a(n) %s job: ID=%s, Kind=%s", t.Job().Info.Status, jobID, t.Job().Info.JobKind)
|
|
// Under this situation, the non-periodic job we're trying to stop has already been in the "non-running(stopped)" status.
|
|
// As the goal of stopping the job running has achieved, we directly return nil here.
|
|
return nil
|
|
}
|
|
|
|
// Mark status to stopped
|
|
if err := t.Stop(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Do more for scheduled job kind
|
|
if t.Job().Info.JobKind == job.KindScheduled {
|
|
// We need to delete the scheduled job in the queue if it is not running yet
|
|
if err := w.client.DeleteScheduledJob(t.Job().Info.RunAt, jobID); err != nil {
|
|
// Job is already running?
|
|
logger.Warningf("scheduled job %s (run at = %d) is not found in the queue, is it running?", lib.TrimLineBreaks(jobID), t.Job().Info.RunAt)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// RetryJob retry the job
|
|
func (w *basicWorker) RetryJob(_ string) error {
|
|
return errors.New("not implemented")
|
|
}
|
|
|
|
// IsKnownJob ...
|
|
func (w *basicWorker) IsKnownJob(name string) (interface{}, bool) {
|
|
return w.knownJobs.Load(name)
|
|
}
|
|
|
|
// ValidateJobParameters ...
|
|
func (w *basicWorker) ValidateJobParameters(jobType interface{}, params job.Parameters) error {
|
|
if jobType == nil {
|
|
return errors.New("nil job type")
|
|
}
|
|
|
|
theJ := runner.Wrap(jobType)
|
|
return theJ.Validate(params)
|
|
}
|
|
|
|
// RegisterJob is used to register the job to the worker.
|
|
// j is the type of job
|
|
func (w *basicWorker) registerJob(name string, j interface{}) (err error) {
|
|
if utils.IsEmptyStr(name) || j == nil {
|
|
return errors.New("job can not be registered with empty name or nil interface")
|
|
}
|
|
|
|
// j must be job.Interface
|
|
if _, ok := j.(job.Interface); !ok {
|
|
return errors.Errorf("job must implement the job.Interface: %s", reflect.TypeOf(j).String())
|
|
}
|
|
|
|
// 1:1 constraint
|
|
if jInList, ok := w.knownJobs.Load(name); ok {
|
|
return fmt.Errorf("job name %s has been already registered with %s", name, reflect.TypeOf(jInList).String())
|
|
}
|
|
|
|
// Same job implementation can be only registered with one name
|
|
w.knownJobs.Range(func(jName interface{}, jInList interface{}) bool {
|
|
jobImpl := reflect.TypeOf(j).String()
|
|
if reflect.TypeOf(jInList).String() == jobImpl {
|
|
err = errors.Errorf("job %s has been already registered with name %s", jobImpl, jName)
|
|
return false
|
|
}
|
|
|
|
return true
|
|
})
|
|
|
|
// Something happened in the range
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
// Wrap job
|
|
redisJob := runner.NewRedisJob(j, w.context, w.ctl)
|
|
// Get more info from j
|
|
theJ := runner.Wrap(j)
|
|
// Put into the pool
|
|
w.pool.JobWithOptions(
|
|
name,
|
|
work.JobOptions{
|
|
MaxFails: theJ.MaxFails(),
|
|
MaxConcurrency: theJ.MaxCurrency(),
|
|
Priority: job.Priority().For(name),
|
|
SkipDead: true,
|
|
},
|
|
// Use generic handler to handle as we do not accept context with this way.
|
|
func(job *work.Job) error {
|
|
return redisJob.Run(job)
|
|
},
|
|
)
|
|
// Keep the name of registered jobs as known jobs for future validation
|
|
w.knownJobs.Store(name, j)
|
|
|
|
logger.Infof("Register job %s with name %s", reflect.TypeOf(j).String(), name)
|
|
|
|
return nil
|
|
}
|
|
|
|
// Ping the redis server
|
|
func (w *basicWorker) ping() error {
|
|
conn := w.redisPool.Get()
|
|
defer func() {
|
|
_ = conn.Close()
|
|
}()
|
|
|
|
var err error
|
|
for count := 1; count <= pingRedisMaxTimes; count++ {
|
|
if _, err = conn.Do("ping"); err == nil {
|
|
return nil
|
|
}
|
|
|
|
time.Sleep(time.Duration(count+4) * time.Second)
|
|
}
|
|
|
|
return fmt.Errorf("connect to redis server timeout: %s", err.Error())
|
|
}
|
|
|
|
// generate the job stats data
|
|
func generateResult(
|
|
j *work.Job,
|
|
jobKind string,
|
|
isUnique bool,
|
|
jobParameters job.Parameters,
|
|
webHook string,
|
|
) *job.Stats {
|
|
return &job.Stats{
|
|
Info: &job.StatsInfo{
|
|
JobID: j.ID,
|
|
JobName: j.Name,
|
|
JobKind: jobKind,
|
|
IsUnique: isUnique,
|
|
Status: job.PendingStatus.String(),
|
|
EnqueueTime: j.EnqueuedAt,
|
|
UpdateTime: time.Now().Unix(),
|
|
RefLink: fmt.Sprintf("/api/v1/jobs/%s", j.ID),
|
|
Parameters: jobParameters,
|
|
WebHookURL: webHook,
|
|
},
|
|
}
|
|
}
|