From 1f481e492c82964cd76b16963e979a397efa6775 Mon Sep 17 00:00:00 2001 From: Steven Zou Date: Thu, 18 Apr 2019 16:02:49 +0800 Subject: [PATCH] Refactor job servcie primary logic to fix related bugs Signed-off-by: Steven Zou --- src/chartserver/handler_repo.go | 4 +- src/common/dao/dao_test.go | 4 +- src/common/dao/replication_job.go | 4 +- src/common/dao/watch_item.go | 2 +- src/common/job/models/models.go | 2 +- src/common/utils/uaa/client.go | 2 +- src/core/config/config.go | 2 +- src/jobservice/api/authenticator.go | 2 +- src/jobservice/api/handler.go | 65 +- src/jobservice/api/handler_test.go | 2 +- src/jobservice/api/server.go | 50 +- .../{pool/mem_pool.go => common/query/q.go} | 8 +- src/jobservice/{utils => common/rds}/keys.go | 48 +- src/jobservice/common/rds/utils.go | 120 +++ src/jobservice/{ => common}/utils/utils.go | 55 +- src/jobservice/config.yml | 9 +- src/jobservice/config/config.go | 18 +- src/jobservice/config_test.yml | 6 +- src/jobservice/core/controller.go | 184 ++-- src/jobservice/core/interface.go | 38 +- src/jobservice/env/context.go | 5 +- src/jobservice/errs/errors.go | 30 +- src/jobservice/hook/hook_agent.go | 328 +++++++ src/jobservice/hook/hook_agent_test.go | 177 ++++ src/jobservice/hook/hook_client.go | 132 +++ .../{opm => hook}/hook_client_test.go | 31 +- .../{env/job_context.go => job/context.go} | 35 +- src/jobservice/job/impl/context.go | 133 +-- src/jobservice/job/impl/default_context.go | 127 +-- .../job/impl/default_context_test.go | 2 +- .../job/impl/{demo_job.go => sample/job.go} | 59 +- src/jobservice/job/interface.go | 24 +- src/jobservice/job/job_status.go | 32 - src/jobservice/job/{job_kinds.go => kinds.go} | 12 +- src/jobservice/job/known_jobs.go | 35 + src/jobservice/job/models.go | 126 +++ src/jobservice/job/op_cmd.go | 28 + src/jobservice/job/status.go | 82 ++ src/jobservice/job/tracker.go | 598 +++++++++++++ .../job/{impl/known_jobs.go => web_hook.go} | 10 +- src/jobservice/lcm/controller.go | 77 ++ src/jobservice/lcm/controller_test.go | 73 ++ src/jobservice/logger/getter/file_getter.go | 2 +- src/jobservice/main.go | 14 +- src/jobservice/models/models.go | 99 --- src/jobservice/opm/hook_client.go | 107 --- src/jobservice/opm/hook_store.go | 69 -- src/jobservice/opm/job_stats_mgr.go | 137 --- src/jobservice/opm/op_commands.go | 178 ---- src/jobservice/opm/redis_job_stats_mgr.go | 826 ------------------ .../opm/redis_job_stats_mgr_test.go | 342 -------- src/jobservice/period/basic_scheduler.go | 277 ++++++ ...eduler_test.go => basic_scheduler_test.go} | 6 +- src/jobservice/period/enqueuer.go | 396 +++++---- src/jobservice/period/enqueuer_test.go | 6 +- src/jobservice/period/interface.go | 71 -- src/jobservice/period/job_policy.go | 118 --- src/jobservice/period/policy_store.go | 321 +++++++ ...ob_policy_test.go => policy_store_test.go} | 0 src/jobservice/period/redis_scheduler.go | 349 -------- src/jobservice/period/scheduler.go | 47 + src/jobservice/period/sweeper.go | 109 --- src/jobservice/period/sweeper_test.go | 60 -- src/jobservice/pool/interface.go | 146 ---- src/jobservice/pool/message_server.go | 203 ----- src/jobservice/pool/message_server_test.go | 211 ----- src/jobservice/pool/redis_job_wrapper.go | 267 ------ src/jobservice/pool/redis_pool.go | 739 ---------------- src/jobservice/runner/redis.go | 190 ++++ .../redis_test.go} | 6 +- .../{pool/runner.go => runner/wrapper.go} | 2 +- src/jobservice/runtime/bootstrap.go | 217 ++--- src/jobservice/utils/gocarft_work.go | 75 -- .../{pool => worker/cworker}/de_duplicator.go | 34 +- .../cworker}/de_duplicator_test.go | 8 +- src/jobservice/worker/cworker/redis_pool.go | 475 ++++++++++ .../cworker}/redis_pool_test.go | 34 +- src/jobservice/worker/interface.go | 127 +++ .../hook_store_test.go => worker/models.go} | 29 +- 79 files changed, 3982 insertions(+), 5096 deletions(-) rename src/jobservice/{pool/mem_pool.go => common/query/q.go} (84%) rename src/jobservice/{utils => common/rds}/keys.go (66%) create mode 100644 src/jobservice/common/rds/utils.go rename src/jobservice/{ => common}/utils/utils.go (76%) create mode 100644 src/jobservice/hook/hook_agent.go create mode 100644 src/jobservice/hook/hook_agent_test.go create mode 100644 src/jobservice/hook/hook_client.go rename src/jobservice/{opm => hook}/hook_client_test.go (66%) rename src/jobservice/{env/job_context.go => job/context.go} (63%) rename src/jobservice/job/impl/{demo_job.go => sample/job.go} (58%) delete mode 100644 src/jobservice/job/job_status.go rename src/jobservice/job/{job_kinds.go => kinds.go} (73%) create mode 100644 src/jobservice/job/known_jobs.go create mode 100644 src/jobservice/job/models.go create mode 100644 src/jobservice/job/op_cmd.go create mode 100644 src/jobservice/job/status.go create mode 100644 src/jobservice/job/tracker.go rename src/jobservice/job/{impl/known_jobs.go => web_hook.go} (79%) create mode 100644 src/jobservice/lcm/controller.go create mode 100644 src/jobservice/lcm/controller_test.go delete mode 100644 src/jobservice/models/models.go delete mode 100644 src/jobservice/opm/hook_client.go delete mode 100644 src/jobservice/opm/hook_store.go delete mode 100644 src/jobservice/opm/job_stats_mgr.go delete mode 100644 src/jobservice/opm/op_commands.go delete mode 100644 src/jobservice/opm/redis_job_stats_mgr.go delete mode 100644 src/jobservice/opm/redis_job_stats_mgr_test.go create mode 100644 src/jobservice/period/basic_scheduler.go rename src/jobservice/period/{redis_scheduler_test.go => basic_scheduler_test.go} (92%) delete mode 100644 src/jobservice/period/interface.go delete mode 100644 src/jobservice/period/job_policy.go create mode 100644 src/jobservice/period/policy_store.go rename src/jobservice/period/{job_policy_test.go => policy_store_test.go} (100%) delete mode 100644 src/jobservice/period/redis_scheduler.go create mode 100644 src/jobservice/period/scheduler.go delete mode 100644 src/jobservice/period/sweeper.go delete mode 100644 src/jobservice/period/sweeper_test.go delete mode 100644 src/jobservice/pool/interface.go delete mode 100644 src/jobservice/pool/message_server.go delete mode 100644 src/jobservice/pool/message_server_test.go delete mode 100644 src/jobservice/pool/redis_job_wrapper.go delete mode 100644 src/jobservice/pool/redis_pool.go create mode 100644 src/jobservice/runner/redis.go rename src/jobservice/{pool/redis_job_wrapper_test.go => runner/redis_test.go} (96%) rename src/jobservice/{pool/runner.go => runner/wrapper.go} (98%) delete mode 100644 src/jobservice/utils/gocarft_work.go rename src/jobservice/{pool => worker/cworker}/de_duplicator.go (70%) rename src/jobservice/{pool => worker/cworker}/de_duplicator_test.go (65%) create mode 100644 src/jobservice/worker/cworker/redis_pool.go rename src/jobservice/{pool => worker/cworker}/redis_pool_test.go (92%) create mode 100644 src/jobservice/worker/interface.go rename src/jobservice/{opm/hook_store_test.go => worker/models.go} (53%) diff --git a/src/chartserver/handler_repo.go b/src/chartserver/handler_repo.go index 01f3b6f77..13ea46a40 100644 --- a/src/chartserver/handler_repo.go +++ b/src/chartserver/handler_repo.go @@ -65,7 +65,7 @@ func (c *Controller) getIndexYaml(namespaces []string) (*helm_repo.IndexFile, er // Retrieve index.yaml for repositories workerPool := make(chan struct{}, initialItemCount) - // Add initial tokens to the pool + // Add initial tokens to the worker for i := 0; i < initialItemCount; i++ { workerPool <- struct{}{} } @@ -103,7 +103,7 @@ LOOP: go func(ns string) { defer func() { waitGroup.Done() // done - // Return the worker back to the pool + // Return the worker back to the worker workerPool <- struct{}{} }() diff --git a/src/common/dao/dao_test.go b/src/common/dao/dao_test.go index fbd9f7bf6..6b6ddbcf1 100644 --- a/src/common/dao/dao_test.go +++ b/src/common/dao/dao_test.go @@ -905,8 +905,8 @@ func TestAddRepJob(t *testing.T) { return } if j.Status != models.JobPending || j.Repository != "library/ubuntu" || j.PolicyID != policyID || j.Operation != "transfer" || len(j.TagList) != 3 { - t.Errorf("Expected data of job, id: %d, Status: %s, Repository: library/ubuntu, PolicyID: %d, Operation: transfer, taglist length 3"+ - "but in returned data:, Status: %s, Repository: %s, Operation: %s, PolicyID: %d, TagList: %v", id, models.JobPending, policyID, j.Status, j.Repository, j.Operation, j.PolicyID, j.TagList) + t.Errorf("Expected data of job, id: %d, Status: %s, Repository: library/ubuntu, ID: %d, Operation: transfer, taglist length 3"+ + "but in returned data:, Status: %s, Repository: %s, Operation: %s, ID: %d, TagList: %v", id, models.JobPending, policyID, j.Status, j.Repository, j.Operation, j.PolicyID, j.TagList) return } } diff --git a/src/common/dao/replication_job.go b/src/common/dao/replication_job.go index e5cd2b109..79a29b3f7 100644 --- a/src/common/dao/replication_job.go +++ b/src/common/dao/replication_job.go @@ -350,7 +350,7 @@ func repJobQueryConditions(query ...*models.RepJobQuery) orm.QuerySeter { q := query[0] if q.PolicyID != 0 { - qs = qs.Filter("PolicyID", q.PolicyID) + qs = qs.Filter("ID", q.PolicyID) } if len(q.OpUUID) > 0 { qs = qs.Filter("OpUUID__exact", q.OpUUID) @@ -382,7 +382,7 @@ func DeleteRepJob(id int64) error { // DeleteRepJobs deletes replication jobs by policy ID func DeleteRepJobs(policyID int64) error { - _, err := GetOrmer().QueryTable(&models.RepJob{}).Filter("PolicyID", policyID).Delete() + _, err := GetOrmer().QueryTable(&models.RepJob{}).Filter("ID", policyID).Delete() return err } diff --git a/src/common/dao/watch_item.go b/src/common/dao/watch_item.go index 53ea74d3a..e70b285c3 100644 --- a/src/common/dao/watch_item.go +++ b/src/common/dao/watch_item.go @@ -52,7 +52,7 @@ func (d *DatabaseWatchItemDAO) Add(item *models.WatchItem) (int64, error) { // DeleteByPolicyID deletes the WatchItem specified by policy ID func (d *DatabaseWatchItemDAO) DeleteByPolicyID(policyID int64) error { - _, err := GetOrmer().QueryTable(&models.WatchItem{}).Filter("PolicyID", policyID).Delete() + _, err := GetOrmer().QueryTable(&models.WatchItem{}).Filter("ID", policyID).Delete() return err } diff --git a/src/common/job/models/models.go b/src/common/job/models/models.go index 15a5e7a25..d11ac6826 100644 --- a/src/common/job/models/models.go +++ b/src/common/job/models/models.go @@ -54,7 +54,7 @@ type JobPoolStats struct { Pools []*JobPoolStatsData `json:"worker_pools"` } -// JobPoolStatsData represent the healthy and status of the worker pool. +// JobPoolStatsData represent the healthy and status of the worker worker. type JobPoolStatsData struct { WorkerPoolID string `json:"worker_pool_id"` StartedAt int64 `json:"started_at"` diff --git a/src/common/utils/uaa/client.go b/src/common/utils/uaa/client.go index 83f76d22e..d4b4c5dcd 100644 --- a/src/common/utils/uaa/client.go +++ b/src/common/utils/uaa/client.go @@ -192,7 +192,7 @@ func (dc *defaultClient) UpdateConfig(cfg *ClientConfig) error { pool := x509.NewCertPool() // Do not throw error if the certificate is malformed, so we can put a place holder. if ok := pool.AppendCertsFromPEM(content); !ok { - log.Warningf("Failed to append certificate to cert pool, cert path: %s", cfg.CARootPath) + log.Warningf("Failed to append certificate to cert worker, cert path: %s", cfg.CARootPath) } else { tc.RootCAs = pool } diff --git a/src/core/config/config.go b/src/core/config/config.go index 0b6049091..f490d8957 100644 --- a/src/core/config/config.go +++ b/src/core/config/config.go @@ -113,7 +113,7 @@ func initProjectManager() error { } pool := x509.NewCertPool() if ok := pool.AppendCertsFromPEM(content); !ok { - return fmt.Errorf("failed to append cert content into cert pool") + return fmt.Errorf("failed to append cert content into cert worker") } AdmiralClient = &http.Client{ Transport: &http.Transport{ diff --git a/src/jobservice/api/authenticator.go b/src/jobservice/api/authenticator.go index 2267ddcea..d75413683 100644 --- a/src/jobservice/api/authenticator.go +++ b/src/jobservice/api/authenticator.go @@ -20,8 +20,8 @@ import ( "net/http" "strings" + "github.com/goharbor/harbor/src/jobservice/common/utils" "github.com/goharbor/harbor/src/jobservice/config" - "github.com/goharbor/harbor/src/jobservice/utils" ) const ( diff --git a/src/jobservice/api/handler.go b/src/jobservice/api/handler.go index 5f495f4dc..654c22b3b 100644 --- a/src/jobservice/api/handler.go +++ b/src/jobservice/api/handler.go @@ -26,9 +26,10 @@ import ( "github.com/goharbor/harbor/src/jobservice/core" "github.com/goharbor/harbor/src/jobservice/errs" + "github.com/goharbor/harbor/src/jobservice/job" + "github.com/goharbor/harbor/src/jobservice/lcm" "github.com/goharbor/harbor/src/jobservice/logger" - "github.com/goharbor/harbor/src/jobservice/models" - "github.com/goharbor/harbor/src/jobservice/opm" + "github.com/pkg/errors" ) // Handler defines approaches to handle the http requests. @@ -74,8 +75,8 @@ func (dh *DefaultHandler) HandleLaunchJobReq(w http.ResponseWriter, req *http.Re } // unmarshal data - jobReq := models.JobRequest{} - if err = json.Unmarshal(data, &jobReq); err != nil { + jobReq := &job.Request{} + if err = json.Unmarshal(data, jobReq); err != nil { dh.handleError(w, req, http.StatusInternalServerError, errs.HandleJSONDataError(err)) return } @@ -136,48 +137,28 @@ func (dh *DefaultHandler) HandleJobActionReq(w http.ResponseWriter, req *http.Re } // unmarshal data - jobActionReq := models.JobActionRequest{} - if err = json.Unmarshal(data, &jobActionReq); err != nil { + jobActionReq := &job.ActionRequest{} + if err = json.Unmarshal(data, jobActionReq); err != nil { dh.handleError(w, req, http.StatusInternalServerError, errs.HandleJSONDataError(err)) return } - switch jobActionReq.Action { - case opm.CtlCommandStop: - if err := dh.controller.StopJob(jobID); err != nil { - code := http.StatusInternalServerError - backErr := errs.StopJobError(err) - if errs.IsObjectNotFoundError(err) { - code = http.StatusNotFound - backErr = err - } - dh.handleError(w, req, code, backErr) - return + // Only support stop command now + cmd := lcm.OPCommand(jobActionReq.Action) + if !cmd.IsStop() { + dh.handleError(w, req, http.StatusNotImplemented, errs.UnknownActionNameError(fmt.Errorf("command: %s", jobActionReq.Action))) + return + } + + // Stop job + if err := dh.controller.StopJob(jobID); err != nil { + code := http.StatusInternalServerError + backErr := errs.StopJobError(err) + if errs.IsObjectNotFoundError(err) { + code = http.StatusNotFound + backErr = err } - case opm.CtlCommandCancel: - if err := dh.controller.CancelJob(jobID); err != nil { - code := http.StatusInternalServerError - backErr := errs.CancelJobError(err) - if errs.IsObjectNotFoundError(err) { - code = http.StatusNotFound - backErr = err - } - dh.handleError(w, req, code, backErr) - return - } - case opm.CtlCommandRetry: - if err := dh.controller.RetryJob(jobID); err != nil { - code := http.StatusInternalServerError - backErr := errs.RetryJobError(err) - if errs.IsObjectNotFoundError(err) { - code = http.StatusNotFound - backErr = err - } - dh.handleError(w, req, code, backErr) - return - } - default: - dh.handleError(w, req, http.StatusNotImplemented, errs.UnknownActionNameError(fmt.Errorf("%s", jobID))) + dh.handleError(w, req, code, backErr) return } @@ -211,7 +192,7 @@ func (dh *DefaultHandler) HandleJobLogReq(w http.ResponseWriter, req *http.Reque jobID := vars["job_id"] if strings.Contains(jobID, "..") || strings.ContainsRune(jobID, os.PathSeparator) { - dh.handleError(w, req, http.StatusBadRequest, fmt.Errorf("Invalid Job ID: %s", jobID)) + dh.handleError(w, req, http.StatusBadRequest, errors.Errorf("invalid Job ID: %s", jobID)) return } diff --git a/src/jobservice/api/handler_test.go b/src/jobservice/api/handler_test.go index 1bd5fb933..54516bef5 100644 --- a/src/jobservice/api/handler_test.go +++ b/src/jobservice/api/handler_test.go @@ -232,7 +232,7 @@ func TestCheckStatus(t *testing.T) { } if poolStats.Pools[0].WorkerPoolID != "fake_pool_ID" { - t.Fatalf("expect pool ID 'fake_pool_ID' but got '%s'", poolStats.Pools[0].WorkerPoolID) + t.Fatalf("expect worker ID 'fake_pool_ID' but got '%s'", poolStats.Pools[0].WorkerPoolID) } server.Stop() diff --git a/src/jobservice/api/server.go b/src/jobservice/api/server.go index a808fe7e0..c926a327c 100644 --- a/src/jobservice/api/server.go +++ b/src/jobservice/api/server.go @@ -15,15 +15,13 @@ package api import ( - "context" "crypto/tls" "fmt" "net/http" "time" + "context" "github.com/goharbor/harbor/src/jobservice/config" - "github.com/goharbor/harbor/src/jobservice/env" - "github.com/goharbor/harbor/src/jobservice/logger" ) // Server serves the http requests. @@ -38,7 +36,7 @@ type Server struct { config ServerConfig // The context - context *env.Context + context context.Context } // ServerConfig contains the configurations of Server. @@ -57,7 +55,7 @@ type ServerConfig struct { } // NewServer is constructor of Server. -func NewServer(ctx *env.Context, router Router, cfg ServerConfig) *Server { +func NewServer(ctx context.Context, router Router, cfg ServerConfig) *Server { apiServer := &Server{ router: router, config: cfg, @@ -96,39 +94,19 @@ func NewServer(ctx *env.Context, router Router, cfg ServerConfig) *Server { } // Start the server to serve requests. -func (s *Server) Start() { - s.context.WG.Add(1) - - go func() { - var err error - defer func() { - s.context.WG.Done() - logger.Infof("API server is gracefully shutdown") - }() - - if s.config.Protocol == config.JobServiceProtocolHTTPS { - err = s.httpServer.ListenAndServeTLS(s.config.Cert, s.config.Key) - } else { - err = s.httpServer.ListenAndServe() - } - - if err != nil { - s.context.ErrorChan <- err - } - }() +// Blocking call +func (s *Server) Start() error { + if s.config.Protocol == config.JobServiceProtocolHTTPS { + return s.httpServer.ListenAndServeTLS(s.config.Cert, s.config.Key) + } else { + return s.httpServer.ListenAndServe() + } } // Stop server gracefully. -func (s *Server) Stop() { - go func() { - defer func() { - logger.Info("Stop API server done!") - }() - shutDownCtx, cancel := context.WithTimeout(s.context.SystemContext, 10*time.Second) - defer cancel() +func (s *Server) Stop() error { + shutDownCtx, cancel := context.WithTimeout(s.context, 15*time.Second) + defer cancel() - if err := s.httpServer.Shutdown(shutDownCtx); err != nil { - logger.Errorf("Shutdown API server failed with error: %s\n", err) - } - }() + return s.httpServer.Shutdown(shutDownCtx) } diff --git a/src/jobservice/pool/mem_pool.go b/src/jobservice/common/query/q.go similarity index 84% rename from src/jobservice/pool/mem_pool.go rename to src/jobservice/common/query/q.go index fbc7939a9..45884f7e9 100644 --- a/src/jobservice/pool/mem_pool.go +++ b/src/jobservice/common/query/q.go @@ -12,4 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -package pool +package query + +// Parameter for getting executions +type Parameter struct { + PageNumber uint + PageSize uint +} diff --git a/src/jobservice/utils/keys.go b/src/jobservice/common/rds/keys.go similarity index 66% rename from src/jobservice/utils/keys.go rename to src/jobservice/common/rds/keys.go index b216c75a8..fde3e745a 100644 --- a/src/jobservice/utils/keys.go +++ b/src/jobservice/common/rds/keys.go @@ -12,27 +12,33 @@ // See the License for the specific language governing permissions and // limitations under the License. -package utils +package rds import ( "fmt" - "math/rand" "strings" - "time" ) -func generateScore() int64 { - ticks := time.Now().Unix() - rand := rand.New(rand.NewSource(ticks)) - return ticks + rand.Int63n(1000) // Double confirm to avoid potential duplications +// Functions defined here are mainly from dep lib "github.com/gocraft/work". +// Only for compatible + +// RedisNamespacePrefix ... Same with 'KeyNamespacePrefix', only for compatibility. +func RedisNamespacePrefix(namespace string) string { + return KeyNamespacePrefix(namespace) } -// MakePeriodicPolicyUUID returns an UUID for the periodic policy. -func MakePeriodicPolicyUUID() (string, int64) { - score := generateScore() - return MakeIdentifier(), score +// RedisKeyScheduled returns key of scheduled job. +func RedisKeyScheduled(namespace string) string { + return RedisNamespacePrefix(namespace) + "scheduled" } +// RedisKeyLastPeriodicEnqueue returns key of timestamp if last periodic enqueue. +func RedisKeyLastPeriodicEnqueue(namespace string) string { + return RedisNamespacePrefix(namespace) + "last_periodic_enqueue" +} + +//---------------------------------------------------------- + // KeyNamespacePrefix returns the based key based on the namespace. func KeyNamespacePrefix(namespace string) string { ns := strings.TrimSpace(namespace) @@ -53,16 +59,6 @@ func KeyPeriodicPolicy(namespace string) string { return fmt.Sprintf("%s:%s", KeyPeriod(namespace), "policies") } -// KeyPeriodicPolicyScore returns the key of policy key and score mapping. -func KeyPeriodicPolicyScore(namespace string) string { - return fmt.Sprintf("%s:%s", KeyPeriod(namespace), "key_score") -} - -// KeyPeriodicJobTimeSlots returns the key of the time slots of scheduled jobs. -func KeyPeriodicJobTimeSlots(namespace string) string { - return fmt.Sprintf("%s:%s", KeyPeriod(namespace), "scheduled_slots") -} - // KeyPeriodicNotification returns the key of periodic pub/sub channel. func KeyPeriodicNotification(namespace string) string { return fmt.Sprintf("%s:%s", KeyPeriodicPolicy(namespace), "notifications") @@ -78,12 +74,12 @@ func KeyJobStats(namespace string, jobID string) string { return fmt.Sprintf("%s%s:%s", KeyNamespacePrefix(namespace), "job_stats", jobID) } -// KeyJobCtlCommands returns the key for publishing ctl commands like 'stop' etc. -func KeyJobCtlCommands(namespace string, jobID string) string { - return fmt.Sprintf("%s%s:%s", KeyNamespacePrefix(namespace), "ctl_commands", jobID) -} - // KeyUpstreamJobAndExecutions returns the key for persisting executions. func KeyUpstreamJobAndExecutions(namespace, upstreamJobID string) string { return fmt.Sprintf("%s%s:%s", KeyNamespacePrefix(namespace), "executions", upstreamJobID) } + +// KeyHookEventRetryQueue returns the key of hook event retrying queue +func KeyHookEventRetryQueue(namespace string) string { + return fmt.Sprintf("%s:%s", KeyNamespacePrefix(namespace), "hook_events") +} diff --git a/src/jobservice/common/rds/utils.go b/src/jobservice/common/rds/utils.go new file mode 100644 index 000000000..d73388a33 --- /dev/null +++ b/src/jobservice/common/rds/utils.go @@ -0,0 +1,120 @@ +package rds + +import ( + "fmt" + "github.com/garyburd/redigo/redis" + "github.com/goharbor/harbor/src/jobservice/common/utils" + "github.com/pkg/errors" + "time" +) + +// HmSet sets the properties of hash map +func HmSet(conn redis.Conn, key string, fieldAndValues ...interface{}) error { + if conn == nil { + return errors.New("nil redis connection") + } + + if utils.IsEmptyStr(key) { + errors.New("no key specified to do HMSET") + } + + if len(fieldAndValues) == 0 { + errors.New("no properties specified to do HMSET") + } + + args := make([]interface{}, 0, len(fieldAndValues)+2) + + args = append(args, key) + args = append(args, fieldAndValues...) + args = append(args, "update_time", time.Now().Unix()) // Add update timestamp + + _, err := conn.Do("HMSET", args...) + + return err +} + +// HmGet gets values of multiple fields +// Values have same order with the provided fields +func HmGet(conn redis.Conn, key string, fields ...interface{}) ([]interface{}, error) { + if conn == nil { + return nil, errors.New("nil redis connection") + } + + if utils.IsEmptyStr(key) { + return nil, errors.New("no key specified to do HMGET") + } + + if len(fields) == 0 { + return nil, errors.New("no fields specified to do HMGET") + } + + args := make([]interface{}, 0, len(fields)+1) + args = append(args, key) + args = append(args, fields...) + + return redis.Values(conn.Do("HMGET", args...)) +} + +// JobScore represents the data item with score in the redis db. +type JobScore struct { + JobBytes []byte + Score int64 +} + +// GetZsetByScore get the items from the zset filtered by the specified score scope. +func GetZsetByScore(conn redis.Conn, key string, scores []int64) ([]JobScore, error) { + if conn == nil { + return nil, errors.New("nil redis conn when getting zset by score") + } + + if utils.IsEmptyStr(key) { + return nil, errors.New("missing key when getting zset by score") + } + + if len(scores) < 2 { + return nil, errors.New("bad arguments: not enough scope scores provided") + } + + values, err := redis.Values(conn.Do("ZRANGEBYSCORE", key, scores[0], scores[1], "WITHSCORES")) + if err != nil { + return nil, err + } + + var jobsWithScores []JobScore + + if err := redis.ScanSlice(values, &jobsWithScores); err != nil { + return nil, err + } + + return jobsWithScores, nil +} + +// AcquireLock acquires a redis lock with specified expired time +func AcquireLock(conn redis.Conn, lockerKey string, lockerID string, expireTime int64) error { + args := []interface{}{lockerKey, lockerID, "NX", "EX", expireTime} + res, err := conn.Do("SET", args...) + if err != nil { + return err + } + // Existing, the value can not be override + if res == nil { + return fmt.Errorf("key %s is already set with value %v", lockerKey, lockerID) + } + + return nil +} + +// ReleaseLock releases the acquired lock +func ReleaseLock(conn redis.Conn, lockerKey string, lockerID string) error { + theID, err := redis.String(conn.Do("GET", lockerKey)) + if err != nil { + return err + } + + if theID == lockerID { + _, err := conn.Do("DEL", lockerKey) + return err + } + + return errors.New("locker ID mismatch") +} diff --git a/src/jobservice/utils/utils.go b/src/jobservice/common/utils/utils.go similarity index 76% rename from src/jobservice/utils/utils.go rename to src/jobservice/common/utils/utils.go index c46315b0e..8d3098165 100644 --- a/src/jobservice/utils/utils.go +++ b/src/jobservice/common/utils/utils.go @@ -16,24 +16,37 @@ package utils import ( - "errors" + "crypto/rand" + "encoding/json" "fmt" + "github.com/gocraft/work" + "io" "net/url" "os" "strconv" "strings" - - "github.com/gomodule/redigo/redis" ) // CtlContextKey is used to keep controller reference in the system context type CtlContextKey string +// NodeIDContextKey is used to keep node ID in the system context +type NodeIDContextKey string + const ( - // CtlKeyOfLaunchJobFunc is context key to keep the ctl launch job func - CtlKeyOfLaunchJobFunc CtlContextKey = "controller_launch_job_func" + NodeID NodeIDContextKey = "node_id" ) +// MakeIdentifier creates uuid for job. +func MakeIdentifier() string { + b := make([]byte, 12) + _, err := io.ReadFull(rand.Reader, b) + if err != nil { + return "" + } + return fmt.Sprintf("%x", b) +} + // IsEmptyStr check if the specified str is empty (len ==0) after triming prefix and suffix spaces. func IsEmptyStr(str string) bool { return len(strings.TrimSpace(str)) == 0 @@ -127,31 +140,15 @@ func TranslateRedisAddress(commaFormat string) (string, bool) { return strings.Join(urlParts, ""), true } -// JobScore represents the data item with score in the redis db. -type JobScore struct { - JobBytes []byte - Score int64 +// SerializeJob encodes work.Job to json data. +func SerializeJob(job *work.Job) ([]byte, error) { + return json.Marshal(job) } -// GetZsetByScore get the items from the zset filtered by the specified score scope. -func GetZsetByScore(pool *redis.Pool, key string, scores []int64) ([]JobScore, error) { - if pool == nil || IsEmptyStr(key) || len(scores) < 2 { - return nil, errors.New("bad arguments") - } +// DeSerializeJob decodes bytes to ptr of work.Job. +func DeSerializeJob(jobBytes []byte) (*work.Job, error) { + var j work.Job + err := json.Unmarshal(jobBytes, &j) - conn := pool.Get() - defer conn.Close() - - values, err := redis.Values(conn.Do("ZRANGEBYSCORE", key, scores[0], scores[1], "WITHSCORES")) - if err != nil { - return nil, err - } - - var jobsWithScores []JobScore - - if err := redis.ScanSlice(values, &jobsWithScores); err != nil { - return nil, err - } - - return jobsWithScores, nil + return &j, err } diff --git a/src/jobservice/config.yml b/src/jobservice/config.yml index fa0c5a7cf..562317698 100644 --- a/src/jobservice/config.yml +++ b/src/jobservice/config.yml @@ -10,7 +10,7 @@ https_config: #Server listening port port: 9443 -#Worker pool +#Worker worker worker_pool: #Worker concurrency workers: 10 @@ -29,17 +29,14 @@ job_loggers: - name: "FILE" level: "DEBUG" settings: # Customized settings of logger - base_dir: "/tmp/job_logs" + base_dir: "/Users/szou/tmp/job_logs" sweeper: duration: 1 #days settings: # Customized settings of sweeper - work_dir: "/tmp/job_logs" + work_dir: "/Users/szou/tmp/job_logs" #Loggers for the job service loggers: - name: "STD_OUTPUT" # Same with above level: "DEBUG" -#Admin server endpoint -admin_server: "http://adminserver:9010/" - diff --git a/src/jobservice/config/config.go b/src/jobservice/config/config.go index 5e128cdd3..2753ce49f 100644 --- a/src/jobservice/config/config.go +++ b/src/jobservice/config/config.go @@ -23,7 +23,7 @@ import ( "strconv" "strings" - "github.com/goharbor/harbor/src/jobservice/utils" + "github.com/goharbor/harbor/src/jobservice/common/utils" yaml "gopkg.in/yaml.v2" ) @@ -68,7 +68,7 @@ type Configuration struct { // Additional config when using https HTTPSConfig *HTTPSConfig `yaml:"https_config,omitempty"` - // Configurations of worker pool + // Configurations of worker worker PoolConfig *PoolConfig `yaml:"worker_pool,omitempty"` // Job logger configurations @@ -84,13 +84,13 @@ type HTTPSConfig struct { Key string `yaml:"key"` } -// RedisPoolConfig keeps redis pool info. +// RedisPoolConfig keeps redis worker info. type RedisPoolConfig struct { RedisURL string `yaml:"redis_url"` Namespace string `yaml:"namespace"` } -// PoolConfig keeps worker pool configurations. +// PoolConfig keeps worker worker configurations. type PoolConfig struct { // Worker concurrency WorkerCount uint `yaml:"workers"` @@ -274,20 +274,20 @@ func (c *Configuration) validate() error { } if c.PoolConfig == nil { - return errors.New("no worker pool is configured") + return errors.New("no worker worker is configured") } if c.PoolConfig.Backend != JobServicePoolBackendRedis { - return fmt.Errorf("worker pool backend %s does not support", c.PoolConfig.Backend) + return fmt.Errorf("worker worker backend %s does not support", c.PoolConfig.Backend) } // When backend is redis if c.PoolConfig.Backend == JobServicePoolBackendRedis { if c.PoolConfig.RedisPoolCfg == nil { - return fmt.Errorf("redis pool must be configured when backend is set to '%s'", c.PoolConfig.Backend) + return fmt.Errorf("redis worker must be configured when backend is set to '%s'", c.PoolConfig.Backend) } if utils.IsEmptyStr(c.PoolConfig.RedisPoolCfg.RedisURL) { - return errors.New("URL of redis pool is empty") + return errors.New("URL of redis worker is empty") } if !strings.HasPrefix(c.PoolConfig.RedisPoolCfg.RedisURL, redisSchema) { @@ -299,7 +299,7 @@ func (c *Configuration) validate() error { } if utils.IsEmptyStr(c.PoolConfig.RedisPoolCfg.Namespace) { - return errors.New("namespace of redis pool is required") + return errors.New("namespace of redis worker is required") } } diff --git a/src/jobservice/config_test.yml b/src/jobservice/config_test.yml index 057a35934..a988e3882 100644 --- a/src/jobservice/config_test.yml +++ b/src/jobservice/config_test.yml @@ -10,7 +10,7 @@ https_config: #Server listening port port: 9444 -#Worker pool +#Worker worker worker_pool: #Worker concurrency workers: 10 @@ -39,7 +39,3 @@ job_loggers: loggers: - name: "STD_OUTPUT" # Same with above level: "DEBUG" - -#Admin server endpoint -admin_server: "http://127.0.0.1:8888" - diff --git a/src/jobservice/core/controller.go b/src/jobservice/core/controller.go index 71fce9d42..df4da649d 100644 --- a/src/jobservice/core/controller.go +++ b/src/jobservice/core/controller.go @@ -15,127 +15,117 @@ package core import ( - "errors" "fmt" + "github.com/pkg/errors" "github.com/goharbor/harbor/src/jobservice/logger" + "github.com/goharbor/harbor/src/jobservice/common/query" + "github.com/goharbor/harbor/src/jobservice/common/utils" "github.com/goharbor/harbor/src/jobservice/job" - "github.com/goharbor/harbor/src/jobservice/models" - "github.com/goharbor/harbor/src/jobservice/pool" - "github.com/goharbor/harbor/src/jobservice/utils" + "github.com/goharbor/harbor/src/jobservice/lcm" + "github.com/goharbor/harbor/src/jobservice/worker" "github.com/robfig/cron" ) -const ( - hookActivated = "activated" - hookDeactivated = "error" -) - -// Controller implement the core interface and provides related job handle methods. -// Controller will coordinate the lower components to complete the process as a commander role. -type Controller struct { - // Refer the backend pool - backendPool pool.Interface +// basicController implement the core interface and provides related job handle methods. +// basicController will coordinate the lower components to complete the process as a commander role. +type basicController struct { + // Refer the backend worker + backendWorker worker.Interface + // Refer the job life cycle management controller + ctl lcm.Controller } -// NewController is constructor of Controller. -func NewController(backendPool pool.Interface) *Controller { - return &Controller{ - backendPool: backendPool, +// NewController is constructor of basicController. +func NewController(backendWorker worker.Interface, ctl lcm.Controller) Interface { + return &basicController{ + backendWorker: backendWorker, + ctl: ctl, } } // LaunchJob is implementation of same method in core interface. -func (c *Controller) LaunchJob(req models.JobRequest) (models.JobStats, error) { +func (bc *basicController) LaunchJob(req *job.Request) (res *job.Stats, err error) { if err := validJobReq(req); err != nil { - return models.JobStats{}, err + return nil, err } // Validate job name - jobType, isKnownJob := c.backendPool.IsKnownJob(req.Job.Name) + jobType, isKnownJob := bc.backendWorker.IsKnownJob(req.Job.Name) if !isKnownJob { - return models.JobStats{}, fmt.Errorf("job with name '%s' is unknown", req.Job.Name) + return nil, errors.Errorf("job with name '%s' is unknown", req.Job.Name) } // Validate parameters - if err := c.backendPool.ValidateJobParameters(jobType, req.Job.Parameters); err != nil { - return models.JobStats{}, err + if err := bc.backendWorker.ValidateJobParameters(jobType, req.Job.Parameters); err != nil { + return nil, err } // Enqueue job regarding of the kind - var ( - res models.JobStats - err error - ) switch req.Job.Metadata.JobKind { - case job.JobKindScheduled: - res, err = c.backendPool.Schedule( + case job.KindScheduled: + res, err = bc.backendWorker.Schedule( req.Job.Name, req.Job.Parameters, req.Job.Metadata.ScheduleDelay, - req.Job.Metadata.IsUnique) - case job.JobKindPeriodic: - res, err = c.backendPool.PeriodicallyEnqueue( + req.Job.Metadata.IsUnique, + req.Job.StatusHook, + ) + case job.KindPeriodic: + res, err = bc.backendWorker.PeriodicallyEnqueue( req.Job.Name, req.Job.Parameters, - req.Job.Metadata.Cron) + req.Job.Metadata.Cron, + req.Job.Metadata.IsUnique, + req.Job.StatusHook, + ) default: - res, err = c.backendPool.Enqueue(req.Job.Name, req.Job.Parameters, req.Job.Metadata.IsUnique) + res, err = bc.backendWorker.Enqueue( + req.Job.Name, + req.Job.Parameters, + req.Job.Metadata.IsUnique, + req.Job.StatusHook, + ) } - // Register status hook? - if err == nil { - if !utils.IsEmptyStr(req.Job.StatusHook) { - if err := c.backendPool.RegisterHook(res.Stats.JobID, req.Job.StatusHook); err != nil { - res.Stats.HookStatus = hookDeactivated - } else { - res.Stats.HookStatus = hookActivated - } - } - } - - return res, err + return } // GetJob is implementation of same method in core interface. -func (c *Controller) GetJob(jobID string) (models.JobStats, error) { +func (bc *basicController) GetJob(jobID string) (*job.Stats, error) { if utils.IsEmptyStr(jobID) { - return models.JobStats{}, errors.New("empty job ID") + return nil, errors.New("empty job ID") } - return c.backendPool.GetJobStats(jobID) + t, err := bc.ctl.Track(jobID) + if err != nil { + return nil, err + } + + return t.Job(), nil } // StopJob is implementation of same method in core interface. -func (c *Controller) StopJob(jobID string) error { +func (bc *basicController) StopJob(jobID string) error { if utils.IsEmptyStr(jobID) { return errors.New("empty job ID") } - return c.backendPool.StopJob(jobID) -} - -// CancelJob is implementation of same method in core interface. -func (c *Controller) CancelJob(jobID string) error { - if utils.IsEmptyStr(jobID) { - return errors.New("empty job ID") - } - - return c.backendPool.CancelJob(jobID) + return bc.backendWorker.StopJob(jobID) } // RetryJob is implementation of same method in core interface. -func (c *Controller) RetryJob(jobID string) error { +func (bc *basicController) RetryJob(jobID string) error { if utils.IsEmptyStr(jobID) { return errors.New("empty job ID") } - return c.backendPool.RetryJob(jobID) + return bc.backendWorker.RetryJob(jobID) } // GetJobLogData is used to return the log text data for the specified job if exists -func (c *Controller) GetJobLogData(jobID string) ([]byte, error) { +func (bc *basicController) GetJobLogData(jobID string) ([]byte, error) { if utils.IsEmptyStr(jobID) { return nil, errors.New("empty job ID") } @@ -149,12 +139,46 @@ func (c *Controller) GetJobLogData(jobID string) ([]byte, error) { } // CheckStatus is implementation of same method in core interface. -func (c *Controller) CheckStatus() (models.JobPoolStats, error) { - return c.backendPool.Stats() +func (bc *basicController) CheckStatus() (*worker.Stats, error) { + return bc.backendWorker.Stats() } -func validJobReq(req models.JobRequest) error { - if req.Job == nil { +// GetPeriodicExecutions gets the periodic executions for the specified periodic job +func (bc *basicController) GetPeriodicExecutions(periodicJobID string, query *query.Parameter) ([]*job.Stats, int64, error) { + if utils.IsEmptyStr(periodicJobID) { + return nil, 0, errors.New("nil periodic job ID") + } + + t, err := bc.ctl.Track(periodicJobID) + if err != nil { + return nil, 0, err + } + + eIDs, total, err := t.Executions(query) + if err != nil { + return nil, 0, err + } + + res := make([]*job.Stats, 0) + for _, eID := range eIDs { + et, err := bc.ctl.Track(eID) + if err != nil { + return nil, 0, err + } + + res = append(res, et.Job()) + } + + return res, total, nil +} + +// ScheduledJobs returns the scheduled jobs by page +func (bc *basicController) ScheduledJobs(query *query.Parameter) ([]*job.Stats, int64, error) { + return bc.backendWorker.ScheduledJobs(query) +} + +func validJobReq(req *job.Request) error { + if req == nil || req.Job == nil { return errors.New("empty job request is not allowed") } @@ -166,29 +190,29 @@ func validJobReq(req models.JobRequest) error { return errors.New("metadata of job is missing") } - if req.Job.Metadata.JobKind != job.JobKindGeneric && - req.Job.Metadata.JobKind != job.JobKindPeriodic && - req.Job.Metadata.JobKind != job.JobKindScheduled { - return fmt.Errorf( + if req.Job.Metadata.JobKind != job.KindGeneric && + req.Job.Metadata.JobKind != job.KindPeriodic && + req.Job.Metadata.JobKind != job.KindScheduled { + return errors.Errorf( "job kind '%s' is not supported, only support '%s','%s','%s'", req.Job.Metadata.JobKind, - job.JobKindGeneric, - job.JobKindScheduled, - job.JobKindPeriodic) + job.KindGeneric, + job.KindScheduled, + job.KindPeriodic) } - if req.Job.Metadata.JobKind == job.JobKindScheduled && + if req.Job.Metadata.JobKind == job.KindScheduled && req.Job.Metadata.ScheduleDelay == 0 { - return fmt.Errorf("'schedule_delay' must be specified if the job kind is '%s'", job.JobKindScheduled) + return errors.Errorf("'schedule_delay' must be specified for %s job", job.KindScheduled) } - if req.Job.Metadata.JobKind == job.JobKindPeriodic { + if req.Job.Metadata.JobKind == job.KindPeriodic { if utils.IsEmptyStr(req.Job.Metadata.Cron) { - return fmt.Errorf("'cron_spec' must be specified if the job kind is '%s'", job.JobKindPeriodic) + return fmt.Errorf("'cron_spec' must be specified if the %s job", job.KindPeriodic) } if _, err := cron.Parse(req.Job.Metadata.Cron); err != nil { - return fmt.Errorf("'cron_spec' is not correctly set: %s", err) + return fmt.Errorf("'cron_spec' is not correctly set: %s: %s", req.Job.Metadata.Cron, err) } } diff --git a/src/jobservice/core/interface.go b/src/jobservice/core/interface.go index 640bd1188..176a87c57 100644 --- a/src/jobservice/core/interface.go +++ b/src/jobservice/core/interface.go @@ -16,28 +16,30 @@ package core import ( - "github.com/goharbor/harbor/src/jobservice/models" + "github.com/goharbor/harbor/src/jobservice/common/query" + "github.com/goharbor/harbor/src/jobservice/job" + "github.com/goharbor/harbor/src/jobservice/worker" ) // Interface defines the related main methods of job operation. type Interface interface { // LaunchJob is used to handle the job submission request. // - // req JobRequest : Job request contains related required information of queuing job. + // req *job.Request : Job request contains related required information of queuing job. // // Returns: - // JobStats: Job status info with ID and self link returned if job is successfully launched. - // error : Error returned if failed to launch the specified job. - LaunchJob(req models.JobRequest) (models.JobStats, error) + // job.Stats : Job status info with ID and self link returned if job is successfully launched. + // error : Error returned if failed to launch the specified job. + LaunchJob(req *job.Request) (*job.Stats, error) // GetJob is used to handle the job stats query request. // // jobID string: ID of job. // // Returns: - // JobStats: Job status info if job exists. - // error : Error returned if failed to get the specified job. - GetJob(jobID string) (models.JobStats, error) + // *job.Stats : Job status info if job exists. + // error : Error returned if failed to get the specified job. + GetJob(jobID string) (*job.Stats, error) // StopJob is used to handle the job stopping request. // @@ -55,17 +57,19 @@ type Interface interface { // error : Error returned if failed to retry the specified job. RetryJob(jobID string) error - // Cancel the job - // - // jobID string : ID of the enqueued job - // - // Returns: - // error : error returned if meet any problems - CancelJob(jobID string) error - // CheckStatus is used to handle the job service healthy status checking request. - CheckStatus() (models.JobPoolStats, error) + CheckStatus() (*worker.Stats, error) // GetJobLogData is used to return the log text data for the specified job if exists GetJobLogData(jobID string) ([]byte, error) + + // Get the periodic executions for the specified periodic job. + // Pagination by query is supported. + // The total number is also returned. + GetPeriodicExecutions(periodicJobID string, query *query.Parameter) ([]*job.Stats, int64, error) + + // Get the scheduled jobs by page + // The page number in the query will be ignored, default 20 is used. This is the limitation of backend lib. + // The total number is also returned. + ScheduledJobs(query *query.Parameter) ([]*job.Stats, int64, error) } diff --git a/src/jobservice/env/context.go b/src/jobservice/env/context.go index 5d468a25e..49d1bc492 100644 --- a/src/jobservice/env/context.go +++ b/src/jobservice/env/context.go @@ -16,6 +16,7 @@ package env import ( "context" + "github.com/goharbor/harbor/src/jobservice/job" "sync" ) @@ -33,6 +34,6 @@ type Context struct { ErrorChan chan error // The base job context reference - // It will be the parent conetext of job execution context - JobContext JobContext + // It will be the parent context of job execution context + JobContext job.Context } diff --git a/src/jobservice/errs/errors.go b/src/jobservice/errs/errors.go index a85015b93..69c3de440 100644 --- a/src/jobservice/errs/errors.go +++ b/src/jobservice/errs/errors.go @@ -33,14 +33,12 @@ const ( MissingBackendHandlerErrorCode // LaunchJobErrorCode is code for the error of launching job LaunchJobErrorCode - // CheckStatsErrorCode is code for the error of checking stats of worker pool + // CheckStatsErrorCode is code for the error of checking stats of worker worker CheckStatsErrorCode // GetJobStatsErrorCode is code for the error of getting stats of enqueued job GetJobStatsErrorCode // StopJobErrorCode is code for the error of stopping job StopJobErrorCode - // CancelJobErrorCode is code for the error of cancelling job - CancelJobErrorCode // RetryJobErrorCode is code for the error of retrying job RetryJobErrorCode // UnknownActionNameErrorCode is code for the case of unknown action name @@ -115,11 +113,6 @@ func StopJobError(err error) error { return New(StopJobErrorCode, "Stop job failed with error", err.Error()) } -// CancelJobError is error for the case of cancelling job failed -func CancelJobError(err error) error { - return New(CancelJobErrorCode, "Cancel job failed with error", err.Error()) -} - // RetryJobError is error for the case of retrying job failed func RetryJobError(err error) error { return New(RetryJobErrorCode, "Retry job failed with error", err.Error()) @@ -155,21 +148,6 @@ func JobStoppedError() error { } } -// jobCancelledError is designed for the case of cancelling job. -type jobCancelledError struct { - baseError -} - -// JobCancelledError is error wrapper for the case of cancelling job. -func JobCancelledError() error { - return jobCancelledError{ - baseError{ - Code: JobStoppedErrorCode, - Err: "Job is cancelled", - }, - } -} - // objectNotFound is designed for the case of no object found type objectNotFoundError struct { baseError @@ -208,12 +186,6 @@ func IsJobStoppedError(err error) bool { return ok } -// IsJobCancelledError return true if the error is jobCancelledError -func IsJobCancelledError(err error) bool { - _, ok := err.(jobCancelledError) - return ok -} - // IsObjectNotFoundError return true if the error is objectNotFoundError func IsObjectNotFoundError(err error) bool { _, ok := err.(objectNotFoundError) diff --git a/src/jobservice/hook/hook_agent.go b/src/jobservice/hook/hook_agent.go new file mode 100644 index 000000000..785f496ce --- /dev/null +++ b/src/jobservice/hook/hook_agent.go @@ -0,0 +1,328 @@ +// Copyright Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hook + +import ( + "context" + "encoding/json" + "errors" + "math/rand" + "net/url" + "time" + + "github.com/goharbor/harbor/src/jobservice/job" + + "github.com/goharbor/harbor/src/jobservice/common/rds" + "github.com/goharbor/harbor/src/jobservice/logger" + "github.com/gomodule/redigo/redis" + "math" +) + +const ( + // Influenced by the worker number setting + maxEventChanBuffer = 1024 + // Max concurrent client handlers + maxHandlers = 5 + // The max time for expiring the retrying events + // 180 days + maxEventExpireTime = 3600 * 24 * 180 + // Interval for retrying loop + retryInterval = 2 * time.Minute + // Number for splitting the event list to sub set for popping out + defaultShardNum = 3 +) + +// Agent is designed to handle the hook events with reasonable numbers of concurrent threads +type Agent interface { + // Trigger hooks + Trigger(evt *Event) error + // Serves events now + Serve() +} + +// Event contains the hook URL and the data +type Event struct { + URL string `json:"url"` + Message string `json:"message"` // meaningful text for event + Data *job.StatusChange `json:"data"` // generic data + Timestamp int64 `json:"timestamp"` // Use as time threshold of discarding the event (unit: second) +} + +// Validate event +func (e *Event) Validate() error { + _, err := url.Parse(e.URL) + if err != nil { + return err + } + + if e.Data == nil { + return errors.New("nil hook data") + } + + return nil +} + +// Serialize event to bytes +func (e *Event) Serialize() ([]byte, error) { + return json.Marshal(e) +} + +// Deserialize the bytes to event +func (e *Event) Deserialize(bytes []byte) error { + return json.Unmarshal(bytes, e) +} + +// Basic agent for usage +type basicAgent struct { + context context.Context + namespace string + client Client + events chan *Event + tokens chan bool + redisPool *redis.Pool +} + +// NewAgent is constructor of basic agent +func NewAgent(ctx context.Context, ns string, redisPool *redis.Pool) Agent { + tks := make(chan bool, maxHandlers) + // Put tokens + for i := 0; i < maxHandlers; i++ { + tks <- true + } + return &basicAgent{ + context: ctx, + namespace: ns, + client: NewClient(ctx), + events: make(chan *Event, maxEventChanBuffer), + tokens: tks, + redisPool: redisPool, + } +} + +// Trigger implements the same method of interface @Agent +func (ba *basicAgent) Trigger(evt *Event) error { + if evt == nil { + return errors.New("nil event") + } + + if err := evt.Validate(); err != nil { + return err + } + + ba.events <- evt + + return nil +} + +// Start the basic agent +// Termination depends on the system context +// Blocking call +func (ba *basicAgent) Serve() { + go ba.looplyRetry() + logger.Info("Hook event retrying loop is started!") + go ba.serve() + logger.Info("Basic hook agent is started") + +} + +func (ba *basicAgent) serve() { + defer func() { + logger.Info("Basic hook agent is stopped") + }() + + for { + select { + case evt := <-ba.events: + // if exceed, wait here + // avoid too many request connections at the same time + <-ba.tokens + go func(evt *Event) { + defer func() { + ba.tokens <- true // return token + }() + + if err := ba.client.SendEvent(evt); err != nil { + logger.Errorf("Send hook event '%s' to '%s' failed with error: %s; push to the queue for retrying later", evt.Message, evt.URL, err) + // Push event to the retry queue + if err := ba.pushForRetry(evt); err != nil { + // Failed to push to the retry queue, let's directly push it + // to the event channel of this node with reasonable backoff time. + logger.Errorf("Failed to push hook event to the retry queue: %s", err) + + // Put to the event chan now + // In a separate goroutine to avoid occupying the token long time + go func() { + // As 'pushForRetry' has checked the timestamp and expired event + // will be directly discarded and nil error is returned, no need to + // check it again here. + <-time.After(time.Duration((rand.Int31n(60) + 5)) * time.Second) + ba.events <- evt + }() + } + } + }(evt) + + case <-ba.context.Done(): + return + } + } +} + +func (ba *basicAgent) pushForRetry(evt *Event) error { + if evt == nil { + // do nothing + return nil + } + + // Anyway we'll need the raw JSON, let's try to serialize it here + rawJSON, err := evt.Serialize() + if err != nil { + return err + } + + now := time.Now().Unix() + if evt.Timestamp > 0 && now-evt.Timestamp >= maxEventExpireTime { + // Expired, do not need to push back to the retry queue + logger.Warningf("Event is expired: %s\n", rawJSON) + + return nil + } + + conn := ba.redisPool.Get() + defer conn.Close() + + key := rds.KeyHookEventRetryQueue(ba.namespace) + args := make([]interface{}, 0) + + // Use nano time to get more accurate timestamp + score := time.Now().UnixNano() + args = append(args, key, "NX", score, rawJSON) + + _, err = conn.Do("ZADD", args...) + if err != nil { + return err + } + + return nil +} + +func (ba *basicAgent) looplyRetry() { + defer func() { + logger.Info("Hook event retrying loop exit!") + }() + + // Append random seconds to avoid working in the same time slot + tk := time.NewTicker(retryInterval + time.Duration(rand.Int31n(13)+3)*time.Second) + defer tk.Stop() + + for { + select { + case <-tk.C: + if err := ba.popMinOnes(); err != nil { + logger.Errorf("Retrying to send hook events failed with error: %s", err.Error()) + } + case <-ba.context.Done(): + return + } + } +} + +func (ba *basicAgent) popMinOnes() error { + conn := ba.redisPool.Get() + defer conn.Close() + + key := rds.KeyHookEventRetryQueue(ba.namespace) + // Get total events + total, err := redis.Int(conn.Do("ZCARD", key)) + if err != nil { + return err + } + + // Get sharding ones + poppedNum := math.Ceil(float64(total) / float64(defaultShardNum)) + rawContent, err := redis.Values(conn.Do("ZPOPMIN", key, poppedNum)) + if err != nil { + return err + } + + for i, l := 0, len(rawContent); i < l; i = i + 2 { + rawEvent := rawContent[i].([]byte) + evt := &Event{} + + if err := evt.Deserialize(rawEvent); err != nil { + // Partially failed + logger.Warningf("Invalid event data when retrying to send hook event: %s", err.Error()) + continue + } + + // Compare with current job status if it is still valid hook events + // If it is already out of date, then directly discard it + // If it is still valid, then retry to send it + // Get the current status of job + jobID, status, err := extractJobID(evt.Data) + if err != nil { + logger.Warning(err.Error()) + continue + } + + latestStatus, err := ba.getJobStatus(jobID) + if err != nil { + logger.Warning(err.Error()) + continue + } + + if status.Compare(latestStatus) < 0 { + // Already out of date + logger.Debugf("Abandon out dated status update retrying action: %s", evt.Message) + continue + } + + // Put to the event chan for sending with a separate goroutine to avoid long time + // waiting + go func(evt *Event) { + ba.events <- evt + }(evt) + } + + return nil +} + +func (ba *basicAgent) getJobStatus(jobID string) (job.Status, error) { + conn := ba.redisPool.Get() + defer conn.Close() + + key := rds.KeyJobStats(ba.namespace, jobID) + status, err := redis.String(conn.Do("HGET", key, "status")) + if err != nil { + return job.PendingStatus, err + } + + return job.Status(status), nil +} + +// Extract the job ID and status from the event data field +// First return is job ID +// Second return is job status +// Last one is error +func extractJobID(data *job.StatusChange) (string, job.Status, error) { + if data != nil && len(data.JobID) > 0 { + status := job.Status(data.Status) + if status.Validate() == nil { + return data.JobID, status, nil + } + } + + return "", "", errors.New("invalid job status change data to extract job ID") +} diff --git a/src/jobservice/hook/hook_agent_test.go b/src/jobservice/hook/hook_agent_test.go new file mode 100644 index 000000000..989e754df --- /dev/null +++ b/src/jobservice/hook/hook_agent_test.go @@ -0,0 +1,177 @@ +// Copyright Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hook + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "sync/atomic" + "testing" + "time" + + "github.com/goharbor/harbor/src/jobservice/common/rds" + "github.com/goharbor/harbor/src/jobservice/job" + "github.com/goharbor/harbor/src/jobservice/tests" +) + +func TestEventSending(t *testing.T) { + done := make(chan bool, 1) + + expected := uint32(1300) // >1024 max + count := uint32(0) + counter := &count + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + defer func() { + c := atomic.AddUint32(counter, 1) + if c == expected { + done <- true + } + }() + fmt.Fprintln(w, "ok") + })) + defer ts.Close() + + // in case test failed and avoid dead lock + go func() { + <-time.After(time.Duration(10) * time.Second) + done <- true // time out + }() + + ctx, cancel := context.WithCancel(context.Background()) + + ns := tests.GiveMeTestNamespace() + pool := tests.GiveMeRedisPool() + + conn := pool.Get() + defer tests.ClearAll(ns, conn) + + agent := NewAgent(ctx, ns, pool) + agent.Serve() + + go func() { + defer func() { + cancel() + }() + + for i := uint32(0); i < expected; i++ { + changeData := &job.StatusChange{ + JobID: fmt.Sprintf("job-%d", i), + Status: "running", + } + + evt := &Event{ + URL: ts.URL, + Message: fmt.Sprintf("status of job %s change to %s", changeData.JobID, changeData.Status), + Data: changeData, + Timestamp: time.Now().Unix(), + } + + if err := agent.Trigger(evt); err != nil { + t.Fatal(err) + } + } + + // Check results + <-done + if count != expected { + t.Fatalf("expected %d hook events but only got %d", expected, count) + } + }() + + // Wait + <-ctx.Done() +} + +func TestRetryAndPopMin(t *testing.T) { + ctx := context.Background() + ns := tests.GiveMeTestNamespace() + pool := tests.GiveMeRedisPool() + + conn := pool.Get() + defer tests.ClearAll(ns, conn) + + tks := make(chan bool, maxHandlers) + // Put tokens + for i := 0; i < maxHandlers; i++ { + tks <- true + } + + agent := &basicAgent{ + context: ctx, + namespace: ns, + client: NewClient(), + events: make(chan *Event, maxEventChanBuffer), + tokens: tks, + redisPool: pool, + } + + changeData := &job.StatusChange{ + JobID: "fake_job_ID", + Status: job.RunningStatus.String(), + } + + evt := &Event{ + URL: "https://fake.js.com", + Message: fmt.Sprintf("status of job %s change to %s", changeData.JobID, changeData.Status), + Data: changeData, + Timestamp: time.Now().Unix(), + } + + // Mock job stats + conn = pool.Get() + defer conn.Close() + + key := rds.KeyJobStats(ns, "fake_job_ID") + _, err := conn.Do("HSET", key, "status", job.SuccessStatus.String()) + if err != nil { + t.Fatal(err) + } + + if err := agent.pushForRetry(evt); err != nil { + t.Fatal(err) + } + + if err := agent.popMinOnes(); err != nil { + t.Fatal(err) + } + + // Check results + if len(agent.events) > 0 { + t.Error("the hook event should be discard but actually not") + } + + // Change status + _, err = conn.Do("HSET", key, "status", job.PendingStatus.String()) + if err != nil { + t.Fatal(err) + } + + if err := agent.pushForRetry(evt); err != nil { + t.Fatal(err) + } + + if err := agent.popMinOnes(); err != nil { + t.Fatal(err) + } + + <-time.After(time.Duration(1) * time.Second) + + if len(agent.events) != 1 { + t.Errorf("the hook event should be requeued but actually not: %d", len(agent.events)) + } +} diff --git a/src/jobservice/hook/hook_client.go b/src/jobservice/hook/hook_client.go new file mode 100644 index 000000000..9e5c30518 --- /dev/null +++ b/src/jobservice/hook/hook_client.go @@ -0,0 +1,132 @@ +// Copyright Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hook + +import ( + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "net" + "net/http" + "net/url" + "os" + "strings" + "time" + + "context" + "github.com/goharbor/harbor/src/jobservice/common/utils" +) + +const ( + proxyEnvHTTP = "http_proxy" + proxyEnvHTTPS = "https_proxy" +) + +// Client for handling the hook events +type Client interface { + // SendEvent send the event to the subscribed parties + SendEvent(evt *Event) error +} + +// Client is used to post the related data to the interested parties. +type basicClient struct { + client *http.Client +} + +// NewClient return the ptr of the new hook client +func NewClient(ctx context.Context) Client { + // Create transport + transport := &http.Transport{ + MaxIdleConns: 20, + IdleConnTimeout: 30 * time.Second, + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + }).DialContext, + TLSHandshakeTimeout: 10 * time.Second, + ResponseHeaderTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + } + + // Get the http/https proxies + proxyAddr, ok := os.LookupEnv(proxyEnvHTTP) + if !ok { + proxyAddr, ok = os.LookupEnv(proxyEnvHTTPS) + } + + if ok && !utils.IsEmptyStr(proxyAddr) { + proxyURL, err := url.Parse(proxyAddr) + if err == nil { + transport.Proxy = http.ProxyURL(proxyURL) + } + } + + client := &http.Client{ + Timeout: 15 * time.Second, + Transport: transport, + } + + return &basicClient{ + client: client, + } +} + +// ReportStatus reports the status change info to the subscribed party. +// The status includes 'checkin' info with format 'check_in:' +func (bc *basicClient) SendEvent(evt *Event) error { + if evt == nil { + return errors.New("nil event") + } + + if err := evt.Validate(); err != nil { + return err + } + + // Marshal data + data, err := json.Marshal(evt.Data) + if err != nil { + return err + } + + // New post request + req, err := http.NewRequest(http.MethodPost, evt.URL, strings.NewReader(string(data))) + if err != nil { + return err + } + + res, err := bc.client.Do(req) + if err != nil { + return err + } + + defer res.Body.Close() // close connection for reuse + + // Should be 200 + if res.StatusCode != http.StatusOK { + if res.ContentLength > 0 { + // read error content and return + dt, err := ioutil.ReadAll(res.Body) + if err != nil { + return err + } + return errors.New(string(dt)) + } + + return fmt.Errorf("failed to report status change via hook, expect '200' but got '%d'", res.StatusCode) + } + + return nil +} diff --git a/src/jobservice/opm/hook_client_test.go b/src/jobservice/hook/hook_client_test.go similarity index 66% rename from src/jobservice/opm/hook_client_test.go rename to src/jobservice/hook/hook_client_test.go index 1d88b9cfc..fc65725ed 100644 --- a/src/jobservice/opm/hook_client_test.go +++ b/src/jobservice/hook/hook_client_test.go @@ -11,27 +11,36 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -package opm +package hook import ( "fmt" + "github.com/goharbor/harbor/src/jobservice/job" "net/http" "net/http/httptest" "testing" - - "github.com/goharbor/harbor/src/jobservice/models" + "time" ) +var testClient = NewClient() + func TestHookClient(t *testing.T) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { fmt.Fprintln(w, "ok") })) defer ts.Close() - err := DefaultHookClient.ReportStatus(ts.URL, models.JobStatusChange{ + changeData := &job.StatusChange{ JobID: "fake_job_ID", Status: "running", - }) + } + evt := &Event{ + URL: ts.URL, + Data: changeData, + Message: fmt.Sprintf("Status of job %s changed to: %s", changeData.JobID, changeData.Status), + Timestamp: time.Now().Unix(), + } + err := testClient.SendEvent(evt) if err != nil { t.Fatal(err) } @@ -44,10 +53,18 @@ func TestReportStatusFailed(t *testing.T) { })) defer ts.Close() - err := DefaultHookClient.ReportStatus(ts.URL, models.JobStatusChange{ + changeData := &job.StatusChange{ JobID: "fake_job_ID", Status: "running", - }) + } + evt := &Event{ + URL: ts.URL, + Data: changeData, + Message: fmt.Sprintf("Status of job %s changed to: %s", changeData.JobID, changeData.Status), + Timestamp: time.Now().Unix(), + } + + err := testClient.SendEvent(evt) if err == nil { t.Fatal("expect error but got nil") } diff --git a/src/jobservice/env/job_context.go b/src/jobservice/job/context.go similarity index 63% rename from src/jobservice/env/job_context.go rename to src/jobservice/job/context.go index c2a9e3281..19e501882 100644 --- a/src/jobservice/env/job_context.go +++ b/src/jobservice/job/context.go @@ -12,27 +12,26 @@ // See the License for the specific language governing permissions and // limitations under the License. -package env +package job import ( "context" "github.com/goharbor/harbor/src/jobservice/logger" - "github.com/goharbor/harbor/src/jobservice/models" ) -// JobContext is combination of BaseContext and other job specified resources. -// JobContext will be the real execution context for one job. -type JobContext interface { +// Context is combination of BaseContext and other job specified resources. +// Context will be the real execution context for one job. +type Context interface { // Build the context based on the parent context // - // dep JobData : Dependencies for building the context, just in case that the build - // function need some external info + // A new job context will be generated based on the current context + // for the provided job. // // Returns: - // new JobContext based on the parent one + // new Context based on the parent one // error if meet any problems - Build(dep JobData) (JobContext, error) + Build(tracker Tracker) (Context, error) // Get property from the context // @@ -57,27 +56,19 @@ type JobContext interface { // error if meet any problems Checkin(status string) error - // OPCommand return the control operational command like stop/cancel if have + // OPCommand return the control operational command like stop if have // // Returns: // op command if have // flag to indicate if have command - OPCommand() (string, bool) + OPCommand() (OPCommand, bool) // Return the logger GetLogger() logger.Interface - // Launch sub jobs - LaunchJob(req models.JobRequest) (models.JobStats, error) -} - -// JobData defines job context dependencies. -type JobData struct { - ID string - Name string - Args map[string]interface{} - ExtraData map[string]interface{} + // Get tracker + Tracker() Tracker } // JobContextInitializer is a func to initialize the concrete job context -type JobContextInitializer func(ctx *Context) (JobContext, error) +type JobContextInitializer func(ctx context.Context) (Context, error) diff --git a/src/jobservice/job/impl/context.go b/src/jobservice/job/impl/context.go index cfcef2a94..5c0d30956 100644 --- a/src/jobservice/job/impl/context.go +++ b/src/jobservice/job/impl/context.go @@ -16,22 +16,17 @@ package impl import ( "context" - "errors" "fmt" "math" - "reflect" "time" - "github.com/goharbor/harbor/src/common" + "errors" comcfg "github.com/goharbor/harbor/src/common/config" "github.com/goharbor/harbor/src/common/dao" - "github.com/goharbor/harbor/src/common/models" "github.com/goharbor/harbor/src/jobservice/config" - "github.com/goharbor/harbor/src/jobservice/env" "github.com/goharbor/harbor/src/jobservice/job" "github.com/goharbor/harbor/src/jobservice/logger" "github.com/goharbor/harbor/src/jobservice/logger/sweeper" - jmodel "github.com/goharbor/harbor/src/jobservice/models" ) const ( @@ -42,24 +37,14 @@ const ( type Context struct { // System context sysContext context.Context - // Logger for job logger logger.Interface - - // op command func - opCommandFunc job.CheckOPCmdFunc - - // checkin func - checkInFunc job.CheckInFunc - - // launch job - launchJobFunc job.LaunchJobFunc - // other required information properties map[string]interface{} - // admin server client cfgMgr comcfg.CfgManager + // job life cycle tracker + tracker job.Tracker } // NewContext ... @@ -107,7 +92,11 @@ func (c *Context) Init() error { // Build implements the same method in env.JobContext interface // This func will build the job execution context before running -func (c *Context) Build(dep env.JobData) (env.JobContext, error) { +func (c *Context) Build(tracker job.Tracker) (job.Context, error) { + if tracker == nil { + return nil, errors.New("nil job tracker") + } + jContext := &Context{ sysContext: c.sysContext, cfgMgr: c.cfgMgr, @@ -132,46 +121,11 @@ func (c *Context) Build(dep env.JobData) (env.JobContext, error) { } // Set loggers for job - if err := setLoggers(func(lg logger.Interface) { - jContext.logger = lg - }, dep.ID); err != nil { + lg, err := createLoggers(c.tracker.Job().Info.JobID) + if err != nil { return nil, err } - - if opCommandFunc, ok := dep.ExtraData["opCommandFunc"]; ok { - if reflect.TypeOf(opCommandFunc).Kind() == reflect.Func { - if funcRef, ok := opCommandFunc.(job.CheckOPCmdFunc); ok { - jContext.opCommandFunc = funcRef - } - } - } - if jContext.opCommandFunc == nil { - return nil, errors.New("failed to inject opCommandFunc") - } - - if checkInFunc, ok := dep.ExtraData["checkInFunc"]; ok { - if reflect.TypeOf(checkInFunc).Kind() == reflect.Func { - if funcRef, ok := checkInFunc.(job.CheckInFunc); ok { - jContext.checkInFunc = funcRef - } - } - } - - if jContext.checkInFunc == nil { - return nil, errors.New("failed to inject checkInFunc") - } - - if launchJobFunc, ok := dep.ExtraData["launchJobFunc"]; ok { - if reflect.TypeOf(launchJobFunc).Kind() == reflect.Func { - if funcRef, ok := launchJobFunc.(job.LaunchJobFunc); ok { - jContext.launchJobFunc = funcRef - } - } - } - - if jContext.launchJobFunc == nil { - return nil, errors.New("failed to inject launchJobFunc") - } + jContext.logger = lg return jContext, nil } @@ -189,22 +143,21 @@ func (c *Context) SystemContext() context.Context { // Checkin is bridge func for reporting detailed status func (c *Context) Checkin(status string) error { - if c.checkInFunc != nil { - c.checkInFunc(status) - } else { - return errors.New("nil check in function") - } - - return nil + return c.tracker.CheckIn(status) } // OPCommand return the control operational command like stop/cancel if have -func (c *Context) OPCommand() (string, bool) { - if c.opCommandFunc != nil { - return c.opCommandFunc() +func (c *Context) OPCommand() (job.OPCommand, bool) { + latest, err := c.tracker.Status() + if err != nil { + return job.NilCommand, false } - return "", false + if job.StoppedStatus == latest { + return job.StopCommand, true + } + + return job.NilCommand, false } // GetLogger returns the logger @@ -212,38 +165,15 @@ func (c *Context) GetLogger() logger.Interface { return c.logger } -// LaunchJob launches sub jobs -func (c *Context) LaunchJob(req jmodel.JobRequest) (jmodel.JobStats, error) { - if c.launchJobFunc == nil { - return jmodel.JobStats{}, errors.New("nil launch job function") - } - - return c.launchJobFunc(req) +// Tracker returns the job tracker attached with the context +func (c *Context) Tracker() job.Tracker { + return c.tracker } -func getDBFromConfig(cfg map[string]interface{}) *models.Database { - database := &models.Database{} - database.Type = cfg[common.DatabaseType].(string) - postgresql := &models.PostGreSQL{} - postgresql.Host = cfg[common.PostGreSQLHOST].(string) - postgresql.Port = int(cfg[common.PostGreSQLPort].(float64)) - postgresql.Username = cfg[common.PostGreSQLUsername].(string) - postgresql.Password = cfg[common.PostGreSQLPassword].(string) - postgresql.Database = cfg[common.PostGreSQLDatabase].(string) - postgresql.SSLMode = cfg[common.PostGreSQLSSLMode].(string) - database.PostGreSQL = postgresql - - return database -} - -// create loggers based on the configurations and set it to the job executing context. -func setLoggers(setter func(lg logger.Interface), jobID string) error { - if setter == nil { - return errors.New("missing setter func") - } - +// create loggers based on the configurations. +func createLoggers(jobID string) (logger.Interface, error) { // Init job loggers here - lOptions := []logger.Option{} + lOptions := make([]logger.Option, 0) for _, lc := range config.DefaultConfig.JobLoggerConfigs { // For running job, the depth should be 5 if lc.Name == logger.LoggerNameFile || lc.Name == logger.LoggerNameStdOutput || lc.Name == logger.LoggerNameDB { @@ -273,14 +203,7 @@ func setLoggers(setter func(lg logger.Interface), jobID string) error { } } // Get logger for the job - lg, err := logger.GetLogger(lOptions...) - if err != nil { - return fmt.Errorf("initialize job logger error: %s", err) - } - - setter(lg) - - return nil + return logger.GetLogger(lOptions...) } func initDBCompleted() error { diff --git a/src/jobservice/job/impl/default_context.go b/src/jobservice/job/impl/default_context.go index 34243059b..434a1339c 100644 --- a/src/jobservice/job/impl/default_context.go +++ b/src/jobservice/job/impl/default_context.go @@ -17,144 +17,97 @@ package impl import ( "context" "errors" - "reflect" - - "github.com/goharbor/harbor/src/jobservice/env" "github.com/goharbor/harbor/src/jobservice/job" "github.com/goharbor/harbor/src/jobservice/logger" - jmodel "github.com/goharbor/harbor/src/jobservice/models" ) // DefaultContext provides a basic job context type DefaultContext struct { // System context sysContext context.Context - // Logger for job logger logger.Interface - - // op command func - opCommandFunc job.CheckOPCmdFunc - - // checkin func - checkInFunc job.CheckInFunc - - // launch job - launchJobFunc job.LaunchJobFunc - - // other required information + // Other required information properties map[string]interface{} + // Track the job attached with the context + tracker job.Tracker } // NewDefaultContext is constructor of building DefaultContext -func NewDefaultContext(sysCtx context.Context) env.JobContext { +func NewDefaultContext(sysCtx context.Context) job.Context { return &DefaultContext{ sysContext: sysCtx, properties: make(map[string]interface{}), } } -// Build implements the same method in env.JobContext interface +// Build implements the same method in env.Context interface // This func will build the job execution context before running -func (c *DefaultContext) Build(dep env.JobData) (env.JobContext, error) { +func (dc *DefaultContext) Build(t job.Tracker) (job.Context, error) { + if t == nil { + return nil, errors.New("nil job tracker") + } + jContext := &DefaultContext{ - sysContext: c.sysContext, + sysContext: dc.sysContext, + tracker: t, properties: make(map[string]interface{}), } // Copy properties - if len(c.properties) > 0 { - for k, v := range c.properties { + if len(dc.properties) > 0 { + for k, v := range dc.properties { jContext.properties[k] = v } } // Set loggers for job - if err := setLoggers(func(lg logger.Interface) { - jContext.logger = lg - }, dep.ID); err != nil { + lg, err := createLoggers(t.Job().Info.JobID) + if err != nil { return nil, err } - if opCommandFunc, ok := dep.ExtraData["opCommandFunc"]; ok { - if reflect.TypeOf(opCommandFunc).Kind() == reflect.Func { - if funcRef, ok := opCommandFunc.(job.CheckOPCmdFunc); ok { - jContext.opCommandFunc = funcRef - } - } - } - if jContext.opCommandFunc == nil { - return nil, errors.New("failed to inject opCommandFunc") - } - - if checkInFunc, ok := dep.ExtraData["checkInFunc"]; ok { - if reflect.TypeOf(checkInFunc).Kind() == reflect.Func { - if funcRef, ok := checkInFunc.(job.CheckInFunc); ok { - jContext.checkInFunc = funcRef - } - } - } - - if jContext.checkInFunc == nil { - return nil, errors.New("failed to inject checkInFunc") - } - - if launchJobFunc, ok := dep.ExtraData["launchJobFunc"]; ok { - if reflect.TypeOf(launchJobFunc).Kind() == reflect.Func { - if funcRef, ok := launchJobFunc.(job.LaunchJobFunc); ok { - jContext.launchJobFunc = funcRef - } - } - } - - if jContext.launchJobFunc == nil { - return nil, errors.New("failed to inject launchJobFunc") - } + jContext.logger = lg return jContext, nil } -// Get implements the same method in env.JobContext interface -func (c *DefaultContext) Get(prop string) (interface{}, bool) { - v, ok := c.properties[prop] +// Get implements the same method in env.Context interface +func (dc *DefaultContext) Get(prop string) (interface{}, bool) { + v, ok := dc.properties[prop] return v, ok } -// SystemContext implements the same method in env.JobContext interface -func (c *DefaultContext) SystemContext() context.Context { - return c.sysContext +// SystemContext implements the same method in env.Context interface +func (dc *DefaultContext) SystemContext() context.Context { + return dc.sysContext } // Checkin is bridge func for reporting detailed status -func (c *DefaultContext) Checkin(status string) error { - if c.checkInFunc != nil { - c.checkInFunc(status) - } else { - return errors.New("nil check in function") - } - - return nil +func (dc *DefaultContext) Checkin(status string) error { + return dc.tracker.CheckIn(status) } -// OPCommand return the control operational command like stop/cancel if have -func (c *DefaultContext) OPCommand() (string, bool) { - if c.opCommandFunc != nil { - return c.opCommandFunc() +// OPCommand return the control operational command like stop if have +func (dc *DefaultContext) OPCommand() (job.OPCommand, bool) { + latest, err := dc.tracker.Status() + if err != nil { + return job.NilCommand, false } - return "", false + if job.StoppedStatus == latest { + return job.StopCommand, true + } + + return job.NilCommand, false } // GetLogger returns the logger -func (c *DefaultContext) GetLogger() logger.Interface { - return c.logger +func (dc *DefaultContext) GetLogger() logger.Interface { + return dc.logger } -// LaunchJob launches sub jobs -func (c *DefaultContext) LaunchJob(req jmodel.JobRequest) (jmodel.JobStats, error) { - if c.launchJobFunc == nil { - return jmodel.JobStats{}, errors.New("nil launch job function") - } - - return c.launchJobFunc(req) +// Tracker returns the tracker tracking the job attached with the context +func (dc *DefaultContext) Tracker() job.Tracker { + return dc.tracker } diff --git a/src/jobservice/job/impl/default_context_test.go b/src/jobservice/job/impl/default_context_test.go index c37d63d0a..4c6e69e48 100644 --- a/src/jobservice/job/impl/default_context_test.go +++ b/src/jobservice/job/impl/default_context_test.go @@ -46,7 +46,7 @@ func TestDefaultContext(t *testing.T) { JobID: "fake_sub_job_id", Status: "pending", JobName: "DEMO", - JobKind: job.JobKindGeneric, + JobKind: job.KindGeneric, EnqueueTime: time.Now().Unix(), UpdateTime: time.Now().Unix(), }, diff --git a/src/jobservice/job/impl/demo_job.go b/src/jobservice/job/impl/sample/job.go similarity index 58% rename from src/jobservice/job/impl/demo_job.go rename to src/jobservice/job/impl/sample/job.go index 603c937de..970292a8d 100644 --- a/src/jobservice/job/impl/demo_job.go +++ b/src/jobservice/job/impl/sample/job.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package impl +package sample import ( "errors" @@ -20,26 +20,25 @@ import ( "strings" "time" - "github.com/goharbor/harbor/src/jobservice/env" "github.com/goharbor/harbor/src/jobservice/errs" - "github.com/goharbor/harbor/src/jobservice/opm" + "github.com/goharbor/harbor/src/jobservice/job" ) -// DemoJob is the job to demostrate the job interface. -type DemoJob struct{} +// Job is a sample to show how to implement a job. +type Job struct{} // MaxFails is implementation of same method in Interface. -func (dj *DemoJob) MaxFails() uint { +func (j *Job) MaxFails() uint { return 3 } // ShouldRetry ... -func (dj *DemoJob) ShouldRetry() bool { +func (j *Job) ShouldRetry() bool { return true } // Validate is implementation of same method in Interface. -func (dj *DemoJob) Validate(params map[string]interface{}) error { +func (j *Job) Validate(params map[string]interface{}) error { if params == nil || len(params) == 0 { return errors.New("parameters required for replication job") } @@ -56,7 +55,7 @@ func (dj *DemoJob) Validate(params map[string]interface{}) error { } // Run the replication logic here. -func (dj *DemoJob) Run(ctx env.JobContext, params map[string]interface{}) error { +func (j *Job) Run(ctx job.Context, params job.Parameters) error { logger := ctx.GetLogger() defer func() { @@ -64,22 +63,19 @@ func (dj *DemoJob) Run(ctx env.JobContext, params map[string]interface{}) error }() fmt.Println("I'm running") - logger.Infof("params: %#v\n", params) - logger.Infof("context: %#v\n", ctx) + logger.Infof("Params: %#v\n", params) + logger.Infof("Context: %#v\n", ctx) if v, ok := ctx.Get("email_from"); ok { fmt.Printf("Get prop form context: email_from=%s\n", v) } - /*if u, err := dao.GetUser(models.User{}); err == nil { - fmt.Printf("u=%#+v\n", u) - }*/ - logger.Info("check in 30%") + logger.Info("Check in 30%") ctx.Checkin("30%") time.Sleep(2 * time.Second) - logger.Warning("check in 60%") + logger.Warning("Check in 60%") ctx.Checkin("60%") time.Sleep(2 * time.Second) - logger.Debug("check in 100%") + logger.Debug("Check in 100%") ctx.Checkin("100%") time.Sleep(1 * time.Second) @@ -90,37 +86,10 @@ func (dj *DemoJob) Run(ctx env.JobContext, params map[string]interface{}) error if cmd, ok := ctx.OPCommand(); ok { logger.Infof("cmd=%s\n", cmd) fmt.Printf("Receive OP command: %s\n", cmd) - if cmd == opm.CtlCommandCancel { - logger.Info("exit for receiving cancel signal") - return errs.JobCancelledError() - } - - logger.Info("exit for receiving stop signal") + logger.Info("Exit for receiving stop signal") return errs.JobStoppedError() } - /*fmt.Println("Launch sub job") - jobParams := make(map[string]interface{}) - jobParams["image"] = "demo:1.7" - subDemoJob := models.JobRequest{ - Job: &models.JobData{ - Name: "DEMO", - Parameters: jobParams, - Metadata: &models.JobMetadata{ - JobKind: job.JobKindGeneric, - }, - }, - } - - subJob, err := ctx.LaunchJob(subDemoJob) - if err != nil { - fmt.Printf("Create sub job failed with error: %s\n", err) - logger.Error(err) - return - } - - fmt.Printf("Sub job: %v", subJob)*/ - fmt.Println("I'm close to end") return nil diff --git a/src/jobservice/job/interface.go b/src/jobservice/job/interface.go index 85e8579b7..40683a553 100644 --- a/src/jobservice/job/interface.go +++ b/src/jobservice/job/interface.go @@ -14,22 +14,6 @@ package job -import ( - "github.com/goharbor/harbor/src/jobservice/env" - "github.com/goharbor/harbor/src/jobservice/models" -) - -// CheckOPCmdFunc is the function to check if the related operation commands -// like STOP or CANCEL is fired for the specified job. If yes, return the -// command code for job to determine if take corresponding action. -type CheckOPCmdFunc func() (string, bool) - -// CheckInFunc is designed for job to report more detailed progress info -type CheckInFunc func(message string) - -// LaunchJobFunc is designed to launch sub jobs in the job -type LaunchJobFunc func(req models.JobRequest) (models.JobStats, error) - // Interface defines the related injection and run entry methods. type Interface interface { // Declare how many times the job can be retried if failed. @@ -38,7 +22,7 @@ type Interface interface { // uint: the failure count allowed. If it is set to 0, then default value 4 is used. MaxFails() uint - // Tell the worker pool if retry the failed job when the fails is + // Tell the worker worker if retry the failed job when the fails is // still less that the number declared by the method 'MaxFails'. // // Returns: @@ -49,16 +33,16 @@ type Interface interface { // // Return: // error if parameters are not valid. NOTES: If no parameters needed, directly return nil. - Validate(params map[string]interface{}) error + Validate(params Parameters) error // Run the business logic here. // The related arguments will be injected by the workerpool. // - // ctx env.JobContext : Job execution context. + // ctx Context : Job execution context. // params map[string]interface{} : parameters with key-pair style for the job execution. // // Returns: // error if failed to run. NOTES: If job is stopped or cancelled, a specified error should be returned // - Run(ctx env.JobContext, params map[string]interface{}) error + Run(ctx Context, params Parameters) error } diff --git a/src/jobservice/job/job_status.go b/src/jobservice/job/job_status.go deleted file mode 100644 index d87a13138..000000000 --- a/src/jobservice/job/job_status.go +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package job - -const ( - // JobStatusPending : job status pending - JobStatusPending = "Pending" - // JobStatusRunning : job status running - JobStatusRunning = "Running" - // JobStatusStopped : job status stopped - JobStatusStopped = "Stopped" - // JobStatusCancelled : job status cancelled - JobStatusCancelled = "Cancelled" - // JobStatusError : job status error - JobStatusError = "Error" - // JobStatusSuccess : job status success - JobStatusSuccess = "Success" - // JobStatusScheduled : job status scheduled - JobStatusScheduled = "Scheduled" -) diff --git a/src/jobservice/job/job_kinds.go b/src/jobservice/job/kinds.go similarity index 73% rename from src/jobservice/job/job_kinds.go rename to src/jobservice/job/kinds.go index 9d993e8c4..ec2f31236 100644 --- a/src/jobservice/job/job_kinds.go +++ b/src/jobservice/job/kinds.go @@ -15,10 +15,10 @@ package job const ( - // JobKindGeneric : Kind of generic job - JobKindGeneric = "Generic" - // JobKindScheduled : Kind of scheduled job - JobKindScheduled = "Scheduled" - // JobKindPeriodic : Kind of periodic job - JobKindPeriodic = "Periodic" + // KindGeneric : Kind of generic job + KindGeneric = "Generic" + // KindScheduled : Kind of scheduled job + KindScheduled = "Scheduled" + // KindPeriodic : Kind of periodic job + KindPeriodic = "Periodic" ) diff --git a/src/jobservice/job/known_jobs.go b/src/jobservice/job/known_jobs.go new file mode 100644 index 000000000..5f3f19650 --- /dev/null +++ b/src/jobservice/job/known_jobs.go @@ -0,0 +1,35 @@ +// Copyright Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package job + +// Define the register name constants of known jobs + +const ( + // SampleJob is name of demo job + SampleJob = "DEMO" + + // ImageScanJob is name of scan job it will be used as key to register to job service. + ImageScanJob = "IMAGE_SCAN" + // ImageScanAllJob is the name of "scanall" job in job service + ImageScanAllJob = "IMAGE_SCAN_ALL" + // ImageTransfer : the name of image transfer job in job service + ImageTransfer = "IMAGE_TRANSFER" + // ImageDelete : the name of image delete job in job service + ImageDelete = "IMAGE_DELETE" + // ImageReplicate : the name of image replicate job in job service + ImageReplicate = "IMAGE_REPLICATE" + // ImageGC the name of image garbage collection job in job service + ImageGC = "IMAGE_GC" +) diff --git a/src/jobservice/job/models.go b/src/jobservice/job/models.go new file mode 100644 index 000000000..23aae672e --- /dev/null +++ b/src/jobservice/job/models.go @@ -0,0 +1,126 @@ +// Copyright Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package job + +import ( + "github.com/goharbor/harbor/src/jobservice/common/utils" + "github.com/pkg/errors" +) + +// Parameters for job execution. +type Parameters map[string]interface{} + +// Request is the request of launching a job. +type Request struct { + Job *RequestBody `json:"job"` +} + +// RequestBody keeps the basic info. +type RequestBody struct { + Name string `json:"name"` + Parameters Parameters `json:"parameters"` + Metadata *Metadata `json:"metadata"` + StatusHook string `json:"status_hook"` +} + +// Metadata stores the metadata of job. +type Metadata struct { + JobKind string `json:"kind"` + ScheduleDelay uint64 `json:"schedule_delay,omitempty"` + Cron string `json:"cron_spec,omitempty"` + IsUnique bool `json:"unique"` +} + +// Stats keeps the result of job launching. +type Stats struct { + Info *StatsInfo `json:"job"` +} + +// StatsInfo keeps the stats of job +type StatsInfo struct { + JobID string `json:"id"` + Status string `json:"status"` + JobName string `json:"name"` + JobKind string `json:"kind"` + IsUnique bool `json:"unique"` + RefLink string `json:"ref_link,omitempty"` + CronSpec string `json:"cron_spec,omitempty"` + EnqueueTime int64 `json:"enqueue_time"` + UpdateTime int64 `json:"update_time"` + RunAt int64 `json:"run_at,omitempty"` + CheckIn string `json:"check_in,omitempty"` + CheckInAt int64 `json:"check_in_at,omitempty"` + DieAt int64 `json:"die_at,omitempty"` + WebHookURL string `json:"web_hook_url,omitempty"` + UpstreamJobID string `json:"upstream_job_id,omitempty"` // Ref the upstream job if existing + NumericPID int64 `json:"numeric_policy_id,omitempty"` // The numeric policy ID of the periodic job +} + +// ActionRequest defines for triggering job action like stop/cancel. +type ActionRequest struct { + Action string `json:"action"` +} + +// StatusChange is designed for reporting the status change via hook. +type StatusChange struct { + JobID string `json:"job_id"` + Status string `json:"status"` + CheckIn string `json:"check_in,omitempty"` + Metadata *StatsInfo `json:"metadata,omitempty"` +} + +// Validate the job stats +func (st *Stats) Validate() error { + if st.Info == nil { + return errors.New("nil stats body") + } + + if utils.IsEmptyStr(st.Info.JobID) { + return errors.New("missing job ID in job stats") + } + + if utils.IsEmptyStr(st.Info.JobName) { + return errors.New("missing job name in job stats") + } + + if utils.IsEmptyStr(st.Info.JobKind) { + return errors.New("missing job name in job stats") + } + + if st.Info.JobKind != KindGeneric && + st.Info.JobKind != KindPeriodic && + st.Info.JobKind != KindScheduled { + return errors.Errorf("job kind is not supported: %s", st.Info.JobKind) + } + + status := Status(st.Info.Status) + if err := status.Validate(); err != nil { + return err + } + + if st.Info.JobKind == KindPeriodic { + if utils.IsEmptyStr(st.Info.CronSpec) { + return errors.New("missing cron spec for periodic job") + } + } + + if st.Info.JobKind == KindScheduled { + if st.Info.RunAt == 0 { + return errors.New("enqueue timestamp missing for scheduled job") + } + } + + return nil +} diff --git a/src/jobservice/job/op_cmd.go b/src/jobservice/job/op_cmd.go new file mode 100644 index 000000000..5cb0ed713 --- /dev/null +++ b/src/jobservice/job/op_cmd.go @@ -0,0 +1,28 @@ +// Copyright Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package job + +const ( + StopCommand OPCommand = "stop" + NilCommand OPCommand = "nil" +) + +// OPCommand is the type of job operation commands +type OPCommand string + +// IsStop return if the op command is stop +func (oc OPCommand) IsStop() bool { + return oc == "stop" +} diff --git a/src/jobservice/job/status.go b/src/jobservice/job/status.go new file mode 100644 index 000000000..2c89f9ac2 --- /dev/null +++ b/src/jobservice/job/status.go @@ -0,0 +1,82 @@ +// Copyright Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package job + +import "fmt" + +const ( + // PendingStatus : job status pending + PendingStatus Status = "Pending" + // RunningStatus : job status running + RunningStatus Status = "Running" + // StoppedStatus : job status stopped + StoppedStatus Status = "Stopped" + // ErrorStatus : job status error + ErrorStatus Status = "Error" + // SuccessStatus : job status success + SuccessStatus Status = "Success" + // ScheduledStatus : job status scheduled + ScheduledStatus Status = "Scheduled" +) + +// Status of job +type Status string + +// Validate the status +// If it's valid, then return nil error +// otherwise an non nil error is returned +func (s Status) Validate() error { + if s.Code() == -1 { + return fmt.Errorf("%s is not valid job status", s) + } + + return nil +} + +// Code of job status +func (s Status) Code() int { + switch s { + case "Pending": + return 0 + case "Scheduled": + return 1 + case "Running": + return 2 + // All final status share the same code + // Each job will have only 1 final status + case "Stopped": + return 3 + case "Error": + return 3 + case "Success": + return 3 + default: + } + + return -1 +} + +// Compare the two job status +// if < 0, s before another status +// if == 0, same status +// if > 0, s after another status +func (s Status) Compare(another Status) int { + return s.Code() - another.Code() +} + +// String returns the raw string value of the status +func (s Status) String() string { + return string(s) +} diff --git a/src/jobservice/job/tracker.go b/src/jobservice/job/tracker.go new file mode 100644 index 000000000..be0df0bf9 --- /dev/null +++ b/src/jobservice/job/tracker.go @@ -0,0 +1,598 @@ +// Copyright Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package job + +import ( + "context" + "fmt" + "github.com/goharbor/harbor/src/jobservice/common/query" + "github.com/goharbor/harbor/src/jobservice/common/rds" + "github.com/goharbor/harbor/src/jobservice/common/utils" + "github.com/goharbor/harbor/src/jobservice/logger" + "github.com/gomodule/redigo/redis" + "github.com/pkg/errors" + "math/rand" + "strconv" + "time" +) + +const ( + // Try best to keep the job stats data but anyway clear it after a long time + statDataExpireTime = 180 * 24 * 3600 + // Default page size for querying + defaultPageSize = 25 +) + +// Tracker is designed to track the life cycle of the job described by the stats +// The status change is linear and then has strict preorder and successor +// Check should be enforced before switching +// +// Pending is default status when creating job, so no need to switch +type Tracker interface { + // Save the job stats which tracked by this tracker to the backend + // + // Return: + // none nil error returned if any issues happened + Save() error + + // Load the job stats which tracked by this tracker with the backend data + // + // Return: + // none nil error returned if any issues happened + Load() error + + // Get the job stats which tracked by this tracker + // + // Returns: + // *models.Info : job stats data + Job() *Stats + + // Update the properties of the job stats + // + // fieldAndValues ...interface{} : One or more properties being updated + // + // Returns: + // error if update failed + Update(fieldAndValues ...interface{}) error + + // Executions returns the executions of the job tracked by this tracker. + // Please pay attention, this only for periodic job. + // + // Returns: + // job execution IDs matched the query + // the total number + // error if any issues happened + Executions(q *query.Parameter) ([]string, int64, error) + + // NumericID returns the numeric ID of periodic job. + // Please pay attention, this only for periodic job. + NumericID() (int64, error) + + // Mark the periodic job execution to done by update the score + // of the relation between its periodic policy and execution to -1. + PeriodicExecutionDone() error + + // Check in message + CheckIn(message string) error + + // The current status of job + Status() (Status, error) + + // Expire the job stats data + Expire() error + + // Switch status to running + Run() error + + // Switch status to scheduled + Schedule() error + + // Switch status to stopped + Stop() error + + // Switch the status to error + Fail() error + + // Switch the status to success + Succeed() error +} + +// basicTracker implements Tracker interface based on redis +type basicTracker struct { + namespace string + context context.Context + pool *redis.Pool + jobID string + jobStats *Stats + callback HookCallback +} + +// NewBasicTrackerWithID builds a tracker with the provided job ID +func NewBasicTrackerWithID( + jobID string, + ctx context.Context, + ns string, + pool *redis.Pool, + callback HookCallback, +) Tracker { + return &basicTracker{ + namespace: ns, + context: ctx, + pool: pool, + jobID: jobID, + callback: callback, + } +} + +// NewBasicTrackerWithStats builds a tracker with the provided job stats +func NewBasicTrackerWithStats( + stats *Stats, + ctx context.Context, + ns string, + pool *redis.Pool, + callback HookCallback, +) Tracker { + return &basicTracker{ + namespace: ns, + context: ctx, + pool: pool, + jobStats: stats, + jobID: stats.Info.JobID, + callback: callback, + } +} + +// Refresh the job stats which tracked by this tracker +func (bt *basicTracker) Load() error { + return bt.retrieve() +} + +// Job returns the job stats which tracked by this tracker +func (bt *basicTracker) Job() *Stats { + return bt.jobStats +} + +// Update the properties of the job stats +func (bt *basicTracker) Update(fieldAndValues ...interface{}) error { + if len(fieldAndValues) == 0 { + errors.New("no properties specified to update") + } + + conn := bt.pool.Get() + defer conn.Close() + + key := rds.KeyJobStats(bt.namespace, bt.jobID) + args := []interface{}{"update_time", time.Now().Unix()} // update timestamp + args = append(args, fieldAndValues...) + + return rds.HmSet(conn, key, args...) +} + +// Status returns the current status of job tracked by this tracker +func (bt *basicTracker) Status() (Status, error) { + // Retrieve the latest status again in case get the outdated one. + conn := bt.pool.Get() + defer conn.Close() + + rootKey := rds.KeyJobStats(bt.namespace, bt.jobID) + return getStatus(conn, rootKey) +} + +// NumericID returns the numeric ID of the periodic job +func (bt *basicTracker) NumericID() (int64, error) { + if bt.jobStats.Info.NumericPID > 0 { + return bt.jobStats.Info.NumericPID, nil + } + + return -1, errors.Errorf("numeric ID not found for job: %s", bt.jobID) +} + +// PeriodicExecutionDone mark the execution done +func (bt *basicTracker) PeriodicExecutionDone() error { + if utils.IsEmptyStr(bt.jobStats.Info.UpstreamJobID) { + return errors.Errorf("%s is not periodic job execution", bt.jobID) + } + + key := rds.KeyUpstreamJobAndExecutions(bt.namespace, bt.jobStats.Info.UpstreamJobID) + + conn := bt.pool.Get() + defer conn.Close() + + args := []interface{}{key, "XX", -1, bt.jobID} + _, err := conn.Do("ZADD", args...) + + return err +} + +// Check in message +func (bt *basicTracker) CheckIn(message string) error { + if utils.IsEmptyStr(message) { + return errors.New("check in error: empty message") + } + + err := bt.fireHook(Status(bt.jobStats.Info.Status), message) + err = bt.Update( + "check_in", message, + "check_in_at", time.Now().Unix(), + "update_time", time.Now().Unix(), + ) + + return err +} + +// Executions of the tracked job +func (bt *basicTracker) Executions(q *query.Parameter) ([]string, int64, error) { + if bt.jobStats.Info.JobKind != KindPeriodic { + return nil, 0, errors.New("only periodic job has executions") + } + + conn := bt.pool.Get() + defer conn.Close() + + key := rds.KeyUpstreamJobAndExecutions(bt.namespace, bt.jobID) + + // Pagination + var pageNumber, pageSize uint = 1, defaultPageSize + if q != nil { + if q.PageNumber > 0 { + pageNumber = q.PageNumber + } + if q.PageSize > 0 { + pageSize = q.PageSize + } + } + + // Get total first + total, err := redis.Int64(conn.Do("ZCARD", key)) + if err != nil { + return nil, 0, err + } + + // No items + result := make([]string, 0) + if total == 0 || (int64)((pageNumber-1)*pageSize) >= total { + return result, total, nil + } + + min, max := (pageNumber-1)*pageSize, pageNumber*pageSize-1 + args := []interface{}{key, min, max} + list, err := redis.Values(conn.Do("ZREVRANGE", args...)) + if err != nil { + return nil, 0, err + } + + for _, item := range list { + if eID, ok := item.(string); ok { + result = append(result, eID) + } + } + + return result, total, nil +} + +// Expire job stats +func (bt *basicTracker) Expire() error { + conn := bt.pool.Get() + defer conn.Close() + + key := rds.KeyJobStats(bt.namespace, bt.jobID) + num, err := conn.Do("EXPIRE", key, statDataExpireTime) + if err != nil { + return err + } + + if num == 0 { + return errors.Errorf("job stats for expiring %s does not exist", bt.jobID) + } + + return nil +} + +// Run job +func (bt *basicTracker) Run() error { + return bt.compareAndSet(RunningStatus) +} + +// Schedule job +func (bt *basicTracker) Schedule() error { + return bt.compareAndSet(ScheduledStatus) +} + +// Stop job +// Stop is final status, if failed to do, retry should be enforced. +// Either one is failed, the final return will be marked as failed. +func (bt *basicTracker) Stop() error { + err := bt.fireHook(StoppedStatus) + err = bt.updateStatusWithRetry(StoppedStatus) + + return err +} + +// Fail job +// Fail is final status, if failed to do, retry should be enforced. +// Either one is failed, the final return will be marked as failed. +func (bt *basicTracker) Fail() error { + err := bt.fireHook(ErrorStatus) + err = bt.updateStatusWithRetry(ErrorStatus) + + return err +} + +// Succeed job +// Succeed is final status, if failed to do, retry should be enforced. +// Either one is failed, the final return will be marked as failed. +func (bt *basicTracker) Succeed() error { + err := bt.fireHook(SuccessStatus) + err = bt.updateStatusWithRetry(SuccessStatus) + + return err +} + +// Save the stats of job tracked by this tracker +func (bt *basicTracker) Save() (err error) { + if bt.jobStats == nil { + errors.New("nil job stats to save") + } + + conn := bt.pool.Get() + defer conn.Close() + + // Alliance + stats := bt.jobStats + + key := rds.KeyJobStats(bt.namespace, stats.Info.JobID) + args := make([]interface{}, 0) + args = append(args, key) + args = append(args, + "id", stats.Info.JobID, + "name", stats.Info.JobName, + "kind", stats.Info.JobKind, + "unique", stats.Info.IsUnique, + "status", stats.Info.Status, + "ref_link", stats.Info.RefLink, + "enqueue_time", stats.Info.EnqueueTime, + "run_at", stats.Info.RunAt, + "cron_spec", stats.Info.CronSpec, + "web_hook_url", stats.Info.WebHookURL, + "numeric_policy_id", stats.Info.NumericPID, + ) + if stats.Info.CheckInAt > 0 && !utils.IsEmptyStr(stats.Info.CheckIn) { + args = append(args, + "check_in", stats.Info.CheckIn, + "check_in_at", stats.Info.CheckInAt, + ) + } + if stats.Info.DieAt > 0 { + args = append(args, "die_at", stats.Info.DieAt) + } + + if !utils.IsEmptyStr(stats.Info.UpstreamJobID) { + args = append(args, "upstream_job_id", stats.Info.UpstreamJobID) + } + // Set update timestamp + args = append(args, "update_time", time.Now().Unix()) + + // Do it in a transaction + err = conn.Send("MULTI") + err = conn.Send("HMSET", args...) + + // If job kind is periodic job, expire time should not be set + // If job kind is scheduled job, expire time should be runAt+ + if stats.Info.JobKind != KindPeriodic { + var expireTime int64 = statDataExpireTime + if stats.Info.JobKind == KindScheduled { + nowTime := time.Now().Unix() + future := stats.Info.RunAt - nowTime + if future > 0 { + expireTime += future + } + } + expireTime += rand.Int63n(15) // Avoid lots of keys being expired at the same time + err = conn.Send("EXPIRE", key, expireTime) + } + + // Link with its upstream job if upstream job ID exists for future querying + if !utils.IsEmptyStr(stats.Info.UpstreamJobID) { + k := rds.KeyUpstreamJobAndExecutions(bt.namespace, stats.Info.UpstreamJobID) + zargs := []interface{}{k, "NX", stats.Info.RunAt, stats.Info.JobID} + err = conn.Send("ZADD", zargs...) + } + + // Check command send error only once here before executing + if err != nil { + return + } + + _, err = conn.Do("EXEC") + + return +} + +// Fire the hook event +func (bt *basicTracker) fireHook(status Status, checkIn ...string) error { + // Check if hook URL is registered + if utils.IsEmptyStr(bt.jobStats.Info.WebHookURL) { + // Do nothing + return nil + } + + change := &StatusChange{ + JobID: bt.jobID, + Status: status.String(), + Metadata: bt.jobStats.Info, + } + + if len(checkIn) > 0 { + change.CheckIn = checkIn[0] + } + + // If callback is registered, then trigger now + if bt.callback != nil { + return bt.callback(bt.jobStats.Info.WebHookURL, change) + } + + return nil +} + +// If update status failed, then retry if permitted. +// Try best to do +func (bt *basicTracker) updateStatusWithRetry(targetStatus Status) error { + err := bt.compareAndSet(targetStatus) + if err != nil { + // If still need to retry + // Check the update timestamp + if time.Now().Unix()-bt.jobStats.Info.UpdateTime < 2*24*3600 { + // Keep on retrying + go func() { + select { + case <-time.After(time.Duration(5)*time.Minute + time.Duration(rand.Int31n(13))*time.Second): + if err := bt.updateStatusWithRetry(targetStatus); err != nil { + logger.Errorf("Retry of updating status of job %s error: %s", bt.jobID, err) + } + case <-bt.context.Done(): + return // terminated + } + }() + } + } + + return err +} + +func (bt *basicTracker) compareAndSet(targetStatus Status) error { + conn := bt.pool.Get() + defer conn.Close() + + rootKey := rds.KeyJobStats(bt.namespace, bt.jobID) + + st, err := getStatus(conn, rootKey) + if err != nil { + return err + } + + if st.Compare(targetStatus) >= 0 { + return fmt.Errorf("mismatch job status: current %s, setting to %s", st, targetStatus) + } + + return setStatus(conn, rootKey, targetStatus) +} + +// retrieve the stats of job tracked by this tracker from the backend data +func (bt *basicTracker) retrieve() error { + conn := bt.pool.Get() + defer conn.Close() + + key := rds.KeyJobStats(bt.namespace, bt.jobID) + vals, err := redis.Strings(conn.Do("HGETALL", key)) + if err != nil { + return err + } + + if vals == nil || len(vals) == 0 { + return errors.Errorf("nothing got from backend for job '%s'", bt.jobID) + } + + res := &Stats{ + Info: &StatsInfo{}, + } + + for i, l := 0, len(vals); i < l; i = i + 2 { + prop := vals[i] + value := vals[i+1] + switch prop { + case "id": + res.Info.JobID = value + break + case "name": + res.Info.JobName = value + break + case "kind": + res.Info.JobKind = value + case "unique": + v, err := strconv.ParseBool(value) + if err != nil { + v = false + } + res.Info.IsUnique = v + case "status": + res.Info.Status = value + break + case "ref_link": + res.Info.RefLink = value + break + case "enqueue_time": + v, _ := strconv.ParseInt(value, 10, 64) + res.Info.EnqueueTime = v + break + case "update_time": + v, _ := strconv.ParseInt(value, 10, 64) + res.Info.UpdateTime = v + break + case "run_at": + v, _ := strconv.ParseInt(value, 10, 64) + res.Info.RunAt = v + break + case "check_in_at": + v, _ := strconv.ParseInt(value, 10, 64) + res.Info.CheckInAt = v + break + case "check_in": + res.Info.CheckIn = value + break + case "cron_spec": + res.Info.CronSpec = value + break + case "web_hook_url": + res.Info.WebHookURL = value + break + case "die_at": + v, _ := strconv.ParseInt(value, 10, 64) + res.Info.DieAt = v + case "upstream_job_id": + res.Info.UpstreamJobID = value + break + case "numeric_policy_id": + v, _ := strconv.ParseInt(value, 10, 64) + res.Info.NumericPID = v + break + default: + break + } + } + + bt.jobStats = res + + return nil +} + +func getStatus(conn redis.Conn, key string) (Status, error) { + values, err := rds.HmGet(conn, key, "status") + if err != nil { + return "", err + } + + if len(values) == 1 { + st := Status(values[0].([]byte)) + if st.Validate() == nil { + return st, nil + } + } + + return "", errors.New("malformed status data returned") +} + +func setStatus(conn redis.Conn, key string, status Status) error { + return rds.HmSet(conn, key, "status", status.String(), "update_time", time.Now().Unix()) +} diff --git a/src/jobservice/job/impl/known_jobs.go b/src/jobservice/job/web_hook.go similarity index 79% rename from src/jobservice/job/impl/known_jobs.go rename to src/jobservice/job/web_hook.go index e19a07cd6..3c52f6b68 100644 --- a/src/jobservice/job/impl/known_jobs.go +++ b/src/jobservice/job/web_hook.go @@ -12,11 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package impl +package job -// Define the register name constants of known jobs - -const ( - // KnownJobDemo is name of demo job - KnownJobDemo = "DEMO" -) +// HookCallback defines a callback to trigger when hook events happened +type HookCallback func(hookURL string, change *StatusChange) error diff --git a/src/jobservice/lcm/controller.go b/src/jobservice/lcm/controller.go new file mode 100644 index 000000000..2b9c9466d --- /dev/null +++ b/src/jobservice/lcm/controller.go @@ -0,0 +1,77 @@ +// Copyright Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lcm + +import ( + "context" + "github.com/goharbor/harbor/src/jobservice/job" + "github.com/gomodule/redigo/redis" + "github.com/pkg/errors" +) + +// Controller is designed to control the life cycle of the job +type Controller interface { + // New tracker from the new provided stats + New(stats *job.Stats) (job.Tracker, error) + + // Track the life cycle of the specified existing job + Track(jobID string) (job.Tracker, error) +} + +// basicController is default implementation of Controller based on redis +type basicController struct { + context context.Context + namespace string + pool *redis.Pool + callback job.HookCallback +} + +// NewController is the constructor of basic controller +func NewController(ctx context.Context, ns string, pool *redis.Pool, callback job.HookCallback) Controller { + return &basicController{ + context: ctx, + namespace: ns, + pool: pool, + callback: callback, + } +} + +// New tracker +func (bc *basicController) New(stats *job.Stats) (job.Tracker, error) { + if stats == nil { + return nil, errors.New("nil stats when creating job tracker") + } + + if err := stats.Validate(); err != nil { + return nil, errors.Errorf("error occurred when creating job tracker: %s", err) + } + + bt := job.NewBasicTrackerWithStats(stats, bc.context, bc.namespace, bc.pool, bc.callback) + if err := bt.Save(); err != nil { + return nil, err + } + + return bt, nil +} + +// Track and attache with the job +func (bc *basicController) Track(jobID string) (job.Tracker, error) { + bt := job.NewBasicTrackerWithID(jobID, bc.context, bc.namespace, bc.pool, bc.callback) + if err := bt.Load(); err != nil { + return nil, err + } + + return bt, nil +} diff --git a/src/jobservice/lcm/controller_test.go b/src/jobservice/lcm/controller_test.go new file mode 100644 index 000000000..b723c3f68 --- /dev/null +++ b/src/jobservice/lcm/controller_test.go @@ -0,0 +1,73 @@ +// Copyright Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lcm + +import ( + "github.com/goharbor/harbor/src/jobservice/common/rds" + "github.com/goharbor/harbor/src/jobservice/job" + "github.com/goharbor/harbor/src/jobservice/tests" + "testing" +) + +var ( + ns = tests.GiveMeTestNamespace() + pool = tests.GiveMeRedisPool() + ctl = NewController(ns, pool) +) + +func TestLifeCycleController(t *testing.T) { + conn := pool.Get() + defer tests.ClearAll(ns, conn) + + // only mock status data + jobID := "fake_job_ID_lcm_ctl" + key := rds.KeyJobStats(ns, jobID) + if err := setStatus(conn, key, job.PendingStatus); err != nil { + t.Fatalf("mock data failed: %s\n", err.Error()) + } + + // Switch status one by one + tk := ctl.Track(jobID) + + current, err := tk.Current() + nilError(t, err) + expect(t, job.PendingStatus, current) + + nilError(t, tk.Run()) + current, err = tk.Current() + nilError(t, err) + expect(t, job.RunningStatus, current) + + nilError(t, tk.Succeed()) + current, err = tk.Current() + nilError(t, err) + expect(t, job.SuccessStatus, current) + + if err := tk.Fail(); err == nil { + t.Fatalf("expect non nil error but got nil when switch status from %s to %s", current, job.ErrorStatus) + } +} + +func expect(t *testing.T, expected job.Status, current job.Status) { + if expected != current { + t.Fatalf("expect status %s but got %s", expected, current) + } +} + +func nilError(t *testing.T, err error) { + if err != nil { + t.Fatal(err) + } +} diff --git a/src/jobservice/logger/getter/file_getter.go b/src/jobservice/logger/getter/file_getter.go index 630c35194..cc7faffad 100644 --- a/src/jobservice/logger/getter/file_getter.go +++ b/src/jobservice/logger/getter/file_getter.go @@ -8,7 +8,7 @@ import ( "github.com/goharbor/harbor/src/jobservice/errs" - "github.com/goharbor/harbor/src/jobservice/utils" + "github.com/goharbor/harbor/src/jobservice/common/utils" ) // FileGetter is responsible for retrieving file log data diff --git a/src/jobservice/main.go b/src/jobservice/main.go index 2620eb3f2..6d535de0a 100644 --- a/src/jobservice/main.go +++ b/src/jobservice/main.go @@ -16,19 +16,13 @@ package main import ( "context" - "errors" "flag" "fmt" - "github.com/goharbor/harbor/src/common" - comcfg "github.com/goharbor/harbor/src/common/config" + "github.com/goharbor/harbor/src/jobservice/common/utils" "github.com/goharbor/harbor/src/jobservice/config" - "github.com/goharbor/harbor/src/jobservice/env" - "github.com/goharbor/harbor/src/jobservice/job/impl" "github.com/goharbor/harbor/src/jobservice/logger" "github.com/goharbor/harbor/src/jobservice/runtime" - "github.com/goharbor/harbor/src/jobservice/utils" - "os" ) func main() { @@ -57,7 +51,7 @@ func main() { } // Set job context initializer - runtime.JobService.SetJobContextInitializer(func(ctx *env.Context) (env.JobContext, error) { + /*runtime.JobService.SetJobContextInitializer(func(ctx context.Context) (job.Context, error) { secret := config.GetAuthSecret() if utils.IsEmptyStr(secret) { return nil, errors.New("empty auth secret") @@ -72,8 +66,8 @@ func main() { } return jobCtx, nil - }) + })*/ // Start - runtime.JobService.LoadAndRun(ctx, cancel) + runtime.JobService.LoadAndRun(ctx) } diff --git a/src/jobservice/models/models.go b/src/jobservice/models/models.go deleted file mode 100644 index c5c5f13c5..000000000 --- a/src/jobservice/models/models.go +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package models - -// Parameters for job execution. -type Parameters map[string]interface{} - -// JobRequest is the request of launching a job. -type JobRequest struct { - Job *JobData `json:"job"` -} - -// JobData keeps the basic info. -type JobData struct { - Name string `json:"name"` - Parameters Parameters `json:"parameters"` - Metadata *JobMetadata `json:"metadata"` - StatusHook string `json:"status_hook"` -} - -// JobMetadata stores the metadata of job. -type JobMetadata struct { - JobKind string `json:"kind"` - ScheduleDelay uint64 `json:"schedule_delay,omitempty"` - Cron string `json:"cron_spec,omitempty"` - IsUnique bool `json:"unique"` -} - -// JobStats keeps the result of job launching. -type JobStats struct { - Stats *JobStatData `json:"job"` -} - -// JobStatData keeps the stats of job -type JobStatData struct { - JobID string `json:"id"` - Status string `json:"status"` - JobName string `json:"name"` - JobKind string `json:"kind"` - IsUnique bool `json:"unique"` - RefLink string `json:"ref_link,omitempty"` - CronSpec string `json:"cron_spec,omitempty"` - EnqueueTime int64 `json:"enqueue_time"` - UpdateTime int64 `json:"update_time"` - RunAt int64 `json:"run_at,omitempty"` - CheckIn string `json:"check_in,omitempty"` - CheckInAt int64 `json:"check_in_at,omitempty"` - DieAt int64 `json:"die_at,omitempty"` - HookStatus string `json:"hook_status,omitempty"` - Executions []string `json:"executions,omitempty"` // For the jobs like periodic jobs, which may execute multiple times - UpstreamJobID string `json:"upstream_job_id,omitempty"` // Ref the upstream job if existing - IsMultipleExecutions bool `json:"multiple_executions"` // Indicate if the job has subsequent executions -} - -// JobPoolStats represents the healthy and status of all the running worker pools. -type JobPoolStats struct { - Pools []*JobPoolStatsData `json:"worker_pools"` -} - -// JobPoolStatsData represent the healthy and status of the worker pool. -type JobPoolStatsData struct { - WorkerPoolID string `json:"worker_pool_id"` - StartedAt int64 `json:"started_at"` - HeartbeatAt int64 `json:"heartbeat_at"` - JobNames []string `json:"job_names"` - Concurrency uint `json:"concurrency"` - Status string `json:"status"` -} - -// JobActionRequest defines for triggering job action like stop/cancel. -type JobActionRequest struct { - Action string `json:"action"` -} - -// JobStatusChange is designed for reporting the status change via hook. -type JobStatusChange struct { - JobID string `json:"job_id"` - Status string `json:"status"` - CheckIn string `json:"check_in,omitempty"` - Metadata *JobStatData `json:"metadata,omitempty"` -} - -// Message is designed for sub/pub messages -type Message struct { - Event string - Data interface{} // generic format -} diff --git a/src/jobservice/opm/hook_client.go b/src/jobservice/opm/hook_client.go deleted file mode 100644 index 422e71295..000000000 --- a/src/jobservice/opm/hook_client.go +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package opm - -import ( - "encoding/json" - "errors" - "fmt" - "io/ioutil" - "net/http" - "net/url" - "strings" - "time" - - "github.com/goharbor/harbor/src/jobservice/models" - "github.com/goharbor/harbor/src/jobservice/utils" -) - -const ( - clientTimeout = 10 * time.Second - maxIdleConnections = 20 - idleConnectionTimeout = 30 * time.Second -) - -// DefaultHookClient is for default use. -var DefaultHookClient = NewHookClient() - -// HookClient is used to post the related data to the interested parties. -type HookClient struct { - client *http.Client -} - -// NewHookClient return the ptr of the new HookClient -func NewHookClient() *HookClient { - client := &http.Client{ - Timeout: clientTimeout, - Transport: &http.Transport{ - MaxIdleConns: maxIdleConnections, - IdleConnTimeout: idleConnectionTimeout, - }, - } - - return &HookClient{ - client: client, - } -} - -// ReportStatus reports the status change info to the subscribed party. -// The status includes 'checkin' info with format 'check_in:' -func (hc *HookClient) ReportStatus(hookURL string, status models.JobStatusChange) error { - if utils.IsEmptyStr(hookURL) { - return errors.New("empty hook url") // do nothing - } - - // Parse and validate URL - url, err := url.Parse(hookURL) - if err != nil { - return err - } - - // Marshal data - data, err := json.Marshal(&status) - if err != nil { - return err - } - - // New post request - req, err := http.NewRequest(http.MethodPost, url.String(), strings.NewReader(string(data))) - if err != nil { - return err - } - - res, err := hc.client.Do(req) - if err != nil { - return err - } - - defer res.Body.Close() // close connection for reuse - - // Should be 200 - if res.StatusCode != http.StatusOK { - if res.ContentLength > 0 { - // read error content and return - dt, err := ioutil.ReadAll(res.Body) - if err != nil { - return err - } - return errors.New(string(dt)) - } - - return fmt.Errorf("failed to report status change via hook, expect '200' but got '%d'", res.StatusCode) - } - - return nil -} diff --git a/src/jobservice/opm/hook_store.go b/src/jobservice/opm/hook_store.go deleted file mode 100644 index c13c5bb07..000000000 --- a/src/jobservice/opm/hook_store.go +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package opm - -import ( - "sync" - - "github.com/goharbor/harbor/src/jobservice/utils" -) - -// HookStore is used to cache the hooks in memory. -// Use job ID as key to index -type HookStore struct { - lock *sync.RWMutex - data map[string]string -} - -// NewHookStore is to create a ptr of new HookStore. -func NewHookStore() *HookStore { - return &HookStore{ - lock: new(sync.RWMutex), - data: make(map[string]string), - } -} - -// Add new record -func (hs *HookStore) Add(jobID string, hookURL string) { - if utils.IsEmptyStr(jobID) { - return // do nothing - } - - hs.lock.Lock() - defer hs.lock.Unlock() - - hs.data[jobID] = hookURL -} - -// Get one hook url by job ID -func (hs *HookStore) Get(jobID string) (string, bool) { - hs.lock.RLock() - defer hs.lock.RUnlock() - - hookURL, ok := hs.data[jobID] - - return hookURL, ok -} - -// Remove the specified one -func (hs *HookStore) Remove(jobID string) (string, bool) { - hs.lock.Lock() - defer hs.lock.Unlock() - - hookURL, ok := hs.data[jobID] - delete(hs.data, jobID) - - return hookURL, ok -} diff --git a/src/jobservice/opm/job_stats_mgr.go b/src/jobservice/opm/job_stats_mgr.go deleted file mode 100644 index f77da38e7..000000000 --- a/src/jobservice/opm/job_stats_mgr.go +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package opm - -import "github.com/goharbor/harbor/src/jobservice/models" - -// Range for list scope defining -type Range int - -// JobStatsManager defines the methods to handle stats of job. -type JobStatsManager interface { - // Start to serve - Start() - - // Shutdown the manager - Shutdown() - - // Save the job stats - // Async method to retry and improve performance - // - // jobStats models.JobStats : the job stats to be saved - Save(jobStats models.JobStats) - - // Get the job stats from backend store - // Sync method as we need the data - // - // Returns: - // models.JobStats : job stats data - // error : error if meet any problems - Retrieve(jobID string) (models.JobStats, error) - - // Update the properties of the job stats - // - // jobID string : ID of the being retried job - // fieldAndValues ...interface{} : One or more properties being updated - // - // Returns: - // error if update failed - Update(jobID string, fieldAndValues ...interface{}) error - - // SetJobStatus will mark the status of job to the specified one - // Async method to retry - SetJobStatus(jobID string, status string) - - // Send command fro the specified job - // - // jobID string : ID of the being retried job - // command string : the command applied to the job like stop/cancel - // isCached bool : to indicate if only cache the op command - // - // Returns: - // error if it was not successfully sent - SendCommand(jobID string, command string, isCached bool) error - - // CtlCommand checks if control command is fired for the specified job. - // - // jobID string : ID of the job - // - // Returns: - // the command if it was fired - // error if it was not fired yet to meet some other problems - CtlCommand(jobID string) (string, error) - - // CheckIn message for the specified job like detailed progress info. - // - // jobID string : ID of the job - // message string : The message being checked in - // - CheckIn(jobID string, message string) - - // DieAt marks the failed jobs with the time they put into dead queue. - // - // jobID string : ID of the job - // message string : The message being checked in - // - DieAt(jobID string, dieAt int64) - - // RegisterHook is used to save the hook url or cache the url in memory. - // - // jobID string : ID of job - // hookURL string : the hook url being registered - // isCached bool : to indicate if only cache the hook url - // - // Returns: - // error if meet any problems - RegisterHook(jobID string, hookURL string, isCached bool) error - - // Get hook returns the web hook url for the specified job if it is registered - // - // jobID string : ID of job - // - // Returns: - // the web hook url if existing - // non-nil error if meet any problems - GetHook(jobID string) (string, error) - - // Mark the periodic job stats expired - // - // jobID string : ID of job - // - // Returns: - // error if meet any problems - ExpirePeriodicJobStats(jobID string) error - - // Persist the links between upstream job and the executions. - // - // upstreamJobID string: ID of the upstream job - // executions ...string: IDs of the execution jobs - // - // Returns: - // error if meet any issues - AttachExecution(upstreamJobID string, executions ...string) error - - // Get all the executions (IDs) fro the specified upstream Job. - // - // upstreamJobID string: ID of the upstream job - // ranges ...Range: Define the start and end for the list, e.g: - // 0, 10 means [0:10] - // 10 means [10:] - // empty means [0:-1]==all - // Returns: - // the ID list of the executions if no error occurred - // or a non-nil error is returned - GetExecutions(upstreamJobID string, ranges ...Range) ([]string, error) -} diff --git a/src/jobservice/opm/op_commands.go b/src/jobservice/opm/op_commands.go deleted file mode 100644 index cbc05f9e1..000000000 --- a/src/jobservice/opm/op_commands.go +++ /dev/null @@ -1,178 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package opm - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "sync" - "time" - - "github.com/goharbor/harbor/src/jobservice/logger" - "github.com/goharbor/harbor/src/jobservice/models" - "github.com/goharbor/harbor/src/jobservice/utils" - "github.com/gomodule/redigo/redis" -) - -const ( - commandValidTime = 5 * time.Minute - commandSweepTickerTime = 1 * time.Hour - // EventFireCommand for firing command event - EventFireCommand = "fire_command" -) - -type oPCommand struct { - command string - fireTime int64 -} - -// oPCommands maintain commands list -type oPCommands struct { - lock *sync.RWMutex - commands map[string]*oPCommand - context context.Context - redisPool *redis.Pool - namespace string - stopChan chan struct{} - doneChan chan struct{} -} - -// newOPCommands is constructor of OPCommands -func newOPCommands(ctx context.Context, ns string, redisPool *redis.Pool) *oPCommands { - return &oPCommands{ - lock: new(sync.RWMutex), - commands: make(map[string]*oPCommand), - context: ctx, - redisPool: redisPool, - namespace: ns, - stopChan: make(chan struct{}, 1), - doneChan: make(chan struct{}, 1), - } -} - -// Start the command sweeper -func (opc *oPCommands) Start() { - go opc.loop() - logger.Info("OP commands sweeper is started") -} - -// Stop the command sweeper -func (opc *oPCommands) Stop() { - opc.stopChan <- struct{}{} - <-opc.doneChan -} - -// Fire command -func (opc *oPCommands) Fire(jobID string, command string) error { - if utils.IsEmptyStr(jobID) { - return errors.New("empty job ID") - } - - if command != CtlCommandStop && command != CtlCommandCancel { - return fmt.Errorf("Unsupported command %s", command) - } - - notification := &models.Message{ - Event: EventFireCommand, - Data: []string{jobID, command}, - } - - rawJSON, err := json.Marshal(notification) - if err != nil { - return err - } - - conn := opc.redisPool.Get() - defer conn.Close() - - _, err = conn.Do("PUBLISH", utils.KeyPeriodicNotification(opc.namespace), rawJSON) - - return err -} - -// Push command into the list -func (opc *oPCommands) Push(jobID string, command string) error { - if utils.IsEmptyStr(jobID) { - return errors.New("empty job ID") - } - - if command != CtlCommandStop && command != CtlCommandCancel { - return fmt.Errorf("Unsupported command %s", command) - } - - opc.lock.Lock() - defer opc.lock.Unlock() - - opc.commands[jobID] = &oPCommand{ - command: command, - fireTime: time.Now().Unix(), - } - - return nil -} - -// Pop out the command if existing -func (opc *oPCommands) Pop(jobID string) (string, bool) { - if utils.IsEmptyStr(jobID) { - return "", false - } - - opc.lock.RLock() - defer opc.lock.RUnlock() - - c, ok := opc.commands[jobID] - if ok { - if time.Unix(c.fireTime, 0).Add(commandValidTime).After(time.Now()) { - delete(opc.commands, jobID) - return c.command, true - } - } - - return "", false -} - -func (opc *oPCommands) loop() { - defer func() { - logger.Info("OP commands is stopped") - opc.doneChan <- struct{}{} - }() - - tk := time.NewTicker(commandSweepTickerTime) - defer tk.Stop() - - for { - select { - case <-tk.C: - opc.sweepCommands() - case <-opc.context.Done(): - return - case <-opc.stopChan: - return - } - } -} - -func (opc *oPCommands) sweepCommands() { - opc.lock.Lock() - defer opc.lock.Unlock() - - for k, v := range opc.commands { - if time.Unix(v.fireTime, 0).Add(commandValidTime).After(time.Now()) { - delete(opc.commands, k) - } - } -} diff --git a/src/jobservice/opm/redis_job_stats_mgr.go b/src/jobservice/opm/redis_job_stats_mgr.go deleted file mode 100644 index 87c8ca228..000000000 --- a/src/jobservice/opm/redis_job_stats_mgr.go +++ /dev/null @@ -1,826 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package opm - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "math" - "math/rand" - "strconv" - "sync/atomic" - "time" - - "github.com/goharbor/harbor/src/jobservice/errs" - "github.com/goharbor/harbor/src/jobservice/logger" - - "github.com/goharbor/harbor/src/jobservice/job" - "github.com/goharbor/harbor/src/jobservice/models" - "github.com/goharbor/harbor/src/jobservice/utils" - "github.com/gomodule/redigo/redis" -) - -const ( - processBufferSize = 1024 - opSaveStats = "save_job_stats" - opUpdateStatus = "update_job_status" - opCheckIn = "check_in" - opDieAt = "mark_die_at" - opReportStatus = "report_status" - opPersistExecutions = "persist_executions" - opUpdateStats = "update_job_stats" - maxFails = 3 - jobStatsDataExpireTime = 60 * 60 * 24 * 5 // 5 days - - // CtlCommandStop : command stop - CtlCommandStop = "stop" - // CtlCommandCancel : command cancel - CtlCommandCancel = "cancel" - // CtlCommandRetry : command retry - CtlCommandRetry = "retry" - - // EventRegisterStatusHook is event name of registering hook - EventRegisterStatusHook = "register_hook" -) - -type queueItem struct { - Op string - Fails uint - Data interface{} -} - -func (qi *queueItem) string() string { - data, err := json.Marshal(qi) - if err != nil { - return fmt.Sprintf("%v", qi) - } - - return string(data) -} - -// RedisJobStatsManager implements JobStatsManager based on redis. -type RedisJobStatsManager struct { - namespace string - redisPool *redis.Pool - context context.Context - stopChan chan struct{} - doneChan chan struct{} - processChan chan *queueItem - isRunning *atomic.Value - hookStore *HookStore // cache the hook here to avoid requesting backend - opCommands *oPCommands // maintain the OP commands -} - -// NewRedisJobStatsManager is constructor of RedisJobStatsManager -func NewRedisJobStatsManager(ctx context.Context, namespace string, redisPool *redis.Pool) JobStatsManager { - isRunning := &atomic.Value{} - isRunning.Store(false) - - return &RedisJobStatsManager{ - namespace: namespace, - context: ctx, - redisPool: redisPool, - stopChan: make(chan struct{}, 1), - doneChan: make(chan struct{}, 1), - processChan: make(chan *queueItem, processBufferSize), - hookStore: NewHookStore(), - isRunning: isRunning, - opCommands: newOPCommands(ctx, namespace, redisPool), - } -} - -// Start is implementation of same method in JobStatsManager interface. -func (rjs *RedisJobStatsManager) Start() { - if rjs.isRunning.Load().(bool) { - return - } - go rjs.loop() - rjs.opCommands.Start() - rjs.isRunning.Store(true) - - logger.Info("Redis job stats manager is started") -} - -// Shutdown is implementation of same method in JobStatsManager interface. -func (rjs *RedisJobStatsManager) Shutdown() { - defer func() { - rjs.isRunning.Store(false) - }() - - if !(rjs.isRunning.Load().(bool)) { - return - } - - rjs.opCommands.Stop() - rjs.stopChan <- struct{}{} - <-rjs.doneChan -} - -// Save is implementation of same method in JobStatsManager interface. -// Async method -func (rjs *RedisJobStatsManager) Save(jobStats models.JobStats) { - item := &queueItem{ - Op: opSaveStats, - Data: jobStats, - } - - rjs.processChan <- item -} - -// Retrieve is implementation of same method in JobStatsManager interface. -// Sync method -func (rjs *RedisJobStatsManager) Retrieve(jobID string) (models.JobStats, error) { - if utils.IsEmptyStr(jobID) { - return models.JobStats{}, errors.New("empty job ID") - } - - res, err := rjs.getJobStats(jobID) - if err != nil { - return models.JobStats{}, err - } - - if res.Stats.IsMultipleExecutions { - executions, err := rjs.GetExecutions(jobID) - if err != nil { - return models.JobStats{}, err - } - - res.Stats.Executions = executions - } - - return res, nil -} - -// SetJobStatus is implementation of same method in JobStatsManager interface. -// Async method -func (rjs *RedisJobStatsManager) SetJobStatus(jobID string, status string) { - if utils.IsEmptyStr(jobID) || utils.IsEmptyStr(status) { - return - } - - item := &queueItem{ - Op: opUpdateStatus, - Data: []string{jobID, status}, - } - - rjs.processChan <- item - - // Report status at the same time - rjs.submitStatusReportingItem(jobID, status, "") -} - -func (rjs *RedisJobStatsManager) loop() { - controlChan := make(chan struct{}) - - defer func() { - rjs.isRunning.Store(false) - // Notify other sub goroutines - close(controlChan) - logger.Info("Redis job stats manager is stopped") - }() - - for { - select { - case item := <-rjs.processChan: - go func(item *queueItem) { - clearHookCache := false - if err := rjs.process(item); err != nil { - item.Fails++ - if item.Fails < maxFails { - logger.Warningf("Failed to process '%s' request with error: %s\n", item.Op, err) - - // Retry after a random interval - go func() { - timer := time.NewTimer(time.Duration(backoff(item.Fails)) * time.Second) - defer timer.Stop() - - select { - case <-timer.C: - rjs.processChan <- item - return - case <-controlChan: - } - }() - } else { - logger.Errorf("Failed to process '%s' request with error: %s (%d times tried)\n", item.Op, err, maxFails) - if item.Op == opReportStatus { - clearHookCache = true - } - } - } else { - logger.Debugf("Operation is successfully processed: %s", item.string()) - - if item.Op == opReportStatus { - clearHookCache = true - } - } - - if clearHookCache { - // Clear cache to save memory if job status is success or stopped. - data := item.Data.([]string) - status := data[2] - if status == job.JobStatusSuccess || status == job.JobStatusStopped { - rjs.hookStore.Remove(data[0]) - } - } - }(item) - break - case <-rjs.stopChan: - rjs.doneChan <- struct{}{} - return - case <-rjs.context.Done(): - return - } - } -} - -// SendCommand for the specified job -func (rjs *RedisJobStatsManager) SendCommand(jobID string, command string, isCached bool) error { - if utils.IsEmptyStr(jobID) { - return errors.New("empty job ID") - } - - if command != CtlCommandStop && command != CtlCommandCancel { - return errors.New("unknown command") - } - - if !isCached { - // Let other interested parties awareness - if err := rjs.opCommands.Fire(jobID, command); err != nil { - return err - } - } - - // Directly add to op commands maintaining list - return rjs.opCommands.Push(jobID, command) -} - -// CheckIn mesage -func (rjs *RedisJobStatsManager) CheckIn(jobID string, message string) { - if utils.IsEmptyStr(jobID) || utils.IsEmptyStr(message) { - return - } - - item := &queueItem{ - Op: opCheckIn, - Data: []string{jobID, message}, - } - - rjs.processChan <- item - - // Report checkin message at the same time - rjs.submitStatusReportingItem(jobID, job.JobStatusRunning, message) -} - -// CtlCommand checks if control command is fired for the specified job. -func (rjs *RedisJobStatsManager) CtlCommand(jobID string) (string, error) { - if utils.IsEmptyStr(jobID) { - return "", errors.New("empty job ID") - } - - c, ok := rjs.opCommands.Pop(jobID) - if !ok { - return "", fmt.Errorf("no OP command fired to job %s", jobID) - } - - return c, nil -} - -// DieAt marks the failed jobs with the time they put into dead queue. -func (rjs *RedisJobStatsManager) DieAt(jobID string, dieAt int64) { - if utils.IsEmptyStr(jobID) || dieAt == 0 { - return - } - - item := &queueItem{ - Op: opDieAt, - Data: []interface{}{jobID, dieAt}, - } - - rjs.processChan <- item -} - -// RegisterHook is used to save the hook url or cache the url in memory. -func (rjs *RedisJobStatsManager) RegisterHook(jobID string, hookURL string, isCached bool) error { - if utils.IsEmptyStr(jobID) { - return errors.New("empty job ID") - } - - if !utils.IsValidURL(hookURL) { - return errors.New("invalid hook url") - } - - if !isCached { - return rjs.saveHook(jobID, hookURL) - } - - rjs.hookStore.Add(jobID, hookURL) - - return nil -} - -// GetHook returns the status web hook url for the specified job if existing -func (rjs *RedisJobStatsManager) GetHook(jobID string) (string, error) { - if utils.IsEmptyStr(jobID) { - return "", errors.New("empty job ID") - } - - // First retrieve from the cache - if hookURL, ok := rjs.hookStore.Get(jobID); ok { - return hookURL, nil - } - - // Not hit in cache! Get it from the backend. - hookURL, err := rjs.getHook(jobID) - if err != nil { - return "", err - } - - // Cache and return - rjs.hookStore.Add(jobID, hookURL) - - return hookURL, nil -} - -// ExpirePeriodicJobStats marks the periodic job stats expired -func (rjs *RedisJobStatsManager) ExpirePeriodicJobStats(jobID string) error { - conn := rjs.redisPool.Get() - defer conn.Close() - - // The periodic job (policy) is stopped/unscheduled and then - // the stats of periodic job now can be expired - key := utils.KeyJobStats(rjs.namespace, jobID) - _, err := conn.Do("EXPIRE", key, jobStatsDataExpireTime) - - return err -} - -// AttachExecution persist the links between upstream jobs and the related executions (jobs). -func (rjs *RedisJobStatsManager) AttachExecution(upstreamJobID string, executions ...string) error { - if len(upstreamJobID) == 0 { - return errors.New("empty upstream job ID is not allowed") - } - - if len(executions) == 0 { - return errors.New("no executions existing to persist") - } - - // Send to process channel - item := &queueItem{ - Op: opPersistExecutions, - Data: []interface{}{upstreamJobID, executions}, - } - - rjs.processChan <- item - - return nil -} - -// GetExecutions returns the existing executions (IDs) for the specified job. -func (rjs *RedisJobStatsManager) GetExecutions(upstreamJobID string, ranges ...Range) ([]string, error) { - if len(upstreamJobID) == 0 { - return nil, errors.New("no upstream ID specified") - } - - conn := rjs.redisPool.Get() - defer conn.Close() - - var start, end interface{} = "-inf", "+inf" - if len(ranges) >= 1 { - start = int(ranges[0]) - } - if len(ranges) > 1 { - end = int(ranges[1]) - } - - key := utils.KeyUpstreamJobAndExecutions(rjs.namespace, upstreamJobID) - ids, err := redis.Strings(conn.Do("ZRANGEBYSCORE", key, start, end)) - if err != nil { - if err == redis.ErrNil { - return []string{}, nil - } - - return nil, err - } - - return ids, nil -} - -// Update the properties of job stats -func (rjs *RedisJobStatsManager) Update(jobID string, fieldAndValues ...interface{}) error { - if len(jobID) == 0 { - return errors.New("no updating job") - } - - if len(fieldAndValues) == 0 || len(fieldAndValues)%2 != 0 { - return errors.New("filed and its value should be pair") - } - - data := []interface{}{} - data = append(data, jobID) - data = append(data, fieldAndValues...) - - item := &queueItem{ - Op: opUpdateStats, - Data: data, - } - - rjs.processChan <- item - - return nil -} - -func (rjs *RedisJobStatsManager) submitStatusReportingItem(jobID string, status, checkIn string) { - // Let it run in a separate goroutine to avoid waiting more time - go func() { - var ( - hookURL string - ok bool - err error - ) - - hookURL, ok = rjs.hookStore.Get(jobID) - if !ok { - // Retrieve from backend - hookURL, err = rjs.getHook(jobID) - if err != nil || !utils.IsValidURL(hookURL) { - // logged and exit - logger.Warningf("no status hook found for job %s\n, abandon status reporting", jobID) - return - } - } - - item := &queueItem{ - Op: opReportStatus, - Data: []string{jobID, hookURL, status, checkIn}, - } - - rjs.processChan <- item - }() -} - -func (rjs *RedisJobStatsManager) reportStatus(jobID string, hookURL, status, checkIn string) error { - reportingStatus := models.JobStatusChange{ - JobID: jobID, - Status: status, - CheckIn: checkIn, - } - // Return the whole metadata of the job. - // To support forward compatibility, keep the original fields `Status` and `CheckIn`. - // TODO: If querying job stats causes performance issues, a two-level cache should be enabled. - jobStats, err := rjs.getJobStats(jobID) - if err != nil { - // Just logged - logger.Errorf("Retrieving stats of job %s for hook reporting failed with error: %s", jobID, err) - } else { - // Override status/check in message - // Just double confirmation - jobStats.Stats.CheckIn = checkIn - jobStats.Stats.Status = status - reportingStatus.Metadata = jobStats.Stats - } - - return DefaultHookClient.ReportStatus(hookURL, reportingStatus) -} - -func (rjs *RedisJobStatsManager) updateJobStats(jobID string, fieldAndValues ...interface{}) error { - conn := rjs.redisPool.Get() - defer conn.Close() - - key := utils.KeyJobStats(rjs.namespace, jobID) - args := make([]interface{}, 0, len(fieldAndValues)+1) - - args = append(args, key) - args = append(args, fieldAndValues...) - args = append(args, "update_time", time.Now().Unix()) - - _, err := conn.Do("HMSET", args...) - - return err -} - -func (rjs *RedisJobStatsManager) updateJobStatus(jobID string, status string) error { - args := make([]interface{}, 0, 4) - args = append(args, "status", status) - if status == job.JobStatusSuccess { - // make sure the 'die_at' is reset in case it's a retrying job - args = append(args, "die_at", 0) - } - - return rjs.updateJobStats(jobID, args...) -} - -func (rjs *RedisJobStatsManager) checkIn(jobID string, message string) error { - - now := time.Now().Unix() - args := make([]interface{}, 0, 4) - args = append(args, "check_in", message, "check_in_at", now) - - return rjs.updateJobStats(jobID, args...) -} - -func (rjs *RedisJobStatsManager) dieAt(jobID string, baseTime int64) error { - conn := rjs.redisPool.Get() - defer conn.Close() - - // Query the dead job in the time scope of [baseTime,baseTime+5] - key := utils.RedisKeyDead(rjs.namespace) - jobWithScores, err := utils.GetZsetByScore(rjs.redisPool, key, []int64{baseTime, baseTime + 5}) - if err != nil { - return err - } - - for _, jws := range jobWithScores { - if j, err := utils.DeSerializeJob(jws.JobBytes); err == nil { - if j.ID == jobID { - // Found - args := make([]interface{}, 0, 6) - args = append(args, "die_at", jws.Score) - return rjs.updateJobStats(jobID, args...) - } - } - } - - return fmt.Errorf("seems %s is not a dead job", jobID) -} - -func (rjs *RedisJobStatsManager) getJobStats(jobID string) (models.JobStats, error) { - conn := rjs.redisPool.Get() - defer conn.Close() - - key := utils.KeyJobStats(rjs.namespace, jobID) - vals, err := redis.Strings(conn.Do("HGETALL", key)) - if err != nil { - return models.JobStats{}, err - } - - if vals == nil || len(vals) == 0 { - return models.JobStats{}, errs.NoObjectFoundError(fmt.Sprintf("job '%s'", jobID)) - } - - res := models.JobStats{ - Stats: &models.JobStatData{}, - } - for i, l := 0, len(vals); i < l; i = i + 2 { - prop := vals[i] - value := vals[i+1] - switch prop { - case "id": - res.Stats.JobID = value - break - case "name": - res.Stats.JobName = value - break - case "kind": - res.Stats.JobKind = value - case "unique": - v, err := strconv.ParseBool(value) - if err != nil { - v = false - } - res.Stats.IsUnique = v - case "status": - res.Stats.Status = value - break - case "ref_link": - res.Stats.RefLink = value - break - case "enqueue_time": - v, _ := strconv.ParseInt(value, 10, 64) - res.Stats.EnqueueTime = v - break - case "update_time": - v, _ := strconv.ParseInt(value, 10, 64) - res.Stats.UpdateTime = v - break - case "run_at": - v, _ := strconv.ParseInt(value, 10, 64) - res.Stats.RunAt = v - break - case "check_in_at": - v, _ := strconv.ParseInt(value, 10, 64) - res.Stats.CheckInAt = v - break - case "check_in": - res.Stats.CheckIn = value - break - case "cron_spec": - res.Stats.CronSpec = value - break - case "die_at": - v, _ := strconv.ParseInt(value, 10, 64) - res.Stats.DieAt = v - case "upstream_job_id": - res.Stats.UpstreamJobID = value - break - case "multiple_executions": - v, err := strconv.ParseBool(value) - if err != nil { - v = false - } - res.Stats.IsMultipleExecutions = v - break - default: - break - } - } - - return res, nil -} - -func (rjs *RedisJobStatsManager) saveJobStats(jobStats models.JobStats) error { - if jobStats.Stats == nil { - return errors.New("malformed job stats object") - } - - conn := rjs.redisPool.Get() - defer conn.Close() - - key := utils.KeyJobStats(rjs.namespace, jobStats.Stats.JobID) - args := make([]interface{}, 0) - args = append(args, key) - args = append(args, - "id", jobStats.Stats.JobID, - "name", jobStats.Stats.JobName, - "kind", jobStats.Stats.JobKind, - "unique", jobStats.Stats.IsUnique, - "status", jobStats.Stats.Status, - "ref_link", jobStats.Stats.RefLink, - "enqueue_time", jobStats.Stats.EnqueueTime, - "update_time", jobStats.Stats.UpdateTime, - "run_at", jobStats.Stats.RunAt, - "cron_spec", jobStats.Stats.CronSpec, - "multiple_executions", jobStats.Stats.IsMultipleExecutions, - ) - if jobStats.Stats.CheckInAt > 0 && !utils.IsEmptyStr(jobStats.Stats.CheckIn) { - args = append(args, - "check_in", jobStats.Stats.CheckIn, - "check_in_at", jobStats.Stats.CheckInAt, - ) - } - if jobStats.Stats.DieAt > 0 { - args = append(args, "die_at", jobStats.Stats.DieAt) - } - - if len(jobStats.Stats.UpstreamJobID) > 0 { - args = append(args, "upstream_job_id", jobStats.Stats.UpstreamJobID) - } - - conn.Send("HMSET", args...) - // If job kind is periodic job, expire time should not be set - // If job kind is scheduled job, expire time should be runAt+1day - if jobStats.Stats.JobKind != job.JobKindPeriodic { - var expireTime int64 = jobStatsDataExpireTime - if jobStats.Stats.JobKind == job.JobKindScheduled { - nowTime := time.Now().Unix() - future := jobStats.Stats.RunAt - nowTime - if future > 0 { - expireTime += future - } - } - expireTime += rand.Int63n(30) // Avoid lots of keys being expired at the same time - conn.Send("EXPIRE", key, expireTime) - } - - return conn.Flush() -} - -func (rjs *RedisJobStatsManager) saveExecutions(upstreamJobID string, executions []string) error { - key := utils.KeyUpstreamJobAndExecutions(rjs.namespace, upstreamJobID) - - conn := rjs.redisPool.Get() - defer conn.Close() - - err := conn.Send("MULTI") - if err != nil { - return err - } - - args := []interface{}{key} - baseScore := time.Now().Unix() - for index, execution := range executions { - args = append(args, baseScore+int64(index), execution) - } - - if err := conn.Send("ZADD", args...); err != nil { - return err - } - - // add expire time - if err := conn.Send("EXPIRE", key, jobStatsDataExpireTime); err != nil { - return err - } - - _, err = conn.Do("EXEC") - - return err -} - -func (rjs *RedisJobStatsManager) process(item *queueItem) error { - switch item.Op { - case opSaveStats: - jobStats := item.Data.(models.JobStats) - return rjs.saveJobStats(jobStats) - case opUpdateStatus: - data := item.Data.([]string) - return rjs.updateJobStatus(data[0], data[1]) - case opCheckIn: - data := item.Data.([]string) - return rjs.checkIn(data[0], data[1]) - case opDieAt: - data := item.Data.([]interface{}) - return rjs.dieAt(data[0].(string), data[1].(int64)) - case opReportStatus: - data := item.Data.([]string) - return rjs.reportStatus(data[0], data[1], data[2], data[3]) - case opPersistExecutions: - data := item.Data.([]interface{}) - return rjs.saveExecutions(data[0].(string), data[1].([]string)) - case opUpdateStats: - data := item.Data.([]interface{}) - return rjs.updateJobStats(data[0].(string), data[1:]...) - default: - break - } - - return nil -} - -// HookData keeps the hook url info -type HookData struct { - JobID string `json:"job_id"` - HookURL string `json:"hook_url"` -} - -func (rjs *RedisJobStatsManager) saveHook(jobID string, hookURL string) error { - conn := rjs.redisPool.Get() - defer conn.Close() - - key := utils.KeyJobStats(rjs.namespace, jobID) - args := make([]interface{}, 0, 3) - args = append(args, key, "status_hook", hookURL) - msg := &models.Message{ - Event: EventRegisterStatusHook, - Data: &HookData{ - JobID: jobID, - HookURL: hookURL, - }, - } - rawJSON, err := json.Marshal(msg) - if err != nil { - return err - } - - // hook is saved into the job stats - // We'll not set expire time here, the expire time of the key will be set when saving job stats - if err := conn.Send("MULTI"); err != nil { - return err - } - if err := conn.Send("HMSET", args...); err != nil { - return err - } - if err := conn.Send("PUBLISH", utils.KeyPeriodicNotification(rjs.namespace), rawJSON); err != nil { - return err - } - - _, err = conn.Do("EXEC") - return err -} - -func (rjs *RedisJobStatsManager) getHook(jobID string) (string, error) { - conn := rjs.redisPool.Get() - defer conn.Close() - - key := utils.KeyJobStats(rjs.namespace, jobID) - hookURL, err := redis.String(conn.Do("HGET", key, "status_hook")) - if err != nil { - if err == redis.ErrNil { - return "", fmt.Errorf("no registered web hook found for job '%s'", jobID) - } - return "", err - } - - return hookURL, nil -} - -func backoff(seed uint) int { - if seed < 1 { - seed = 1 - } - - return int(math.Pow(float64(seed+1), float64(seed))) + rand.Intn(5) -} diff --git a/src/jobservice/opm/redis_job_stats_mgr_test.go b/src/jobservice/opm/redis_job_stats_mgr_test.go deleted file mode 100644 index def65f797..000000000 --- a/src/jobservice/opm/redis_job_stats_mgr_test.go +++ /dev/null @@ -1,342 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -package opm - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "io/ioutil" - "net/http" - "net/http/httptest" - "os" - "strings" - "testing" - "time" - - "github.com/goharbor/harbor/src/jobservice/job" - "github.com/goharbor/harbor/src/jobservice/models" - "github.com/goharbor/harbor/src/jobservice/utils" - "github.com/gomodule/redigo/redis" -) - -const ( - dialConnectionTimeout = 30 * time.Second - healthCheckPeriod = time.Minute - dialReadTimeout = healthCheckPeriod + 10*time.Second - dialWriteTimeout = 10 * time.Second - testingRedisHost = "REDIS_HOST" - testingNamespace = "testing_job_service_v2" -) - -var redisHost = getRedisHost() -var redisPool = &redis.Pool{ - MaxActive: 2, - MaxIdle: 2, - Wait: true, - Dial: func() (redis.Conn, error) { - return redis.Dial( - "tcp", - fmt.Sprintf("%s:%d", redisHost, 6379), - redis.DialConnectTimeout(dialConnectionTimeout), - redis.DialReadTimeout(dialReadTimeout), - redis.DialWriteTimeout(dialWriteTimeout), - ) - }, -} - -func TestSetJobStatus(t *testing.T) { - mgr := createStatsManager(redisPool) - mgr.Start() - defer mgr.Shutdown() - <-time.After(200 * time.Millisecond) - // make sure data existing - testingStats := createFakeStats() - mgr.Save(testingStats) - <-time.After(200 * time.Millisecond) - - mgr.SetJobStatus("fake_job_ID", "running") - <-time.After(100 * time.Millisecond) - stats, err := mgr.Retrieve("fake_job_ID") - if err != nil { - t.Fatal(err) - } - - if stats.Stats.Status != "running" { - t.Fatalf("expect job status 'running' but got '%s'\n", stats.Stats.Status) - } - - key := utils.KeyJobStats(testingNamespace, "fake_job_ID") - if err := clear(key, redisPool.Get()); err != nil { - t.Fatal(err) - } -} - -func TestCommand(t *testing.T) { - mgr := createStatsManager(redisPool) - mgr.Start() - defer mgr.Shutdown() - <-time.After(200 * time.Millisecond) - - if err := mgr.SendCommand("fake_job_ID", CtlCommandStop, true); err != nil { - t.Fatal(err) - } - - if cmd, err := mgr.CtlCommand("fake_job_ID"); err != nil { - t.Fatal(err) - } else { - if cmd != CtlCommandStop { - t.Fatalf("expect '%s' but got '%s'", CtlCommandStop, cmd) - } - } -} - -func TestDieAt(t *testing.T) { - mgr := createStatsManager(redisPool) - mgr.Start() - defer mgr.Shutdown() - <-time.After(200 * time.Millisecond) - - testingStats := createFakeStats() - mgr.Save(testingStats) - - dieAt := time.Now().Unix() - if err := createDeadJob(redisPool.Get(), dieAt); err != nil { - t.Fatal(err) - } - <-time.After(200 * time.Millisecond) - mgr.DieAt("fake_job_ID", dieAt) - <-time.After(300 * time.Millisecond) - - stats, err := mgr.Retrieve("fake_job_ID") - if err != nil { - t.Fatal(err) - } - - if stats.Stats.DieAt != dieAt { - t.Fatalf("expect die at '%d' but got '%d'\n", dieAt, stats.Stats.DieAt) - } - - key := utils.KeyJobStats(testingNamespace, "fake_job_ID") - if err := clear(key, redisPool.Get()); err != nil { - t.Fatal(err) - } - key2 := utils.RedisKeyDead(testingNamespace) - if err := clear(key2, redisPool.Get()); err != nil { - t.Fatal(err) - } -} - -func TestRegisterHook(t *testing.T) { - mgr := createStatsManager(redisPool) - mgr.Start() - defer mgr.Shutdown() - <-time.After(200 * time.Millisecond) - - if err := mgr.RegisterHook("fake_job_ID", "http://localhost:9999", false); err != nil { - t.Fatal(err) - } - - key := utils.KeyJobStats(testingNamespace, "fake_job_ID") - if err := clear(key, redisPool.Get()); err != nil { - t.Fatal(err) - } -} - -func TestExpireJobStats(t *testing.T) { - mgr := createStatsManager(redisPool) - mgr.Start() - defer mgr.Shutdown() - <-time.After(200 * time.Millisecond) - - // make sure data existing - testingStats := createFakeStats() - mgr.Save(testingStats) - <-time.After(200 * time.Millisecond) - - if err := mgr.ExpirePeriodicJobStats("fake_job_ID"); err != nil { - t.Fatal(err) - } - - key := utils.KeyJobStats(testingNamespace, "fake_job_ID") - if err := clear(key, redisPool.Get()); err != nil { - t.Fatal(err) - } -} - -func TestCheckIn(t *testing.T) { - mgr := createStatsManager(redisPool) - mgr.Start() - defer mgr.Shutdown() - <-time.After(200 * time.Millisecond) - - // make sure data existing - testingStats := createFakeStats() - mgr.Save(testingStats) - <-time.After(200 * time.Millisecond) - - // Start http server - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - data, err := ioutil.ReadAll(r.Body) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - defer r.Body.Close() - - statusReport := &models.JobStatusChange{} - if err := json.Unmarshal(data, statusReport); err != nil { - w.WriteHeader(http.StatusInternalServerError) - return - } - - if statusReport.Metadata == nil || statusReport.Metadata.JobID != "fake_job_ID" { - w.WriteHeader(http.StatusInternalServerError) - return - } - - fmt.Fprintln(w, "ok") - })) - defer ts.Close() - - if err := mgr.RegisterHook("fake_job_ID", ts.URL, false); err != nil { - t.Fatal(err) - } - - mgr.CheckIn("fake_job_ID", "checkin") - <-time.After(200 * time.Millisecond) - - stats, err := mgr.Retrieve("fake_job_ID") - if err != nil { - t.Fatal(err) - } - - if stats.Stats.CheckIn != "checkin" { - t.Fatalf("expect check in info 'checkin' but got '%s'\n", stats.Stats.CheckIn) - } - - key := utils.KeyJobStats(testingNamespace, "fake_job_ID") - if err := clear(key, redisPool.Get()); err != nil { - t.Fatal(err) - } -} - -func TestExecutionRelated(t *testing.T) { - mgr := createStatsManager(redisPool) - mgr.Start() - defer mgr.Shutdown() - <-time.After(200 * time.Millisecond) - - if err := mgr.AttachExecution("upstream_id", "id1", "id2", "id3"); err != nil { - t.Fatal(err) - } - - // Wait for data is stable - <-time.After(200 * time.Millisecond) - ids, err := mgr.GetExecutions("upstream_id") - if err != nil { - t.Fatal(err) - } - - if strings.Join(ids, "/") != "id1/id2/id3" { - t.Fatalf("expect 'id1/id2/id3' but got %s", strings.Join(ids, " / ")) - } -} - -func TestUpdateJobStats(t *testing.T) { - mgr := createStatsManager(redisPool) - mgr.Start() - defer mgr.Shutdown() - <-time.After(200 * time.Millisecond) - - // make sure data existing - testingStats := createFakeStats() - mgr.Save(testingStats) - <-time.After(200 * time.Millisecond) - - mgr.Update("fake_job_ID", "status", "Error") - <-time.After(200 * time.Millisecond) - - updatedStats, err := mgr.Retrieve("fake_job_ID") - if err != nil { - t.Fatal(err) - } - - if updatedStats.Stats.Status != "Error" { - t.Fatalf("expect status to be '%s' but got '%s'", "Error", updatedStats.Stats.Status) - } -} - -func getRedisHost() string { - redisHost := os.Getenv(testingRedisHost) - if redisHost == "" { - redisHost = "localhost" // for local test - } - - return redisHost -} - -func createStatsManager(redisPool *redis.Pool) JobStatsManager { - ctx := context.Background() - return NewRedisJobStatsManager(ctx, testingNamespace, redisPool) -} - -func clear(key string, conn redis.Conn) error { - if conn != nil { - defer conn.Close() - _, err := conn.Do("DEL", key) - return err - } - - return errors.New("failed to clear") -} - -func createFakeStats() models.JobStats { - testingStats := models.JobStats{ - Stats: &models.JobStatData{ - JobID: "fake_job_ID", - JobKind: job.JobKindPeriodic, - JobName: "fake_job", - Status: "Pending", - IsUnique: false, - RefLink: "/api/v1/jobs/fake_job_ID", - CronSpec: "5 * * * * *", - EnqueueTime: time.Now().Unix(), - UpdateTime: time.Now().Unix(), - }, - } - - return testingStats -} - -func createDeadJob(conn redis.Conn, dieAt int64) error { - dead := make(map[string]interface{}) - dead["name"] = "fake_job" - dead["id"] = "fake_job_ID" - dead["args"] = make(map[string]interface{}) - dead["fails"] = 3 - dead["err"] = "testing error" - dead["failed_at"] = dieAt - - rawJSON, err := json.Marshal(&dead) - if err != nil { - return err - } - - defer conn.Close() - key := utils.RedisKeyDead(testingNamespace) - _, err = conn.Do("ZADD", key, dieAt, rawJSON) - return err -} diff --git a/src/jobservice/period/basic_scheduler.go b/src/jobservice/period/basic_scheduler.go new file mode 100644 index 000000000..6b1482dfd --- /dev/null +++ b/src/jobservice/period/basic_scheduler.go @@ -0,0 +1,277 @@ +// Copyright Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package period + +import ( + "encoding/json" + "time" + + "context" + "github.com/gocraft/work" + "github.com/goharbor/harbor/src/jobservice/common/rds" + "github.com/goharbor/harbor/src/jobservice/common/utils" + "github.com/goharbor/harbor/src/jobservice/job" + "github.com/goharbor/harbor/src/jobservice/lcm" + "github.com/goharbor/harbor/src/jobservice/logger" + "github.com/gomodule/redigo/redis" + "github.com/pkg/errors" +) + +// basicScheduler manages the periodic scheduling policies. +type basicScheduler struct { + context context.Context + pool *redis.Pool + namespace string + enqueuer *enqueuer + client *work.Client + ctl lcm.Controller +} + +// NewScheduler is constructor of basicScheduler +func NewScheduler(ctx context.Context, namespace string, pool *redis.Pool, ctl lcm.Controller) Scheduler { + return &basicScheduler{ + context: ctx, + pool: pool, + namespace: namespace, + enqueuer: newEnqueuer(ctx, namespace, pool, ctl), + client: work.NewClient(namespace, pool), + ctl: ctl, + } +} + +// Start the periodic scheduling process +// Blocking call here +func (bs *basicScheduler) Start() error { + defer func() { + logger.Info("Basic scheduler is stopped") + }() + + // Try best to do + go bs.clearDirtyJobs() + + logger.Info("Basic scheduler is started") + + // start enqueuer + return bs.enqueuer.start() +} + +// Stop the periodic scheduling process +func (bs *basicScheduler) Stop() error { + // stop everything + bs.enqueuer.stopChan <- true + + return nil +} + +// Schedule is implementation of the same method in period.Interface +func (bs *basicScheduler) Schedule(p *Policy) (int64, error) { + if p == nil { + return -1, errors.New("bad policy object: nil") + } + + if err := p.Validate(); err != nil { + return -1, err + } + + conn := bs.pool.Get() + defer conn.Close() + + // Do the 1st round of enqueuing + bs.enqueuer.scheduleNextJobs(p, conn) + + // Serialize data + rawJSON, err := p.Serialize() + if err != nil { + return -1, err + } + + // Prepare publish message + m := &message{ + Event: changeEventSchedule, + Data: p, + } + + msgJSON, err := json.Marshal(m) + if err != nil { + return -1, err + } + + pid := time.Now().Unix() + + // Save to redis db and publish notification via redis transaction + err = conn.Send("MULTI") + err = conn.Send("ZADD", rds.KeyPeriodicPolicy(bs.namespace), pid, rawJSON) + err = conn.Send("PUBLISH", rds.KeyPeriodicNotification(bs.namespace), msgJSON) + if _, err := conn.Do("EXEC"); err != nil { + return -1, err + } + + return pid, nil +} + +// UnSchedule is implementation of the same method in period.Interface +func (bs *basicScheduler) UnSchedule(policyID string) error { + if utils.IsEmptyStr(policyID) { + return errors.New("bad periodic job ID: nil") + } + + tracker, err := bs.ctl.Track(policyID) + if err != nil { + return err + } + + // If errors occurred when getting the numeric ID of periodic job, + // may be because the specified job is not a valid periodic job. + numericID, err := tracker.NumericID() + if err != nil { + return err + } + + conn := bs.pool.Get() + defer conn.Close() + + // Get the un-scheduling policy object + bytes, err := redis.Values(conn.Do("ZRANGE", rds.KeyPeriodicPolicy(bs.namespace), numericID, numericID)) + if err != nil { + return err + } + + p := &Policy{} + if len(bytes) > 0 { + if rawPolicy, ok := bytes[0].([]byte); ok { + if err := p.DeSerialize(rawPolicy); err != nil { + return err + } + } + } + + if utils.IsEmptyStr(p.ID) { + // Deserialize failed + return errors.Errorf("no valid periodic job policy found: %s:%d", policyID, numericID) + } + + notification := &message{ + Event: changeEventUnSchedule, + Data: p, + } + + msgJSON, err := json.Marshal(notification) + if err != nil { + return err + } + + // REM from redis db with transaction way + conn.Send("MULTI") + conn.Send("ZREMRANGEBYSCORE", rds.KeyPeriodicPolicy(bs.namespace), numericID, numericID) // Accurately remove the item with the specified score + conn.Send("PUBLISH", rds.KeyPeriodicNotification(bs.namespace), msgJSON) + _, err = conn.Do("EXEC") + if err != nil { + return err + } + + // Expire periodic job stats + if err := tracker.Expire(); err != nil { + logger.Error(err) + } + + // Get downstream executions of the periodic job + // And clear these executions + // This is a try best action, its failure will not cause the unschedule action failed. + // Failure errors will be only logged here + eKey := rds.KeyUpstreamJobAndExecutions(bs.namespace, policyID) + if eIDs, err := getPeriodicExecutions(conn, eKey); err != nil { + logger.Errorf("Get executions for periodic job %s error: %s", policyID, err) + } else { + for _, eID := range eIDs { + eTracker, err := bs.ctl.Track(eID) + if err != nil { + logger.Errorf("Track execution %s error: %s", eID, err) + continue + } + + // Mark job status to stopped to block execution. + // The executions here should not be in the final states, + // double confirmation: only stop the stopped ones. + e := eTracker.Job() + if job.RunningStatus.Compare(job.Status(e.Info.Status)) >= 0 { + if err := eTracker.Stop(); err != nil { + logger.Errorf("Stop execution %s error: %s", eID, err) + } + } + + // Only need to care the pending and running ones + // Do clear + if job.PendingStatus == job.Status(e.Info.Status) { + // Please pay attention here, the job ID used in the scheduled jon queue is + // the ID of the periodic job (policy). + if err := bs.client.DeleteScheduledJob(e.Info.RunAt, policyID); err != nil { + logger.Errorf("Delete scheduled job %s error: %s", eID, err) + } + } + } + } + + return err +} + +// Clear all the dirty jobs +// A scheduled job will be marked as dirty job only if the enqueued timestamp has expired a horizon. +// This is a try best action +func (bs *basicScheduler) clearDirtyJobs() { + conn := bs.pool.Get() + defer conn.Close() + + nowEpoch := time.Now().Unix() + scope := nowEpoch - int64(enqueuerHorizon/time.Minute)*60 + + jobScores, err := rds.GetZsetByScore(conn, rds.RedisKeyScheduled(bs.namespace), []int64{0, scope}) + if err != nil { + logger.Errorf("Get dirty jobs error: %s", err) + return + } + + for _, jobScore := range jobScores { + j, err := utils.DeSerializeJob(jobScore.JobBytes) + if err != nil { + logger.Errorf("Deserialize dirty job error: %s", err) + continue + } + + if err = bs.client.DeleteScheduledJob(jobScore.Score, j.ID); err != nil { + logger.Errorf("Remove dirty scheduled job error: %s", err) + } else { + logger.Debugf("Remove dirty scheduled job: %s run at %#v", j.ID, time.Unix(jobScore.Score, 0).String()) + } + } +} + +// Get relevant executions for the periodic job +func getPeriodicExecutions(conn redis.Conn, key string) ([]string, error) { + args := []interface{}{key, 0, "+inf"} + + list, err := redis.Values(conn.Do("ZRANGEBYSCORE", args...)) + if err != nil { + return nil, err + } + + results := make([]string, 0) + for _, item := range list { + if eID, ok := item.(string); ok { + results = append(results, eID) + } + } + + return results, nil +} diff --git a/src/jobservice/period/redis_scheduler_test.go b/src/jobservice/period/basic_scheduler_test.go similarity index 92% rename from src/jobservice/period/redis_scheduler_test.go rename to src/jobservice/period/basic_scheduler_test.go index 1f20cab4b..7ae348321 100644 --- a/src/jobservice/period/redis_scheduler_test.go +++ b/src/jobservice/period/basic_scheduler_test.go @@ -21,9 +21,9 @@ import ( "github.com/goharbor/harbor/src/jobservice/opm" + "github.com/goharbor/harbor/src/jobservice/common/utils" "github.com/goharbor/harbor/src/jobservice/env" "github.com/goharbor/harbor/src/jobservice/tests" - "github.com/goharbor/harbor/src/jobservice/utils" ) var redisPool = tests.GiveMeRedisPool() @@ -93,7 +93,7 @@ func TestPubFunc(t *testing.T) { } } -func myPeriodicScheduler(statsManager opm.JobStatsManager) *RedisPeriodicScheduler { +func myPeriodicScheduler(statsManager opm.JobStatsManager) *basicScheduler { sysCtx := context.Background() ctx := &env.Context{ SystemContext: sysCtx, @@ -101,5 +101,5 @@ func myPeriodicScheduler(statsManager opm.JobStatsManager) *RedisPeriodicSchedul ErrorChan: make(chan error, 1), } - return NewRedisPeriodicScheduler(ctx, tests.GiveMeTestNamespace(), redisPool, statsManager) + return NewScheduler(ctx, tests.GiveMeTestNamespace(), redisPool, statsManager) } diff --git a/src/jobservice/period/enqueuer.go b/src/jobservice/period/enqueuer.go index 0627d75b8..c1543fc8c 100644 --- a/src/jobservice/period/enqueuer.go +++ b/src/jobservice/period/enqueuer.go @@ -15,272 +15,288 @@ package period import ( - "errors" "fmt" "math/rand" "time" + "context" "github.com/gocraft/work" + "github.com/goharbor/harbor/src/jobservice/common/rds" + "github.com/goharbor/harbor/src/jobservice/common/utils" "github.com/goharbor/harbor/src/jobservice/job" + "github.com/goharbor/harbor/src/jobservice/lcm" "github.com/goharbor/harbor/src/jobservice/logger" - "github.com/goharbor/harbor/src/jobservice/models" - "github.com/goharbor/harbor/src/jobservice/opm" - "github.com/goharbor/harbor/src/jobservice/utils" "github.com/gomodule/redigo/redis" "github.com/robfig/cron" ) const ( - periodicEnqueuerSleep = 2 * time.Minute - periodicEnqueuerHorizon = 4 * time.Minute + enqueuerSleep = 2 * time.Minute + enqueuerHorizon = 4 * time.Minute + neverExecuted = 365 * 24 * time.Hour ) -type periodicEnqueuer struct { - namespace string - pool *redis.Pool - policyStore *periodicJobPolicyStore - stopChan chan struct{} - doneStoppingChan chan struct{} - statsManager opm.JobStatsManager - identity string +type enqueuer struct { + namespace string + context context.Context + pool *redis.Pool + policyStore *policyStore + ctl lcm.Controller + // Diff with other nodes + nodeID string + // Track the error of enqueuing + lastEnqueueErr error + // For stop + stopChan chan bool } -func newPeriodicEnqueuer(namespace string, pool *redis.Pool, policyStore *periodicJobPolicyStore, statsManager opm.JobStatsManager) *periodicEnqueuer { - return &periodicEnqueuer{ - namespace: namespace, - pool: pool, - policyStore: policyStore, - statsManager: statsManager, - stopChan: make(chan struct{}), - doneStoppingChan: make(chan struct{}), - identity: utils.MakeIdentifier(), // Currently, use a generated ID +func newEnqueuer(ctx context.Context, namespace string, pool *redis.Pool, ctl lcm.Controller) *enqueuer { + nodeID := ctx.Value(utils.NodeID) + if nodeID == nil { + // Must be failed + panic("missing node ID in the system context of periodic enqueuer") + } + + return &enqueuer{ + context: ctx, + namespace: namespace, + pool: pool, + policyStore: newPolicyStore(ctx, namespace, pool), + ctl: ctl, + stopChan: make(chan bool, 1), + nodeID: nodeID.(string), } } -func (pe *periodicEnqueuer) start() { - go pe.loop() +// Blocking call +func (e *enqueuer) start() error { + // Load policies first when starting + if err := e.policyStore.load(); err != nil { + return err + } + + go e.loop() logger.Info("Periodic enqueuer is started") + + return e.policyStore.serve() } -func (pe *periodicEnqueuer) stop() { - pe.stopChan <- struct{}{} - <-pe.doneStoppingChan -} - -func (pe *periodicEnqueuer) loop() { +func (e *enqueuer) loop() { defer func() { logger.Info("Periodic enqueuer is stopped") }() - // Begin reaping periodically - timer := time.NewTimer(periodicEnqueuerSleep + time.Duration(rand.Intn(30))*time.Second) - defer timer.Stop() - if pe.shouldEnqueue() { - err := pe.enqueue() - if err != nil { - logger.Errorf("periodic_enqueuer.loop.enqueue:%s\n", err) - } - } else { - logger.Debug("Enqueue condition not matched, do nothing.") - } + // Do enqueue immediately when starting + isHit := e.checkAndEnqueue() + + // Begin reaping periodically + timer := time.NewTimer(e.nextTurn(isHit, e.lastEnqueueErr != nil)) + defer timer.Stop() for { select { - case <-pe.stopChan: - pe.doneStoppingChan <- struct{}{} + case <-e.stopChan: + // Stop policy store now + e.policyStore.stopChan <- true return case <-timer.C: - timer.Reset(periodicEnqueuerSleep + time.Duration(rand.Intn(30))*time.Second) - if pe.shouldEnqueue() { - err := pe.enqueue() - if err != nil { - logger.Errorf("periodic_enqueuer.loop.enqueue:%s\n", err) - } - } else { - logger.Debug("Enqueue condition not matched, do nothing.") - } + // Pause the timer for completing the processing this time + timer.Reset(neverExecuted) + + // Check and enqueue. + // Set next turn with lower priority to balance workload with long + // round time if it hits. + isHit = e.checkAndEnqueue() + timer.Reset(e.nextTurn(isHit, e.lastEnqueueErr != nil)) } } } -func (pe *periodicEnqueuer) enqueue() error { - now := time.Now().Unix() +// checkAndEnqueue checks if it should do enqueue and +// does enqueue when condition hit. +func (e *enqueuer) checkAndEnqueue() (isHit bool) { + if isHit = e.shouldEnqueue(); isHit { + e.enqueue() + } - logger.Debugf("Periodic enqueuing loop by enqueuer %s: %d", pe.identity, now) + return +} - conn := pe.pool.Get() +// nextTurn returns the next check time slot by applying +// priorities to balance the workloads across multiple nodes +func (e *enqueuer) nextTurn(isHit bool, enqErr bool) time.Duration { + base := enqueuerSleep + + if isHit { + // Down the hit priority by adding more waiting time + base = base + time.Duration(3)*time.Second + if enqErr { + // Downgrade the priority if the node has occurred error when enqueuing + base = base + time.Duration(5)*time.Second + } + } else { + // Upgrade the priority of hitting in the next turn + base = base - time.Duration(3)*time.Second + } + + // Add random waiting time [0,8) + base = base + time.Duration(rand.Intn(5))*time.Second + + return base +} + +func (e *enqueuer) enqueue() { + conn := e.pool.Get() defer conn.Close() - // Set last periodic enqueue timestamp in advance to avoid duplicated enqueue actions - if _, err := conn.Do("SET", utils.RedisKeyLastPeriodicEnqueue(pe.namespace), now); err != nil { - return err - } + // Reset error track + e.lastEnqueueErr = nil - // Avoid schedule in the same time. - lockKey := fmt.Sprintf("%s:%s", utils.KeyPeriod(pe.namespace), "lock") + e.policyStore.Iterate(func(id string, p *Policy) bool { + e.scheduleNextJobs(p, conn) + return true + }) +} - // Use separate conn for the locker - lockConn := pe.pool.Get() - defer lockConn.Close() +// scheduleNextJobs schedules job for next time slots based on the policy +func (e *enqueuer) scheduleNextJobs(p *Policy, conn redis.Conn) { + nowTime := time.Unix(time.Now().Unix(), 0) + horizon := nowTime.Add(enqueuerHorizon) - // Acquire a locker with 30s expiring time - if err := acquireLock(lockConn, lockKey, pe.identity, 30); err != nil { - return err - } - logger.Debugf("Periodic enqueuer %s acquires lock", pe.identity) - - defer func() { - if err := releaseLock(lockConn, lockKey, pe.identity); err != nil { - logger.Errorf("Periodic enqueuer %s releases lock failed: %s", pe.identity, err) - } else { - logger.Debugf("Periodic enqueuer %s releases lock", pe.identity) + schedule, err := cron.Parse(p.CronSpec) + if err != nil { + // The cron spec should be already checked at upper layers. + // Just in cases, if error occurred, ignore it + e.lastEnqueueErr = err + logger.Errorf("Invalid corn spec in periodic policy %s %s: %s", p.JobName, p.ID, err) + } else { + if p.JobParameters == nil { + p.JobParameters = make(job.Parameters) } - }() + // Add extra argument for job running + // Only for system using + p.JobParameters["_job_kind_periodic_"] = true - nowTime := time.Unix(now, 0) - horizon := nowTime.Add(periodicEnqueuerHorizon) - - for _, pl := range pe.policyStore.list() { - schedule, err := cron.Parse(pl.CronSpec) - if err != nil { - // The cron spec should be already checked at top components. - // Just in cases, if error occurred, ignore it - logger.Errorf("[Ignore] Invalid corn spec in periodic policy %s %s: %s", pl.JobName, pl.PolicyID, err) - continue - } - - executions := []string{} for t := schedule.Next(nowTime); t.Before(horizon); t = schedule.Next(t) { epoch := t.Unix() - scheduledExecutionID := utils.MakeIdentifier() - executions = append(executions, scheduledExecutionID) // Create an execution (job) based on the periodic job template (policy) - job := &work.Job{ - Name: pl.JobName, - ID: scheduledExecutionID, + j := &work.Job{ + Name: p.JobName, + ID: p.ID, // Use the ID of policy to avoid scheduling duplicated periodic job executions. // This is technically wrong, but this lets the bytes be identical for the same periodic job instance. // If we don't do this, we'd need to use a different approach -- probably giving each periodic job its own // history of the past 100 periodic jobs, and only scheduling a job if it's not in the history. EnqueuedAt: epoch, - Args: pl.JobParameters, // Pass parameters to scheduled job here + // Pass parameters to scheduled job here + Args: p.JobParameters, } - rawJSON, err := utils.SerializeJob(job) + rawJSON, err := utils.SerializeJob(j) if err != nil { - return err + e.lastEnqueueErr = err + // Actually this error should not happen if the object struct is well defined + logger.Errorf("Serialize job object for periodic job %s error: %s", p.ID, err) + break } - // Place the time slots for the job (policy) - // If the slot is already there, error will be returned. - expireTime := (epoch - nowTime.Unix()) + 5 - slot := fmt.Sprintf("%s:%s@%d", utils.KeyPeriodicJobTimeSlots(pe.namespace), pl.PolicyID, epoch) - if err := placeSlot(conn, slot, epoch, expireTime); err != nil { - // Logged and continue - logger.Errorf("Failed to place time slot '%s@%d' in enqueuer %s: %s", pl.PolicyID, epoch, pe.identity, err) - continue - } - - _, err = conn.Do("ZADD", utils.RedisKeyScheduled(pe.namespace), epoch, rawJSON) + // Persistent execution first. + // Please pay attention that the job has not been really scheduled yet. + // If job data is failed to persistent, then job schedule should be abandoned. + execution := e.createExecution(p, epoch) + eTracker, err := e.ctl.New(execution) if err != nil { - return err + e.lastEnqueueErr = err + logger.Errorf("Save stats data of job execution '%s' error: %s", execution.Info.JobID, err) + break } - logger.Infof("Schedule job %s:%s for policy %s at %d by enqueuer %s", job.Name, job.ID, pl.PolicyID, epoch, pe.identity) + // Put job to the scheduled job queue + _, err = conn.Do("ZADD", rds.RedisKeyScheduled(e.namespace), epoch, rawJSON) + if err != nil { + e.lastEnqueueErr = err + logger.Errorf("Put the execution of the periodic job '%s' to the scheduled job queue error: %s", p.ID, err) - // Try to save the stats of new scheduled execution (job). - pe.createExecution(pl.PolicyID, pl.JobName, scheduledExecutionID, epoch) - - // Get web hook from the periodic job (policy) - webHookURL, err := pe.statsManager.GetHook(pl.PolicyID) - if err == nil { - // Register hook for the execution - if err := pe.statsManager.RegisterHook(scheduledExecutionID, webHookURL, false); err != nil { - // Just logged - logger.Errorf("Failed to register web hook '%s' for periodic job (execution) '%s' with error by enqueuer %s: %s", webHookURL, scheduledExecutionID, pe.identity, err) + // Mark job status to be error + // If this happened, the job stats is definitely becoming dirty data at job service side. + // For the consumer side, the retrying of web hook may fix the problem. + if err := eTracker.Fail(); err != nil { + e.lastEnqueueErr = err + logger.Errorf("Mark execution '%s' to failure status error: %s", execution.Info.JobID, err) } - } else { - // Just a warning - logger.Warningf("Failed to retrieve web hook for periodic job (policy) %s by enqueuer %s: %s", pl.PolicyID, pe.identity, err) - } - } - // Link the upstream job (policy) with the created executions - if len(executions) > 0 { - if err := pe.statsManager.AttachExecution(pl.PolicyID, executions...); err != nil { - // Just logged it - logger.Errorf("Link upstream job with executions failed in enqueuer %s: %s", pe.identity, err) - } - } - // Directly use redis conn to update the periodic job (policy) status - // Do not care the result - conn.Do("HMSET", utils.KeyJobStats(pe.namespace, pl.PolicyID), "status", job.JobStatusScheduled, "update_time", time.Now().Unix()) - } - return nil + break // Probably redis connection is broken + } + + logger.Debugf("Scheduled execution for periodic job %s:%s at %d", j.Name, p.ID, epoch) + } + } } -func (pe *periodicEnqueuer) createExecution(upstreamJobID, upstreamJobName, executionID string, runAt int64) { - execution := models.JobStats{ - Stats: &models.JobStatData{ - JobID: executionID, - JobName: upstreamJobName, - Status: job.JobStatusPending, - JobKind: job.JobKindScheduled, - EnqueueTime: time.Now().Unix(), - UpdateTime: time.Now().Unix(), - RefLink: fmt.Sprintf("/api/v1/jobs/%s", executionID), +// createExecution creates execution object +func (e *enqueuer) createExecution(p *Policy, runAt int64) *job.Stats { + eID := fmt.Sprintf("%s@%d", p.ID, runAt) + + return &job.Stats{ + Info: &job.StatsInfo{ + JobID: eID, + JobName: p.JobName, + WebHookURL: p.WebHookURL, + CronSpec: p.CronSpec, + UpstreamJobID: p.ID, RunAt: runAt, - UpstreamJobID: upstreamJobID, + Status: job.PendingStatus.String(), + JobKind: job.KindScheduled, // For periodic job execution, it should be set to 'scheduled' + EnqueueTime: time.Now().Unix(), + RefLink: fmt.Sprintf("/api/v1/jobs/%s", eID), }, } - - pe.statsManager.Save(execution) } -func (pe *periodicEnqueuer) shouldEnqueue() bool { - conn := pe.pool.Get() +func (e *enqueuer) shouldEnqueue() bool { + conn := e.pool.Get() defer conn.Close() - lastEnqueue, err := redis.Int64(conn.Do("GET", utils.RedisKeyLastPeriodicEnqueue(pe.namespace))) - if err == redis.ErrNil { - return true - } else if err != nil { - logger.Errorf("periodic_enqueuer.should_enqueue:%s\n", err) - return true + // Acquired a lock before doing checking + // If failed, directly returns false. + lockKey := rds.KeyPeriodicLock(e.namespace) + if err := rds.AcquireLock(conn, lockKey, e.nodeID, 30); err != nil { + logger.Errorf("acquire lock for periodic enqueuing error: %s", err) + return false } + // Acquired lock + // For lock releasing + defer func() { + if err := rds.ReleaseLock(conn, lockKey, e.nodeID); err != nil { + logger.Errorf("release lock for periodic enqueuing error: %s", err) + } + }() - return lastEnqueue < (time.Now().Unix() - int64(periodicEnqueuerSleep/time.Minute)) -} - -func placeSlot(conn redis.Conn, key string, value interface{}, expireTime int64) error { - args := []interface{}{key, value, "NX", "EX", expireTime} - res, err := conn.Do("SET", args...) + shouldEnq := false + lastEnqueue, err := redis.Int64(conn.Do("GET", rds.RedisKeyLastPeriodicEnqueue(e.namespace))) if err != nil { - return err - } - // Existing, the value can not be overrid - if res == nil { - return fmt.Errorf("key %s is already set with value %v", key, value) + if err != redis.ErrNil { + // Logged error + logger.Errorf("get timestamp of last enqueue error: %s", err) + } + + // Should enqueue + shouldEnq = true + } else { + // Check further condition + shouldEnq = lastEnqueue < (time.Now().Unix() - int64(enqueuerSleep/time.Minute)*60) } - return nil -} - -func acquireLock(conn redis.Conn, lockerKey string, lockerID string, expireTime int64) error { - return placeSlot(conn, lockerKey, lockerID, expireTime) -} - -func releaseLock(conn redis.Conn, lockerKey string, lockerID string) error { - theID, err := redis.String(conn.Do("GET", lockerKey)) - if err != nil { - return err - } - - if theID == lockerID { - _, err := conn.Do("DEL", lockerKey) - return err - } - - return errors.New("locker ID mismatch") + if shouldEnq { + // Set last periodic enqueue timestamp + if _, err := conn.Do("SET", rds.RedisKeyLastPeriodicEnqueue(e.namespace), time.Now().Unix()); err != nil { + logger.Errorf("set last periodic enqueue timestamp error: %s", err) + // Anyway the action should be enforced + // The negative effect of this failure is just more re-enqueues by other nodes + return true + } + } + + return false } diff --git a/src/jobservice/period/enqueuer_test.go b/src/jobservice/period/enqueuer_test.go index 29e1d0689..d46914b51 100644 --- a/src/jobservice/period/enqueuer_test.go +++ b/src/jobservice/period/enqueuer_test.go @@ -21,8 +21,8 @@ import ( "github.com/goharbor/harbor/src/jobservice/opm" + "github.com/goharbor/harbor/src/jobservice/common/utils" "github.com/goharbor/harbor/src/jobservice/tests" - "github.com/goharbor/harbor/src/jobservice/utils" ) func TestPeriodicEnqueuerStartStop(t *testing.T) { @@ -31,7 +31,7 @@ func TestPeriodicEnqueuerStartStop(t *testing.T) { lock: new(sync.RWMutex), policies: make(map[string]*PeriodicJobPolicy), } - enqueuer := newPeriodicEnqueuer(ns, redisPool, ps, nil) + enqueuer := newEnqueuer(ns, redisPool, ps, nil) enqueuer.start() <-time.After(100 * time.Millisecond) enqueuer.stop() @@ -55,7 +55,7 @@ func TestEnqueue(t *testing.T) { statsManager.Start() defer statsManager.Shutdown() - enqueuer := newPeriodicEnqueuer(ns, redisPool, ps, statsManager) + enqueuer := newEnqueuer(ns, redisPool, ps, statsManager) if err := enqueuer.enqueue(); err != nil { t.Error(err) } diff --git a/src/jobservice/period/interface.go b/src/jobservice/period/interface.go deleted file mode 100644 index ee432e333..000000000 --- a/src/jobservice/period/interface.go +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package period - -import "github.com/goharbor/harbor/src/jobservice/models" - -// Interface defines operations the periodic scheduler should have. -type Interface interface { - // Schedule the specified cron job policy. - // - // jobName string : The name of periodical job - // params models.Parameters : The parameters required by the periodical job - // cronSpec string : The periodical settings with cron format - // - // Returns: - // The uuid of the cron job policy - // The latest next trigger time - // error if failed to schedule - Schedule(jobName string, params models.Parameters, cronSpec string) (string, int64, error) - - // Unschedule the specified cron job policy. - // - // cronJobPolicyID string: The ID of cron job policy. - // - // Return: - // error if failed to unschedule - UnSchedule(cronJobPolicyID string) error - - // Load and cache data if needed - // - // Return: - // error if failed to do - Load() error - - // Clear all the cron job policies. - // - // Return: - // error if failed to do - Clear() error - - // Start to serve - Start() - - // Accept the pushed policy and cache it - // - // policy *PeriodicJobPolicy : the periodic policy being accept - // - // Return: - // error if failed to do - AcceptPeriodicPolicy(policy *PeriodicJobPolicy) error - - // Remove the specified policy from the cache if it is existing - // - // policyID string : ID of the policy being removed - // - // Return: - // the ptr of the being deletd policy - RemovePeriodicPolicy(policyID string) *PeriodicJobPolicy -} diff --git a/src/jobservice/period/job_policy.go b/src/jobservice/period/job_policy.go deleted file mode 100644 index 81396ddfb..000000000 --- a/src/jobservice/period/job_policy.go +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package period - -import ( - "encoding/json" - "sync" - - "github.com/goharbor/harbor/src/jobservice/utils" -) - -const ( - // periodicJobPolicyChangeEventSchedule : Schedule periodic job policy event - periodicJobPolicyChangeEventSchedule = "Schedule" - // periodicJobPolicyChangeEventUnSchedule : UnSchedule periodic job policy event - periodicJobPolicyChangeEventUnSchedule = "UnSchedule" -) - -// PeriodicJobPolicy ... -type PeriodicJobPolicy struct { - // NOTES: The 'PolicyID' should not be set when serialize this policy struct to the zset - // because each 'Policy ID' is different and it may cause issue of losing zset unique capability. - PolicyID string `json:"policy_id,omitempty"` - JobName string `json:"job_name"` - JobParameters map[string]interface{} `json:"job_params"` - CronSpec string `json:"cron_spec"` -} - -// Serialize the policy to raw data. -func (pjp *PeriodicJobPolicy) Serialize() ([]byte, error) { - return json.Marshal(pjp) -} - -// DeSerialize the raw json to policy. -func (pjp *PeriodicJobPolicy) DeSerialize(rawJSON []byte) error { - return json.Unmarshal(rawJSON, pjp) -} - -// periodicJobPolicyStore is in-memory cache for the periodic job policies. -type periodicJobPolicyStore struct { - lock *sync.RWMutex - policies map[string]*PeriodicJobPolicy // k-v pair and key is the policy ID -} - -func (ps *periodicJobPolicyStore) addAll(items []*PeriodicJobPolicy) { - if items == nil || len(items) == 0 { - return - } - - ps.lock.Lock() - defer ps.lock.Unlock() - - for _, item := range items { - // Ignore the item with empty uuid - if !utils.IsEmptyStr(item.PolicyID) { - ps.policies[item.PolicyID] = item - } - } -} - -func (ps *periodicJobPolicyStore) list() []*PeriodicJobPolicy { - allItems := make([]*PeriodicJobPolicy, 0) - - ps.lock.RLock() - defer ps.lock.RUnlock() - - for _, v := range ps.policies { - allItems = append(allItems, v) - } - - return allItems -} - -func (ps *periodicJobPolicyStore) add(jobPolicy *PeriodicJobPolicy) { - if jobPolicy == nil || utils.IsEmptyStr(jobPolicy.PolicyID) { - return - } - - ps.lock.Lock() - defer ps.lock.Unlock() - - ps.policies[jobPolicy.PolicyID] = jobPolicy -} - -func (ps *periodicJobPolicyStore) remove(policyID string) *PeriodicJobPolicy { - if utils.IsEmptyStr(policyID) { - return nil - } - - ps.lock.Lock() - defer ps.lock.Unlock() - - if item, ok := ps.policies[policyID]; ok { - delete(ps.policies, policyID) - return item - } - - return nil -} - -func (ps *periodicJobPolicyStore) size() int { - ps.lock.RLock() - defer ps.lock.RUnlock() - - return len(ps.policies) -} diff --git a/src/jobservice/period/policy_store.go b/src/jobservice/period/policy_store.go new file mode 100644 index 000000000..22175fa6a --- /dev/null +++ b/src/jobservice/period/policy_store.go @@ -0,0 +1,321 @@ +// Copyright Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package period + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "sync" + "time" + + "github.com/goharbor/harbor/src/jobservice/common/rds" + "github.com/goharbor/harbor/src/jobservice/common/utils" + "github.com/goharbor/harbor/src/jobservice/logger" + "github.com/gomodule/redigo/redis" + "github.com/robfig/cron" + "strings" +) + +const ( + // changeEventSchedule : Schedule periodic job policy event + changeEventSchedule = "Schedule" + // changeEventUnSchedule : UnSchedule periodic job policy event + changeEventUnSchedule = "UnSchedule" +) + +// Policy ... +type Policy struct { + // Policy can be treated as job template of periodic job. + // The info of policy will be copied into the scheduled job executions for the periodic job. + ID string `json:"id"` + JobName string `json:"job_name"` + CronSpec string `json:"cron_spec"` + JobParameters map[string]interface{} `json:"job_params,omitempty"` + WebHookURL string `json:"web_hook_url,omitempty"` +} + +// Serialize the policy to raw data. +func (p *Policy) Serialize() ([]byte, error) { + return json.Marshal(p) +} + +// DeSerialize the raw json to policy. +func (p *Policy) DeSerialize(rawJSON []byte) error { + return json.Unmarshal(rawJSON, p) +} + +// Validate the policy +func (p *Policy) Validate() error { + if utils.IsEmptyStr(p.ID) { + return errors.New("missing ID in the periodic job policy object") + } + + if utils.IsEmptyStr(p.JobName) { + return errors.New("missing job name in the periodic job policy object") + } + + if !utils.IsEmptyStr(p.WebHookURL) { + if !utils.IsValidURL(p.WebHookURL) { + return fmt.Errorf("bad web hook URL: %s", p.WebHookURL) + } + } + + if _, err := cron.Parse(p.CronSpec); err != nil { + return err + } + + return nil +} + +// policyStore is in-memory cache for the periodic job policies. +type policyStore struct { + // k-v pair and key is the policy ID + hash *sync.Map + namespace string + context context.Context + pool *redis.Pool + // For stop + stopChan chan bool +} + +// message is designed for sub/pub messages +type message struct { + Event string `json:"event"` + Data *Policy `json:"data"` +} + +// newPolicyStore is constructor of policyStore +func newPolicyStore(ctx context.Context, ns string, pool *redis.Pool) *policyStore { + return &policyStore{ + hash: new(sync.Map), + context: ctx, + namespace: ns, + pool: pool, + stopChan: make(chan bool, 1), + } +} + +// Blocking call +func (ps *policyStore) serve() (err error) { + defer func() { + logger.Info("Periodical job policy store is stopped") + }() + + conn := ps.pool.Get() + psc := redis.PubSubConn{ + Conn: conn, + } + defer psc.Close() + + // Subscribe channel + err = psc.Subscribe(redis.Args{}.AddFlat(rds.KeyPeriodicNotification(ps.namespace))...) + if err != nil { + return + } + + // Channels for sub/pub ctl + errChan := make(chan error, 1) + done := make(chan bool, 1) + + go func() { + for { + switch res := psc.Receive().(type) { + case error: + errChan <- fmt.Errorf("redis sub/pub chan error: %s", res.(error).Error()) + break + case redis.Message: + m := &message{} + if err := json.Unmarshal(res.Data, m); err != nil { + // logged + logger.Errorf("Read invalid message: %s\n", res.Data) + break + } + if err := ps.sync(m); err != nil { + logger.Error(err) + } + break + case redis.Subscription: + switch res.Kind { + case "subscribe": + logger.Infof("Subscribe redis channel %s", res.Channel) + break + case "unsubscribe": + // Unsubscribe all, means main goroutine is exiting + logger.Infof("Unsubscribe redis channel %s", res.Channel) + done <- true + return + } + } + } + }() + + logger.Info("Periodical job policy store is serving with policy auto sync enabled") + defer func() { + var unSubErr error + defer func() { + // Merge errors + finalErrs := make([]string, 0) + if unSubErr != nil { + finalErrs = append(finalErrs, unSubErr.Error()) + } + if err != nil { + finalErrs = append(finalErrs, err.Error()) + } + + if len(finalErrs) > 0 { + // Override returned err or do nothing + err = errors.New(strings.Join(finalErrs, ";")) + } + }() + // Unsubscribe all + psc.Unsubscribe() + // Confirm result + // Add timeout in case unsubscribe failed + select { + case unSubErr = <-errChan: + return + case <-done: + return + case <-time.After(30 * time.Second): + unSubErr = errors.New("unsubscribe time out") + return + } + }() + + ticker := time.NewTicker(time.Minute) + defer ticker.Stop() + + // blocking here + for { + select { + case <-ticker.C: + err = psc.Ping("ping!") + if err != nil { + return + } + case <-ps.stopChan: + return nil + case err = <-errChan: + return + } + } +} + +// sync policy with backend list +func (ps *policyStore) sync(m *message) error { + if m == nil { + return errors.New("nil message") + } + + if m.Data == nil { + return errors.New("missing data in the policy sync message") + } + + switch m.Event { + case changeEventSchedule: + if err := ps.add(m.Data); err != nil { + return fmt.Errorf("failed to sync scheduled policy %s: %s", m.Data.ID, err) + } + case changeEventUnSchedule: + removed := ps.remove(m.Data.ID) + if removed == nil { + return fmt.Errorf("failed to sync unscheduled policy %s", m.Data.ID) + } + default: + return fmt.Errorf("message %s is not supported", m.Event) + } + + return nil +} + +// Load all the policies from the backend to store +func (ps *policyStore) load() error { + conn := ps.pool.Get() + defer conn.Close() + + bytes, err := redis.Values(conn.Do("ZRANGE", rds.KeyPeriodicPolicy(ps.namespace), 0, -1)) + if err != nil { + return err + } + + count := 0 + for i, l := 0, len(bytes); i < l; i++ { + rawPolicy := bytes[i].([]byte) + p := &Policy{} + + if err := p.DeSerialize(rawPolicy); err != nil { + // Ignore error which means the policy data is not valid + // Only logged + logger.Errorf("malform policy: %s; error: %s\n", rawPolicy, err) + continue + } + + // Add to cache store + if err := ps.add(p); err != nil { + // Only logged + logger.Errorf("cache periodic policies error: %s", err) + continue + } + + count++ + + logger.Debugf("Load periodic job policy: %s", string(rawPolicy)) + } + + logger.Infof("Load %d periodic job policies", count) + + return nil +} + +// Add one or more policy +func (ps *policyStore) add(item *Policy) error { + if item == nil { + return errors.New("nil policy to add") + } + + if utils.IsEmptyStr(item.ID) { + return errors.New("malform policy to add") + } + + v, _ := ps.hash.LoadOrStore(item.ID, item) + if v == nil { + return fmt.Errorf("failed to add policy: %s", item.ID) + } + + return nil +} + +// Iterate all the policies in the store +func (ps *policyStore) Iterate(f func(id string, p *Policy) bool) { + ps.hash.Range(func(k, v interface{}) bool { + return f(k.(string), v.(*Policy)) + }) +} + +// Remove the specified policy from the store +func (ps *policyStore) remove(policyID string) *Policy { + if utils.IsEmptyStr(policyID) { + return nil + } + + if v, ok := ps.hash.Load(policyID); ok { + ps.hash.Delete(policyID) + return v.(*Policy) + } + + return nil +} diff --git a/src/jobservice/period/job_policy_test.go b/src/jobservice/period/policy_store_test.go similarity index 100% rename from src/jobservice/period/job_policy_test.go rename to src/jobservice/period/policy_store_test.go diff --git a/src/jobservice/period/redis_scheduler.go b/src/jobservice/period/redis_scheduler.go deleted file mode 100644 index e28a23807..000000000 --- a/src/jobservice/period/redis_scheduler.go +++ /dev/null @@ -1,349 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package period - -import ( - "encoding/json" - "errors" - "strconv" - "sync" - "time" - - "github.com/goharbor/harbor/src/jobservice/errs" - "github.com/goharbor/harbor/src/jobservice/opm" - - "github.com/robfig/cron" - - "github.com/goharbor/harbor/src/jobservice/env" - "github.com/goharbor/harbor/src/jobservice/logger" - "github.com/goharbor/harbor/src/jobservice/models" - "github.com/goharbor/harbor/src/jobservice/utils" - "github.com/gomodule/redigo/redis" -) - -const ( - // EventSchedulePeriodicPolicy is for scheduling periodic policy event - EventSchedulePeriodicPolicy = "schedule" - // EventUnSchedulePeriodicPolicy is for unscheduling periodic policy event - EventUnSchedulePeriodicPolicy = "unschedule" -) - -// RedisPeriodicScheduler manages the periodic scheduling policies. -type RedisPeriodicScheduler struct { - context *env.Context - redisPool *redis.Pool - namespace string - pstore *periodicJobPolicyStore - enqueuer *periodicEnqueuer -} - -// NewRedisPeriodicScheduler is constructor of RedisPeriodicScheduler -func NewRedisPeriodicScheduler(ctx *env.Context, namespace string, redisPool *redis.Pool, statsManager opm.JobStatsManager) *RedisPeriodicScheduler { - pstore := &periodicJobPolicyStore{ - lock: new(sync.RWMutex), - policies: make(map[string]*PeriodicJobPolicy), - } - enqueuer := newPeriodicEnqueuer(namespace, redisPool, pstore, statsManager) - - return &RedisPeriodicScheduler{ - context: ctx, - redisPool: redisPool, - namespace: namespace, - pstore: pstore, - enqueuer: enqueuer, - } -} - -// Start to serve -func (rps *RedisPeriodicScheduler) Start() { - defer func() { - logger.Info("Redis scheduler is stopped") - }() - - // Load existing periodic job policies - if err := rps.Load(); err != nil { - // exit now - rps.context.ErrorChan <- err - return - } - - // start enqueuer - rps.enqueuer.start() - defer rps.enqueuer.stop() - logger.Info("Redis scheduler is started") - - // blocking here - <-rps.context.SystemContext.Done() -} - -// Schedule is implementation of the same method in period.Interface -func (rps *RedisPeriodicScheduler) Schedule(jobName string, params models.Parameters, cronSpec string) (string, int64, error) { - if utils.IsEmptyStr(jobName) { - return "", 0, errors.New("empty job name is not allowed") - } - if utils.IsEmptyStr(cronSpec) { - return "", 0, errors.New("cron spec is not set") - } - - // Get next run time - schedule, err := cron.Parse(cronSpec) - if err != nil { - return "", 0, err - } - - // Although the ZSET can guarantee no duplicated items, we still need to check the existing - // of the job policy to avoid publish duplicated ones to other nodes as we - // use transaction commands. - jobPolicy := &PeriodicJobPolicy{ - JobName: jobName, - JobParameters: params, - CronSpec: cronSpec, - } - // Serialize data - rawJSON, err := jobPolicy.Serialize() - if err != nil { - return "", 0, nil - } - - // Check existing - // If existing, treat as a succeed submitting and return the exitsing id - if score, ok := rps.exists(string(rawJSON)); ok { - // Ignore error - id, _ := rps.getIDByScore(score) - return "", 0, errs.ConflictError(id) - } - - uuid, score := utils.MakePeriodicPolicyUUID() - // Set back policy ID - jobPolicy.PolicyID = uuid - notification := &models.Message{ - Event: EventSchedulePeriodicPolicy, - Data: jobPolicy, - } - rawJSON2, err := json.Marshal(notification) - if err != nil { - return "", 0, err - } - - // Save to redis db and publish notification via redis transaction - conn := rps.redisPool.Get() - defer conn.Close() - - err = conn.Send("MULTI") - if err != nil { - return "", 0, err - } - err = conn.Send("ZADD", utils.KeyPeriodicPolicy(rps.namespace), score, rawJSON) - if err != nil { - return "", 0, err - } - err = conn.Send("ZADD", utils.KeyPeriodicPolicyScore(rps.namespace), score, uuid) - if err != nil { - return "", 0, err - } - err = conn.Send("PUBLISH", utils.KeyPeriodicNotification(rps.namespace), rawJSON2) - if err != nil { - return "", 0, err - } - - if _, err := conn.Do("EXEC"); err != nil { - return "", 0, err - } - - return uuid, schedule.Next(time.Now()).Unix(), nil -} - -// UnSchedule is implementation of the same method in period.Interface -func (rps *RedisPeriodicScheduler) UnSchedule(cronJobPolicyID string) error { - if utils.IsEmptyStr(cronJobPolicyID) { - return errors.New("cron job policy ID is empty") - } - - score, err := rps.getScoreByID(cronJobPolicyID) - if err == redis.ErrNil { - return errs.NoObjectFoundError(err.Error()) - } - - if err != nil { - return err - } - - notification := &models.Message{ - Event: EventUnSchedulePeriodicPolicy, - Data: &PeriodicJobPolicy{ - PolicyID: cronJobPolicyID, // Only ID required - }, - } - - rawJSON, err := json.Marshal(notification) - if err != nil { - return err - } - - // REM from redis db - conn := rps.redisPool.Get() - defer conn.Close() - - err = conn.Send("MULTI") - if err != nil { - return err - } - err = conn.Send("ZREMRANGEBYSCORE", utils.KeyPeriodicPolicy(rps.namespace), score, score) // Accurately remove the item with the specified score - if err != nil { - return err - } - err = conn.Send("ZREMRANGEBYSCORE", utils.KeyPeriodicPolicyScore(rps.namespace), score, score) // Remove key score mapping - if err != nil { - return err - } - err = conn.Send("PUBLISH", utils.KeyPeriodicNotification(rps.namespace), rawJSON) - if err != nil { - return err - } - - _, err = conn.Do("EXEC") - - return err -} - -// Load data from zset -func (rps *RedisPeriodicScheduler) Load() error { - conn := rps.redisPool.Get() - defer conn.Close() - - // Let's build key score mapping locally first - bytes, err := redis.MultiBulk(conn.Do("ZRANGE", utils.KeyPeriodicPolicyScore(rps.namespace), 0, -1, "WITHSCORES")) - if err != nil { - return err - } - keyScoreMap := make(map[int64]string) - for i, l := 0, len(bytes); i < l; i = i + 2 { - pid := string(bytes[i].([]byte)) - rawScore := bytes[i+1].([]byte) - score, err := strconv.ParseInt(string(rawScore), 10, 64) - if err != nil { - // Ignore - continue - } - keyScoreMap[score] = pid - } - - bytes, err = redis.MultiBulk(conn.Do("ZRANGE", utils.KeyPeriodicPolicy(rps.namespace), 0, -1, "WITHSCORES")) - if err != nil { - return err - } - - allPeriodicPolicies := make([]*PeriodicJobPolicy, 0, len(bytes)/2) - for i, l := 0, len(bytes); i < l; i = i + 2 { - rawPolicy := bytes[i].([]byte) - rawScore := bytes[i+1].([]byte) - policy := &PeriodicJobPolicy{} - - if err := policy.DeSerialize(rawPolicy); err != nil { - // Ignore error which means the policy data is not valid - // Only logged - logger.Warningf("failed to deserialize periodic policy with error:%s; raw data: %s\n", err, rawPolicy) - continue - } - score, err := strconv.ParseInt(string(rawScore), 10, 64) - if err != nil { - // Ignore error which means the policy data is not valid - // Only logged - logger.Warningf("failed to parse the score of the periodic policy with error:%s\n", err) - continue - } - - // Set back the policy ID - if pid, ok := keyScoreMap[score]; ok { - policy.PolicyID = pid - } else { - // Something wrong, should not be happened - // ignore here - continue - } - - allPeriodicPolicies = append(allPeriodicPolicies, policy) - - logger.Infof("Load periodic job policy %s for job %s: %s", policy.PolicyID, policy.JobName, policy.CronSpec) - } - - if len(allPeriodicPolicies) > 0 { - rps.pstore.addAll(allPeriodicPolicies) - } - - logger.Infof("Load %d periodic job policies", len(allPeriodicPolicies)) - return nil -} - -// Clear is implementation of the same method in period.Interface -func (rps *RedisPeriodicScheduler) Clear() error { - conn := rps.redisPool.Get() - defer conn.Close() - - _, err := conn.Do("ZREMRANGEBYRANK", utils.KeyPeriodicPolicy(rps.namespace), 0, -1) - - return err -} - -// AcceptPeriodicPolicy is implementation of the same method in period.Interface -func (rps *RedisPeriodicScheduler) AcceptPeriodicPolicy(policy *PeriodicJobPolicy) error { - if policy == nil || utils.IsEmptyStr(policy.PolicyID) { - return errors.New("nil periodic policy") - } - - rps.pstore.add(policy) - - return nil -} - -// RemovePeriodicPolicy is implementation of the same method in period.Interface -func (rps *RedisPeriodicScheduler) RemovePeriodicPolicy(policyID string) *PeriodicJobPolicy { - if utils.IsEmptyStr(policyID) { - return nil - } - - return rps.pstore.remove(policyID) -} - -func (rps *RedisPeriodicScheduler) exists(rawPolicy string) (int64, bool) { - if utils.IsEmptyStr(rawPolicy) { - return 0, false - } - - conn := rps.redisPool.Get() - defer conn.Close() - - count, err := redis.Int64(conn.Do("ZSCORE", utils.KeyPeriodicPolicy(rps.namespace), rawPolicy)) - return count, err == nil -} - -func (rps *RedisPeriodicScheduler) getScoreByID(id string) (int64, error) { - conn := rps.redisPool.Get() - defer conn.Close() - - return redis.Int64(conn.Do("ZSCORE", utils.KeyPeriodicPolicyScore(rps.namespace), id)) -} - -func (rps *RedisPeriodicScheduler) getIDByScore(score int64) (string, error) { - conn := rps.redisPool.Get() - defer conn.Close() - - ids, err := redis.Strings(conn.Do("ZRANGEBYSCORE", utils.KeyPeriodicPolicyScore(rps.namespace), score, score)) - if err != nil { - return "", err - } - - return ids[0], nil -} diff --git a/src/jobservice/period/scheduler.go b/src/jobservice/period/scheduler.go new file mode 100644 index 000000000..7704b061c --- /dev/null +++ b/src/jobservice/period/scheduler.go @@ -0,0 +1,47 @@ +// Copyright Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package period + +// Scheduler defines operations the periodic scheduler should have. +type Scheduler interface { + // Start to serve periodic job scheduling process + // + // Returns: + // error if any problems happened + Start() error + + // Stop the working periodic job scheduling process + // + // Returns; + // error if any problems happened + Stop() error + + // Schedule the specified cron job policy. + // + // policy *Policy : The job template of the scheduling periodical jobs + // + // Returns: + // int64 the numeric id of policy + // error if failed to schedule + Schedule(policy *Policy) (int64, error) + + // Unschedule the specified cron job policy. + // + // policyID string: The ID of cron job policy. + // + // Return: + // error if failed to unschedule + UnSchedule(policyID string) error +} diff --git a/src/jobservice/period/sweeper.go b/src/jobservice/period/sweeper.go deleted file mode 100644 index 097851018..000000000 --- a/src/jobservice/period/sweeper.go +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package period - -import ( - "fmt" - "time" - - "github.com/gocraft/work" - - "github.com/goharbor/harbor/src/jobservice/logger" - "github.com/goharbor/harbor/src/jobservice/utils" - "github.com/gomodule/redigo/redis" -) - -// Sweeper take charge of clearing the outdated data such as scheduled jobs etc.. -// Currently, only used in redis worker pool. -type Sweeper struct { - redisPool *redis.Pool - client *work.Client - namespace string -} - -// NewSweeper is constructor of Sweeper. -func NewSweeper(namespace string, pool *redis.Pool, client *work.Client) *Sweeper { - return &Sweeper{ - namespace: namespace, - redisPool: pool, - client: client, - } -} - -// ClearOutdatedScheduledJobs clears the outdated scheduled jobs. -// Try best to do -func (s *Sweeper) ClearOutdatedScheduledJobs() error { - // Check if other workpool has done the action - conn := s.redisPool.Get() - defer conn.Close() - - // Lock - r, err := conn.Do("SET", utils.KeyPeriodicLock(s.namespace), time.Now().Unix(), "EX", 30, "NX") - defer func() { - // Make sure it can be unlocked if it is not expired yet - if _, err := conn.Do("DEL", utils.KeyPeriodicLock(s.namespace)); err != nil { - logger.Errorf("Unlock key '%s' failed with error: %s\n", utils.KeyPeriodicLock(s.namespace), err.Error()) - } - }() - if err != nil { - return err - } - - if r == nil { - // Action is already locked by other workerpool - logger.Info("Ignore clear outdated scheduled jobs") - return nil - } - - nowEpoch := time.Now().Unix() - jobScores, err := utils.GetZsetByScore(s.redisPool, utils.RedisKeyScheduled(s.namespace), []int64{0, nowEpoch}) - if err != nil { - return err - } - - allErrors := make([]error, 0) - for _, jobScore := range jobScores { - j, err := utils.DeSerializeJob(jobScore.JobBytes) - if err != nil { - allErrors = append(allErrors, err) - continue - } - - if err = s.client.DeleteScheduledJob(jobScore.Score, j.ID); err != nil { - allErrors = append(allErrors, err) - } - - logger.Infof("Clear outdated scheduled job: %s run at %#v\n", j.ID, time.Unix(jobScore.Score, 0).String()) - } - - // Unlock - if len(allErrors) == 0 { - return nil - } - - if len(allErrors) == 1 { - return allErrors[0] - } - - errorSummary := allErrors[0].Error() - for index, e := range allErrors { - if index == 0 { - continue - } - - errorSummary = fmt.Sprintf("%s, %s", errorSummary, e) - } - return fmt.Errorf("%s", errorSummary) -} diff --git a/src/jobservice/period/sweeper_test.go b/src/jobservice/period/sweeper_test.go deleted file mode 100644 index 46e993fb3..000000000 --- a/src/jobservice/period/sweeper_test.go +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -package period - -import ( - "encoding/json" - "testing" - "time" - - "github.com/gocraft/work" - - "github.com/goharbor/harbor/src/jobservice/tests" - "github.com/goharbor/harbor/src/jobservice/utils" -) - -func TestSweeper(t *testing.T) { - epoch := time.Now().Unix() - 1000 - if err := createFakeScheduledJob(epoch); err != nil { - t.Fatal(err) - } - ns := tests.GiveMeTestNamespace() - sweeper := NewSweeper(ns, redisPool, work.NewClient(ns, redisPool)) - if err := sweeper.ClearOutdatedScheduledJobs(); err != nil { - t.Fatal(err) - } - err := tests.Clear(utils.RedisKeyScheduled(ns), redisPool.Get()) - if err != nil { - t.Fatal(err) - } -} - -func createFakeScheduledJob(runAt int64) error { - fakeJob := make(map[string]interface{}) - fakeJob["name"] = "fake_periodic_job" - fakeJob["id"] = "fake_job_id" - fakeJob["t"] = runAt - fakeJob["args"] = make(map[string]interface{}) - - rawJSON, err := json.Marshal(&fakeJob) - if err != nil { - return err - } - - conn := redisPool.Get() - defer conn.Close() - - _, err = conn.Do("ZADD", utils.RedisKeyScheduled(tests.GiveMeTestNamespace()), runAt, rawJSON) - return err -} diff --git a/src/jobservice/pool/interface.go b/src/jobservice/pool/interface.go deleted file mode 100644 index 3427aa690..000000000 --- a/src/jobservice/pool/interface.go +++ /dev/null @@ -1,146 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pool - -import "github.com/goharbor/harbor/src/jobservice/models" - -// Interface for worker pool. -// More like a driver to transparent the lower queue. -type Interface interface { - // Start to serve - // - // Return: - // error if failed to start - Start() error - - // Register job to the pool. - // - // name string : job name for referring - // job interface{}: job handler which must implement the job.Interface. - // - // Return: - // error if failed to register - RegisterJob(name string, job interface{}) error - - // Register multiple jobs. - // - // jobs map[string]interface{}: job map, key is job name and value is job handler. - // - // Return: - // error if failed to register - RegisterJobs(jobs map[string]interface{}) error - - // Enqueue job - // - // jobName string : the name of enqueuing job - // params models.Parameters : parameters of enqueuing job - // isUnique bool : specify if duplicated job will be discarded - // - // Returns: - // models.JobStats: the stats of enqueuing job if succeed - // error : if failed to enqueue - Enqueue(jobName string, params models.Parameters, isUnique bool) (models.JobStats, error) - - // Schedule job to run after the specified interval (seconds). - // - // jobName string : the name of enqueuing job - // runAfterSeconds uint64 : the waiting interval with seconds - // params models.Parameters : parameters of enqueuing job - // isUnique bool : specify if duplicated job will be discarded - // - // Returns: - // models.JobStats: the stats of enqueuing job if succeed - // error : if failed to enqueue - Schedule(jobName string, params models.Parameters, runAfterSeconds uint64, isUnique bool) (models.JobStats, error) - - // Schedule the job periodically running. - // - // jobName string : the name of enqueuing job - // params models.Parameters : parameters of enqueuing job - // cronSetting string : the periodic duration with cron style like '0 * * * * *' - // - // Returns: - // models.JobStats: the stats of enqueuing job if succeed - // error : if failed to enqueue - PeriodicallyEnqueue(jobName string, params models.Parameters, cronSetting string) (models.JobStats, error) - - // Return the status info of the pool. - // - // Returns: - // models.JobPoolStats : the stats info of all running pools - // error : failed to check - Stats() (models.JobPoolStats, error) - - // Check if the job has been already registered. - // - // name string : name of job - // - // Returns: - // interface{} : the job type of the known job if it's existing - // bool : if the known job requires parameters - IsKnownJob(name string) (interface{}, bool) - - // Validate the parameters of the known job - // - // jobType interface{} : type of known job - // params map[string]interface{} : parameters of known job - // - // Return: - // error if parameters are not valid - - ValidateJobParameters(jobType interface{}, params map[string]interface{}) error - - // Get the stats of the specified job - // - // jobID string : ID of the enqueued job - // - // Returns: - // models.JobStats : job stats data - // error : error returned if meet any problems - GetJobStats(jobID string) (models.JobStats, error) - - // Stop the job - // - // jobID string : ID of the enqueued job - // - // Return: - // error : error returned if meet any problems - StopJob(jobID string) error - - // Cancel the job - // - // jobID string : ID of the enqueued job - // - // Return: - // error : error returned if meet any problems - CancelJob(jobID string) error - - // Retry the job - // - // jobID string : ID of the enqueued job - // - // Return: - // error : error returned if meet any problems - RetryJob(jobID string) error - - // Register hook - // - // jobID string : ID of job - // hookURL string : the hook url - // - // Return: - // error : error returned if meet any problems - RegisterHook(jobID string, hookURL string) error -} diff --git a/src/jobservice/pool/message_server.go b/src/jobservice/pool/message_server.go deleted file mode 100644 index ffb269cc7..000000000 --- a/src/jobservice/pool/message_server.go +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pool - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "reflect" - "time" - - "github.com/goharbor/harbor/src/jobservice/logger" - "github.com/goharbor/harbor/src/jobservice/opm" - "github.com/goharbor/harbor/src/jobservice/period" - - "github.com/goharbor/harbor/src/jobservice/models" - "github.com/goharbor/harbor/src/jobservice/utils" - "github.com/gomodule/redigo/redis" -) - -const ( - msgServerRetryTimes = 5 -) - -// MessageServer implements the sub/pub mechanism via redis to do async message exchanging. -type MessageServer struct { - context context.Context - redisPool *redis.Pool - namespace string - callbacks map[string]reflect.Value // no need to sync -} - -// NewMessageServer creates a new ptr of MessageServer -func NewMessageServer(ctx context.Context, namespace string, redisPool *redis.Pool) *MessageServer { - return &MessageServer{ - context: ctx, - redisPool: redisPool, - namespace: namespace, - callbacks: make(map[string]reflect.Value), - } -} - -// Start to serve -func (ms *MessageServer) Start() error { - defer func() { - logger.Info("Message server is stopped") - }() - - conn := ms.redisPool.Get() // Get one backend connection! - psc := redis.PubSubConn{ - Conn: conn, - } - defer psc.Close() - - // Subscribe channel - err := psc.Subscribe(redis.Args{}.AddFlat(utils.KeyPeriodicNotification(ms.namespace))...) - if err != nil { - return err - } - - done := make(chan error, 1) - go func() { - for { - switch res := psc.Receive().(type) { - case error: - done <- fmt.Errorf("error occurred when receiving from pub/sub channel of message server: %s", res.(error).Error()) - case redis.Message: - m := &models.Message{} - if err := json.Unmarshal(res.Data, m); err != nil { - // logged - logger.Warningf("Read invalid message: %s\n", res.Data) - } - if callback, ok := ms.callbacks[m.Event]; !ok { - // logged - logger.Warningf("no handler to handle event %s\n", m.Event) - } else { - // logged incoming events - logger.Infof("Receive event '%s' with data(unformatted): %+#v\n", m.Event, m.Data) - // Try to recover the concrete type - var converted interface{} - switch m.Event { - case period.EventSchedulePeriodicPolicy, - period.EventUnSchedulePeriodicPolicy: - // ignore error, actually error should not be happened because we did not change data - // after the last unmarshal try. - policyObject := &period.PeriodicJobPolicy{} - dt, _ := json.Marshal(m.Data) - json.Unmarshal(dt, policyObject) - converted = policyObject - case opm.EventRegisterStatusHook: - // ignore error - hookObject := &opm.HookData{} - dt, _ := json.Marshal(m.Data) - json.Unmarshal(dt, hookObject) - converted = hookObject - case opm.EventFireCommand: - // no need to convert []string - converted = m.Data - } - res := callback.Call([]reflect.Value{reflect.ValueOf(converted)}) - e := res[0].Interface() - if e != nil { - err := e.(error) - // logged - logger.Errorf("Failed to fire callback with error: %s\n", err) - } - } - case redis.Subscription: - switch res.Kind { - case "subscribe": - logger.Infof("Subscribe redis channel %s\n", res.Channel) - break - case "unsubscribe": - // Unsubscribe all, means main goroutine is exiting - logger.Infof("Unsubscribe redis channel %s\n", res.Channel) - done <- nil - return - } - } - } - }() - - logger.Info("Message server is started") - - ticker := time.NewTicker(time.Minute) - defer ticker.Stop() - - // blocking here - for err == nil { - select { - case <-ticker.C: - err = psc.Ping("ping!") - case <-ms.context.Done(): - err = errors.New("context exit") - case err = <-done: - } - } - - // Unsubscribe all - psc.Unsubscribe() - - return <-done -} - -// Subscribe event with specified callback -func (ms *MessageServer) Subscribe(event string, callback interface{}) error { - if utils.IsEmptyStr(event) { - return errors.New("empty event is not allowed") - } - - handler, err := validateCallbackFunc(callback) - if err != nil { - return err - } - - ms.callbacks[event] = handler - return nil -} - -func validateCallbackFunc(callback interface{}) (reflect.Value, error) { - if callback == nil { - return reflect.ValueOf(nil), errors.New("nil callback handler") - } - - vFn := reflect.ValueOf(callback) - vFType := vFn.Type() - if vFType.Kind() != reflect.Func { - return reflect.ValueOf(nil), errors.New("callback handler must be a generic func") - } - - inNum := vFType.NumIn() - outNum := vFType.NumOut() - if inNum != 1 || outNum != 1 { - return reflect.ValueOf(nil), errors.New("callback handler can only be func(interface{})error format") - } - - inType := vFType.In(0) - var intf *interface{} - if inType != reflect.TypeOf(intf).Elem() { - return reflect.ValueOf(nil), errors.New("callback handler can only be func(interface{})error format") - } - - outType := vFType.Out(0) - var e *error - if outType != reflect.TypeOf(e).Elem() { - return reflect.ValueOf(nil), errors.New("callback handler can only be func(interface{})error format") - } - - return vFn, nil -} diff --git a/src/jobservice/pool/message_server_test.go b/src/jobservice/pool/message_server_test.go deleted file mode 100644 index 9b84e40a2..000000000 --- a/src/jobservice/pool/message_server_test.go +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -package pool - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "testing" - "time" - - "github.com/goharbor/harbor/src/jobservice/opm" - - "github.com/goharbor/harbor/src/jobservice/models" - "github.com/goharbor/harbor/src/jobservice/period" - "github.com/goharbor/harbor/src/jobservice/utils" - - "github.com/goharbor/harbor/src/jobservice/tests" -) - -var redisPool = tests.GiveMeRedisPool() - -func TestPublishPolicy(t *testing.T) { - ms, cancel := createMessageServer() - err := ms.Subscribe(period.EventSchedulePeriodicPolicy, func(data interface{}) error { - if _, ok := data.(*period.PeriodicJobPolicy); !ok { - t.Fatal("expect PeriodicJobPolicy but got other thing") - return errors.New("expect PeriodicJobPolicy but got other thing") - } - return nil - }) - if err != nil { - t.Fatal(err) - } - - err = ms.Subscribe(period.EventUnSchedulePeriodicPolicy, func(data interface{}) error { - if _, ok := data.(*period.PeriodicJobPolicy); !ok { - t.Fatal("expect PeriodicJobPolicy but got other thing") - return errors.New("expect PeriodicJobPolicy but got other thing") - } - - return nil - }) - if err != nil { - t.Fatal(err) - } - - go func() { - defer cancel() - // wait and then publish - <-time.After(200 * time.Millisecond) - - p := &period.PeriodicJobPolicy{ - PolicyID: "fake_ID", - JobName: "fake_job", - CronSpec: "5 * * * *", - } - notification := &models.Message{ - Event: period.EventSchedulePeriodicPolicy, - Data: p, - } - - rawJSON, err := json.Marshal(notification) - if err != nil { - t.Fatal(err) - } - - conn := redisPool.Get() - defer conn.Close() - err = conn.Send("PUBLISH", utils.KeyPeriodicNotification(tests.GiveMeTestNamespace()), rawJSON) - if err != nil { - t.Fatal(err) - } - - notification.Event = period.EventUnSchedulePeriodicPolicy - rawJSON, err = json.Marshal(notification) - if err != nil { - t.Fatal(err) - } - err = conn.Send("PUBLISH", utils.KeyPeriodicNotification(tests.GiveMeTestNamespace()), rawJSON) - if err != nil { - t.Fatal(err) - } - - // send quit signal - <-time.After(200 * time.Millisecond) - err = tests.Clear(utils.KeyPeriodicNotification(tests.GiveMeTestNamespace()), conn) - if err != nil { - t.Fatal(err) - } - }() - - ms.Start() -} - -func TestPublishHook(t *testing.T) { - ms, cancel := createMessageServer() - err := ms.Subscribe(opm.EventRegisterStatusHook, func(data interface{}) error { - if _, ok := data.(*opm.HookData); !ok { - t.Fatal("expect HookData but got other thing") - return errors.New("expect HookData but got other thing") - } - return nil - }) - if err != nil { - t.Fatal(err) - } - - go func() { - defer cancel() - - <-time.After(200 * time.Millisecond) - hook := &opm.HookData{ - JobID: "fake_job_ID", - HookURL: "http://localhost:9999/hook", - } - notification := &models.Message{ - Event: opm.EventRegisterStatusHook, - Data: hook, - } - - rawJSON, err := json.Marshal(notification) - if err != nil { - t.Fatal(err) - } - - conn := redisPool.Get() - defer conn.Close() - err = conn.Send("PUBLISH", utils.KeyPeriodicNotification(tests.GiveMeTestNamespace()), rawJSON) - if err != nil { - t.Fatal(err) - } - - // send quit signal - <-time.After(200 * time.Millisecond) - err = tests.Clear(utils.KeyPeriodicNotification(tests.GiveMeTestNamespace()), conn) - if err != nil { - t.Fatal(err) - } - }() - - ms.Start() -} - -func TestPublishCommands(t *testing.T) { - ms, cancel := createMessageServer() - err := ms.Subscribe(opm.EventFireCommand, func(data interface{}) error { - cmds, ok := data.([]string) - if !ok { - t.Fatal("expect fired command but got other thing") - return errors.New("expect fired command but got other thing") - } - if len(cmds) != 2 { - t.Fatalf("expect a array with 2 items but only got '%d' items", len(cmds)) - return fmt.Errorf("expect a array with 2 items but only got '%d' items", len(cmds)) - } - if cmds[1] != "stop" { - t.Fatalf("expect command 'stop' but got '%s'", cmds[1]) - return fmt.Errorf("expect command 'stop' but got '%s'", cmds[1]) - } - return nil - }) - if err != nil { - t.Fatal(err) - } - - go func() { - defer cancel() - <-time.After(200 * time.Millisecond) - - notification := &models.Message{ - Event: opm.EventRegisterStatusHook, - Data: []string{"fake_job_ID", "stop"}, - } - - rawJSON, err := json.Marshal(notification) - if err != nil { - t.Fatal(err) - } - - conn := redisPool.Get() - defer conn.Close() - err = conn.Send("PUBLISH", utils.KeyPeriodicNotification(tests.GiveMeTestNamespace()), rawJSON) - if err != nil { - t.Fatal(err) - } - - // hold for a while - <-time.After(200 * time.Millisecond) - }() - - ms.Start() -} - -func createMessageServer() (*MessageServer, context.CancelFunc) { - ns := tests.GiveMeTestNamespace() - ctx, cancel := context.WithCancel(context.Background()) - return NewMessageServer(ctx, ns, redisPool), cancel -} diff --git a/src/jobservice/pool/redis_job_wrapper.go b/src/jobservice/pool/redis_job_wrapper.go deleted file mode 100644 index 2b196848a..000000000 --- a/src/jobservice/pool/redis_job_wrapper.go +++ /dev/null @@ -1,267 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pool - -import ( - "errors" - "fmt" - "runtime" - "time" - - "github.com/goharbor/harbor/src/jobservice/job/impl" - - "github.com/gocraft/work" - "github.com/goharbor/harbor/src/jobservice/env" - "github.com/goharbor/harbor/src/jobservice/errs" - "github.com/goharbor/harbor/src/jobservice/job" - "github.com/goharbor/harbor/src/jobservice/logger" - "github.com/goharbor/harbor/src/jobservice/models" - "github.com/goharbor/harbor/src/jobservice/opm" - "github.com/goharbor/harbor/src/jobservice/utils" -) - -// RedisJob is a job wrapper to wrap the job.Interface to the style which can be recognized by the redis pool. -type RedisJob struct { - job interface{} // the real job implementation - context *env.Context // context - statsManager opm.JobStatsManager // job stats manager - deDuplicator DeDuplicator // handle unique job -} - -// NewRedisJob is constructor of RedisJob -func NewRedisJob(j interface{}, ctx *env.Context, statsManager opm.JobStatsManager, deDuplicator DeDuplicator) *RedisJob { - return &RedisJob{ - job: j, - context: ctx, - statsManager: statsManager, - deDuplicator: deDuplicator, - } -} - -// Run the job -func (rj *RedisJob) Run(j *work.Job) error { - var ( - cancelled = false - buildContextFailed = false - runningJob job.Interface - err error - execContext env.JobContext - ) - - defer func() { - if err == nil { - logger.Infof("Job '%s:%s' exit with success", j.Name, j.ID) - return // nothing need to do - } - - // log error - logger.Errorf("Job '%s:%s' exit with error: %s\n", j.Name, j.ID, err) - - if buildContextFailed || rj.shouldDisableRetry(runningJob, j, cancelled) { - j.Fails = 10000000000 // Make it big enough to avoid retrying - now := time.Now().Unix() - go func() { - timer := time.NewTimer(2 * time.Second) // make sure the failed job is already put into the dead queue - defer timer.Stop() - - <-timer.C - - rj.statsManager.DieAt(j.ID, now) - }() - } - }() - - defer func() { - if r := recover(); r != nil { - err = fmt.Errorf("Runtime error: %s", r) - - // Log the stack - buf := make([]byte, 1<<16) - size := runtime.Stack(buf, false) - logger.Errorf("Runtime error happened when executing job %s:%s: %s", j.Name, j.ID, buf[0:size]) - - // record runtime error status - rj.jobFailed(j.ID) - } - }() - - // Wrap job - runningJob = Wrap(rj.job) - - execContext, err = rj.buildContext(j) - if err != nil { - buildContextFailed = true - goto FAILED // no need to retry - } - - defer func() { - // Close open io stream first - if closer, ok := execContext.GetLogger().(logger.Closer); ok { - err := closer.Close() - if err != nil { - logger.Errorf("Close job logger failed: %s", err) - } - } - }() - - if j.Unique { - defer func() { - if err := rj.deDuplicator.DelUniqueSign(j.Name, j.Args); err != nil { - logger.Errorf("delete job unique sign error: %s", err) - } - }() - } - - // Start to run - rj.jobRunning(j.ID) - - // Inject data - err = runningJob.Run(execContext, j.Args) - - // update the proper status - if err == nil { - rj.jobSucceed(j.ID) - return nil - } - - if errs.IsJobStoppedError(err) { - rj.jobStopped(j.ID) - return nil // no need to put it into the dead queue for resume - } - - if errs.IsJobCancelledError(err) { - rj.jobCancelled(j.ID) - cancelled = true - return err // need to resume - } - -FAILED: - rj.jobFailed(j.ID) - return err -} - -func (rj *RedisJob) jobRunning(jobID string) { - rj.statsManager.SetJobStatus(jobID, job.JobStatusRunning) -} - -func (rj *RedisJob) jobFailed(jobID string) { - rj.statsManager.SetJobStatus(jobID, job.JobStatusError) -} - -func (rj *RedisJob) jobStopped(jobID string) { - rj.statsManager.SetJobStatus(jobID, job.JobStatusStopped) -} - -func (rj *RedisJob) jobCancelled(jobID string) { - rj.statsManager.SetJobStatus(jobID, job.JobStatusCancelled) -} - -func (rj *RedisJob) jobSucceed(jobID string) { - rj.statsManager.SetJobStatus(jobID, job.JobStatusSuccess) -} - -func (rj *RedisJob) buildContext(j *work.Job) (env.JobContext, error) { - // Build job execution context - jData := env.JobData{ - ID: j.ID, - Name: j.Name, - Args: j.Args, - ExtraData: make(map[string]interface{}), - } - - checkOPCmdFuncFactory := func(jobID string) job.CheckOPCmdFunc { - return func() (string, bool) { - cmd, err := rj.statsManager.CtlCommand(jobID) - if err != nil { - return "", false - } - return cmd, true - } - } - - jData.ExtraData["opCommandFunc"] = checkOPCmdFuncFactory(j.ID) - - checkInFuncFactory := func(jobID string) job.CheckInFunc { - return func(message string) { - rj.statsManager.CheckIn(jobID, message) - } - } - - jData.ExtraData["checkInFunc"] = checkInFuncFactory(j.ID) - - launchJobFuncFactory := func(jobID string) job.LaunchJobFunc { - funcIntf := rj.context.SystemContext.Value(utils.CtlKeyOfLaunchJobFunc) - return func(jobReq models.JobRequest) (models.JobStats, error) { - launchJobFunc, ok := funcIntf.(job.LaunchJobFunc) - if !ok { - return models.JobStats{}, errors.New("no launch job func provided") - } - - jobName := "" - if jobReq.Job != nil { - jobName = jobReq.Job.Name - } - if j.Name == jobName { - return models.JobStats{}, errors.New("infinite job creating loop may exist") - } - - res, err := launchJobFunc(jobReq) - if err != nil { - return models.JobStats{}, err - } - - if err := rj.statsManager.Update(jobID, "multiple_executions", true); err != nil { - logger.Error(err) - } - - if err := rj.statsManager.Update(res.Stats.JobID, "upstream_job_id", jobID); err != nil { - logger.Error(err) - } - - rj.statsManager.AttachExecution(jobID, res.Stats.JobID) - - logger.Infof("Launch sub job %s:%s for upstream job %s", res.Stats.JobName, res.Stats.JobID, jobID) - return res, nil - } - } - - jData.ExtraData["launchJobFunc"] = launchJobFuncFactory(j.ID) - - // Use default context - if rj.context.JobContext == nil { - rj.context.JobContext = impl.NewDefaultContext(rj.context.SystemContext) - } - - return rj.context.JobContext.Build(jData) -} - -func (rj *RedisJob) shouldDisableRetry(j job.Interface, wj *work.Job, cancelled bool) bool { - maxFails := j.MaxFails() - if maxFails == 0 { - maxFails = 4 // Consistent with backend worker pool - } - fails := wj.Fails - fails++ // as the fail is not returned to backend pool yet - - if cancelled && fails < int64(maxFails) { - return true - } - - if !cancelled && fails < int64(maxFails) && !j.ShouldRetry() { - return true - } - - return false -} diff --git a/src/jobservice/pool/redis_pool.go b/src/jobservice/pool/redis_pool.go deleted file mode 100644 index 362af68b6..000000000 --- a/src/jobservice/pool/redis_pool.go +++ /dev/null @@ -1,739 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pool - -import ( - "errors" - "fmt" - "math" - "reflect" - "strings" - "time" - - "github.com/gocraft/work" - "github.com/goharbor/harbor/src/jobservice/env" - "github.com/goharbor/harbor/src/jobservice/job" - "github.com/goharbor/harbor/src/jobservice/logger" - "github.com/goharbor/harbor/src/jobservice/models" - "github.com/goharbor/harbor/src/jobservice/opm" - "github.com/goharbor/harbor/src/jobservice/period" - "github.com/goharbor/harbor/src/jobservice/utils" - "github.com/gomodule/redigo/redis" -) - -var ( - workerPoolDeadTime = 10 * time.Second -) - -const ( - workerPoolStatusHealthy = "Healthy" - workerPoolStatusDead = "Dead" - - // Copy from period.enqueuer - periodicEnqueuerHorizon = 4 * time.Minute - - pingRedisMaxTimes = 10 -) - -// GoCraftWorkPool is the pool implementation based on gocraft/work powered by redis. -type GoCraftWorkPool struct { - namespace string - redisPool *redis.Pool - pool *work.WorkerPool - enqueuer *work.Enqueuer - sweeper *period.Sweeper - client *work.Client - context *env.Context - scheduler period.Interface - statsManager opm.JobStatsManager - messageServer *MessageServer - deDuplicator DeDuplicator - - // no need to sync as write once and then only read - // key is name of known job - // value is the type of known job - knownJobs map[string]interface{} -} - -// RedisPoolContext ... -// We did not use this context to pass context info so far, just a placeholder. -type RedisPoolContext struct{} - -// NewGoCraftWorkPool is constructor of goCraftWorkPool. -func NewGoCraftWorkPool(ctx *env.Context, namespace string, workerCount uint, redisPool *redis.Pool) *GoCraftWorkPool { - pool := work.NewWorkerPool(RedisPoolContext{}, workerCount, namespace, redisPool) - enqueuer := work.NewEnqueuer(namespace, redisPool) - client := work.NewClient(namespace, redisPool) - statsMgr := opm.NewRedisJobStatsManager(ctx.SystemContext, namespace, redisPool) - scheduler := period.NewRedisPeriodicScheduler(ctx, namespace, redisPool, statsMgr) - sweeper := period.NewSweeper(namespace, redisPool, client) - msgServer := NewMessageServer(ctx.SystemContext, namespace, redisPool) - deDepulicator := NewRedisDeDuplicator(namespace, redisPool) - return &GoCraftWorkPool{ - namespace: namespace, - redisPool: redisPool, - pool: pool, - enqueuer: enqueuer, - scheduler: scheduler, - sweeper: sweeper, - client: client, - context: ctx, - statsManager: statsMgr, - knownJobs: make(map[string]interface{}), - messageServer: msgServer, - deDuplicator: deDepulicator, - } -} - -// Start to serve -// Unblock action -func (gcwp *GoCraftWorkPool) Start() error { - if gcwp.redisPool == nil || - gcwp.pool == nil || - gcwp.context.SystemContext == nil { - // report and exit - return errors.New("Redis worker pool can not start as it's not correctly configured") - } - - // Test the redis connection - if err := gcwp.ping(); err != nil { - return err - } - - done := make(chan interface{}, 1) - - gcwp.context.WG.Add(1) - go func() { - var err error - - defer func() { - gcwp.context.WG.Done() - if err != nil { - // report error - gcwp.context.ErrorChan <- err - done <- struct{}{} // exit immediately - } - }() - - // Register callbacks - if err = gcwp.messageServer.Subscribe(period.EventSchedulePeriodicPolicy, - func(data interface{}) error { - return gcwp.handleSchedulePolicy(data) - }); err != nil { - return - } - if err = gcwp.messageServer.Subscribe(period.EventUnSchedulePeriodicPolicy, - func(data interface{}) error { - return gcwp.handleUnSchedulePolicy(data) - }); err != nil { - return - } - if err = gcwp.messageServer.Subscribe(opm.EventRegisterStatusHook, - func(data interface{}) error { - return gcwp.handleRegisterStatusHook(data) - }); err != nil { - return - } - if err = gcwp.messageServer.Subscribe(opm.EventFireCommand, - func(data interface{}) error { - return gcwp.handleOPCommandFiring(data) - }); err != nil { - return - } - - startTimes := 0 - START_MSG_SERVER: - // Start message server - if err = gcwp.messageServer.Start(); err != nil { - logger.Errorf("Message server exits with error: %s\n", err.Error()) - if startTimes < msgServerRetryTimes { - startTimes++ - time.Sleep(time.Duration((int)(math.Pow(2, (float64)(startTimes)))+5) * time.Second) - logger.Infof("Restart message server (%d times)\n", startTimes) - goto START_MSG_SERVER - } - - return - } - }() - - gcwp.context.WG.Add(1) - go func() { - defer func() { - gcwp.context.WG.Done() - gcwp.statsManager.Shutdown() - }() - // Start stats manager - // None-blocking - gcwp.statsManager.Start() - - // blocking call - gcwp.scheduler.Start() - }() - - gcwp.context.WG.Add(1) - go func() { - defer func() { - gcwp.context.WG.Done() - logger.Infof("Redis worker pool is stopped") - }() - - // Clear dirty data before pool starting - if err := gcwp.sweeper.ClearOutdatedScheduledJobs(); err != nil { - // Only logged - logger.Errorf("Clear outdated data before pool starting failed with error:%s\n", err) - } - - // Append middlewares - gcwp.pool.Middleware((*RedisPoolContext).logJob) - - gcwp.pool.Start() - logger.Infof("Redis worker pool is started") - - // Block on listening context and done signal - select { - case <-gcwp.context.SystemContext.Done(): - case <-done: - } - - gcwp.pool.Stop() - }() - - return nil -} - -// RegisterJob is used to register the job to the pool. -// j is the type of job -func (gcwp *GoCraftWorkPool) RegisterJob(name string, j interface{}) error { - if utils.IsEmptyStr(name) || j == nil { - return errors.New("job can not be registered with empty name or nil interface") - } - - // j must be job.Interface - if _, ok := j.(job.Interface); !ok { - return errors.New("job must implement the job.Interface") - } - - // 1:1 constraint - if jInList, ok := gcwp.knownJobs[name]; ok { - return fmt.Errorf("Job name %s has been already registered with %s", name, reflect.TypeOf(jInList).String()) - } - - // Same job implementation can be only registered with one name - for jName, jInList := range gcwp.knownJobs { - jobImpl := reflect.TypeOf(j).String() - if reflect.TypeOf(jInList).String() == jobImpl { - return fmt.Errorf("Job %s has been already registered with name %s", jobImpl, jName) - } - } - - redisJob := NewRedisJob(j, gcwp.context, gcwp.statsManager, gcwp.deDuplicator) - - // Get more info from j - theJ := Wrap(j) - - gcwp.pool.JobWithOptions(name, - work.JobOptions{MaxFails: theJ.MaxFails()}, - func(job *work.Job) error { - return redisJob.Run(job) - }, // Use generic handler to handle as we do not accept context with this way. - ) - gcwp.knownJobs[name] = j // keep the name of registered jobs as known jobs for future validation - - logger.Infof("Register job %s with name %s", reflect.TypeOf(j).String(), name) - - return nil -} - -// RegisterJobs is used to register multiple jobs to pool. -func (gcwp *GoCraftWorkPool) RegisterJobs(jobs map[string]interface{}) error { - if jobs == nil || len(jobs) == 0 { - return nil - } - - for name, j := range jobs { - if err := gcwp.RegisterJob(name, j); err != nil { - return err - } - } - - return nil -} - -// Enqueue job -func (gcwp *GoCraftWorkPool) Enqueue(jobName string, params models.Parameters, isUnique bool) (models.JobStats, error) { - var ( - j *work.Job - err error - ) - - // As the job is declared to be unique, - // check the uniqueness of the job, - // if no duplicated job existing (including the running jobs), - // set the unique flag. - if isUnique { - if err = gcwp.deDuplicator.Unique(jobName, params); err != nil { - return models.JobStats{}, err - } - - if j, err = gcwp.enqueuer.EnqueueUnique(jobName, params); err != nil { - return models.JobStats{}, err - } - } else { - // Enqueue job - if j, err = gcwp.enqueuer.Enqueue(jobName, params); err != nil { - return models.JobStats{}, err - } - } - - // avoid backend pool bug - if j == nil { - return models.JobStats{}, fmt.Errorf("job '%s' can not be enqueued, please check the job metatdata", jobName) - } - - res := generateResult(j, job.JobKindGeneric, isUnique) - // Save data with async way. Once it fails to do, let it escape - // The client method may help if the job is still in progress when get stats of this job - gcwp.statsManager.Save(res) - - return res, nil -} - -// Schedule job -func (gcwp *GoCraftWorkPool) Schedule(jobName string, params models.Parameters, runAfterSeconds uint64, isUnique bool) (models.JobStats, error) { - var ( - j *work.ScheduledJob - err error - ) - - // As the job is declared to be unique, - // check the uniqueness of the job, - // if no duplicated job existing (including the running jobs), - // set the unique flag. - if isUnique { - if err = gcwp.deDuplicator.Unique(jobName, params); err != nil { - return models.JobStats{}, err - } - - if j, err = gcwp.enqueuer.EnqueueUniqueIn(jobName, int64(runAfterSeconds), params); err != nil { - return models.JobStats{}, err - } - } else { - // Enqueue job in - if j, err = gcwp.enqueuer.EnqueueIn(jobName, int64(runAfterSeconds), params); err != nil { - return models.JobStats{}, err - } - } - - // avoid backend pool bug - if j == nil { - return models.JobStats{}, fmt.Errorf("job '%s' can not be enqueued, please check the job metatdata", jobName) - } - - res := generateResult(j.Job, job.JobKindScheduled, isUnique) - res.Stats.RunAt = j.RunAt - - // As job is already scheduled, we should not block this call - // Once it fails to do, use client method to help get the status of the escape job - gcwp.statsManager.Save(res) - - return res, nil -} - -// PeriodicallyEnqueue job -func (gcwp *GoCraftWorkPool) PeriodicallyEnqueue(jobName string, params models.Parameters, cronSetting string) (models.JobStats, error) { - id, nextRun, err := gcwp.scheduler.Schedule(jobName, params, cronSetting) - if err != nil { - return models.JobStats{}, err - } - - res := models.JobStats{ - Stats: &models.JobStatData{ - JobID: id, - JobName: jobName, - Status: job.JobStatusPending, - JobKind: job.JobKindPeriodic, - CronSpec: cronSetting, - EnqueueTime: time.Now().Unix(), - UpdateTime: time.Now().Unix(), - RefLink: fmt.Sprintf("/api/v1/jobs/%s", id), - RunAt: nextRun, - IsMultipleExecutions: true, // True for periodic job - }, - } - - gcwp.statsManager.Save(res) - - return res, nil -} - -// GetJobStats return the job stats of the specified enqueued job. -func (gcwp *GoCraftWorkPool) GetJobStats(jobID string) (models.JobStats, error) { - if utils.IsEmptyStr(jobID) { - return models.JobStats{}, errors.New("empty job ID") - } - - return gcwp.statsManager.Retrieve(jobID) -} - -// Stats of pool -func (gcwp *GoCraftWorkPool) Stats() (models.JobPoolStats, error) { - // Get the status of workerpool via client - hbs, err := gcwp.client.WorkerPoolHeartbeats() - if err != nil { - return models.JobPoolStats{}, err - } - - // Find the heartbeat of this pool via pid - stats := make([]*models.JobPoolStatsData, 0) - for _, hb := range hbs { - if hb.HeartbeatAt == 0 { - continue // invalid ones - } - - wPoolStatus := workerPoolStatusHealthy - if time.Unix(hb.HeartbeatAt, 0).Add(workerPoolDeadTime).Before(time.Now()) { - wPoolStatus = workerPoolStatusDead - } - stat := &models.JobPoolStatsData{ - WorkerPoolID: hb.WorkerPoolID, - StartedAt: hb.StartedAt, - HeartbeatAt: hb.HeartbeatAt, - JobNames: hb.JobNames, - Concurrency: hb.Concurrency, - Status: wPoolStatus, - } - stats = append(stats, stat) - } - - if len(stats) == 0 { - return models.JobPoolStats{}, errors.New("Failed to get stats of worker pools") - } - - return models.JobPoolStats{ - Pools: stats, - }, nil -} - -// StopJob will stop the job -func (gcwp *GoCraftWorkPool) StopJob(jobID string) error { - if utils.IsEmptyStr(jobID) { - return errors.New("empty job ID") - } - - theJob, err := gcwp.statsManager.Retrieve(jobID) - if err != nil { - return err - } - - switch theJob.Stats.JobKind { - case job.JobKindGeneric: - // Only running job can be stopped - if theJob.Stats.Status != job.JobStatusRunning { - return fmt.Errorf("job '%s' is not a running job", jobID) - } - case job.JobKindScheduled: - // we need to delete the scheduled job in the queue if it is not running yet - // otherwise, stop it. - if theJob.Stats.Status == job.JobStatusPending { - if err := gcwp.client.DeleteScheduledJob(theJob.Stats.RunAt, jobID); err != nil { - return err - } - - // Update the job status to 'stopped' - gcwp.statsManager.SetJobStatus(jobID, job.JobStatusStopped) - - logger.Debugf("Scheduled job which plan to run at %d '%s' is stopped", theJob.Stats.RunAt, jobID) - - return nil - } - case job.JobKindPeriodic: - // firstly delete the periodic job policy - if err := gcwp.scheduler.UnSchedule(jobID); err != nil { - return err - } - - logger.Infof("Periodic job policy %s is removed", jobID) - - // secondly we need try to delete the job instances scheduled for this periodic job, a try best action - if err := gcwp.deleteScheduledJobsOfPeriodicPolicy(theJob.Stats.JobID); err != nil { - // only logged - logger.Errorf("Errors happened when deleting jobs of periodic policy %s: %s", theJob.Stats.JobID, err) - } - - // thirdly expire the job stats of this periodic job if exists - if err := gcwp.statsManager.ExpirePeriodicJobStats(theJob.Stats.JobID); err != nil { - // only logged - logger.Errorf("Expire the stats of job %s failed with error: %s\n", theJob.Stats.JobID, err) - } - - return nil - default: - return fmt.Errorf("Job kind %s is not supported", theJob.Stats.JobKind) - } - - // Check if the job has 'running' instance - if theJob.Stats.Status == job.JobStatusRunning { - // Send 'stop' ctl command to the running instance - if err := gcwp.statsManager.SendCommand(jobID, opm.CtlCommandStop, false); err != nil { - return err - } - } - - return nil -} - -// CancelJob will cancel the job -func (gcwp *GoCraftWorkPool) CancelJob(jobID string) error { - if utils.IsEmptyStr(jobID) { - return errors.New("empty job ID") - } - - theJob, err := gcwp.statsManager.Retrieve(jobID) - if err != nil { - return err - } - - switch theJob.Stats.JobKind { - case job.JobKindGeneric: - if theJob.Stats.Status != job.JobStatusRunning { - return fmt.Errorf("only running job can be cancelled, job '%s' seems not running now", theJob.Stats.JobID) - } - - // Send 'cancel' ctl command to the running instance - if err := gcwp.statsManager.SendCommand(jobID, opm.CtlCommandCancel, false); err != nil { - return err - } - break - default: - return fmt.Errorf("job kind '%s' does not support 'cancel' operation", theJob.Stats.JobKind) - } - - return nil -} - -// RetryJob retry the job -func (gcwp *GoCraftWorkPool) RetryJob(jobID string) error { - if utils.IsEmptyStr(jobID) { - return errors.New("empty job ID") - } - - theJob, err := gcwp.statsManager.Retrieve(jobID) - if err != nil { - return err - } - - if theJob.Stats.DieAt == 0 { - return fmt.Errorf("job '%s' is not a retryable job", jobID) - } - - return gcwp.client.RetryDeadJob(theJob.Stats.DieAt, jobID) -} - -// IsKnownJob ... -func (gcwp *GoCraftWorkPool) IsKnownJob(name string) (interface{}, bool) { - v, ok := gcwp.knownJobs[name] - return v, ok -} - -// ValidateJobParameters ... -func (gcwp *GoCraftWorkPool) ValidateJobParameters(jobType interface{}, params map[string]interface{}) error { - if jobType == nil { - return errors.New("nil job type") - } - - theJ := Wrap(jobType) - return theJ.Validate(params) -} - -// RegisterHook registers status hook url -// sync method -func (gcwp *GoCraftWorkPool) RegisterHook(jobID string, hookURL string) error { - if utils.IsEmptyStr(jobID) { - return errors.New("empty job ID") - } - - if !utils.IsValidURL(hookURL) { - return errors.New("invalid hook url") - } - - return gcwp.statsManager.RegisterHook(jobID, hookURL, false) -} - -// A try best method to delete the scheduled jobs of one periodic job -func (gcwp *GoCraftWorkPool) deleteScheduledJobsOfPeriodicPolicy(policyID string) error { - // Check the scope of [-periodicEnqueuerHorizon, -1] - // If the job is still not completed after a 'periodicEnqueuerHorizon', just ignore it - now := time.Now().Unix() // Baseline - startTime := now - (int64)(periodicEnqueuerHorizon/time.Minute)*60 - - // Try to delete more - // Get the range scope - start := (opm.Range)(startTime) - ids, err := gcwp.statsManager.GetExecutions(policyID, start) - if err != nil { - return err - } - - logger.Debugf("Found scheduled jobs '%v' in scope [%d,+inf] for periodic job policy %s", ids, start, policyID) - - if len(ids) == 0 { - // Treat as a normal case, nothing need to do - return nil - } - - multiErrs := []string{} - for _, id := range ids { - subJob, err := gcwp.statsManager.Retrieve(id) - if err != nil { - multiErrs = append(multiErrs, err.Error()) - continue // going on - } - - if subJob.Stats.Status == job.JobStatusRunning { - // Send 'stop' ctl command to the running instance - if err := gcwp.statsManager.SendCommand(subJob.Stats.JobID, opm.CtlCommandStop, false); err != nil { - multiErrs = append(multiErrs, err.Error()) - continue - } - - logger.Debugf("Stop running job %s for periodic job policy %s", subJob.Stats.JobID, policyID) - } else { - if subJob.Stats.JobKind == job.JobKindScheduled && - subJob.Stats.Status == job.JobStatusPending { - // The pending scheduled job - if err := gcwp.client.DeleteScheduledJob(subJob.Stats.RunAt, subJob.Stats.JobID); err != nil { - multiErrs = append(multiErrs, err.Error()) - continue // going on - } - - // Log action - logger.Debugf("Delete scheduled job for periodic job policy %s: runat = %d", policyID, subJob.Stats.RunAt) - } - } - } - - if len(multiErrs) > 0 { - return errors.New(strings.Join(multiErrs, "\n")) - } - - return nil -} - -func (gcwp *GoCraftWorkPool) handleSchedulePolicy(data interface{}) error { - if data == nil { - return errors.New("nil data interface") - } - - pl, ok := data.(*period.PeriodicJobPolicy) - if !ok { - return errors.New("malformed policy object") - } - - return gcwp.scheduler.AcceptPeriodicPolicy(pl) -} - -func (gcwp *GoCraftWorkPool) handleUnSchedulePolicy(data interface{}) error { - if data == nil { - return errors.New("nil data interface") - } - - pl, ok := data.(*period.PeriodicJobPolicy) - if !ok { - return errors.New("malformed policy object") - } - - removed := gcwp.scheduler.RemovePeriodicPolicy(pl.PolicyID) - if removed == nil { - return errors.New("nothing removed") - } - - return nil -} - -func (gcwp *GoCraftWorkPool) handleRegisterStatusHook(data interface{}) error { - if data == nil { - return errors.New("nil data interface") - } - - hook, ok := data.(*opm.HookData) - if !ok { - return errors.New("malformed hook object") - } - - return gcwp.statsManager.RegisterHook(hook.JobID, hook.HookURL, true) -} - -func (gcwp *GoCraftWorkPool) handleOPCommandFiring(data interface{}) error { - if data == nil { - return errors.New("nil data interface") - } - - commands, ok := data.([]interface{}) - if !ok || len(commands) != 2 { - return errors.New("malformed op commands object") - } - jobID, ok := commands[0].(string) - command, ok := commands[1].(string) - if !ok { - return errors.New("malformed op command info") - } - - // Put the command into the maintaining list - return gcwp.statsManager.SendCommand(jobID, command, true) -} - -// log the job -func (rpc *RedisPoolContext) logJob(job *work.Job, next work.NextMiddlewareFunc) error { - logger.Infof("Job incoming: %s:%s", job.Name, job.ID) - return next() -} - -// Ping the redis server -func (gcwp *GoCraftWorkPool) ping() error { - conn := gcwp.redisPool.Get() - defer conn.Close() - - var err error - for count := 1; count <= pingRedisMaxTimes; count++ { - if _, err = conn.Do("ping"); err == nil { - return nil - } - - time.Sleep(time.Duration(count+4) * time.Second) - } - - return fmt.Errorf("connect to redis server timeout: %s", err.Error()) -} - -// generate the job stats data -func generateResult(j *work.Job, jobKind string, isUnique bool) models.JobStats { - if j == nil { - return models.JobStats{} - } - - return models.JobStats{ - Stats: &models.JobStatData{ - JobID: j.ID, - JobName: j.Name, - JobKind: jobKind, - IsUnique: isUnique, - Status: job.JobStatusPending, - EnqueueTime: j.EnqueuedAt, - UpdateTime: time.Now().Unix(), - RefLink: fmt.Sprintf("/api/v1/jobs/%s", j.ID), - }, - } -} diff --git a/src/jobservice/runner/redis.go b/src/jobservice/runner/redis.go new file mode 100644 index 000000000..4a48c103a --- /dev/null +++ b/src/jobservice/runner/redis.go @@ -0,0 +1,190 @@ +// Copyright Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package runner + +import ( + "github.com/goharbor/harbor/src/jobservice/job/impl" + "runtime" + + "fmt" + "github.com/gocraft/work" + "github.com/goharbor/harbor/src/jobservice/env" + "github.com/goharbor/harbor/src/jobservice/errs" + "github.com/goharbor/harbor/src/jobservice/job" + "github.com/goharbor/harbor/src/jobservice/lcm" + "github.com/goharbor/harbor/src/jobservice/logger" + "github.com/pkg/errors" + "time" +) + +// RedisJob is a job wrapper to wrap the job.Interface to the style which can be recognized by the redis worker. +type RedisJob struct { + job interface{} // the real job implementation + context *env.Context // context + ctl lcm.Controller // life cycle controller +} + +// NewRedisJob is constructor of RedisJob +func NewRedisJob(job interface{}, ctx *env.Context, ctl lcm.Controller) *RedisJob { + return &RedisJob{ + job: job, + context: ctx, + ctl: ctl, + } +} + +// Run the job +func (rj *RedisJob) Run(j *work.Job) (err error) { + var ( + runningJob job.Interface + execContext job.Context + tracker lcm.Tracker + markStopped *bool = bp(false) + ) + + // Defer to log the exit result + defer func() { + if !*markStopped { + if err == nil { + logger.Infof("Job '%s:%s' exit with success", j.Name, j.ID) + } else { + // log error + logger.Errorf("Job '%s:%s' exit with error: %s\n", j.Name, j.ID, err) + } + } + }() + + // Track the running job now + jID := j.ID + if isPeriodicJobExecution(j) { + jID = fmt.Sprintf("%s@%d", j.ID, j.EnqueuedAt) + } + + if tracker, err = rj.ctl.Track(jID); err != nil { + // As tracker creation failed, there is no way to mark the job status change. + // Also a non nil error return consumes a fail. If all retries are failed here, + // it will cause the job to be zombie one (pending forever). + // Here we will avoid the job to consume a fail and let it retry again and again. + // However, to avoid a forever retry, we will check the FailedAt timestamp. + now := time.Now().Unix() + if j.FailedAt == 0 || now-j.FailedAt < 2*24*3600 { + j.Fails-- + } + + return + } + + if job.RunningStatus.Compare(job.Status(tracker.Job().Info.Status)) >= 0 { + // Probably jobs has been stopped by directly mark status to stopped. + // Directly exit and no retry + markStopped = bp(true) + return nil + } + + // Defer to switch status + defer func() { + // Switch job status based on the returned error. + // The err happened here should not override the job run error, just log it. + if err != nil { + if errs.IsJobStoppedError(err) { + if er := tracker.Stop(); er != nil { + logger.Errorf("Mark job status to stopped error: %s", err) + } + } else { + if er := tracker.Fail(); er != nil { + logger.Errorf("Mark job status to failure error: %s", err) + } + } + + return + } + + // Mark job status to success. + if er := tracker.Succeed(); er != nil { + logger.Errorf("Mark job status to success error: %s", err) + } + }() + + // Defer to handle runtime error + defer func() { + if r := recover(); r != nil { + // Log the stack + buf := make([]byte, 1<<16) + size := runtime.Stack(buf, false) + err = errors.Errorf("runtime error: %s; stack: %s", r, buf[0:size]) + logger.Errorf("Run job %s:%s error: %s", j.Name, j.ID, err) + } + }() + + // Build job context + if rj.context.JobContext == nil { + rj.context.JobContext = impl.NewDefaultContext(rj.context.SystemContext) + if execContext, err = rj.context.JobContext.Build(tracker); err != nil { + return + } + } + // Defer to close logger stream + defer func() { + // Close open io stream first + if closer, ok := execContext.GetLogger().(logger.Closer); ok { + if er := closer.Close(); er != nil { + logger.Errorf("Close job logger failed: %s", er) + } + } + }() + + // Wrap job + runningJob = Wrap(rj.job) + // Set status to run + if err = tracker.Run(); err != nil { + return + } + // Run the job + err = runningJob.Run(execContext, j.Args) + // Handle retry + rj.retry(runningJob, j, (err != nil && errs.IsJobStoppedError(err))) + // Handle periodic job execution + if isPeriodicJobExecution(j) { + if er := tracker.PeriodicExecutionDone(); er != nil { + // Just log it + logger.Error(er) + } + } + + return +} + +func (rj *RedisJob) retry(j job.Interface, wj *work.Job, stopped bool) { + if stopped || !j.ShouldRetry() { + // Cancel retry immediately + // Make it big enough to avoid retrying + wj.Fails = 10000000000 + return + } +} + +func isPeriodicJobExecution(j *work.Job) bool { + if isPeriodic, ok := j.Args["_job_kind_periodic_"]; ok { + if isPeriodicV, yes := isPeriodic.(bool); yes && isPeriodicV { + return true + } + } + + return false +} + +func bp(b bool) *bool { + return &b +} diff --git a/src/jobservice/pool/redis_job_wrapper_test.go b/src/jobservice/runner/redis_test.go similarity index 96% rename from src/jobservice/pool/redis_job_wrapper_test.go rename to src/jobservice/runner/redis_test.go index c5d4122e3..cac9e4f42 100644 --- a/src/jobservice/pool/redis_job_wrapper_test.go +++ b/src/jobservice/runner/redis_test.go @@ -11,7 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -package pool +package runner import ( "context" @@ -20,10 +20,10 @@ import ( "testing" "time" + "github.com/goharbor/harbor/src/jobservice/common/utils" "github.com/goharbor/harbor/src/jobservice/job" "github.com/goharbor/harbor/src/jobservice/logger/backend" "github.com/goharbor/harbor/src/jobservice/models" - "github.com/goharbor/harbor/src/jobservice/utils" "github.com/gocraft/work" @@ -112,7 +112,7 @@ func (j *fakeParentJob) Run(ctx env.JobContext, params map[string]interface{}) e Job: &models.JobData{ Name: "SUB_JOB", Metadata: &models.JobMetadata{ - JobKind: job.JobKindGeneric, + JobKind: job.KindGeneric, }, }, }) diff --git a/src/jobservice/pool/runner.go b/src/jobservice/runner/wrapper.go similarity index 98% rename from src/jobservice/pool/runner.go rename to src/jobservice/runner/wrapper.go index 4a3f648a6..a076fbffe 100644 --- a/src/jobservice/pool/runner.go +++ b/src/jobservice/runner/wrapper.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package pool +package runner import ( "reflect" diff --git a/src/jobservice/runtime/bootstrap.go b/src/jobservice/runtime/bootstrap.go index d59a6ecaf..abbf391a9 100644 --- a/src/jobservice/runtime/bootstrap.go +++ b/src/jobservice/runtime/bootstrap.go @@ -23,20 +23,21 @@ import ( "syscall" "time" - "github.com/goharbor/harbor/src/common/job" "github.com/goharbor/harbor/src/jobservice/api" + "github.com/goharbor/harbor/src/jobservice/common/utils" "github.com/goharbor/harbor/src/jobservice/config" "github.com/goharbor/harbor/src/jobservice/core" "github.com/goharbor/harbor/src/jobservice/env" - jsjob "github.com/goharbor/harbor/src/jobservice/job" - "github.com/goharbor/harbor/src/jobservice/job/impl" + "github.com/goharbor/harbor/src/jobservice/hook" + "github.com/goharbor/harbor/src/jobservice/job" "github.com/goharbor/harbor/src/jobservice/job/impl/gc" "github.com/goharbor/harbor/src/jobservice/job/impl/replication" + "github.com/goharbor/harbor/src/jobservice/job/impl/sample" "github.com/goharbor/harbor/src/jobservice/job/impl/scan" + "github.com/goharbor/harbor/src/jobservice/lcm" "github.com/goharbor/harbor/src/jobservice/logger" - "github.com/goharbor/harbor/src/jobservice/models" - "github.com/goharbor/harbor/src/jobservice/pool" - "github.com/goharbor/harbor/src/jobservice/utils" + "github.com/goharbor/harbor/src/jobservice/worker" + "github.com/goharbor/harbor/src/jobservice/worker/cworker" "github.com/gomodule/redigo/redis" ) @@ -52,11 +53,11 @@ var JobService = &Bootstrap{} // Bootstrap is coordinating process to help load and start the other components to serve. type Bootstrap struct { - jobConextInitializer env.JobContextInitializer + jobConextInitializer job.JobContextInitializer } // SetJobContextInitializer set the job context initializer -func (bs *Bootstrap) SetJobContextInitializer(initializer env.JobContextInitializer) { +func (bs *Bootstrap) SetJobContextInitializer(initializer job.JobContextInitializer) { if initializer != nil { bs.jobConextInitializer = initializer } @@ -64,92 +65,107 @@ func (bs *Bootstrap) SetJobContextInitializer(initializer env.JobContextInitiali // LoadAndRun will load configurations, initialize components and then start the related process to serve requests. // Return error if meet any problems. -func (bs *Bootstrap) LoadAndRun(ctx context.Context, cancel context.CancelFunc) { +func (bs *Bootstrap) LoadAndRun(ctx context.Context) { rootContext := &env.Context{ SystemContext: ctx, WG: &sync.WaitGroup{}, - ErrorChan: make(chan error, 1), // with 1 buffer + ErrorChan: make(chan error, 3), // with 3 buffers } // Build specified job context if bs.jobConextInitializer != nil { - if jobCtx, err := bs.jobConextInitializer(rootContext); err == nil { + if jobCtx, err := bs.jobConextInitializer(ctx); err == nil { rootContext.JobContext = jobCtx } else { logger.Fatalf("Failed to initialize job context: %s\n", err) } } - // Start the pool + // Alliance to config + cfg := config.DefaultConfig + var ( - backendPool pool.Interface - wpErr error + backendWorker worker.Interface + lcmCtl lcm.Controller + wErr error ) - if config.DefaultConfig.PoolConfig.Backend == config.JobServicePoolBackendRedis { - backendPool, wpErr = bs.loadAndRunRedisWorkerPool(rootContext, config.DefaultConfig) - if wpErr != nil { - logger.Fatalf("Failed to load and run worker pool: %s\n", wpErr.Error()) + if cfg.PoolConfig.Backend == config.JobServicePoolBackendRedis { + // Number of workers + workerNum := cfg.PoolConfig.WorkerCount + // Add {} to namespace to void slot issue + namespace := fmt.Sprintf("{%s}", cfg.PoolConfig.RedisPoolCfg.Namespace) + // Get redis connection pool + redisPool := bs.getRedisPool(cfg.PoolConfig.RedisPoolCfg.RedisURL) + // Create hook agent, it's a singleton object + hookAgent := hook.NewAgent(ctx, namespace, redisPool) + hookCallback := func(URL string, change *job.StatusChange) error { + msg := fmt.Sprintf("status change: job=%s, status=%s", change.JobID, change.Status) + if !utils.IsEmptyStr(change.CheckIn) { + msg = fmt.Sprintf("%s, check_in=%s", msg, change.CheckIn) + } + + evt := &hook.Event{ + URL: URL, + Timestamp: time.Now().Unix(), + Data: change, + Message: msg, + } + + return hookAgent.Trigger(evt) } + + // Create job life cycle management controller + lcmCtl = lcm.NewController(ctx, namespace, redisPool, hookCallback) + + // Start the backend worker + backendWorker, wErr = bs.loadAndRunRedisWorkerPool(rootContext, namespace, workerNum, redisPool, lcmCtl) + if wErr != nil { + logger.Fatalf("Failed to load and run worker worker: %s\n", wErr.Error()) + } + + // Start agent + // Non blocking call + hookAgent.Serve() } else { - logger.Fatalf("Worker pool backend '%s' is not supported", config.DefaultConfig.PoolConfig.Backend) + logger.Fatalf("Worker worker backend '%s' is not supported", cfg.PoolConfig.Backend) } // Initialize controller - ctl := core.NewController(backendPool) - // Keep the job launch func in the system context - var launchJobFunc jsjob.LaunchJobFunc = func(req models.JobRequest) (models.JobStats, error) { - return ctl.LaunchJob(req) - } - rootContext.SystemContext = context.WithValue(rootContext.SystemContext, utils.CtlKeyOfLaunchJobFunc, launchJobFunc) - + ctl := core.NewController(backendWorker, lcmCtl) // Start the API server - apiServer := bs.loadAndRunAPIServer(rootContext, config.DefaultConfig, ctl) - logger.Infof("Server is started at %s:%d with %s", "", config.DefaultConfig.Port, config.DefaultConfig.Protocol) + apiServer := bs.createAPIServer(ctx, cfg, ctl) - // To indicate if any errors occurred - var err error - // Block here - sig := make(chan os.Signal, 1) - signal.Notify(sig, os.Interrupt, syscall.SIGTERM, os.Kill) - select { - case <-sig: - case err = <-rootContext.ErrorChan: - } - - // Call cancel to send termination signal to other interested parts. - cancel() - - // Gracefully shutdown - apiServer.Stop() - - // In case stop is called before the server is ready - closeChan := make(chan bool, 1) - go func() { - timer := time.NewTimer(10 * time.Second) - defer timer.Stop() + // Listen to the system signals + go func(errChan chan error) { + defer func() { + // Gracefully shutdown + if err := apiServer.Stop(); err != nil { + logger.Error(err) + } + }() + sig := make(chan os.Signal, 1) + signal.Notify(sig, os.Interrupt, syscall.SIGTERM, os.Kill) select { - case <-timer.C: - // Try again - apiServer.Stop() - case <-closeChan: + case <-sig: + return + case err := <-errChan: + logger.Errorf("error received from error chan: %s", err) return } + }(rootContext.ErrorChan) - }() - - rootContext.WG.Wait() - closeChan <- true - - if err != nil { - logger.Fatalf("Server exit with error: %s\n", err) + // Blocking here + logger.Infof("API server is serving at %d with %s mode", cfg.Port, cfg.Protocol) + if err := apiServer.Start(); err != nil { + logger.Errorf("API server error: %s", err) + } else { + logger.Info("API server is gracefully shut down") } - - logger.Infof("Server gracefully exit") } // Load and run the API server. -func (bs *Bootstrap) loadAndRunAPIServer(ctx *env.Context, cfg *config.Configuration, ctl *core.Controller) *api.Server { +func (bs *Bootstrap) createAPIServer(ctx context.Context, cfg *config.Configuration, ctl core.Interface) *api.Server { // Initialized API server authProvider := &api.SecretAuthenticator{} handler := api.NewDefaultHandler(ctl) @@ -163,22 +179,51 @@ func (bs *Bootstrap) loadAndRunAPIServer(ctx *env.Context, cfg *config.Configura serverConfig.Key = cfg.HTTPSConfig.Key } - server := api.NewServer(ctx, router, serverConfig) - // Start processes - server.Start() - - return server + return api.NewServer(ctx, router, serverConfig) } -// Load and run the worker pool -func (bs *Bootstrap) loadAndRunRedisWorkerPool(ctx *env.Context, cfg *config.Configuration) (pool.Interface, error) { - redisPool := &redis.Pool{ +// Load and run the worker worker +func (bs *Bootstrap) loadAndRunRedisWorkerPool( + ctx *env.Context, + ns string, + workers uint, + redisPool *redis.Pool, + lcmCtl lcm.Controller, +) (worker.Interface, error) { + redisWorker := cworker.NewWorker(ctx, ns, workers, redisPool, lcmCtl) + // Register jobs here + if err := redisWorker.RegisterJobs( + map[string]interface{}{ + // Only for debugging and testing purpose + job.SampleJob: (*sample.Job)(nil), + // Functional jobs + job.ImageScanJob: (*scan.ClairJob)(nil), + job.ImageScanAllJob: (*scan.All)(nil), + job.ImageTransfer: (*replication.Transfer)(nil), + job.ImageDelete: (*replication.Deleter)(nil), + job.ImageReplicate: (*replication.Replicator)(nil), + job.ImageGC: (*gc.GarbageCollector)(nil), + }); err != nil { + // exit + return nil, err + } + + if err := redisWorker.Start(); err != nil { + return nil, err + } + + return redisWorker, nil +} + +// Get a redis connection pool +func (bs *Bootstrap) getRedisPool(redisURL string) *redis.Pool { + return &redis.Pool{ MaxActive: 6, MaxIdle: 6, Wait: true, Dial: func() (redis.Conn, error) { return redis.DialURL( - cfg.PoolConfig.RedisPoolCfg.RedisURL, + redisURL, redis.DialConnectTimeout(dialConnectionTimeout), redis.DialReadTimeout(dialReadTimeout), redis.DialWriteTimeout(dialWriteTimeout), @@ -193,32 +238,4 @@ func (bs *Bootstrap) loadAndRunRedisWorkerPool(ctx *env.Context, cfg *config.Con return err }, } - - redisWorkerPool := pool.NewGoCraftWorkPool(ctx, - fmt.Sprintf("{%s}", cfg.PoolConfig.RedisPoolCfg.Namespace), - cfg.PoolConfig.WorkerCount, - redisPool) - // Register jobs here - if err := redisWorkerPool.RegisterJob(impl.KnownJobDemo, (*impl.DemoJob)(nil)); err != nil { - // exit - return nil, err - } - if err := redisWorkerPool.RegisterJobs( - map[string]interface{}{ - job.ImageScanJob: (*scan.ClairJob)(nil), - job.ImageScanAllJob: (*scan.All)(nil), - job.ImageTransfer: (*replication.Transfer)(nil), - job.ImageDelete: (*replication.Deleter)(nil), - job.ImageReplicate: (*replication.Replicator)(nil), - job.ImageGC: (*gc.GarbageCollector)(nil), - }); err != nil { - // exit - return nil, err - } - - if err := redisWorkerPool.Start(); err != nil { - return nil, err - } - - return redisWorkerPool, nil } diff --git a/src/jobservice/utils/gocarft_work.go b/src/jobservice/utils/gocarft_work.go deleted file mode 100644 index 65b8fb767..000000000 --- a/src/jobservice/utils/gocarft_work.go +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright Project Harbor Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package utils - -import ( - "crypto/rand" - "encoding/json" - "fmt" - "io" - - "github.com/gocraft/work" -) - -// Functions defined here are mainly from dep lib "github.com/gocraft/work". -// Only for compatible - -// MakeIdentifier creates uuid for job. -func MakeIdentifier() string { - b := make([]byte, 12) - _, err := io.ReadFull(rand.Reader, b) - if err != nil { - return "" - } - return fmt.Sprintf("%x", b) -} - -// MakeUniquePeriodicID creates id for the periodic job. -func MakeUniquePeriodicID(name, spec string, epoch int64) string { - return fmt.Sprintf("periodic:job:%s:%s:%d", name, spec, epoch) -} - -// RedisNamespacePrefix ... Same with 'KeyNamespacePrefix', only for compatibility. -func RedisNamespacePrefix(namespace string) string { - return KeyNamespacePrefix(namespace) -} - -// RedisKeyScheduled returns key of scheduled job. -func RedisKeyScheduled(namespace string) string { - return RedisNamespacePrefix(namespace) + "scheduled" -} - -// RedisKeyLastPeriodicEnqueue returns key of timestamp if last periodic enqueue. -func RedisKeyLastPeriodicEnqueue(namespace string) string { - return RedisNamespacePrefix(namespace) + "last_periodic_enqueue" -} - -// RedisKeyDead returns key of the dead jobs. -func RedisKeyDead(namespace string) string { - return RedisNamespacePrefix(namespace) + "dead" -} - -// SerializeJob encodes work.Job to json data. -func SerializeJob(job *work.Job) ([]byte, error) { - return json.Marshal(job) -} - -// DeSerializeJob decodes bytes to ptr of work.Job. -func DeSerializeJob(jobBytes []byte) (*work.Job, error) { - var j work.Job - err := json.Unmarshal(jobBytes, &j) - - return &j, err -} diff --git a/src/jobservice/pool/de_duplicator.go b/src/jobservice/worker/cworker/de_duplicator.go similarity index 70% rename from src/jobservice/pool/de_duplicator.go rename to src/jobservice/worker/cworker/de_duplicator.go index 91fe2d98d..9264367b0 100644 --- a/src/jobservice/pool/de_duplicator.go +++ b/src/jobservice/worker/cworker/de_duplicator.go @@ -1,4 +1,4 @@ -package pool +package cworker import ( "bytes" @@ -7,13 +7,13 @@ import ( "fmt" "strings" + "github.com/goharbor/harbor/src/jobservice/common/rds" "github.com/goharbor/harbor/src/jobservice/errs" - "github.com/goharbor/harbor/src/jobservice/models" - "github.com/goharbor/harbor/src/jobservice/utils" + "github.com/goharbor/harbor/src/jobservice/job" "github.com/gomodule/redigo/redis" ) -// DeDuplicator is designed to handle the uniqueness of the job. +// redisDeDuplicator is designed to handle the uniqueness of the job. // Once a job is declared to be unique, the job can be enqueued only if // no same job (same job name and parameters) in the queue or running in progress. // Adopt the same unique mechanism with the upstream framework. @@ -27,7 +27,7 @@ type DeDuplicator interface { // Returns: // If no unique flag and successfully set it, a nil error is returned; // otherwise, a non nil error is returned. - Unique(jobName string, params models.Parameters) error + MustUnique(jobName string, params job.Parameters) error // Remove the unique flag after job exiting // Parameters: @@ -37,30 +37,30 @@ type DeDuplicator interface { // Returns: // If unique flag is successfully removed, a nil error is returned; // otherwise, a non nil error is returned. - DelUniqueSign(jobName string, params models.Parameters) error + DelUniqueSign(jobName string, params job.Parameters) error } -// RedisDeDuplicator implement the DeDuplicator interface based on redis. -type RedisDeDuplicator struct { +// redisDeDuplicator implements the redisDeDuplicator interface based on redis. +type redisDeDuplicator struct { // Redis namespace namespace string - // Redis conn pool + // Redis conn worker pool *redis.Pool } -// NewRedisDeDuplicator is constructor of RedisDeDuplicator -func NewRedisDeDuplicator(ns string, pool *redis.Pool) *RedisDeDuplicator { - return &RedisDeDuplicator{ +// NewDeDuplicator is constructor of redisDeDuplicator +func NewDeDuplicator(ns string, pool *redis.Pool) DeDuplicator { + return &redisDeDuplicator{ namespace: ns, pool: pool, } } -// Unique checks if the job is unique and set unique flag if it is not set yet. -func (rdd *RedisDeDuplicator) Unique(jobName string, params models.Parameters) error { +// MustUnique checks if the job is unique and set unique flag if it is not set yet. +func (rdd *redisDeDuplicator) MustUnique(jobName string, params job.Parameters) error { uniqueKey, err := redisKeyUniqueJob(rdd.namespace, jobName, params) if err != nil { - return fmt.Errorf("unique job error: %s", err) + return fmt.Errorf("job unique key generated error: %s", err) } conn := rdd.pool.Get() @@ -91,7 +91,7 @@ func (rdd *RedisDeDuplicator) Unique(jobName string, params models.Parameters) e } // DelUniqueSign delete the job unique sign -func (rdd *RedisDeDuplicator) DelUniqueSign(jobName string, params models.Parameters) error { +func (rdd *redisDeDuplicator) DelUniqueSign(jobName string, params job.Parameters) error { uniqueKey, err := redisKeyUniqueJob(rdd.namespace, jobName, params) if err != nil { return fmt.Errorf("delete unique job error: %s", err) @@ -111,7 +111,7 @@ func (rdd *RedisDeDuplicator) DelUniqueSign(jobName string, params models.Parame func redisKeyUniqueJob(namespace, jobName string, args map[string]interface{}) (string, error) { var buf bytes.Buffer - buf.WriteString(utils.KeyNamespacePrefix(namespace)) + buf.WriteString(rds.KeyNamespacePrefix(namespace)) buf.WriteString("unique:running:") buf.WriteString(jobName) buf.WriteRune(':') diff --git a/src/jobservice/pool/de_duplicator_test.go b/src/jobservice/worker/cworker/de_duplicator_test.go similarity index 65% rename from src/jobservice/pool/de_duplicator_test.go rename to src/jobservice/worker/cworker/de_duplicator_test.go index e1607ae0f..761443b41 100644 --- a/src/jobservice/pool/de_duplicator_test.go +++ b/src/jobservice/worker/cworker/de_duplicator_test.go @@ -1,4 +1,4 @@ -package pool +package cworker import ( "testing" @@ -12,13 +12,13 @@ func TestDeDuplicator(t *testing.T) { "image": "ubuntu:latest", } - rdd := NewRedisDeDuplicator(tests.GiveMeTestNamespace(), rPool) + rdd := NewDeDuplicator(tests.GiveMeTestNamespace(), rPool) - if err := rdd.Unique(jobName, jobParams); err != nil { + if err := rdd.MustUnique(jobName, jobParams); err != nil { t.Error(err) } - if err := rdd.Unique(jobName, jobParams); err == nil { + if err := rdd.DelUniqueSign(jobName, jobParams); err == nil { t.Errorf("expect duplicated error but got nil error") } diff --git a/src/jobservice/worker/cworker/redis_pool.go b/src/jobservice/worker/cworker/redis_pool.go new file mode 100644 index 000000000..3b6294bfb --- /dev/null +++ b/src/jobservice/worker/cworker/redis_pool.go @@ -0,0 +1,475 @@ +// Copyright Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cworker + +import ( + "fmt" + "reflect" + "time" + + "github.com/gocraft/work" + "github.com/goharbor/harbor/src/jobservice/common/query" + "github.com/goharbor/harbor/src/jobservice/common/utils" + "github.com/goharbor/harbor/src/jobservice/env" + "github.com/goharbor/harbor/src/jobservice/job" + "github.com/goharbor/harbor/src/jobservice/lcm" + "github.com/goharbor/harbor/src/jobservice/logger" + "github.com/goharbor/harbor/src/jobservice/period" + "github.com/goharbor/harbor/src/jobservice/runner" + "github.com/goharbor/harbor/src/jobservice/worker" + "github.com/gomodule/redigo/redis" + "github.com/pkg/errors" + "sync" +) + +var ( + workerPoolDeadTime = 10 * time.Second +) + +const ( + workerPoolStatusHealthy = "Healthy" + workerPoolStatusDead = "Dead" + pingRedisMaxTimes = 10 + defaultWorkerCount uint = 10 +) + +// basicWorker is the worker implementation based on gocraft/work powered by redis. +type basicWorker struct { + namespace string + redisPool *redis.Pool + pool *work.WorkerPool + enqueuer *work.Enqueuer + client *work.Client + context *env.Context + scheduler period.Scheduler + ctl lcm.Controller + + // key is name of known job + // value is the type of known job + knownJobs *sync.Map +} + +// workerContext ... +// We did not use this context to pass context info so far, just a placeholder. +type workerContext struct{} + +// log the job +func (rpc *workerContext) logJob(job *work.Job, next work.NextMiddlewareFunc) error { + jobInfo, _ := utils.SerializeJob(job) + logger.Infof("Job incoming: %s", jobInfo) + + return next() +} + +// NewWorker is constructor of worker +func NewWorker(ctx *env.Context, namespace string, workerCount uint, redisPool *redis.Pool, ctl lcm.Controller) worker.Interface { + wc := defaultWorkerCount + if workerCount > 0 { + wc = workerCount + } + + return &basicWorker{ + namespace: namespace, + redisPool: redisPool, + pool: work.NewWorkerPool(workerContext{}, wc, namespace, redisPool), + enqueuer: work.NewEnqueuer(namespace, redisPool), + client: work.NewClient(namespace, redisPool), + scheduler: period.NewScheduler(ctx.SystemContext, namespace, redisPool, ctl), + ctl: ctl, + context: ctx, + knownJobs: new(sync.Map), + } +} + +// Start to serve +// Unblock action +func (w *basicWorker) Start() error { + if w.redisPool == nil { + return errors.New("missing redis pool") + } + + if utils.IsEmptyStr(w.namespace) { + return errors.New("missing namespace") + } + + if w.context == nil || w.context.SystemContext == nil { + // report and exit + return errors.New("missing context") + } + + if w.ctl == nil { + return errors.New("missing job life cycle controller") + } + + // Test the redis connection + if err := w.ping(); err != nil { + return err + } + + // Start the periodic scheduler + w.context.WG.Add(1) + go func() { + defer func() { + w.context.WG.Done() + }() + // Blocking call + if err := w.scheduler.Start(); err != nil { + w.context.ErrorChan <- err + } + }() + + // Listen to the system signal + w.context.WG.Add(1) + go func() { + defer func() { + w.context.WG.Done() + logger.Infof("Basic worker is stopped") + }() + <-w.context.SystemContext.Done() + w.scheduler.Stop() + w.pool.Stop() + }() + + // Start the backend worker pool + // Add middleware + w.pool.Middleware((*workerContext).logJob) + // Non blocking call + w.pool.Start() + logger.Infof("Redis worker is started") + + return nil +} + +// RegisterJobs is used to register multiple jobs to worker. +func (w *basicWorker) RegisterJobs(jobs map[string]interface{}) error { + if jobs == nil || len(jobs) == 0 { + // Do nothing + return nil + } + + for name, j := range jobs { + if err := w.registerJob(name, j); err != nil { + return err + } + } + + return nil +} + +// Enqueue job +func (w *basicWorker) Enqueue(jobName string, params job.Parameters, isUnique bool, webHook string) (*job.Stats, error) { + var ( + j *work.Job + err error + ) + + // As the job is declared to be unique, + // check the uniqueness of the job, + // Here we only need to make sure only 1 job with the same type and parameters in the queue + // For the uniqueness of executing, it can be checked in the running stage + if isUnique { + if j, err = w.enqueuer.EnqueueUnique(jobName, params); err != nil { + return nil, err + } + } else { + // Enqueue job + if j, err = w.enqueuer.Enqueue(jobName, params); err != nil { + return nil, err + } + } + + // avoid backend worker bug + if j == nil { + return nil, fmt.Errorf("job '%s' can not be enqueued, please check the job metatdata", jobName) + } + + return generateResult(j, job.KindGeneric, isUnique), nil +} + +// Schedule job +func (w *basicWorker) Schedule(jobName string, params job.Parameters, runAfterSeconds uint64, isUnique bool, webHook string) (*job.Stats, error) { + var ( + j *work.ScheduledJob + err error + ) + + // As the job is declared to be unique, + // check the uniqueness of the job, + // Here we only need to make sure only 1 job with the same type and parameters in the queue + // For the uniqueness of executing, it can be checked in the running stage + if isUnique { + if j, err = w.enqueuer.EnqueueUniqueIn(jobName, int64(runAfterSeconds), params); err != nil { + return nil, err + } + } else { + // Enqueue job in + if j, err = w.enqueuer.EnqueueIn(jobName, int64(runAfterSeconds), params); err != nil { + return nil, err + } + } + + // avoid backend worker bug + if j == nil { + return nil, fmt.Errorf("job '%s' can not be enqueued, please check the job metatdata", jobName) + } + + res := generateResult(j.Job, job.KindScheduled, isUnique) + res.Info.RunAt = j.RunAt + + return res, nil +} + +// PeriodicallyEnqueue job +func (w *basicWorker) PeriodicallyEnqueue(jobName string, params job.Parameters, cronSetting string, isUnique bool, webHook string) (*job.Stats, error) { + p := &period.Policy{ + ID: utils.MakeIdentifier(), + JobName: jobName, + CronSpec: cronSetting, + JobParameters: params, + WebHookURL: webHook, + } + + id, err := w.scheduler.Schedule(p) + if err != nil { + return nil, err + } + + res := &job.Stats{ + Info: &job.StatsInfo{ + JobID: p.ID, + JobName: jobName, + Status: job.ScheduledStatus.String(), + JobKind: job.KindPeriodic, + CronSpec: cronSetting, + WebHookURL: webHook, + NumericPID: id, + EnqueueTime: time.Now().Unix(), + UpdateTime: time.Now().Unix(), + RefLink: fmt.Sprintf("/api/v1/jobs/%s", p.ID), + }, + } + + return res, nil +} + +// Info of worker +func (w *basicWorker) Stats() (*worker.Stats, error) { + // Get the status of worker pool via client + hbs, err := w.client.WorkerPoolHeartbeats() + if err != nil { + return nil, err + } + + // Find the heartbeat of this worker via pid + stats := make([]*worker.StatsData, 0) + for _, hb := range hbs { + if hb.HeartbeatAt == 0 { + continue // invalid ones + } + + wPoolStatus := workerPoolStatusHealthy + if time.Unix(hb.HeartbeatAt, 0).Add(workerPoolDeadTime).Before(time.Now()) { + wPoolStatus = workerPoolStatusDead + } + stat := &worker.StatsData{ + WorkerPoolID: hb.WorkerPoolID, + StartedAt: hb.StartedAt, + HeartbeatAt: hb.HeartbeatAt, + JobNames: hb.JobNames, + Concurrency: hb.Concurrency, + Status: wPoolStatus, + } + stats = append(stats, stat) + } + + if len(stats) == 0 { + return nil, errors.New("failed to get stats of worker pools") + } + + return &worker.Stats{ + Pools: stats, + }, nil +} + +// StopJob will stop the job +func (w *basicWorker) StopJob(jobID string) error { + if utils.IsEmptyStr(jobID) { + return errors.New("empty job ID to stop") + } + + t, err := w.ctl.Track(jobID) + if err != nil { + return err + } + + if job.RunningStatus.Compare(job.Status(t.Job().Info.Status)) < 0 { + // Job has been in the final states + return errors.Errorf("mismatch job status %s for stopping job %s", t.Job().Info.Status, jobID) + } + + switch t.Job().Info.JobKind { + case job.KindGeneric: + return t.Stop() + case job.KindScheduled: + // we need to delete the scheduled job in the queue if it is not running yet + // otherwise, stop it. + if err := w.client.DeleteScheduledJob(t.Job().Info.RunAt, jobID); err != nil { + // Job is already running? + logger.Errorf("scheduled job %s (run at = %d) is not found in the queue to stop, is it already running?", jobID, t.Job().Info.RunAt) + } + // Anyway, mark jon stopped + return t.Stop() + case job.KindPeriodic: + return w.scheduler.UnSchedule(jobID) + default: + return errors.Errorf("job kind %s is not supported", t.Job().Info.JobKind) + } +} + +// RetryJob retry the job +func (w *basicWorker) RetryJob(jobID string) error { + return errors.New("not implemented") +} + +// IsKnownJob ... +func (w *basicWorker) IsKnownJob(name string) (interface{}, bool) { + return w.knownJobs.Load(name) +} + +// ValidateJobParameters ... +func (w *basicWorker) ValidateJobParameters(jobType interface{}, params job.Parameters) error { + if jobType == nil { + return errors.New("nil job type") + } + + theJ := runner.Wrap(jobType) + return theJ.Validate(params) +} + +// ScheduledJobs returns the scheduled jobs by page +func (w *basicWorker) ScheduledJobs(query *query.Parameter) ([]*job.Stats, int64, error) { + var page uint = 1 + if query != nil && query.PageSize > 1 { + page = query.PageSize + } + + sJobs, total, err := w.client.ScheduledJobs(page) + if err != nil { + return nil, 0, err + } + + res := make([]*job.Stats, 0) + for _, sJob := range sJobs { + t, err := w.ctl.Track(sJob.ID) + if err != nil { + // Just log it + logger.Errorf("cworker: query scheduled jobs error: %s", err) + continue + } + + res = append(res, t.Job()) + } + + return res, total, nil +} + +// RegisterJob is used to register the job to the worker. +// j is the type of job +func (w *basicWorker) registerJob(name string, j interface{}) (err error) { + if utils.IsEmptyStr(name) || j == nil { + return errors.New("job can not be registered with empty name or nil interface") + } + + // j must be job.Interface + if _, ok := j.(job.Interface); !ok { + return errors.New("job must implement the job.Interface") + } + + // 1:1 constraint + if jInList, ok := w.knownJobs.Load(name); ok { + return fmt.Errorf("job name %s has been already registered with %s", name, reflect.TypeOf(jInList).String()) + } + + // Same job implementation can be only registered with one name + w.knownJobs.Range(func(jName interface{}, jInList interface{}) bool { + jobImpl := reflect.TypeOf(j).String() + if reflect.TypeOf(jInList).String() == jobImpl { + err = errors.Errorf("job %s has been already registered with name %s", jobImpl, jName) + return false + } + + return true + }) + + // Something happened in the range + if err != nil { + return + } + + // Wrap job + redisJob := runner.NewRedisJob(j, w.context, w.ctl) + // Get more info from j + theJ := runner.Wrap(j) + // Put into the pool + w.pool.JobWithOptions( + name, + work.JobOptions{ + MaxFails: theJ.MaxFails(), + }, + // Use generic handler to handle as we do not accept context with this way. + func(job *work.Job) error { + return redisJob.Run(job) + }, + ) + // Keep the name of registered jobs as known jobs for future validation + w.knownJobs.Store(name, j) + + logger.Infof("Register job %s with name %s", reflect.TypeOf(j).String(), name) + + return nil +} + +// Ping the redis server +func (w *basicWorker) ping() error { + conn := w.redisPool.Get() + defer conn.Close() + + var err error + for count := 1; count <= pingRedisMaxTimes; count++ { + if _, err = conn.Do("ping"); err == nil { + return nil + } + + time.Sleep(time.Duration(count+4) * time.Second) + } + + return fmt.Errorf("connect to redis server timeout: %s", err.Error()) +} + +// generate the job stats data +func generateResult(j *work.Job, jobKind string, isUnique bool) *job.Stats { + return &job.Stats{ + Info: &job.StatsInfo{ + JobID: j.ID, + JobName: j.Name, + JobKind: jobKind, + IsUnique: isUnique, + Status: job.PendingStatus.String(), + EnqueueTime: j.EnqueuedAt, + UpdateTime: time.Now().Unix(), + RefLink: fmt.Sprintf("/api/v1/jobs/%s", j.ID), + }, + } +} diff --git a/src/jobservice/pool/redis_pool_test.go b/src/jobservice/worker/cworker/redis_pool_test.go similarity index 92% rename from src/jobservice/pool/redis_pool_test.go rename to src/jobservice/worker/cworker/redis_pool_test.go index e4b9146da..d2b4e1bed 100644 --- a/src/jobservice/pool/redis_pool_test.go +++ b/src/jobservice/worker/cworker/redis_pool_test.go @@ -11,7 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -package pool +package cworker import ( "context" @@ -204,7 +204,7 @@ func TestStopJob(t *testing.T) { if err != nil { t.Fatal(err) } - if stats.Stats.Status != job.JobStatusRunning { + if stats.Stats.Status != job.RunningStatus { t.Fatalf("expect job running but got %s", stats.Stats.Status) } if err := wp.StopJob(genericJob.Stats.JobID); err != nil { @@ -251,7 +251,7 @@ func TestCancelJob(t *testing.T) { if err != nil { t.Fatal(err) } - if stats.Stats.Status != job.JobStatusRunning { + if stats.Stats.Status != job.RunningStatus { t.Fatalf("expect job running but got %s", stats.Stats.Status) } @@ -300,7 +300,7 @@ func TestCancelJob(t *testing.T) { if err != nil { t.Fatal(err) } - if err := wp.RegisterHook(res.Stats.JobID, ts.URL); err != nil { + if err := wp.RegisterHook(res.Info.JobID, ts.URL); err != nil { t.Fatal(err) } // make sure it's running @@ -309,38 +309,38 @@ func TestCancelJob(t *testing.T) { CHECK: <-timer.C - if check, err := wp.GetJobStats(res.Stats.JobID); err != nil { + if check, err := wp.GetJobStats(res.Info.JobID); err != nil { t.Fatal(err) } else { - if check.Stats.Status != job.JobStatusRunning { + if check.Info.Status != job.RunningStatus { timer.Reset(1 * time.Second) goto CHECK } } // cancel - if err := wp.CancelJob(res.Stats.JobID); err != nil { + if err := wp.CancelJob(res.Info.JobID); err != nil { t.Fatal(err) } <-time.After(5 * time.Second) - updatedRes, err := wp.GetJobStats(res.Stats.JobID) + updatedRes, err := wp.GetJobStats(res.Info.JobID) if err != nil { t.Fatal(err) } - if updatedRes.Stats.Status != job.JobStatusCancelled { - t.Fatalf("expect job staus '%s' but got '%s'\n", job.JobStatusCancelled, updatedRes.Stats.Status) + if updatedRes.Info.Status != job.JobStatusCancelled { + t.Fatalf("expect job staus '%s' but got '%s'\n", job.JobStatusCancelled, updatedRes.Info.Status) } - if updatedRes.Stats.DieAt == 0 { + if updatedRes.Info.DieAt == 0 { t.Fatalf("expect none zero 'DieAt' but got 0 value") } // retry - if err := wp.RetryJob(updatedRes.Stats.JobID); err != nil { + if err := wp.RetryJob(updatedRes.Info.JobID); err != nil { t.Fatal(err) } }*/ -func createRedisWorkerPool() (*GoCraftWorkPool, *env.Context, context.CancelFunc) { +func createRedisWorkerPool() (*worker, *env.Context, context.CancelFunc) { ctx := context.Background() ctx, cancel := context.WithCancel(ctx) envCtx := &env.Context{ @@ -350,7 +350,7 @@ func createRedisWorkerPool() (*GoCraftWorkPool, *env.Context, context.CancelFunc JobContext: newContext(ctx), } - return NewGoCraftWorkPool(envCtx, tests.GiveMeTestNamespace(), 3, rPool), envCtx, cancel + return NewWorker(envCtx, tests.GiveMeTestNamespace(), 3, rPool), envCtx, cancel } type fakeJob struct{} @@ -468,7 +468,7 @@ func newContext(sysCtx context.Context) *fakeContext { } } -// Build implements the same method in env.JobContext interface +// Build implements the same method in env.Context interface // This func will build the job execution context before running func (c *fakeContext) Build(dep env.JobData) (env.JobContext, error) { jContext := &fakeContext{ @@ -521,13 +521,13 @@ func (c *fakeContext) Build(dep env.JobData) (env.JobContext, error) { return jContext, nil } -// Get implements the same method in env.JobContext interface +// Get implements the same method in env.Context interface func (c *fakeContext) Get(prop string) (interface{}, bool) { v, ok := c.properties[prop] return v, ok } -// SystemContext implements the same method in env.JobContext interface +// SystemContext implements the same method in env.Context interface func (c *fakeContext) SystemContext() context.Context { return c.sysContext } diff --git a/src/jobservice/worker/interface.go b/src/jobservice/worker/interface.go new file mode 100644 index 000000000..7c63926f1 --- /dev/null +++ b/src/jobservice/worker/interface.go @@ -0,0 +1,127 @@ +// Copyright Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package worker + +import ( + "github.com/goharbor/harbor/src/jobservice/common/query" + "github.com/goharbor/harbor/src/jobservice/job" +) + +// Interface for worker. +// More like a driver to transparent the lower queue. +type Interface interface { + // Start to serve + Start() error + + // Register multiple jobs. + // + // jobs map[string]interface{}: job map, key is job name and value is job handler. + // + // Return: + // error if failed to register + RegisterJobs(jobs map[string]interface{}) error + + // Enqueue job + // + // jobName string : the name of enqueuing job + // params job.Parameters : parameters of enqueuing job + // isUnique bool : specify if duplicated job will be discarded + // webHook string : the server URL to receive hook events + // + // Returns: + // *job.Stats : the stats of enqueuing job if succeed + // error : if failed to enqueue + Enqueue(jobName string, params job.Parameters, isUnique bool, webHook string) (*job.Stats, error) + + // Schedule job to run after the specified interval (seconds). + // + // jobName string : the name of enqueuing job + // runAfterSeconds uint64 : the waiting interval with seconds + // params job.Parameters : parameters of enqueuing job + // isUnique bool : specify if duplicated job will be discarded + // webHook string : the server URL to receive hook events + // + // Returns: + // *job.Stats: the stats of enqueuing job if succeed + // error : if failed to enqueue + Schedule(jobName string, params job.Parameters, runAfterSeconds uint64, isUnique bool, webHook string) (*job.Stats, error) + + // Schedule the job periodically running. + // + // jobName string : the name of enqueuing job + // params job.Parameters : parameters of enqueuing job + // cronSetting string : the periodic duration with cron style like '0 * * * * *' + // isUnique bool : specify if duplicated job will be discarded + // webHook string : the server URL to receive hook events + // + // Returns: + // models.JobStats: the stats of enqueuing job if succeed + // error : if failed to enqueue + PeriodicallyEnqueue(jobName string, params job.Parameters, cronSetting string, isUnique bool, webHook string) (*job.Stats, error) + + // Return the status info of the worker. + // + // Returns: + // *Stats : the stats info of all running pools + // error : failed to check + Stats() (*Stats, error) + + // Check if the job has been already registered. + // + // name string : name of job + // + // Returns: + // interface{} : the job type of the known job if it's existing + // bool : if the known job requires parameters + IsKnownJob(name string) (interface{}, bool) + + // Validate the parameters of the known job + // + // jobType interface{} : type of known job + // params map[string]interface{} : parameters of known job + // + // Return: + // error if parameters are not valid + + ValidateJobParameters(jobType interface{}, params job.Parameters) error + + // Stop the job + // + // jobID string : ID of the enqueued job + // + // Return: + // error : error returned if meet any problems + StopJob(jobID string) error + + // Retry the job + // + // jobID string : ID of the enqueued job + // + // Return: + // error : error returned if meet any problems + RetryJob(jobID string) error + + // Get the scheduled jobs by page + // The page number in the query will be ignored, default 20 is used. This is the limitation of backend lib. + // The total number is also returned. + // + // query *query.Parameter : query parameters + // + // Return: + // []*job.Stats : list of scheduled jobs + // int : the total number of scheduled jobs + // error : non nil error if meet any issues + ScheduledJobs(query *query.Parameter) ([]*job.Stats, int64, error) +} diff --git a/src/jobservice/opm/hook_store_test.go b/src/jobservice/worker/models.go similarity index 53% rename from src/jobservice/opm/hook_store_test.go rename to src/jobservice/worker/models.go index 46b81dc7b..80c8fd3e1 100644 --- a/src/jobservice/opm/hook_store_test.go +++ b/src/jobservice/worker/models.go @@ -11,21 +11,20 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -package opm -import "testing" +package worker -func TestHookStore(t *testing.T) { - store := NewHookStore() - - reportURL := "http://localhost:9090/report" - store.Add("id_1", reportURL) - url, ok := store.Get("id_1") - if !ok || url != reportURL { - t.Errorf("expect hook url '%s' but got '%s'", reportURL, url) - } - u, ok := store.Remove("id_1") - if !ok || u != reportURL { - t.Errorf("expect deleted '%s' but failed to do", reportURL) - } +// Info represents the healthy and status of all the running worker pools. +type Stats struct { + Pools []*StatsData `json:"worker_pools"` +} + +// JobPoolStatsData represent the healthy and status of the worker worker. +type StatsData struct { + WorkerPoolID string `json:"worker_pool_id"` + StartedAt int64 `json:"started_at"` + HeartbeatAt int64 `json:"heartbeat_at"` + JobNames []string `json:"job_names"` + Concurrency uint `json:"concurrency"` + Status string `json:"status"` }