Merge pull request #14542 from ninjadq/add_task_info_in_exporter

Add task info in exporter
This commit is contained in:
Qian Deng 2021-04-07 18:17:26 +08:00 committed by GitHub
commit c5d12ce8ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 296 additions and 49 deletions

View File

@ -1,12 +1,16 @@
LOG_LEVEL={{log_level}}
HARBOR_EXPORTER_PORT=8080
HARBOR_EXPORTER_METRICS_PATH=/metrics
HARBOR_EXPORTER_METRICS_ENABLED=true
HARBOR_EXPORTER_MAX_REQUESTS=30
HARBOR_EXPORTER_CACHE_TIME=30
HARBOR_EXPORTER_CACHE_TIME=23
HARBOR_EXPORTER_CACHE_CLEAN_INTERVAL=14400
HARBOR_METRIC_NAMESPACE=harbor
HARBOR_METRIC_SUBSYSTEM=exporter
HARBOR_SERVICE_HOST=core
HARBOR_REDIS_URL={{redis_url_js}}
HARBOR_REDIS_NAMESPACE=harbor_job_service_namespace
HARBOR_REDIS_TIMEOUT=3600
{%if internal_tls.enabled %}
HARBOR_SERVICE_PORT=8443
HARBOR_SERVICE_SCHEME=https

View File

@ -47,6 +47,12 @@ func main() {
},
})
exporter.InitBackendWorker(&exporter.RedisPoolConfig{
URL: viper.GetString("redis.url"),
Namespace: viper.GetString("redis.namespace"),
IdleTimeoutSecond: viper.GetInt("redis.timeout"),
})
exporterOpt := &exporter.Opt{
Port: viper.GetInt("exporter.port"),
MetricsPath: viper.GetString("exporter.metrics_path"),

View File

@ -24,7 +24,7 @@ var (
Name: "info",
Help: "the information of jobservice",
},
[]string{"node", "pool_id", "workers"},
[]string{"node", "pool", "workers"},
)
// JobserviceTotalTask used for collect data
JobserviceTotalTask = prometheus.NewCounterVec(
@ -32,7 +32,7 @@ var (
Namespace: os.Getenv(NamespaceEnvKey),
Subsystem: os.Getenv(SubsystemEnvKey),
Name: "task_total",
Help: "The total number of requests",
Help: "The number of processed tasks",
},
[]string{"type", "status"},
)
@ -42,7 +42,7 @@ var (
Namespace: os.Getenv(NamespaceEnvKey),
Subsystem: os.Getenv(SubsystemEnvKey),
Name: "task_process_time_seconds",
Help: "The time duration of the task process time",
Help: "The time duration of the task processing time",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
},
[]string{"type", "status"})

View File

@ -86,11 +86,8 @@ func CacheInit(opt *Opt) {
cacheCleanInterval = defaultCacheCleanInterval
}
ticker := time.NewTicker(time.Duration(cacheCleanInterval) * time.Second)
for {
select {
case <-ticker.C:
StartCacheCleaner()
}
for range ticker.C {
StartCacheCleaner()
}
}()
}

View File

@ -6,18 +6,6 @@ import (
var (
namespace = "harbor"
subsystem = "exporter"
)
var (
scrapeDuration = typedDesc{
desc: newDescWithLables(subsystem, "collector_duration_seconds", "Duration of a collector scrape", "collector"),
valueType: prometheus.GaugeValue,
}
scrapeSuccess = typedDesc{
desc: newDescWithLables(subsystem, "collector_success", " Whether a collector succeeded.", "collector"),
valueType: prometheus.GaugeValue,
}
)
func newDesc(subsystem, name, help string) *prometheus.Desc {
@ -47,9 +35,8 @@ func (d *typedDesc) Desc() *prometheus.Desc {
return d.desc
}
// // ErrNoData indicates the collector found no data to collect, but had no other error.
// var ErrNoData = errors.New("collector returned no data")
// func IsNoDataError(err error) bool {
// return err == ErrNoData
// }
type collector interface {
prometheus.Collector
// Return the name of the collector
GetName() string
}

View File

@ -33,9 +33,11 @@ func NewExporter(opt *Opt) *Exporter {
if opt.CacheDuration > 0 {
CacheInit(opt)
}
exporter.RegisterCollector(healthCollectorName, NewHealthCollect(hbrCli))
exporter.RegisterCollector(systemInfoCollectorName, NewSystemInfoCollector(hbrCli))
exporter.RegisterCollector(ProjectCollectorName, NewProjectCollector())
exporter.RegisterCollector(NewHealthCollect(hbrCli),
NewSystemInfoCollector(hbrCli),
NewProjectCollector(),
NewJobServiceCollector())
r := prometheus.NewRegistry()
r.MustRegister(exporter)
exporter.Server = newServer(opt, r)
@ -51,12 +53,15 @@ type Exporter struct {
}
// RegisterCollector register a collector to expoter
func (e *Exporter) RegisterCollector(name string, c prometheus.Collector) error {
if _, ok := e.collectors[name]; ok {
return errors.New("Collector name is already registered")
func (e *Exporter) RegisterCollector(collectors ...collector) error {
for _, c := range collectors {
name := c.GetName()
if _, ok := e.collectors[name]; ok {
return errors.New("collector name is already registered")
}
e.collectors[name] = c
log.Infof("collector %s registered ...", name)
}
e.collectors[name] = c
log.Infof("collector %s registered ...", name)
return nil
}
@ -92,3 +97,11 @@ func (e *Exporter) Collect(c chan<- prometheus.Metric) {
v.Collect(c)
}
}
func checkErr(err error, arg string) {
if err == nil {
return
}
log.Errorf("%s: %v", arg, err)
}

View File

@ -48,6 +48,12 @@ func (hc *HealthCollector) Collect(c chan<- prometheus.Metric) {
c <- m
}
}
// GetName returns the name of the health collector
func (hc *HealthCollector) GetName() string {
return healthCollectorName
}
func (hc *HealthCollector) getHealthStatus() []prometheus.Metric {
if CacheEnabled() {
value, ok := CacheGet(healthCollectorName)

View File

@ -0,0 +1,117 @@
package exporter
import (
"github.com/gomodule/redigo/redis"
"github.com/prometheus/client_golang/prometheus"
)
// JobServiceCollectorName ...
const JobServiceCollectorName = "JobServiceCollector"
var (
jobServiceTaskQueueSize = typedDesc{
desc: newDescWithLables("", "task_queue_size", "Total number of tasks", "type"),
valueType: prometheus.GaugeValue,
}
jobServiceTaskQueueLatency = typedDesc{
desc: newDescWithLables("", "task_queue_latency", "how long ago the next job to be processed was enqueued", "type"),
valueType: prometheus.GaugeValue,
}
jobServiceConcurrency = typedDesc{
desc: newDescWithLables("", "task_concurrency", "Total number of concurrency on a pool", "type", "pool"),
valueType: prometheus.GaugeValue,
}
jobServiceScheduledJobTotal = typedDesc{
desc: newDesc("", "task_scheduled_total", "total number of scheduled job"),
valueType: prometheus.GaugeValue,
}
)
// NewJobServiceCollector ...
func NewJobServiceCollector() *JobServiceCollector {
return &JobServiceCollector{Namespace: namespace}
}
// JobServiceCollector ...
type JobServiceCollector struct {
Namespace string
}
// Describe implements prometheus.Collector
func (hc *JobServiceCollector) Describe(c chan<- *prometheus.Desc) {
for _, jd := range hc.getDescribeInfo() {
c <- jd
}
}
// Collect implements prometheus.Collector
func (hc *JobServiceCollector) Collect(c chan<- prometheus.Metric) {
for _, m := range hc.getJobserviceInfo() {
c <- m
}
}
// GetName returns the name of the job service collector
func (hc *JobServiceCollector) GetName() string {
return JobServiceCollectorName
}
func (hc *JobServiceCollector) getDescribeInfo() []*prometheus.Desc {
return []*prometheus.Desc{
jobServiceTaskQueueSize.Desc(),
jobServiceTaskQueueLatency.Desc(),
jobServiceConcurrency.Desc(),
jobServiceScheduledJobTotal.Desc(),
}
}
func (hc *JobServiceCollector) getJobserviceInfo() []prometheus.Metric {
if CacheEnabled() {
value, ok := CacheGet(JobServiceCollectorName)
if ok {
return value.([]prometheus.Metric)
}
}
// Get concurrency info via raw redis client
result := getConccurrentInfo()
// get info via jobservice client
cli := GetBackendWorker()
// get queue info
qs, err := cli.Queues()
checkErr(err, "error when get work task queues info")
for _, q := range qs {
result = append(result, jobServiceTaskQueueSize.MustNewConstMetric(float64(q.Count), q.JobName))
result = append(result, jobServiceTaskQueueLatency.MustNewConstMetric(float64(q.Latency), q.JobName))
}
// get scheduled job info
_, total, err := cli.ScheduledJobs(0)
checkErr(err, "error when get scheduled job number")
result = append(result, jobServiceScheduledJobTotal.MustNewConstMetric(float64(total)))
if CacheEnabled() {
CachePut(JobServiceCollectorName, result)
}
return result
}
func getConccurrentInfo() []prometheus.Metric {
rdsConn := GetRedisPool().Get()
defer rdsConn.Close()
result := []prometheus.Metric{}
knownJobvalues, err := redis.Values(rdsConn.Do("SMEMBERS", redisKeyKnownJobs(jsNamespace)))
checkErr(err, "err when get known jobs")
for _, v := range knownJobvalues {
job := string(v.([]byte))
lockInfovalues, err := redis.Values(rdsConn.Do("HGETALL", redisKeyJobsLockInfo(jsNamespace, job)))
checkErr(err, "err when get job lock info")
for i := 0; i < len(lockInfovalues); i += 2 {
key, _ := redis.String(lockInfovalues[i], nil)
value, _ := redis.Float64(lockInfovalues[i+1], nil)
result = append(result, jobServiceConcurrency.MustNewConstMetric(value, job, key))
}
}
return result
}

View File

@ -0,0 +1,75 @@
package exporter
import (
"fmt"
"time"
"github.com/gocraft/work"
redislib "github.com/goharbor/harbor/src/lib/redis"
"github.com/gomodule/redigo/redis"
)
const (
dialConnectionTimeout = 30 * time.Second
dialReadTimeout = 10 * time.Second
dialWriteTimeout = 10 * time.Second
pageItemNum = 20.0
)
var (
redisPool *redis.Pool
jsClient *work.Client
jsNamespace string
)
// RedisPoolConfig ...
type RedisPoolConfig struct {
URL string
Namespace string
IdleTimeoutSecond int
}
// InitBackendWorker initiate backend worker
func InitBackendWorker(redisPoolConfig *RedisPoolConfig) {
pool, err := redislib.GetRedisPool("JobService", redisPoolConfig.URL, &redislib.PoolParam{
PoolMaxIdle: 6,
PoolIdleTimeout: time.Duration(redisPoolConfig.IdleTimeoutSecond) * time.Second,
DialConnectionTimeout: dialConnectionTimeout,
DialReadTimeout: dialReadTimeout,
DialWriteTimeout: dialWriteTimeout,
})
if err != nil {
panic(err)
}
redisPool = pool
jsNamespace = fmt.Sprintf("{%s}", redisPoolConfig.Namespace)
// Start the backend worker
jsClient = work.NewClient(jsNamespace, pool)
}
// GetBackendWorker ...
func GetBackendWorker() *work.Client {
return jsClient
}
// GetRedisPool ...
func GetRedisPool() *redis.Pool {
return redisPool
}
func redisNamespacePrefix(namespace string) string {
l := len(namespace)
if (l > 0) && (namespace[l-1] != ':') {
namespace = namespace + ":"
}
return namespace
}
func redisKeyJobsLockInfo(namespace, jobName string) string {
return redisNamespacePrefix(namespace) + "jobs:" + jobName + ":lock_info"
}
func redisKeyKnownJobs(namespace string) string {
return redisNamespacePrefix(namespace) + "known_jobs"
}

View File

@ -106,6 +106,11 @@ func (hc *ProjectCollector) Collect(c chan<- prometheus.Metric) {
}
}
// GetName returns the name of the project info collector
func (hc *ProjectCollector) GetName() string {
return ProjectCollectorName
}
type projectOverviewInfo struct {
projectTotals []projectCount
ProjectMap map[int64]*projectInfo
@ -227,11 +232,3 @@ func updateProjectArtifactInfo(projectMap map[int64]*projectInfo) {
}
}
}
func checkErr(err error, arg string) {
if err == nil {
return
}
log.Errorf("%s: %v", arg, err)
}

View File

@ -41,7 +41,13 @@ func setupTest(t *testing.T) {
// register projAdmin and assign project admin role
aliceID, err := dao.Register(alice)
if err != nil {
t.Errorf("register user error %v", err)
}
bobID, err := dao.Register(bob)
if err != nil {
t.Errorf("register user error %v", err)
}
eveID, err := dao.Register(eve)
if err != nil {
t.Errorf("register user error %v", err)
@ -50,6 +56,9 @@ func setupTest(t *testing.T) {
// Create Project
ctx := orm.NewContext(context.Background(), dao.GetOrmer())
proID1, err := proctl.Ctl.Create(ctx, &testPro1)
if err != nil {
t.Errorf("project creating %v", err)
}
proID2, err := proctl.Ctl.Create(ctx, &testPro2)
if err != nil {
t.Errorf("project creating %v", err)
@ -67,6 +76,9 @@ func setupTest(t *testing.T) {
// Add repo to project
repo1.ProjectID = testPro1.ProjectID
repo1ID, err := repository.Mgr.Create(ctx, &repo1)
if err != nil {
t.Errorf("add repo error %v", err)
}
repo1.RepositoryID = repo1ID
repo2.ProjectID = testPro2.ProjectID
repo2ID, err := repository.Mgr.Create(ctx, &repo2)
@ -79,6 +91,9 @@ func setupTest(t *testing.T) {
art1.RepositoryID = repo1ID
art1.PushTime = time.Now()
_, err = artifact.Mgr.Create(ctx, &art1)
if err != nil {
t.Errorf("add repo error %v", err)
}
art2.ProjectID = testPro2.ProjectID
art2.RepositoryID = repo2ID
@ -91,7 +106,13 @@ func setupTest(t *testing.T) {
pmIDs = make([]int, 0)
alice.UserID, bob.UserID, eve.UserID = int(aliceID), int(bobID), int(eveID)
p1m1ID, err := project.AddProjectMember(models.Member{ProjectID: proID1, Role: common.RoleDeveloper, EntityID: int(aliceID), EntityType: common.UserMember})
if err != nil {
t.Errorf("add project member error %v", err)
}
p2m1ID, err := project.AddProjectMember(models.Member{ProjectID: proID2, Role: common.RoleMaintainer, EntityID: int(bobID), EntityType: common.UserMember})
if err != nil {
t.Errorf("add project member error %v", err)
}
p2m2ID, err := project.AddProjectMember(models.Member{ProjectID: proID2, Role: common.RoleMaintainer, EntityID: int(eveID), EntityType: common.UserMember})
if err != nil {
t.Errorf("add project member error %v", err)

View File

@ -48,6 +48,11 @@ func (hc *SystemInfoCollector) Collect(c chan<- prometheus.Metric) {
}
}
// GetName returns the name of the system info collector
func (hc *SystemInfoCollector) GetName() string {
return systemInfoCollectorName
}
func (hc *SystemInfoCollector) getSysInfo() []prometheus.Metric {
if CacheEnabled() {
value, ok := CacheGet(systemInfoCollectorName)

View File

@ -10,12 +10,27 @@ class TestMetricsExist(unittest.TestCase):
golang_basic_metrics = ["go_gc_duration_seconds", "go_goroutines", "go_info", "go_memstats_alloc_bytes"]
metrics = {
'core': golang_basic_metrics + ["harbor_core_http_request_total", "harbor_core_http_request_duration_seconds",
"harbor_core_http_inflight_requests"],
'core': golang_basic_metrics + [
"harbor_core_http_request_total",
"harbor_core_http_request_duration_seconds",
"harbor_core_http_inflight_requests"],
'registry': golang_basic_metrics + ["registry_http_in_flight_requests"],
'exporter': golang_basic_metrics + ["artifact_pulled",
"harbor_project_artifact_total", "harbor_project_member_total", "harbor_project_quota_byte",
"harbor_project_repo_total", "harbor_project_total", "project_quota_usage_byte"]
'exporter': golang_basic_metrics + [
"artifact_pulled",
"harbor_project_artifact_total",
"harbor_project_member_total",
"harbor_project_quota_byte",
"harbor_project_repo_total",
"harbor_project_total",
"project_quota_usage_byte",
"harbor_task_concurrency",
"harbor_task_queue_latency",
"harbor_task_queue_size",
"harbor_task_scheduled_total"],
'jobservice': golang_basic_metrics + [
"harbor_jobservice_info",
"harbor_jobservice_task_process_time_seconds",
"harbor_jobservice_task_total"]
}
def get_metrics(self):
@ -23,10 +38,14 @@ class TestMetricsExist(unittest.TestCase):
exporter_res = requests.get(metrics_url)
core_res = requests.get(metrics_url, params={'comp': 'core'})
reg_res = requests.get(metrics_url, params={'comp': 'registry'})
return [('exporter', exporter_res.text), ('core', core_res.text), ('registry', reg_res.text)]
js_res = requests.get(metrics_url, params={'comp': 'jobservice'})
return [('exporter', exporter_res.text), ('core', core_res.text), ('registry', reg_res.text), ('jobservice', js_res.text)]
def testMetricsExist(self):
for k, metric_text in self.get_metrics():
for metric_name in self.metrics[k]:
print("Metric {} should exist in {} ".format(metric_name, k))
self.assertTrue(metric_name in metric_text)