mirror of
https://github.com/goharbor/harbor.git
synced 2024-12-19 15:17:43 +01:00
fix(jobservice):fix job stats NOT_FOUND issue
Signed-off-by: Steven Zou <szou@vmware.com>
This commit is contained in:
parent
f2d5f4e256
commit
0ccea49c18
@ -5,13 +5,13 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
|
"reflect"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/goharbor/harbor/src/jobservice/job"
|
"github.com/goharbor/harbor/src/jobservice/job"
|
||||||
"github.com/goharbor/harbor/src/jobservice/logger"
|
"github.com/goharbor/harbor/src/jobservice/logger"
|
||||||
"github.com/pkg/errors"
|
"github.com/goharbor/harbor/src/lib/errors"
|
||||||
"reflect"
|
|
||||||
"time"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// SlackJob implements the job interface, which send notification to slack by slack incoming webhooks.
|
// SlackJob implements the job interface, which send notification to slack by slack incoming webhooks.
|
||||||
|
@ -20,13 +20,10 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/goharbor/harbor/src/jobservice/errs"
|
|
||||||
|
|
||||||
"github.com/goharbor/harbor/src/jobservice/common/rds"
|
|
||||||
|
|
||||||
"github.com/goharbor/harbor/src/jobservice/common/list"
|
"github.com/goharbor/harbor/src/jobservice/common/list"
|
||||||
|
"github.com/goharbor/harbor/src/jobservice/common/rds"
|
||||||
"github.com/goharbor/harbor/src/jobservice/env"
|
"github.com/goharbor/harbor/src/jobservice/env"
|
||||||
|
"github.com/goharbor/harbor/src/jobservice/errs"
|
||||||
"github.com/goharbor/harbor/src/jobservice/job"
|
"github.com/goharbor/harbor/src/jobservice/job"
|
||||||
"github.com/goharbor/harbor/src/jobservice/logger"
|
"github.com/goharbor/harbor/src/jobservice/logger"
|
||||||
"github.com/goharbor/harbor/src/lib/errors"
|
"github.com/goharbor/harbor/src/lib/errors"
|
||||||
|
@ -17,6 +17,7 @@ package runner
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/gocraft/work"
|
"github.com/gocraft/work"
|
||||||
"github.com/goharbor/harbor/src/jobservice/env"
|
"github.com/goharbor/harbor/src/jobservice/env"
|
||||||
@ -28,6 +29,10 @@ import (
|
|||||||
"github.com/goharbor/harbor/src/lib/errors"
|
"github.com/goharbor/harbor/src/lib/errors"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
maxTrackRetries = 6
|
||||||
|
)
|
||||||
|
|
||||||
// RedisJob is a job wrapper to wrap the job.Interface to the style which can be recognized by the redis worker.
|
// RedisJob is a job wrapper to wrap the job.Interface to the style which can be recognized by the redis worker.
|
||||||
type RedisJob struct {
|
type RedisJob struct {
|
||||||
job interface{} // the real job implementation
|
job interface{} // the real job implementation
|
||||||
@ -60,15 +65,32 @@ func (rj *RedisJob) Run(j *work.Job) (err error) {
|
|||||||
jID = eID
|
jID = eID
|
||||||
}
|
}
|
||||||
|
|
||||||
if tracker, err = rj.ctl.Track(jID); err != nil {
|
// As the job stats may not be ready when job executing sometimes (corner case),
|
||||||
// log error
|
// the track call here may get NOT_FOUND error. For that case, let's do retry to recovery.
|
||||||
|
for retried := 0; retried <= maxTrackRetries; retried++ {
|
||||||
|
tracker, err = rj.ctl.Track(jID)
|
||||||
|
if err == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if errs.IsObjectNotFoundError(err) {
|
||||||
|
if retried < maxTrackRetries {
|
||||||
|
// Still have chance to re-track the given job.
|
||||||
|
// Hold for a while and retry
|
||||||
|
b := backoff(retried)
|
||||||
|
logger.Errorf("Track job %s: stats may not have been ready yet, hold for %d ms and retry again", jID, b)
|
||||||
|
<-time.After(time.Duration(b) * time.Millisecond)
|
||||||
|
continue
|
||||||
|
} else {
|
||||||
|
// Exit and never try.
|
||||||
|
// Directly return without retry again as we have no way to restore the stats again.
|
||||||
|
j.Fails = 10000000000 // never retry
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log error and exit
|
||||||
logger.Errorf("Job '%s:%s' exit with error: failed to get job tracker: %s", j.Name, j.ID, err)
|
logger.Errorf("Job '%s:%s' exit with error: failed to get job tracker: %s", j.Name, j.ID, err)
|
||||||
|
|
||||||
// Pay attentions here, if the job stats is lost (NOTFOUND error returned),
|
|
||||||
// directly return without retry again as we have no way to restore the stats again.
|
|
||||||
if errs.IsObjectNotFoundError(err) {
|
|
||||||
j.Fails = 10000000000 // never retry
|
|
||||||
}
|
|
||||||
// ELSE:
|
// ELSE:
|
||||||
// As tracker creation failed, there is no way to mark the job status change.
|
// As tracker creation failed, there is no way to mark the job status change.
|
||||||
// Also a non nil error return consumes a fail. If all retries are failed here,
|
// Also a non nil error return consumes a fail. If all retries are failed here,
|
||||||
@ -215,3 +237,15 @@ func isPeriodicJobExecution(j *work.Job) (string, bool) {
|
|||||||
func bp(b bool) *bool {
|
func bp(b bool) *bool {
|
||||||
return &b
|
return &b
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func backoff(x int) int {
|
||||||
|
// y=ax^2+bx+c
|
||||||
|
var a, b, c = -111, 666, 500
|
||||||
|
|
||||||
|
y := a*x*x + b*x + c
|
||||||
|
if y < 0 {
|
||||||
|
y = 0 - y
|
||||||
|
}
|
||||||
|
|
||||||
|
return y
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user