diff --git a/docs/swagger.yaml b/docs/swagger.yaml index b33e9ebc1..eef1cce21 100644 --- a/docs/swagger.yaml +++ b/docs/swagger.yaml @@ -19,6 +19,18 @@ securityDefinitions: security: - basicAuth: [] paths: + /health: + get: + summary: 'Health check API' + description: | + The endpoint returns the health stauts of the system. + tags: + - Products + responses: + '200': + description: The system health status. + schema: + $ref: '#/definitions/OverallHealthStatus' /search: get: summary: 'Search for projects, repositories and helm charts' @@ -4514,3 +4526,27 @@ definitions: description: A list of label items: $ref: '#/definitions/Label' + OverallHealthStatus: + type: object + description: The system health status + properties: + status: + type: string + description: The overall health status. It is "healthy" only when all the components' status are "healthy" + components: + type: array + items: + $ref: '#/definitions/ComponentHealthStatus' + ComponentHealthStatus: + type: object + description: The health status of component + properties: + name: + type: string + description: The component name + status: + type: string + description: The health status of component + error: + type: string + description: (optional) The error message when the status is "unhealthy" diff --git a/src/common/const.go b/src/common/const.go index 4cb2d1c84..5dc0325e1 100644 --- a/src/common/const.go +++ b/src/common/const.go @@ -115,6 +115,9 @@ const ( WithChartMuseum = "with_chartmuseum" ChartRepoURL = "chart_repository_url" DefaultChartRepoURL = "http://chartmuseum:9999" + DefaultPortalURL = "http://portal" + DefaultRegistryCtlURL = "http://registryctl:8080" + DefaultClairHealthCheckServerURL = "http://clair:6061" ) // Shared variable, not allowed to modify diff --git a/src/core/api/base.go b/src/core/api/base.go index 4e8a8ad57..22bc2c059 100644 --- a/src/core/api/base.go +++ b/src/core/api/base.go @@ -125,6 +125,7 @@ func (b *BaseController) WriteYamlData(object interface{}) { // Init related objects/configurations for the API controllers func Init() error { + registerHealthCheckers() // If chart repository is not enabled then directly return if !config.WithChartMuseum() { return nil diff --git a/src/core/api/harborapi_test.go b/src/core/api/harborapi_test.go index 17e1a1e58..9b501f108 100644 --- a/src/core/api/harborapi_test.go +++ b/src/core/api/harborapi_test.go @@ -96,6 +96,7 @@ func init() { filter.Init() beego.InsertFilter("/*", beego.BeforeRouter, filter.SecurityFilter) + beego.Router("/api/health", &HealthAPI{}, "get:CheckHealth") beego.Router("/api/search/", &SearchAPI{}) beego.Router("/api/projects/", &ProjectAPI{}, "get:List;post:Post;head:Head") beego.Router("/api/projects/:id", &ProjectAPI{}, "delete:Delete;get:Get;put:Put") diff --git a/src/core/api/health.go b/src/core/api/health.go new file mode 100644 index 000000000..1a43ab68e --- /dev/null +++ b/src/core/api/health.go @@ -0,0 +1,323 @@ +// Copyright 2019 Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package api + +import ( + "errors" + "fmt" + "io/ioutil" + "net/http" + "sync" + "time" + + "github.com/goharbor/harbor/src/common/utils" + + "github.com/goharbor/harbor/src/common/dao" + httputil "github.com/goharbor/harbor/src/common/http" + "github.com/goharbor/harbor/src/common/utils/log" + "github.com/goharbor/harbor/src/core/config" + + "github.com/docker/distribution/health" + "github.com/gomodule/redigo/redis" +) + +var ( + timeout = 60 * time.Second + healthCheckerRegistry = map[string]health.Checker{} +) + +type overallHealthStatus struct { + Status string `json:"status"` + Components []*componentHealthStatus `json:"components"` +} + +type componentHealthStatus struct { + Name string `json:"name"` + Status string `json:"status"` + Error string `json:"error,omitempty"` +} + +type healthy bool + +func (h healthy) String() string { + if h { + return "healthy" + } + return "unhealthy" +} + +// HealthAPI handles the request for "/api/health" +type HealthAPI struct { + BaseController +} + +// CheckHealth checks the health of system +func (h *HealthAPI) CheckHealth() { + var isHealthy healthy = true + components := []*componentHealthStatus{} + c := make(chan *componentHealthStatus, len(healthCheckerRegistry)) + for name, checker := range healthCheckerRegistry { + go check(name, checker, timeout, c) + } + for i := 0; i < len(healthCheckerRegistry); i++ { + componentStatus := <-c + if len(componentStatus.Error) != 0 { + isHealthy = false + } + components = append(components, componentStatus) + } + status := &overallHealthStatus{} + status.Status = isHealthy.String() + status.Components = components + if !isHealthy { + log.Debugf("unhealthy system status: %v", status) + } + h.WriteJSONData(status) +} + +func check(name string, checker health.Checker, + timeout time.Duration, c chan *componentHealthStatus) { + statusChan := make(chan *componentHealthStatus) + go func() { + err := checker.Check() + var healthy healthy = err == nil + status := &componentHealthStatus{ + Name: name, + Status: healthy.String(), + } + if !healthy { + status.Error = err.Error() + } + statusChan <- status + }() + + select { + case status := <-statusChan: + c <- status + case <-time.After(timeout): + var healthy healthy = false + c <- &componentHealthStatus{ + Name: name, + Status: healthy.String(), + Error: "failed to check the health status: timeout", + } + } +} + +// HTTPStatusCodeHealthChecker implements a Checker to check that the HTTP status code +// returned matches the expected one +func HTTPStatusCodeHealthChecker(method string, url string, header http.Header, + timeout time.Duration, statusCode int) health.Checker { + return health.CheckFunc(func() error { + req, err := http.NewRequest(method, url, nil) + if err != nil { + return fmt.Errorf("failed to create request: %v", err) + } + for key, values := range header { + for _, value := range values { + req.Header.Add(key, value) + } + } + + client := httputil.NewClient(&http.Client{ + Timeout: timeout, + }) + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("failed to check health: %v", err) + } + defer resp.Body.Close() + if resp.StatusCode != statusCode { + data, err := ioutil.ReadAll(resp.Body) + if err != nil { + log.Debugf("failed to read response body: %v", err) + } + return fmt.Errorf("received unexpected status code: %d %s", resp.StatusCode, string(data)) + } + + return nil + }) +} + +type updater struct { + sync.Mutex + status error +} + +func (u *updater) Check() error { + u.Lock() + defer u.Unlock() + + return u.status +} + +func (u *updater) update(status error) { + u.Lock() + defer u.Unlock() + + u.status = status +} + +// PeriodicHealthChecker implements a Checker to check status periodically +func PeriodicHealthChecker(checker health.Checker, period time.Duration) health.Checker { + u := &updater{ + // init the "status" as "unknown status" error to avoid returning nil error(which means healthy) + // before the first health check request finished + status: errors.New("unknown status"), + } + + go func() { + ticker := time.NewTicker(period) + for { + u.update(checker.Check()) + <-ticker.C + } + }() + + return u +} + +func coreHealthChecker() health.Checker { + return health.CheckFunc(func() error { + return nil + }) +} + +func portalHealthChecker() health.Checker { + url := config.GetPortalURL() + timeout := 60 * time.Second + period := 10 * time.Second + checker := HTTPStatusCodeHealthChecker(http.MethodGet, url, nil, timeout, http.StatusOK) + return PeriodicHealthChecker(checker, period) +} + +func jobserviceHealthChecker() health.Checker { + url := config.InternalJobServiceURL() + "/api/v1/stats" + timeout := 60 * time.Second + period := 10 * time.Second + checker := HTTPStatusCodeHealthChecker(http.MethodGet, url, nil, timeout, http.StatusOK) + return PeriodicHealthChecker(checker, period) +} + +func registryHealthChecker() health.Checker { + url := getRegistryURL() + "/v2" + timeout := 60 * time.Second + period := 10 * time.Second + checker := HTTPStatusCodeHealthChecker(http.MethodGet, url, nil, timeout, http.StatusUnauthorized) + return PeriodicHealthChecker(checker, period) +} + +func registryCtlHealthChecker() health.Checker { + url := config.GetRegistryCtlURL() + "/api/health" + timeout := 60 * time.Second + period := 10 * time.Second + checker := HTTPStatusCodeHealthChecker(http.MethodGet, url, nil, timeout, http.StatusOK) + return PeriodicHealthChecker(checker, period) +} + +func chartmuseumHealthChecker() health.Checker { + url, err := config.GetChartMuseumEndpoint() + if err != nil { + log.Errorf("failed to get the URL of chartmuseum: %v", err) + } + url = url + "/health" + timeout := 60 * time.Second + period := 10 * time.Second + checker := HTTPStatusCodeHealthChecker(http.MethodGet, url, nil, timeout, http.StatusOK) + return PeriodicHealthChecker(checker, period) +} + +func clairHealthChecker() health.Checker { + url := config.GetClairHealthCheckServerURL() + "/health" + timeout := 60 * time.Second + period := 10 * time.Second + checker := HTTPStatusCodeHealthChecker(http.MethodGet, url, nil, timeout, http.StatusOK) + return PeriodicHealthChecker(checker, period) +} + +func notaryHealthChecker() health.Checker { + url := config.InternalNotaryEndpoint() + "/_notary_server/health" + timeout := 60 * time.Second + period := 10 * time.Second + checker := HTTPStatusCodeHealthChecker(http.MethodGet, url, nil, timeout, http.StatusOK) + return PeriodicHealthChecker(checker, period) +} + +func databaseHealthChecker() health.Checker { + period := 10 * time.Second + checker := health.CheckFunc(func() error { + _, err := dao.GetOrmer().Raw("SELECT 1").Exec() + if err != nil { + return fmt.Errorf("failed to run SQL \"SELECT 1\": %v", err) + } + return nil + }) + return PeriodicHealthChecker(checker, period) +} + +func redisHealthChecker() health.Checker { + url := config.GetRedisOfRegURL() + timeout := 60 * time.Second + period := 10 * time.Second + checker := health.CheckFunc(func() error { + conn, err := redis.DialURL(url, + redis.DialConnectTimeout(timeout*time.Second), + redis.DialReadTimeout(timeout*time.Second), + redis.DialWriteTimeout(timeout*time.Second)) + if err != nil { + return fmt.Errorf("failed to establish connection with Redis: %v", err) + } + defer conn.Close() + _, err = conn.Do("PING") + if err != nil { + return fmt.Errorf("failed to run \"PING\": %v", err) + } + return nil + }) + return PeriodicHealthChecker(checker, period) +} + +func registerHealthCheckers() { + healthCheckerRegistry["core"] = coreHealthChecker() + healthCheckerRegistry["portal"] = portalHealthChecker() + healthCheckerRegistry["jobservice"] = jobserviceHealthChecker() + healthCheckerRegistry["registry"] = registryHealthChecker() + healthCheckerRegistry["registryctl"] = registryCtlHealthChecker() + healthCheckerRegistry["database"] = databaseHealthChecker() + healthCheckerRegistry["redis"] = redisHealthChecker() + if config.WithChartMuseum() { + healthCheckerRegistry["chartmuseum"] = chartmuseumHealthChecker() + } + if config.WithClair() { + healthCheckerRegistry["clair"] = clairHealthChecker() + } + if config.WithNotary() { + healthCheckerRegistry["notary"] = notaryHealthChecker() + } +} + +func getRegistryURL() string { + endpoint, err := config.RegistryURL() + if err != nil { + log.Errorf("failed to get the URL of registry: %v", err) + return "" + } + url, err := utils.ParseEndpoint(endpoint) + if err != nil { + log.Errorf("failed to parse the URL of registry: %v", err) + return "" + } + return url.String() +} diff --git a/src/core/api/health_test.go b/src/core/api/health_test.go new file mode 100644 index 000000000..8426a74b1 --- /dev/null +++ b/src/core/api/health_test.go @@ -0,0 +1,134 @@ +// Copyright 2019 Project Harbor Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package api + +import ( + "errors" + "net/http" + "testing" + "time" + + "github.com/docker/distribution/health" + "github.com/goharbor/harbor/src/common/utils/test" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestStringOfHealthy(t *testing.T) { + var isHealthy healthy = true + assert.Equal(t, "healthy", isHealthy.String()) + isHealthy = false + assert.Equal(t, "unhealthy", isHealthy.String()) +} + +func TestUpdater(t *testing.T) { + updater := &updater{} + assert.Equal(t, nil, updater.Check()) + updater.status = errors.New("unhealthy") + assert.Equal(t, "unhealthy", updater.Check().Error()) +} + +func TestHTTPStatusCodeHealthChecker(t *testing.T) { + handler := &test.RequestHandlerMapping{ + Method: http.MethodGet, + Pattern: "/health", + Handler: func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }, + } + server := test.NewServer(handler) + defer server.Close() + + url := server.URL + "/health" + checker := HTTPStatusCodeHealthChecker( + http.MethodGet, url, map[string][]string{ + "key": {"value"}, + }, 5*time.Second, http.StatusOK) + assert.Equal(t, nil, checker.Check()) + + checker = HTTPStatusCodeHealthChecker( + http.MethodGet, url, nil, 5*time.Second, http.StatusUnauthorized) + assert.NotEqual(t, nil, checker.Check()) +} + +func TestPeriodicHealthChecker(t *testing.T) { + firstCheck := true + checkFunc := func() error { + time.Sleep(2 * time.Second) + if firstCheck { + firstCheck = false + return nil + } + return errors.New("unhealthy") + } + + checker := PeriodicHealthChecker(health.CheckFunc(checkFunc), 1*time.Second) + assert.Equal(t, "unknown status", checker.Check().Error()) + time.Sleep(3 * time.Second) + assert.Equal(t, nil, checker.Check()) + time.Sleep(3 * time.Second) + assert.Equal(t, "unhealthy", checker.Check().Error()) +} + +func fakeHealthChecker(healthy bool) health.Checker { + return health.CheckFunc(func() error { + if healthy { + return nil + } + return errors.New("unhealthy") + }) +} +func TestCheckHealth(t *testing.T) { + // component01: healthy, component02: healthy => status: healthy + healthCheckerRegistry = map[string]health.Checker{} + healthCheckerRegistry["component01"] = fakeHealthChecker(true) + healthCheckerRegistry["component02"] = fakeHealthChecker(true) + status := map[string]interface{}{} + err := handleAndParse(&testingRequest{ + method: http.MethodGet, + url: "/api/health", + }, &status) + require.Nil(t, err) + assert.Equal(t, "healthy", status["status"].(string)) + + // component01: healthy, component02: unhealthy => status: unhealthy + healthCheckerRegistry = map[string]health.Checker{} + healthCheckerRegistry["component01"] = fakeHealthChecker(true) + healthCheckerRegistry["component02"] = fakeHealthChecker(false) + status = map[string]interface{}{} + err = handleAndParse(&testingRequest{ + method: http.MethodGet, + url: "/api/health", + }, &status) + require.Nil(t, err) + assert.Equal(t, "unhealthy", status["status"].(string)) +} + +func TestCoreHealthChecker(t *testing.T) { + checker := coreHealthChecker() + assert.Equal(t, nil, checker.Check()) +} + +func TestDatabaseHealthChecker(t *testing.T) { + checker := databaseHealthChecker() + time.Sleep(1 * time.Second) + assert.Equal(t, nil, checker.Check()) +} + +func TestRegisterHealthCheckers(t *testing.T) { + healthCheckerRegistry = map[string]health.Checker{} + registerHealthCheckers() + assert.NotNil(t, healthCheckerRegistry["core"]) +} diff --git a/src/core/config/config.go b/src/core/config/config.go index 0d503b71a..c71ce1574 100644 --- a/src/core/config/config.go +++ b/src/core/config/config.go @@ -572,3 +572,36 @@ func GetChartMuseumEndpoint() (string, error) { return chartEndpoint, nil } + +// GetRedisOfRegURL returns the URL of Redis used by registry +func GetRedisOfRegURL() string { + return os.Getenv("_REDIS_URL_REG") +} + +// GetPortalURL returns the URL of portal +func GetPortalURL() string { + url := os.Getenv("PORTAL_URL") + if len(url) == 0 { + return common.DefaultPortalURL + } + return url +} + +// GetRegistryCtlURL returns the URL of registryctl +func GetRegistryCtlURL() string { + url := os.Getenv("REGISTRYCTL_URL") + if len(url) == 0 { + return common.DefaultRegistryCtlURL + } + return url +} + +// GetClairHealthCheckServerURL returns the URL of +// the health check server of Clair +func GetClairHealthCheckServerURL() string { + url := os.Getenv("CLAIR_HEALTH_CHECK_SERVER_URL") + if len(url) == 0 { + return common.DefaultClairHealthCheckServerURL + } + return url +} diff --git a/src/core/router.go b/src/core/router.go index 2c629ba01..734fd84b6 100644 --- a/src/core/router.go +++ b/src/core/router.go @@ -56,6 +56,7 @@ func initRouters() { } // API + beego.Router("/api/health", &api.HealthAPI{}, "get:CheckHealth") beego.Router("/api/ping", &api.SystemInfoAPI{}, "get:Ping") beego.Router("/api/search", &api.SearchAPI{}) beego.Router("/api/projects/", &api.ProjectAPI{}, "get:List;post:Post")