Provide simple storage driver health check
To ensure the ensure the web application is properly operating, we've added a periodic health check for the storage driver. If the health check fails three times in a row, the registry will serve 503 response status for any request until the condition is resolved. The condition is reported in the response body and via the /debug/health endpoint. To ensure that all drivers will properly operate with this health check, a function has been added to the driver testsuite. Signed-off-by: Stephen J Day <stephen.day@docker.com>master
							parent
							
								
									1d5b311fc4
								
							
						
					
					
						commit
						6ba799b69e
					
				|  | @ -17,7 +17,7 @@ import ( | ||||||
| 	"github.com/bugsnag/bugsnag-go" | 	"github.com/bugsnag/bugsnag-go" | ||||||
| 	"github.com/docker/distribution/configuration" | 	"github.com/docker/distribution/configuration" | ||||||
| 	"github.com/docker/distribution/context" | 	"github.com/docker/distribution/context" | ||||||
| 	_ "github.com/docker/distribution/health" | 	"github.com/docker/distribution/health" | ||||||
| 	_ "github.com/docker/distribution/registry/auth/htpasswd" | 	_ "github.com/docker/distribution/registry/auth/htpasswd" | ||||||
| 	_ "github.com/docker/distribution/registry/auth/silly" | 	_ "github.com/docker/distribution/registry/auth/silly" | ||||||
| 	_ "github.com/docker/distribution/registry/auth/token" | 	_ "github.com/docker/distribution/registry/auth/token" | ||||||
|  | @ -70,8 +70,10 @@ func main() { | ||||||
| 	uuid.Loggerf = context.GetLogger(ctx).Warnf | 	uuid.Loggerf = context.GetLogger(ctx).Warnf | ||||||
| 
 | 
 | ||||||
| 	app := handlers.NewApp(ctx, *config) | 	app := handlers.NewApp(ctx, *config) | ||||||
|  | 	app.RegisterHealthChecks() | ||||||
| 	handler := configureReporting(app) | 	handler := configureReporting(app) | ||||||
| 	handler = panicHandler(handler) | 	handler = panicHandler(handler) | ||||||
|  | 	handler = health.Handler(handler) | ||||||
| 	handler = gorhandlers.CombinedLoggingHandler(os.Stdout, handler) | 	handler = gorhandlers.CombinedLoggingHandler(os.Stdout, handler) | ||||||
| 
 | 
 | ||||||
| 	if config.HTTP.Debug.Addr != "" { | 	if config.HTTP.Debug.Addr != "" { | ||||||
|  |  | ||||||
|  | @ -2,9 +2,12 @@ package health | ||||||
| 
 | 
 | ||||||
| import ( | import ( | ||||||
| 	"encoding/json" | 	"encoding/json" | ||||||
|  | 	"fmt" | ||||||
| 	"net/http" | 	"net/http" | ||||||
| 	"sync" | 	"sync" | ||||||
| 	"time" | 	"time" | ||||||
|  | 
 | ||||||
|  | 	"github.com/docker/distribution/context" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| var ( | var ( | ||||||
|  | @ -140,7 +143,7 @@ func PeriodicThresholdChecker(check Checker, period time.Duration, threshold int | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // CheckStatus returns a map with all the current health check errors
 | // CheckStatus returns a map with all the current health check errors
 | ||||||
| func CheckStatus() map[string]string { | func CheckStatus() map[string]string { // TODO(stevvooe) this needs a proper type
 | ||||||
| 	mutex.RLock() | 	mutex.RLock() | ||||||
| 	defer mutex.RUnlock() | 	defer mutex.RUnlock() | ||||||
| 	statusKeys := make(map[string]string) | 	statusKeys := make(map[string]string) | ||||||
|  | @ -174,13 +177,13 @@ func RegisterFunc(name string, check func() error) { | ||||||
| 
 | 
 | ||||||
| // RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker
 | // RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker
 | ||||||
| // from an arbitrary func() error
 | // from an arbitrary func() error
 | ||||||
| func RegisterPeriodicFunc(name string, check func() error, period time.Duration) { | func RegisterPeriodicFunc(name string, period time.Duration, check CheckFunc) { | ||||||
| 	Register(name, PeriodicChecker(CheckFunc(check), period)) | 	Register(name, PeriodicChecker(CheckFunc(check), period)) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // RegisterPeriodicThresholdFunc allows the convenience of registering a
 | // RegisterPeriodicThresholdFunc allows the convenience of registering a
 | ||||||
| // PeriodicChecker from an arbitrary func() error
 | // PeriodicChecker from an arbitrary func() error
 | ||||||
| func RegisterPeriodicThresholdFunc(name string, check func() error, period time.Duration, threshold int) { | func RegisterPeriodicThresholdFunc(name string, period time.Duration, threshold int, check CheckFunc) { | ||||||
| 	Register(name, PeriodicThresholdChecker(CheckFunc(check), period, threshold)) | 	Register(name, PeriodicThresholdChecker(CheckFunc(check), period, threshold)) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -189,25 +192,60 @@ func RegisterPeriodicThresholdFunc(name string, check func() error, period time. | ||||||
| // Returns 503 if any Error status exists, 200 otherwise
 | // Returns 503 if any Error status exists, 200 otherwise
 | ||||||
| func StatusHandler(w http.ResponseWriter, r *http.Request) { | func StatusHandler(w http.ResponseWriter, r *http.Request) { | ||||||
| 	if r.Method == "GET" { | 	if r.Method == "GET" { | ||||||
| 		w.Header().Set("Content-Type", "application/json; charset=utf-8") | 		checks := CheckStatus() | ||||||
| 		checksStatus := CheckStatus() | 		status := http.StatusOK | ||||||
| 		// If there is an error, return 503
 |  | ||||||
| 		if len(checksStatus) != 0 { |  | ||||||
| 			w.WriteHeader(http.StatusServiceUnavailable) |  | ||||||
| 		} |  | ||||||
| 		encoder := json.NewEncoder(w) |  | ||||||
| 		err := encoder.Encode(checksStatus) |  | ||||||
| 
 | 
 | ||||||
| 		// Parsing of the JSON failed. Returning generic error message
 | 		// If there is an error, return 503
 | ||||||
| 		if err != nil { | 		if len(checks) != 0 { | ||||||
| 			encoder.Encode(struct { | 			status = http.StatusServiceUnavailable | ||||||
| 				ServerError string `json:"server_error"` |  | ||||||
| 			}{ |  | ||||||
| 				ServerError: "Could not parse error message", |  | ||||||
| 			}) |  | ||||||
| 		} | 		} | ||||||
|  | 
 | ||||||
|  | 		statusResponse(w, r, status, checks) | ||||||
| 	} else { | 	} else { | ||||||
| 		w.WriteHeader(http.StatusNotFound) | 		http.NotFound(w, r) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Handler returns a handler that will return 503 response code if the health
 | ||||||
|  | // checks have failed. If everything is okay with the health checks, the
 | ||||||
|  | // handler will pass through to the provided handler. Use this handler to
 | ||||||
|  | // disable a web application when the health checks fail.
 | ||||||
|  | func Handler(handler http.Handler) http.Handler { | ||||||
|  | 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { | ||||||
|  | 		checks := CheckStatus() | ||||||
|  | 		if len(checks) != 0 { | ||||||
|  | 			statusResponse(w, r, http.StatusServiceUnavailable, checks) | ||||||
|  | 			return | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		handler.ServeHTTP(w, r) // pass through
 | ||||||
|  | 	}) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // statusResponse completes the request with a response describing the health
 | ||||||
|  | // of the service.
 | ||||||
|  | func statusResponse(w http.ResponseWriter, r *http.Request, status int, checks map[string]string) { | ||||||
|  | 	p, err := json.Marshal(checks) | ||||||
|  | 	if err != nil { | ||||||
|  | 		context.GetLogger(context.Background()).Errorf("error serializing health status: %v", err) | ||||||
|  | 		p, err = json.Marshal(struct { | ||||||
|  | 			ServerError string `json:"server_error"` | ||||||
|  | 		}{ | ||||||
|  | 			ServerError: "Could not parse error message", | ||||||
|  | 		}) | ||||||
|  | 		status = http.StatusInternalServerError | ||||||
|  | 
 | ||||||
|  | 		if err != nil { | ||||||
|  | 			context.GetLogger(context.Background()).Errorf("error serializing health status failure message: %v", err) | ||||||
|  | 			return | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	w.Header().Set("Content-Type", "application/json; charset=utf-8") | ||||||
|  | 	w.Header().Set("Content-Length", fmt.Sprint(len(p))) | ||||||
|  | 	w.WriteHeader(status) | ||||||
|  | 	if _, err := w.Write(p); err != nil { | ||||||
|  | 		context.GetLogger(context.Background()).Errorf("error writing health status response body: %v", err) | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -2,6 +2,7 @@ package health | ||||||
| 
 | 
 | ||||||
| import ( | import ( | ||||||
| 	"errors" | 	"errors" | ||||||
|  | 	"fmt" | ||||||
| 	"net/http" | 	"net/http" | ||||||
| 	"net/http/httptest" | 	"net/http/httptest" | ||||||
| 	"testing" | 	"testing" | ||||||
|  | @ -45,3 +46,62 @@ func TestReturns503IfThereAreErrorChecks(t *testing.T) { | ||||||
| 		t.Errorf("Did not get a 503.") | 		t.Errorf("Did not get a 503.") | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | // TestHealthHandler ensures that our handler implementation correct protects
 | ||||||
|  | // the web application when things aren't so healthy.
 | ||||||
|  | func TestHealthHandler(t *testing.T) { | ||||||
|  | 	// clear out existing checks.
 | ||||||
|  | 	registeredChecks = make(map[string]Checker) | ||||||
|  | 
 | ||||||
|  | 	// protect an http server
 | ||||||
|  | 	handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { | ||||||
|  | 		w.WriteHeader(http.StatusNoContent) | ||||||
|  | 	})) | ||||||
|  | 
 | ||||||
|  | 	// wrap it in our health handler
 | ||||||
|  | 	handler = Handler(handler) | ||||||
|  | 
 | ||||||
|  | 	// use this swap check status
 | ||||||
|  | 	updater := NewStatusUpdater() | ||||||
|  | 	Register("test_check", updater) | ||||||
|  | 
 | ||||||
|  | 	// now, create a test server
 | ||||||
|  | 	server := httptest.NewServer(handler) | ||||||
|  | 
 | ||||||
|  | 	checkUp := func(t *testing.T, message string) { | ||||||
|  | 		resp, err := http.Get(server.URL) | ||||||
|  | 		if err != nil { | ||||||
|  | 			t.Fatalf("error getting success status: %v", err) | ||||||
|  | 		} | ||||||
|  | 		defer resp.Body.Close() | ||||||
|  | 
 | ||||||
|  | 		if resp.StatusCode != http.StatusNoContent { | ||||||
|  | 			t.Fatalf("unexpected response code from server when %s: %d != %d", message, resp.StatusCode, http.StatusNoContent) | ||||||
|  | 		} | ||||||
|  | 		// NOTE(stevvooe): we really don't care about the body -- the format is
 | ||||||
|  | 		// not standardized or supported, yet.
 | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	checkDown := func(t *testing.T, message string) { | ||||||
|  | 		resp, err := http.Get(server.URL) | ||||||
|  | 		if err != nil { | ||||||
|  | 			t.Fatalf("error getting down status: %v", err) | ||||||
|  | 		} | ||||||
|  | 		defer resp.Body.Close() | ||||||
|  | 
 | ||||||
|  | 		if resp.StatusCode != http.StatusServiceUnavailable { | ||||||
|  | 			t.Fatalf("unexpected response code from server when %s: %d != %d", message, resp.StatusCode, http.StatusServiceUnavailable) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// server should be up
 | ||||||
|  | 	checkUp(t, "initial health check") | ||||||
|  | 
 | ||||||
|  | 	// now, we fail the health check
 | ||||||
|  | 	updater.Update(fmt.Errorf("the server is now out of commission")) | ||||||
|  | 	checkDown(t, "server should be down") // should be down
 | ||||||
|  | 
 | ||||||
|  | 	// bring server back up
 | ||||||
|  | 	updater.Update(nil) | ||||||
|  | 	checkUp(t, "when server is back up") // now we should be back up.
 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | @ -14,6 +14,7 @@ import ( | ||||||
| 	"github.com/docker/distribution" | 	"github.com/docker/distribution" | ||||||
| 	"github.com/docker/distribution/configuration" | 	"github.com/docker/distribution/configuration" | ||||||
| 	ctxu "github.com/docker/distribution/context" | 	ctxu "github.com/docker/distribution/context" | ||||||
|  | 	"github.com/docker/distribution/health" | ||||||
| 	"github.com/docker/distribution/notifications" | 	"github.com/docker/distribution/notifications" | ||||||
| 	"github.com/docker/distribution/registry/api/errcode" | 	"github.com/docker/distribution/registry/api/errcode" | ||||||
| 	"github.com/docker/distribution/registry/api/v2" | 	"github.com/docker/distribution/registry/api/v2" | ||||||
|  | @ -203,6 +204,20 @@ func NewApp(ctx context.Context, configuration configuration.Configuration) *App | ||||||
| 	return app | 	return app | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | // RegisterHealthChecks is an awful hack to defer health check registration
 | ||||||
|  | // control to callers. This should only ever be called once per registry
 | ||||||
|  | // process, typically in a main function. The correct way would be register
 | ||||||
|  | // health checks outside of app, since multiple apps may exist in the same
 | ||||||
|  | // process. Because the configuration and app are tightly coupled,
 | ||||||
|  | // implementing this properly will require a refactor. This method may panic
 | ||||||
|  | // if called twice in the same process.
 | ||||||
|  | func (app *App) RegisterHealthChecks() { | ||||||
|  | 	health.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), 10*time.Second, 3, func() error { | ||||||
|  | 		_, err := app.driver.List(app, "/") // "/" should always exist
 | ||||||
|  | 		return err                          // any error will be treated as failure
 | ||||||
|  | 	}) | ||||||
|  | } | ||||||
|  | 
 | ||||||
| // register a handler with the application, by route name. The handler will be
 | // register a handler with the application, by route name. The handler will be
 | ||||||
| // passed through the application filters and context will be constructed at
 | // passed through the application filters and context will be constructed at
 | ||||||
| // request time.
 | // request time.
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue