Merge pull request #901 from aaronlehmann/configurable-health-checks
Add configurable file-existence and HTTP health checksmaster
						commit
						37d4ad081f
					
				|  | @ -48,3 +48,8 @@ proxy: | |||
|   remoteurl: https://registry-1.docker.io | ||||
|   username: username | ||||
|   password: password | ||||
| health: | ||||
|   storagedriver: | ||||
|     enabled: true | ||||
|     interval: 10s | ||||
|     threshold: 3 | ||||
|  |  | |||
|  | @ -59,4 +59,8 @@ notifications: | |||
|           threshold: 10 | ||||
|           backoff: 1s | ||||
|           disabled: true  | ||||
| 
 | ||||
| health: | ||||
|   storagedriver: | ||||
|     enabled: true | ||||
|     interval: 10s | ||||
|     threshold: 3 | ||||
|  |  | |||
|  | @ -11,3 +11,8 @@ http: | |||
|     addr: :5000 | ||||
|     headers: | ||||
|         X-Content-Type-Options: [nosniff] | ||||
| health: | ||||
|   storagedriver: | ||||
|     enabled: true | ||||
|     interval: 10s | ||||
|     threshold: 3 | ||||
|  |  | |||
|  | @ -135,6 +135,8 @@ type Configuration struct { | |||
| 		} `yaml:"pool,omitempty"` | ||||
| 	} `yaml:"redis,omitempty"` | ||||
| 
 | ||||
| 	Health Health `yaml:"health,omitempty"` | ||||
| 
 | ||||
| 	Proxy Proxy `yaml:"proxy,omitempty"` | ||||
| } | ||||
| 
 | ||||
|  | @ -179,6 +181,68 @@ type MailOptions struct { | |||
| 	To []string `yaml:"to,omitempty"` | ||||
| } | ||||
| 
 | ||||
| // FileChecker is a type of entry in the health section for checking files.
 | ||||
| type FileChecker struct { | ||||
| 	// Interval is the duration in between checks
 | ||||
| 	Interval time.Duration `yaml:"interval,omitempty"` | ||||
| 	// File is the path to check
 | ||||
| 	File string `yaml:"file,omitempty"` | ||||
| 	// Threshold is the number of times a check must fail to trigger an
 | ||||
| 	// unhealthy state
 | ||||
| 	Threshold int `yaml:"threshold,omitempty"` | ||||
| } | ||||
| 
 | ||||
| // HTTPChecker is a type of entry in the health section for checking HTTP URIs.
 | ||||
| type HTTPChecker struct { | ||||
| 	// Timeout is the duration to wait before timing out the HTTP request
 | ||||
| 	Timeout time.Duration `yaml:"interval,omitempty"` | ||||
| 	// StatusCode is the expected status code
 | ||||
| 	StatusCode int | ||||
| 	// Interval is the duration in between checks
 | ||||
| 	Interval time.Duration `yaml:"interval,omitempty"` | ||||
| 	// URI is the HTTP URI to check
 | ||||
| 	URI string `yaml:"uri,omitempty"` | ||||
| 	// Headers lists static headers that should be added to all requests
 | ||||
| 	Headers http.Header `yaml:"headers"` | ||||
| 	// Threshold is the number of times a check must fail to trigger an
 | ||||
| 	// unhealthy state
 | ||||
| 	Threshold int `yaml:"threshold,omitempty"` | ||||
| } | ||||
| 
 | ||||
| // TCPChecker is a type of entry in the health section for checking TCP servers.
 | ||||
| type TCPChecker struct { | ||||
| 	// Timeout is the duration to wait before timing out the TCP connection
 | ||||
| 	Timeout time.Duration `yaml:"interval,omitempty"` | ||||
| 	// Interval is the duration in between checks
 | ||||
| 	Interval time.Duration `yaml:"interval,omitempty"` | ||||
| 	// Addr is the TCP address to check
 | ||||
| 	Addr string `yaml:"addr,omitempty"` | ||||
| 	// Threshold is the number of times a check must fail to trigger an
 | ||||
| 	// unhealthy state
 | ||||
| 	Threshold int `yaml:"threshold,omitempty"` | ||||
| } | ||||
| 
 | ||||
| // Health provides the configuration section for health checks.
 | ||||
| type Health struct { | ||||
| 	// FileCheckers is a list of paths to check
 | ||||
| 	FileCheckers []FileChecker `yaml:"file,omitempty"` | ||||
| 	// HTTPCheckers is a list of URIs to check
 | ||||
| 	HTTPCheckers []HTTPChecker `yaml:"http,omitempty"` | ||||
| 	// TCPCheckers is a list of URIs to check
 | ||||
| 	TCPCheckers []TCPChecker `yaml:"tcp,omitempty"` | ||||
| 	// StorageDriver configures a health check on the configured storage
 | ||||
| 	// driver
 | ||||
| 	StorageDriver struct { | ||||
| 		// Enabled turns on the health check for the storage driver
 | ||||
| 		Enabled bool `yaml:"enabled,omitempty"` | ||||
| 		// Interval is the duration in between checks
 | ||||
| 		Interval time.Duration `yaml:"interval,omitempty"` | ||||
| 		// Threshold is the number of times a check must fail to trigger an
 | ||||
| 		// unhealthy state
 | ||||
| 		Threshold int `yaml:"threshold,omitempty"` | ||||
| 	} `yaml:"storagedriver,omitempty"` | ||||
| } | ||||
| 
 | ||||
| // v0_1Configuration is a Version 0.1 Configuration struct
 | ||||
| // This is currently aliased to Configuration, as it is the current version
 | ||||
| type v0_1Configuration Configuration | ||||
|  |  | |||
|  | @ -195,6 +195,27 @@ information about each option that appears later in this page. | |||
|         maxidle: 16 | ||||
|         maxactive: 64 | ||||
|         idletimeout: 300s | ||||
|     health: | ||||
|       storagedriver: | ||||
|         enabled: true | ||||
|         interval: 10s | ||||
|         threshold: 3 | ||||
|       file: | ||||
|         - file: /path/to/checked/file | ||||
|           interval: 10s | ||||
|       http: | ||||
|         - uri: http://server.to.check/must/return/200 | ||||
|           headers: | ||||
|             Authorization: [Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==] | ||||
|           statuscode: 200 | ||||
|           timeout: 3s | ||||
|           interval: 10s | ||||
|           threshold: 3 | ||||
|       tcp: | ||||
|         - addr: redis-server.domain.com:6379 | ||||
|           timeout: 3s | ||||
|           interval: 10s | ||||
|           threshold: 3 | ||||
| 
 | ||||
| In some instances a configuration option is **optional** but it contains child | ||||
| options marked as **required**. This indicates that you can omit the parent with | ||||
|  | @ -1381,7 +1402,9 @@ The URL to which events should be published. | |||
|       yes | ||||
|     </td> | ||||
|     <td> | ||||
|       Static headers to add to each request. | ||||
|       Static headers to add to each request. Each header's name should be a key | ||||
|       underneath headers, and each value is a list of payloads for that | ||||
|       header name. Note that values must always be lists. | ||||
|     </td> | ||||
|   </tr> | ||||
|   <tr> | ||||
|  | @ -1588,6 +1611,334 @@ Configure the behavior of the Redis connection pool. | |||
|   </tr> | ||||
| </table> | ||||
| 
 | ||||
| ## health | ||||
| 
 | ||||
|     health: | ||||
|       storagedriver: | ||||
|         enabled: true | ||||
|         interval: 10s | ||||
|         threshold: 3 | ||||
|       file: | ||||
|         - file: /path/to/checked/file | ||||
|           interval: 10s | ||||
|       http: | ||||
|         - uri: http://server.to.check/must/return/200 | ||||
|           headers: | ||||
|             Authorization: [Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==] | ||||
|           statuscode: 200 | ||||
|           timeout: 3s | ||||
|           interval: 10s | ||||
|           threshold: 3 | ||||
|       tcp: | ||||
|         - addr: redis-server.domain.com:6379 | ||||
|           timeout: 3s | ||||
|           interval: 10s | ||||
|           threshold: 3 | ||||
| 
 | ||||
| The health option is **optional**. It may contain preferences for a periodic | ||||
| health check on the storage driver's backend storage, and optional periodic | ||||
| checks on local files, HTTP URIs, and/or TCP servers. The results of the health | ||||
| checks are available at /debug/health on the debug HTTP server if the debug | ||||
| HTTP server is enabled (see http section). | ||||
| 
 | ||||
| ### storagedriver | ||||
| 
 | ||||
| storagedriver contains options for a health check on the configured storage | ||||
| driver's backend storage. enabled must be set to true for this health check to | ||||
| be active. | ||||
| 
 | ||||
| <table> | ||||
|   <tr> | ||||
|     <th>Parameter</th> | ||||
|     <th>Required</th> | ||||
|     <th>Description</th> | ||||
|   </tr> | ||||
|   <tr> | ||||
|     <td> | ||||
|       <code>enabled</code> | ||||
|     </td> | ||||
|     <td> | ||||
|       yes | ||||
|     </td> | ||||
|     <td> | ||||
| "true" to enable the storage driver health check or "false" to disable it. | ||||
| </td> | ||||
|   </tr> | ||||
|   <tr> | ||||
|     <td> | ||||
|       <code>interval</code> | ||||
|     </td> | ||||
|     <td> | ||||
|       no | ||||
|     </td> | ||||
|     <td> | ||||
|       The length of time to wait between repetitions of the check. This field | ||||
|       takes a positive integer and an optional suffix indicating the unit of | ||||
|       time. Possible units are: | ||||
|       <ul> | ||||
|         <li><code>ns</code> (nanoseconds)</li> | ||||
|         <li><code>us</code> (microseconds)</li> | ||||
|         <li><code>ms</code> (milliseconds)</li> | ||||
|         <li><code>s</code> (seconds)</li> | ||||
|         <li><code>m</code> (minutes)</li> | ||||
|         <li><code>h</code> (hours)</li> | ||||
|       </ul> | ||||
|     If you omit the suffix, the system interprets the value as nanoseconds. | ||||
|     The default value is 10 seconds if this field is omitted. | ||||
|     </td> | ||||
|   </tr> | ||||
|   <tr> | ||||
|     <td> | ||||
|       <code>threshold</code> | ||||
|     </td> | ||||
|     <td> | ||||
|       no | ||||
|     </td> | ||||
|     <td> | ||||
|       An integer specifying the number of times the check must fail before the | ||||
|       check triggers an unhealthy state. If this filed is not specified, a | ||||
|       single failure will trigger an unhealthy state. | ||||
|     </td> | ||||
|   </tr> | ||||
| </table> | ||||
| 
 | ||||
| ### file | ||||
| 
 | ||||
| file is a list of paths to be periodically checked for the existence of a file. | ||||
| If a file exists at the given path, the health check will fail. This can be | ||||
| used as a way of bringing a registry out of rotation by creating a file. | ||||
| 
 | ||||
| <table> | ||||
|   <tr> | ||||
|     <th>Parameter</th> | ||||
|     <th>Required</th> | ||||
|     <th>Description</th> | ||||
|   </tr> | ||||
|   <tr> | ||||
|     <td> | ||||
|       <code>file</code> | ||||
|     </td> | ||||
|     <td> | ||||
|       yes | ||||
|     </td> | ||||
|     <td> | ||||
| The path to check for the existence of a file. | ||||
| </td> | ||||
|   </tr> | ||||
|   <tr> | ||||
|     <td> | ||||
|       <code>interval</code> | ||||
|     </td> | ||||
|     <td> | ||||
|       no | ||||
|     </td> | ||||
|     <td> | ||||
|       The length of time to wait between repetitions of the check. This field | ||||
|       takes a positive integer and an optional suffix indicating the unit of | ||||
|       time. Possible units are: | ||||
|       <ul> | ||||
|         <li><code>ns</code> (nanoseconds)</li> | ||||
|         <li><code>us</code> (microseconds)</li> | ||||
|         <li><code>ms</code> (milliseconds)</li> | ||||
|         <li><code>s</code> (seconds)</li> | ||||
|         <li><code>m</code> (minutes)</li> | ||||
|         <li><code>h</code> (hours)</li> | ||||
|       </ul> | ||||
|     If you omit the suffix, the system interprets the value as nanoseconds. | ||||
|     The default value is 10 seconds if this field is omitted. | ||||
|     </td> | ||||
|   </tr> | ||||
| </table> | ||||
| 
 | ||||
| ### http | ||||
| 
 | ||||
| http is a list of HTTP URIs to be periodically checked with HEAD requests. If | ||||
| a HEAD request doesn't complete or returns an unexpected status code, the | ||||
| health check will fail. | ||||
| 
 | ||||
| <table> | ||||
|   <tr> | ||||
|     <th>Parameter</th> | ||||
|     <th>Required</th> | ||||
|     <th>Description</th> | ||||
|   </tr> | ||||
|   <tr> | ||||
|     <td> | ||||
|       <code>uri</code> | ||||
|     </td> | ||||
|     <td> | ||||
|       yes | ||||
|     </td> | ||||
|     <td> | ||||
| The URI to check. | ||||
| </td> | ||||
|   </tr> | ||||
|    <tr> | ||||
|     <td> | ||||
|       <code>headers</code> | ||||
|     </td> | ||||
|     <td> | ||||
|       no | ||||
|     </td> | ||||
|     <td> | ||||
|       Static headers to add to each request. Each header's name should be a key | ||||
|       underneath headers, and each value is a list of payloads for that | ||||
|       header name. Note that values must always be lists. | ||||
|     </td> | ||||
|   </tr> | ||||
|   <tr> | ||||
|     <td> | ||||
|       <code>statuscode</code> | ||||
|     </td> | ||||
|     <td> | ||||
|       no | ||||
|     </td> | ||||
|     <td> | ||||
| Expected status code from the HTTP URI. Defaults to 200. | ||||
| </td> | ||||
|   </tr> | ||||
|   <tr> | ||||
|     <td> | ||||
|       <code>timeout</code> | ||||
|     </td> | ||||
|     <td> | ||||
|       no | ||||
|     </td> | ||||
|     <td> | ||||
|       The length of time to wait before timing out the HTTP request. This field | ||||
|       takes a positive integer and an optional suffix indicating the unit of | ||||
|       time. Possible units are: | ||||
|       <ul> | ||||
|         <li><code>ns</code> (nanoseconds)</li> | ||||
|         <li><code>us</code> (microseconds)</li> | ||||
|         <li><code>ms</code> (milliseconds)</li> | ||||
|         <li><code>s</code> (seconds)</li> | ||||
|         <li><code>m</code> (minutes)</li> | ||||
|         <li><code>h</code> (hours)</li> | ||||
|       </ul> | ||||
|     If you omit the suffix, the system interprets the value as nanoseconds. | ||||
|     </td> | ||||
|   </tr> | ||||
|   <tr> | ||||
|     <td> | ||||
|       <code>interval</code> | ||||
|     </td> | ||||
|     <td> | ||||
|       no | ||||
|     </td> | ||||
|     <td> | ||||
|       The length of time to wait between repetitions of the check. This field | ||||
|       takes a positive integer and an optional suffix indicating the unit of | ||||
|       time. Possible units are: | ||||
|       <ul> | ||||
|         <li><code>ns</code> (nanoseconds)</li> | ||||
|         <li><code>us</code> (microseconds)</li> | ||||
|         <li><code>ms</code> (milliseconds)</li> | ||||
|         <li><code>s</code> (seconds)</li> | ||||
|         <li><code>m</code> (minutes)</li> | ||||
|         <li><code>h</code> (hours)</li> | ||||
|       </ul> | ||||
|     If you omit the suffix, the system interprets the value as nanoseconds. | ||||
|     The default value is 10 seconds if this field is omitted. | ||||
|     </td> | ||||
|   </tr> | ||||
|   <tr> | ||||
|     <td> | ||||
|       <code>threshold</code> | ||||
|     </td> | ||||
|     <td> | ||||
|       no | ||||
|     </td> | ||||
|     <td> | ||||
|       An integer specifying the number of times the check must fail before the | ||||
|       check triggers an unhealthy state. If this filed is not specified, a | ||||
|       single failure will trigger an unhealthy state. | ||||
|     </td> | ||||
|   </tr> | ||||
| </table> | ||||
| 
 | ||||
| ### tcp | ||||
| 
 | ||||
| tcp is a list of TCP addresses to be periodically checked with connection | ||||
| attempts. The addresses must include port numbers. If a connection attempt | ||||
| fails, the health check will fail. | ||||
| 
 | ||||
| <table> | ||||
|   <tr> | ||||
|     <th>Parameter</th> | ||||
|     <th>Required</th> | ||||
|     <th>Description</th> | ||||
|   </tr> | ||||
|   <tr> | ||||
|     <td> | ||||
|       <code>addr</code> | ||||
|     </td> | ||||
|     <td> | ||||
|       yes | ||||
|     </td> | ||||
|     <td> | ||||
| The TCP address to connect to, including a port number. | ||||
| </td> | ||||
|   </tr> | ||||
|   <tr> | ||||
|     <td> | ||||
|       <code>timeout</code> | ||||
|     </td> | ||||
|     <td> | ||||
|       no | ||||
|     </td> | ||||
|     <td> | ||||
|       The length of time to wait before timing out the TCP connection. This | ||||
|       field takes a positive integer and an optional suffix indicating the unit | ||||
|       of time. Possible units are: | ||||
|       <ul> | ||||
|         <li><code>ns</code> (nanoseconds)</li> | ||||
|         <li><code>us</code> (microseconds)</li> | ||||
|         <li><code>ms</code> (milliseconds)</li> | ||||
|         <li><code>s</code> (seconds)</li> | ||||
|         <li><code>m</code> (minutes)</li> | ||||
|         <li><code>h</code> (hours)</li> | ||||
|       </ul> | ||||
|     If you omit the suffix, the system interprets the value as nanoseconds. | ||||
|     </td> | ||||
|   </tr> | ||||
|   <tr> | ||||
|     <td> | ||||
|       <code>interval</code> | ||||
|     </td> | ||||
|     <td> | ||||
|       no | ||||
|     </td> | ||||
|     <td> | ||||
|       The length of time to wait between repetitions of the check. This field | ||||
|       takes a positive integer and an optional suffix indicating the unit of | ||||
|       time. Possible units are: | ||||
|       <ul> | ||||
|         <li><code>ns</code> (nanoseconds)</li> | ||||
|         <li><code>us</code> (microseconds)</li> | ||||
|         <li><code>ms</code> (milliseconds)</li> | ||||
|         <li><code>s</code> (seconds)</li> | ||||
|         <li><code>m</code> (minutes)</li> | ||||
|         <li><code>h</code> (hours)</li> | ||||
|       </ul> | ||||
|     If you omit the suffix, the system interprets the value as nanoseconds. | ||||
|     The default value is 10 seconds if this field is omitted. | ||||
|     </td> | ||||
|   </tr> | ||||
|   <tr> | ||||
|     <td> | ||||
|       <code>threshold</code> | ||||
|     </td> | ||||
|     <td> | ||||
|       no | ||||
|     </td> | ||||
|     <td> | ||||
|       An integer specifying the number of times the check must fail before the | ||||
|       check triggers an unhealthy state. If this filed is not specified, a | ||||
|       single failure will trigger an unhealthy state. | ||||
|     </td> | ||||
|   </tr> | ||||
| </table> | ||||
| 
 | ||||
| ## Example: Development configuration | ||||
| 
 | ||||
|  |  | |||
|  | @ -2,13 +2,17 @@ package checks | |||
| 
 | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"github.com/docker/distribution/health" | ||||
| 	"net" | ||||
| 	"net/http" | ||||
| 	"os" | ||||
| 	"strconv" | ||||
| 	"time" | ||||
| 
 | ||||
| 	"github.com/docker/distribution/health" | ||||
| ) | ||||
| 
 | ||||
| // FileChecker checks the existence of a file and returns and error
 | ||||
| // if the file exists, taking the application out of rotation
 | ||||
| // FileChecker checks the existence of a file and returns an error
 | ||||
| // if the file exists.
 | ||||
| func FileChecker(f string) health.Checker { | ||||
| 	return health.CheckFunc(func() error { | ||||
| 		if _, err := os.Stat(f); err == nil { | ||||
|  | @ -18,18 +22,41 @@ func FileChecker(f string) health.Checker { | |||
| 	}) | ||||
| } | ||||
| 
 | ||||
| // HTTPChecker does a HEAD request and verifies if the HTTP status
 | ||||
| // code return is a 200, taking the application out of rotation if
 | ||||
| // otherwise
 | ||||
| func HTTPChecker(r string) health.Checker { | ||||
| // HTTPChecker does a HEAD request and verifies that the HTTP status code
 | ||||
| // returned matches statusCode.
 | ||||
| func HTTPChecker(r string, statusCode int, timeout time.Duration, headers http.Header) health.Checker { | ||||
| 	return health.CheckFunc(func() error { | ||||
| 		response, err := http.Head(r) | ||||
| 		client := http.Client{ | ||||
| 			Timeout: timeout, | ||||
| 		} | ||||
| 		req, err := http.NewRequest("HEAD", r, nil) | ||||
| 		if err != nil { | ||||
| 			return errors.New("error creating request: " + r) | ||||
| 		} | ||||
| 		for headerName, headerValues := range headers { | ||||
| 			for _, headerValue := range headerValues { | ||||
| 				req.Header.Add(headerName, headerValue) | ||||
| 			} | ||||
| 		} | ||||
| 		response, err := client.Do(req) | ||||
| 		if err != nil { | ||||
| 			return errors.New("error while checking: " + r) | ||||
| 		} | ||||
| 		if response.StatusCode != http.StatusOK { | ||||
| 			return errors.New("downstream service returned unexpected status: " + string(response.StatusCode)) | ||||
| 		if response.StatusCode != statusCode { | ||||
| 			return errors.New("downstream service returned unexpected status: " + strconv.Itoa(response.StatusCode)) | ||||
| 		} | ||||
| 		return nil | ||||
| 	}) | ||||
| } | ||||
| 
 | ||||
| // TCPChecker attempts to open a TCP connection.
 | ||||
| func TCPChecker(addr string, timeout time.Duration) health.Checker { | ||||
| 	return health.CheckFunc(func() error { | ||||
| 		conn, err := net.DialTimeout("tcp", addr, timeout) | ||||
| 		if err != nil { | ||||
| 			return errors.New("connection to " + addr + " failed") | ||||
| 		} | ||||
| 		conn.Close() | ||||
| 		return nil | ||||
| 	}) | ||||
| } | ||||
|  |  | |||
|  | @ -15,11 +15,11 @@ func TestFileChecker(t *testing.T) { | |||
| } | ||||
| 
 | ||||
| func TestHTTPChecker(t *testing.T) { | ||||
| 	if err := HTTPChecker("https://www.google.cybertron").Check(); err == nil { | ||||
| 	if err := HTTPChecker("https://www.google.cybertron", 200, 0, nil).Check(); err == nil { | ||||
| 		t.Errorf("Google on Cybertron was expected as not exists") | ||||
| 	} | ||||
| 
 | ||||
| 	if err := HTTPChecker("https://www.google.pt").Check(); err != nil { | ||||
| 	if err := HTTPChecker("https://www.google.pt", 200, 0, nil).Check(); err != nil { | ||||
| 		t.Errorf("Google at Portugal was expected as exists, error:%v", err) | ||||
| 	} | ||||
| } | ||||
|  |  | |||
|  | @ -39,7 +39,7 @@ | |||
| //
 | ||||
| // The recommended way of registering checks is using a periodic Check.
 | ||||
| // PeriodicChecks run on a certain schedule and asynchronously update the
 | ||||
| // status of the check. This allows "CheckStatus()" to return without blocking
 | ||||
| // status of the check. This allows CheckStatus to return without blocking
 | ||||
| // on an expensive check.
 | ||||
| //
 | ||||
| // A trivial example of a check that runs every 5 seconds and shuts down our
 | ||||
|  |  | |||
|  | @ -11,10 +11,26 @@ import ( | |||
| 	"github.com/docker/distribution/registry/api/errcode" | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	mutex            sync.RWMutex | ||||
| 	registeredChecks = make(map[string]Checker) | ||||
| ) | ||||
| // A Registry is a collection of checks. Most applications will use the global
 | ||||
| // registry defined in DefaultRegistry. However, unit tests may need to create
 | ||||
| // separate registries to isolate themselves from other tests.
 | ||||
| type Registry struct { | ||||
| 	mu               sync.RWMutex | ||||
| 	registeredChecks map[string]Checker | ||||
| } | ||||
| 
 | ||||
| // NewRegistry creates a new registry. This isn't necessary for normal use of
 | ||||
| // the package, but may be useful for unit tests so individual tests have their
 | ||||
| // own set of checks.
 | ||||
| func NewRegistry() *Registry { | ||||
| 	return &Registry{ | ||||
| 		registeredChecks: make(map[string]Checker), | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // DefaultRegistry is the default registry where checks are registered. It is
 | ||||
| // the registry used by the HTTP handler.
 | ||||
| var DefaultRegistry *Registry | ||||
| 
 | ||||
| // Checker is the interface for a Health Checker
 | ||||
| type Checker interface { | ||||
|  | @ -144,11 +160,11 @@ func PeriodicThresholdChecker(check Checker, period time.Duration, threshold int | |||
| } | ||||
| 
 | ||||
| // CheckStatus returns a map with all the current health check errors
 | ||||
| func CheckStatus() map[string]string { // TODO(stevvooe) this needs a proper type
 | ||||
| 	mutex.RLock() | ||||
| 	defer mutex.RUnlock() | ||||
| func (registry *Registry) CheckStatus() map[string]string { // TODO(stevvooe) this needs a proper type
 | ||||
| 	registry.mu.RLock() | ||||
| 	defer registry.mu.RUnlock() | ||||
| 	statusKeys := make(map[string]string) | ||||
| 	for k, v := range registeredChecks { | ||||
| 	for k, v := range registry.registeredChecks { | ||||
| 		err := v.Check() | ||||
| 		if err != nil { | ||||
| 			statusKeys[k] = err.Error() | ||||
|  | @ -158,34 +174,66 @@ func CheckStatus() map[string]string { // TODO(stevvooe) this needs a proper typ | |||
| 	return statusKeys | ||||
| } | ||||
| 
 | ||||
| // Register associates the checker with the provided name. We allow
 | ||||
| // overwrites to a specific check status.
 | ||||
| func Register(name string, check Checker) { | ||||
| 	mutex.Lock() | ||||
| 	defer mutex.Unlock() | ||||
| 	_, ok := registeredChecks[name] | ||||
| // CheckStatus returns a map with all the current health check errors from the
 | ||||
| // default registry.
 | ||||
| func CheckStatus() map[string]string { | ||||
| 	return DefaultRegistry.CheckStatus() | ||||
| } | ||||
| 
 | ||||
| // Register associates the checker with the provided name.
 | ||||
| func (registry *Registry) Register(name string, check Checker) { | ||||
| 	if registry == nil { | ||||
| 		registry = DefaultRegistry | ||||
| 	} | ||||
| 	registry.mu.Lock() | ||||
| 	defer registry.mu.Unlock() | ||||
| 	_, ok := registry.registeredChecks[name] | ||||
| 	if ok { | ||||
| 		panic("Check already exists: " + name) | ||||
| 	} | ||||
| 	registeredChecks[name] = check | ||||
| 	registry.registeredChecks[name] = check | ||||
| } | ||||
| 
 | ||||
| // RegisterFunc allows the convenience of registering a checker directly
 | ||||
| // from an arbitrary func() error
 | ||||
| // Register associates the checker with the provided name in the default
 | ||||
| // registry.
 | ||||
| func Register(name string, check Checker) { | ||||
| 	DefaultRegistry.Register(name, check) | ||||
| } | ||||
| 
 | ||||
| // RegisterFunc allows the convenience of registering a checker directly from
 | ||||
| // an arbitrary func() error.
 | ||||
| func (registry *Registry) RegisterFunc(name string, check func() error) { | ||||
| 	registry.Register(name, CheckFunc(check)) | ||||
| } | ||||
| 
 | ||||
| // RegisterFunc allows the convenience of registering a checker in the default
 | ||||
| // registry directly from an arbitrary func() error.
 | ||||
| func RegisterFunc(name string, check func() error) { | ||||
| 	Register(name, CheckFunc(check)) | ||||
| 	DefaultRegistry.RegisterFunc(name, check) | ||||
| } | ||||
| 
 | ||||
| // RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker
 | ||||
| // from an arbitrary func() error
 | ||||
| // from an arbitrary func() error.
 | ||||
| func (registry *Registry) RegisterPeriodicFunc(name string, period time.Duration, check CheckFunc) { | ||||
| 	registry.Register(name, PeriodicChecker(CheckFunc(check), period)) | ||||
| } | ||||
| 
 | ||||
| // RegisterPeriodicFunc allows the convenience of registering a PeriodicChecker
 | ||||
| // in the default registry from an arbitrary func() error.
 | ||||
| func RegisterPeriodicFunc(name string, period time.Duration, check CheckFunc) { | ||||
| 	Register(name, PeriodicChecker(CheckFunc(check), period)) | ||||
| 	DefaultRegistry.RegisterPeriodicFunc(name, period, check) | ||||
| } | ||||
| 
 | ||||
| // RegisterPeriodicThresholdFunc allows the convenience of registering a
 | ||||
| // PeriodicChecker from an arbitrary func() error
 | ||||
| // PeriodicChecker from an arbitrary func() error.
 | ||||
| func (registry *Registry) RegisterPeriodicThresholdFunc(name string, period time.Duration, threshold int, check CheckFunc) { | ||||
| 	registry.Register(name, PeriodicThresholdChecker(CheckFunc(check), period, threshold)) | ||||
| } | ||||
| 
 | ||||
| // RegisterPeriodicThresholdFunc allows the convenience of registering a
 | ||||
| // PeriodicChecker in the default registry from an arbitrary func() error.
 | ||||
| func RegisterPeriodicThresholdFunc(name string, period time.Duration, threshold int, check CheckFunc) { | ||||
| 	Register(name, PeriodicThresholdChecker(CheckFunc(check), period, threshold)) | ||||
| 	DefaultRegistry.RegisterPeriodicThresholdFunc(name, period, threshold, check) | ||||
| } | ||||
| 
 | ||||
| // StatusHandler returns a JSON blob with all the currently registered Health Checks
 | ||||
|  | @ -251,7 +299,8 @@ func statusResponse(w http.ResponseWriter, r *http.Request, status int, checks m | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| // Registers global /debug/health api endpoint
 | ||||
| // Registers global /debug/health api endpoint, creates default registry
 | ||||
| func init() { | ||||
| 	DefaultRegistry = NewRegistry() | ||||
| 	http.HandleFunc("/debug/health", StatusHandler) | ||||
| } | ||||
|  |  | |||
|  | @ -51,7 +51,7 @@ func TestReturns503IfThereAreErrorChecks(t *testing.T) { | |||
| // the web application when things aren't so healthy.
 | ||||
| func TestHealthHandler(t *testing.T) { | ||||
| 	// clear out existing checks.
 | ||||
| 	registeredChecks = make(map[string]Checker) | ||||
| 	DefaultRegistry = NewRegistry() | ||||
| 
 | ||||
| 	// protect an http server
 | ||||
| 	handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { | ||||
|  |  | |||
|  | @ -5,8 +5,8 @@ import ( | |||
| 	"net/http/httptest" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/docker/distribution/context" | ||||
| 	"github.com/docker/distribution/registry/auth" | ||||
| 	"golang.org/x/net/context" | ||||
| ) | ||||
| 
 | ||||
| func TestSillyAccessController(t *testing.T) { | ||||
|  |  | |||
|  | @ -15,9 +15,9 @@ import ( | |||
| 	"testing" | ||||
| 	"time" | ||||
| 
 | ||||
| 	"github.com/docker/distribution/context" | ||||
| 	"github.com/docker/distribution/registry/auth" | ||||
| 	"github.com/docker/libtrust" | ||||
| 	"golang.org/x/net/context" | ||||
| ) | ||||
| 
 | ||||
| func makeRootKeys(numKeys int) ([]libtrust.PrivateKey, error) { | ||||
|  |  | |||
|  | @ -19,6 +19,7 @@ import ( | |||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/docker/distribution/configuration" | ||||
| 	"github.com/docker/distribution/context" | ||||
| 	"github.com/docker/distribution/digest" | ||||
| 	"github.com/docker/distribution/manifest" | ||||
| 	"github.com/docker/distribution/registry/api/errcode" | ||||
|  | @ -27,7 +28,6 @@ import ( | |||
| 	"github.com/docker/distribution/testutil" | ||||
| 	"github.com/docker/libtrust" | ||||
| 	"github.com/gorilla/handlers" | ||||
| 	"golang.org/x/net/context" | ||||
| ) | ||||
| 
 | ||||
| var headerConfig = http.Header{ | ||||
|  |  | |||
|  | @ -15,6 +15,7 @@ import ( | |||
| 	"github.com/docker/distribution/configuration" | ||||
| 	ctxu "github.com/docker/distribution/context" | ||||
| 	"github.com/docker/distribution/health" | ||||
| 	"github.com/docker/distribution/health/checks" | ||||
| 	"github.com/docker/distribution/notifications" | ||||
| 	"github.com/docker/distribution/registry/api/errcode" | ||||
| 	"github.com/docker/distribution/registry/api/v2" | ||||
|  | @ -37,6 +38,9 @@ import ( | |||
| // was specified.
 | ||||
| const randomSecretSize = 32 | ||||
| 
 | ||||
| // defaultCheckInterval is the default time in between health checks
 | ||||
| const defaultCheckInterval = 10 * time.Second | ||||
| 
 | ||||
| // App is a global registry application object. Shared resources can be placed
 | ||||
| // on this object that will be accessible from all requests. Any writable
 | ||||
| // fields should be protected.
 | ||||
|  | @ -230,11 +234,80 @@ func NewApp(ctx context.Context, configuration configuration.Configuration) *App | |||
| // process. Because the configuration and app are tightly coupled,
 | ||||
| // implementing this properly will require a refactor. This method may panic
 | ||||
| // if called twice in the same process.
 | ||||
| func (app *App) RegisterHealthChecks() { | ||||
| 	health.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), 10*time.Second, 3, func() error { | ||||
| 		_, err := app.driver.List(app, "/") // "/" should always exist
 | ||||
| 		return err                          // any error will be treated as failure
 | ||||
| 	}) | ||||
| func (app *App) RegisterHealthChecks(healthRegistries ...*health.Registry) { | ||||
| 	if len(healthRegistries) > 1 { | ||||
| 		panic("RegisterHealthChecks called with more than one registry") | ||||
| 	} | ||||
| 	healthRegistry := health.DefaultRegistry | ||||
| 	if len(healthRegistries) == 1 { | ||||
| 		healthRegistry = healthRegistries[0] | ||||
| 	} | ||||
| 
 | ||||
| 	if app.Config.Health.StorageDriver.Enabled { | ||||
| 		interval := app.Config.Health.StorageDriver.Interval | ||||
| 		if interval == 0 { | ||||
| 			interval = defaultCheckInterval | ||||
| 		} | ||||
| 
 | ||||
| 		storageDriverCheck := func() error { | ||||
| 			_, err := app.driver.List(app, "/") // "/" should always exist
 | ||||
| 			return err                          // any error will be treated as failure
 | ||||
| 		} | ||||
| 
 | ||||
| 		if app.Config.Health.StorageDriver.Threshold != 0 { | ||||
| 			healthRegistry.RegisterPeriodicThresholdFunc("storagedriver_"+app.Config.Storage.Type(), interval, app.Config.Health.StorageDriver.Threshold, storageDriverCheck) | ||||
| 		} else { | ||||
| 			healthRegistry.RegisterPeriodicFunc("storagedriver_"+app.Config.Storage.Type(), interval, storageDriverCheck) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	for _, fileChecker := range app.Config.Health.FileCheckers { | ||||
| 		interval := fileChecker.Interval | ||||
| 		if interval == 0 { | ||||
| 			interval = defaultCheckInterval | ||||
| 		} | ||||
| 		ctxu.GetLogger(app).Infof("configuring file health check path=%s, interval=%d", fileChecker.File, interval/time.Second) | ||||
| 		healthRegistry.Register(fileChecker.File, health.PeriodicChecker(checks.FileChecker(fileChecker.File), interval)) | ||||
| 	} | ||||
| 
 | ||||
| 	for _, httpChecker := range app.Config.Health.HTTPCheckers { | ||||
| 		interval := httpChecker.Interval | ||||
| 		if interval == 0 { | ||||
| 			interval = defaultCheckInterval | ||||
| 		} | ||||
| 
 | ||||
| 		statusCode := httpChecker.StatusCode | ||||
| 		if statusCode == 0 { | ||||
| 			statusCode = 200 | ||||
| 		} | ||||
| 
 | ||||
| 		checker := checks.HTTPChecker(httpChecker.URI, statusCode, httpChecker.Timeout, httpChecker.Headers) | ||||
| 
 | ||||
| 		if httpChecker.Threshold != 0 { | ||||
| 			ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d, threshold=%d", httpChecker.URI, interval/time.Second, httpChecker.Threshold) | ||||
| 			healthRegistry.Register(httpChecker.URI, health.PeriodicThresholdChecker(checker, interval, httpChecker.Threshold)) | ||||
| 		} else { | ||||
| 			ctxu.GetLogger(app).Infof("configuring HTTP health check uri=%s, interval=%d", httpChecker.URI, interval/time.Second) | ||||
| 			healthRegistry.Register(httpChecker.URI, health.PeriodicChecker(checker, interval)) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	for _, tcpChecker := range app.Config.Health.TCPCheckers { | ||||
| 		interval := tcpChecker.Interval | ||||
| 		if interval == 0 { | ||||
| 			interval = defaultCheckInterval | ||||
| 		} | ||||
| 
 | ||||
| 		checker := checks.TCPChecker(tcpChecker.Addr, tcpChecker.Timeout) | ||||
| 
 | ||||
| 		if tcpChecker.Threshold != 0 { | ||||
| 			ctxu.GetLogger(app).Infof("configuring TCP health check addr=%s, interval=%d, threshold=%d", tcpChecker.Addr, interval/time.Second, tcpChecker.Threshold) | ||||
| 			healthRegistry.Register(tcpChecker.Addr, health.PeriodicThresholdChecker(checker, interval, tcpChecker.Threshold)) | ||||
| 		} else { | ||||
| 			ctxu.GetLogger(app).Infof("configuring TCP health check addr=%s, interval=%d", tcpChecker.Addr, interval/time.Second) | ||||
| 			healthRegistry.Register(tcpChecker.Addr, health.PeriodicChecker(checker, interval)) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // register a handler with the application, by route name. The handler will be
 | ||||
|  |  | |||
|  | @ -9,6 +9,7 @@ import ( | |||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/docker/distribution/configuration" | ||||
| 	"github.com/docker/distribution/context" | ||||
| 	"github.com/docker/distribution/registry/api/errcode" | ||||
| 	"github.com/docker/distribution/registry/api/v2" | ||||
| 	"github.com/docker/distribution/registry/auth" | ||||
|  | @ -16,7 +17,6 @@ import ( | |||
| 	"github.com/docker/distribution/registry/storage" | ||||
| 	memorycache "github.com/docker/distribution/registry/storage/cache/memory" | ||||
| 	"github.com/docker/distribution/registry/storage/driver/inmemory" | ||||
| 	"golang.org/x/net/context" | ||||
| ) | ||||
| 
 | ||||
| // TestAppDispatcher builds an application with a test dispatcher and ensures
 | ||||
|  |  | |||
|  | @ -0,0 +1,201 @@ | |||
| package handlers | ||||
| 
 | ||||
| import ( | ||||
| 	"io/ioutil" | ||||
| 	"net" | ||||
| 	"net/http" | ||||
| 	"net/http/httptest" | ||||
| 	"os" | ||||
| 	"testing" | ||||
| 	"time" | ||||
| 
 | ||||
| 	"github.com/docker/distribution/configuration" | ||||
| 	"github.com/docker/distribution/context" | ||||
| 	"github.com/docker/distribution/health" | ||||
| ) | ||||
| 
 | ||||
| func TestFileHealthCheck(t *testing.T) { | ||||
| 	interval := time.Second | ||||
| 
 | ||||
| 	tmpfile, err := ioutil.TempFile(os.TempDir(), "healthcheck") | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("could not create temporary file: %v", err) | ||||
| 	} | ||||
| 	defer tmpfile.Close() | ||||
| 
 | ||||
| 	config := configuration.Configuration{ | ||||
| 		Storage: configuration.Storage{ | ||||
| 			"inmemory": configuration.Parameters{}, | ||||
| 		}, | ||||
| 		Health: configuration.Health{ | ||||
| 			FileCheckers: []configuration.FileChecker{ | ||||
| 				{ | ||||
| 					Interval: interval, | ||||
| 					File:     tmpfile.Name(), | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	ctx := context.Background() | ||||
| 
 | ||||
| 	app := NewApp(ctx, config) | ||||
| 	healthRegistry := health.NewRegistry() | ||||
| 	app.RegisterHealthChecks(healthRegistry) | ||||
| 
 | ||||
| 	// Wait for health check to happen
 | ||||
| 	<-time.After(2 * interval) | ||||
| 
 | ||||
| 	status := healthRegistry.CheckStatus() | ||||
| 	if len(status) != 1 { | ||||
| 		t.Fatal("expected 1 item in health check results") | ||||
| 	} | ||||
| 	if status[tmpfile.Name()] != "file exists" { | ||||
| 		t.Fatal(`did not get "file exists" result for health check`) | ||||
| 	} | ||||
| 
 | ||||
| 	os.Remove(tmpfile.Name()) | ||||
| 
 | ||||
| 	<-time.After(2 * interval) | ||||
| 	if len(healthRegistry.CheckStatus()) != 0 { | ||||
| 		t.Fatal("expected 0 items in health check results") | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestTCPHealthCheck(t *testing.T) { | ||||
| 	interval := time.Second | ||||
| 
 | ||||
| 	ln, err := net.Listen("tcp", "127.0.0.1:0") | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("could not create listener: %v", err) | ||||
| 	} | ||||
| 	addrStr := ln.Addr().String() | ||||
| 
 | ||||
| 	// Start accepting
 | ||||
| 	go func() { | ||||
| 		for { | ||||
| 			conn, err := ln.Accept() | ||||
| 			if err != nil { | ||||
| 				// listener was closed
 | ||||
| 				return | ||||
| 			} | ||||
| 			defer conn.Close() | ||||
| 		} | ||||
| 	}() | ||||
| 
 | ||||
| 	config := configuration.Configuration{ | ||||
| 		Storage: configuration.Storage{ | ||||
| 			"inmemory": configuration.Parameters{}, | ||||
| 		}, | ||||
| 		Health: configuration.Health{ | ||||
| 			TCPCheckers: []configuration.TCPChecker{ | ||||
| 				{ | ||||
| 					Interval: interval, | ||||
| 					Addr:     addrStr, | ||||
| 					Timeout:  500 * time.Millisecond, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	ctx := context.Background() | ||||
| 
 | ||||
| 	app := NewApp(ctx, config) | ||||
| 	healthRegistry := health.NewRegistry() | ||||
| 	app.RegisterHealthChecks(healthRegistry) | ||||
| 
 | ||||
| 	// Wait for health check to happen
 | ||||
| 	<-time.After(2 * interval) | ||||
| 
 | ||||
| 	if len(healthRegistry.CheckStatus()) != 0 { | ||||
| 		t.Fatal("expected 0 items in health check results") | ||||
| 	} | ||||
| 
 | ||||
| 	ln.Close() | ||||
| 	<-time.After(2 * interval) | ||||
| 
 | ||||
| 	// Health check should now fail
 | ||||
| 	status := healthRegistry.CheckStatus() | ||||
| 	if len(status) != 1 { | ||||
| 		t.Fatal("expected 1 item in health check results") | ||||
| 	} | ||||
| 	if status[addrStr] != "connection to "+addrStr+" failed" { | ||||
| 		t.Fatal(`did not get "connection failed" result for health check`) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestHTTPHealthCheck(t *testing.T) { | ||||
| 	interval := time.Second | ||||
| 	threshold := 3 | ||||
| 
 | ||||
| 	stopFailing := make(chan struct{}) | ||||
| 
 | ||||
| 	checkedServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { | ||||
| 		if r.Method != "HEAD" { | ||||
| 			t.Fatalf("expected HEAD request, got %s", r.Method) | ||||
| 		} | ||||
| 		select { | ||||
| 		case <-stopFailing: | ||||
| 			w.WriteHeader(http.StatusOK) | ||||
| 		default: | ||||
| 			w.WriteHeader(http.StatusInternalServerError) | ||||
| 		} | ||||
| 	})) | ||||
| 
 | ||||
| 	config := configuration.Configuration{ | ||||
| 		Storage: configuration.Storage{ | ||||
| 			"inmemory": configuration.Parameters{}, | ||||
| 		}, | ||||
| 		Health: configuration.Health{ | ||||
| 			HTTPCheckers: []configuration.HTTPChecker{ | ||||
| 				{ | ||||
| 					Interval:  interval, | ||||
| 					URI:       checkedServer.URL, | ||||
| 					Threshold: threshold, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	ctx := context.Background() | ||||
| 
 | ||||
| 	app := NewApp(ctx, config) | ||||
| 	healthRegistry := health.NewRegistry() | ||||
| 	app.RegisterHealthChecks(healthRegistry) | ||||
| 
 | ||||
| 	for i := 0; ; i++ { | ||||
| 		<-time.After(interval) | ||||
| 
 | ||||
| 		status := healthRegistry.CheckStatus() | ||||
| 
 | ||||
| 		if i < threshold-1 { | ||||
| 			// definitely shouldn't have hit the threshold yet
 | ||||
| 			if len(status) != 0 { | ||||
| 				t.Fatal("expected 1 item in health check results") | ||||
| 			} | ||||
| 			continue | ||||
| 		} | ||||
| 		if i < threshold+1 { | ||||
| 			// right on the threshold - don't expect a failure yet
 | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		if len(status) != 1 { | ||||
| 			t.Fatal("expected 1 item in health check results") | ||||
| 		} | ||||
| 		if status[checkedServer.URL] != "downstream service returned unexpected status: 500" { | ||||
| 			t.Fatal("did not get expected result for health check") | ||||
| 		} | ||||
| 
 | ||||
| 		break | ||||
| 	} | ||||
| 
 | ||||
| 	// Signal HTTP handler to start returning 200
 | ||||
| 	close(stopFailing) | ||||
| 
 | ||||
| 	<-time.After(2 * interval) | ||||
| 
 | ||||
| 	if len(healthRegistry.CheckStatus()) != 0 { | ||||
| 		t.Fatal("expected 0 items in health check results") | ||||
| 	} | ||||
| } | ||||
		Loading…
	
		Reference in New Issue