Improve how reference regexps are built
Previous implementation was doing a lot of string -> regexp -> string conversions Signed-off-by: Paul Cacheux <paul.cacheux@datadoghq.com>master
							parent
							
								
									bb1fb61445
								
							
						
					
					
						commit
						1c89ce5fc1
					
				|  | @ -3,145 +3,157 @@ package reference | |||
| import "regexp" | ||||
| 
 | ||||
| var ( | ||||
| 	// alphaNumericRegexp defines the alpha numeric atom, typically a
 | ||||
| 	// alphaNumeric defines the alpha numeric atom, typically a
 | ||||
| 	// component of names. This only allows lower case characters and digits.
 | ||||
| 	alphaNumericRegexp = match(`[a-z0-9]+`) | ||||
| 	alphaNumeric = `[a-z0-9]+` | ||||
| 
 | ||||
| 	// separatorRegexp defines the separators allowed to be embedded in name
 | ||||
| 	// separator defines the separators allowed to be embedded in name
 | ||||
| 	// components. This allow one period, one or two underscore and multiple
 | ||||
| 	// dashes. Repeated dashes and underscores are intentionally treated
 | ||||
| 	// differently. In order to support valid hostnames as name components,
 | ||||
| 	// supporting repeated dash was added. Additionally double underscore is
 | ||||
| 	// now allowed as a separator to loosen the restriction for previously
 | ||||
| 	// supported names.
 | ||||
| 	separatorRegexp = match(`(?:[._]|__|[-]*)`) | ||||
| 	separator = `(?:[._]|__|[-]*)` | ||||
| 
 | ||||
| 	// nameComponentRegexp restricts registry path component names to start
 | ||||
| 	// nameComponent restricts registry path component names to start
 | ||||
| 	// with at least one letter or number, with following parts able to be
 | ||||
| 	// separated by one period, one or two underscore and multiple dashes.
 | ||||
| 	nameComponentRegexp = expression( | ||||
| 		alphaNumericRegexp, | ||||
| 		optional(repeated(separatorRegexp, alphaNumericRegexp))) | ||||
| 	nameComponent = expression( | ||||
| 		alphaNumeric, | ||||
| 		optional(repeated(separator, alphaNumeric))) | ||||
| 
 | ||||
| 	// domainComponentRegexp restricts the registry domain component of a
 | ||||
| 	// domainComponent restricts the registry domain component of a
 | ||||
| 	// repository name to start with a component as defined by DomainRegexp
 | ||||
| 	// and followed by an optional port.
 | ||||
| 	domainComponentRegexp = match(`(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])`) | ||||
| 	domainComponent = `(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])` | ||||
| 
 | ||||
| 	domain = expression( | ||||
| 		domainComponent, | ||||
| 		optional(repeated(literal(`.`), domainComponent)), | ||||
| 		optional(literal(`:`), `[0-9]+`)) | ||||
| 	// DomainRegexp defines the structure of potential domain components
 | ||||
| 	// that may be part of image names. This is purposely a subset of what is
 | ||||
| 	// allowed by DNS to ensure backwards compatibility with Docker image
 | ||||
| 	// names.
 | ||||
| 	DomainRegexp = expression( | ||||
| 		domainComponentRegexp, | ||||
| 		optional(repeated(literal(`.`), domainComponentRegexp)), | ||||
| 		optional(literal(`:`), match(`[0-9]+`))) | ||||
| 	DomainRegexp = re(domain) | ||||
| 
 | ||||
| 	tag = `[\w][\w.-]{0,127}` | ||||
| 	// TagRegexp matches valid tag names. From docker/docker:graph/tags.go.
 | ||||
| 	TagRegexp = match(`[\w][\w.-]{0,127}`) | ||||
| 	TagRegexp = re(tag) | ||||
| 
 | ||||
| 	anchoredTag = anchored(tag) | ||||
| 	// anchoredTagRegexp matches valid tag names, anchored at the start and
 | ||||
| 	// end of the matched string.
 | ||||
| 	anchoredTagRegexp = anchored(TagRegexp) | ||||
| 	anchoredTagRegexp = re(anchoredTag) | ||||
| 
 | ||||
| 	digestPat = `[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}` | ||||
| 	// DigestRegexp matches valid digests.
 | ||||
| 	DigestRegexp = match(`[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}`) | ||||
| 	DigestRegexp = re(digestPat) | ||||
| 
 | ||||
| 	anchoredDigest = anchored(digestPat) | ||||
| 	// anchoredDigestRegexp matches valid digests, anchored at the start and
 | ||||
| 	// end of the matched string.
 | ||||
| 	anchoredDigestRegexp = anchored(DigestRegexp) | ||||
| 	anchoredDigestRegexp = re(anchoredDigest) | ||||
| 
 | ||||
| 	namePat = expression( | ||||
| 		optional(domain, literal(`/`)), | ||||
| 		nameComponent, | ||||
| 		optional(repeated(literal(`/`), nameComponent))) | ||||
| 	// NameRegexp is the format for the name component of references. The
 | ||||
| 	// regexp has capturing groups for the domain and name part omitting
 | ||||
| 	// the separating forward slash from either.
 | ||||
| 	NameRegexp = expression( | ||||
| 		optional(DomainRegexp, literal(`/`)), | ||||
| 		nameComponentRegexp, | ||||
| 		optional(repeated(literal(`/`), nameComponentRegexp))) | ||||
| 	NameRegexp = re(namePat) | ||||
| 
 | ||||
| 	anchoredName = anchored( | ||||
| 		optional(capture(domain), literal(`/`)), | ||||
| 		capture(nameComponent, | ||||
| 			optional(repeated(literal(`/`), nameComponent)))) | ||||
| 	// anchoredNameRegexp is used to parse a name value, capturing the
 | ||||
| 	// domain and trailing components.
 | ||||
| 	anchoredNameRegexp = anchored( | ||||
| 		optional(capture(DomainRegexp), literal(`/`)), | ||||
| 		capture(nameComponentRegexp, | ||||
| 			optional(repeated(literal(`/`), nameComponentRegexp)))) | ||||
| 	anchoredNameRegexp = re(anchoredName) | ||||
| 
 | ||||
| 	referencePat = anchored(capture(namePat), | ||||
| 		optional(literal(":"), capture(tag)), | ||||
| 		optional(literal("@"), capture(digestPat))) | ||||
| 	// ReferenceRegexp is the full supported format of a reference. The regexp
 | ||||
| 	// is anchored and has capturing groups for name, tag, and digest
 | ||||
| 	// components.
 | ||||
| 	ReferenceRegexp = anchored(capture(NameRegexp), | ||||
| 		optional(literal(":"), capture(TagRegexp)), | ||||
| 		optional(literal("@"), capture(DigestRegexp))) | ||||
| 	ReferenceRegexp = re(referencePat) | ||||
| 
 | ||||
| 	identifier = `([a-f0-9]{64})` | ||||
| 	// IdentifierRegexp is the format for string identifier used as a
 | ||||
| 	// content addressable identifier using sha256. These identifiers
 | ||||
| 	// are like digests without the algorithm, since sha256 is used.
 | ||||
| 	IdentifierRegexp = match(`([a-f0-9]{64})`) | ||||
| 	IdentifierRegexp = re(identifier) | ||||
| 
 | ||||
| 	shortIdentifier = `([a-f0-9]{6,64})` | ||||
| 	// ShortIdentifierRegexp is the format used to represent a prefix
 | ||||
| 	// of an identifier. A prefix may be used to match a sha256 identifier
 | ||||
| 	// within a list of trusted identifiers.
 | ||||
| 	ShortIdentifierRegexp = match(`([a-f0-9]{6,64})`) | ||||
| 	ShortIdentifierRegexp = re(shortIdentifier) | ||||
| 
 | ||||
| 	anchoredIdentifier = anchored(identifier) | ||||
| 	// anchoredIdentifierRegexp is used to check or match an
 | ||||
| 	// identifier value, anchored at start and end of string.
 | ||||
| 	anchoredIdentifierRegexp = anchored(IdentifierRegexp) | ||||
| 	anchoredIdentifierRegexp = re(anchoredIdentifier) | ||||
| 
 | ||||
| 	anchoredShortIdentifier = anchored(shortIdentifier) | ||||
| 	// anchoredShortIdentifierRegexp is used to check if a value
 | ||||
| 	// is a possible identifier prefix, anchored at start and end
 | ||||
| 	// of string.
 | ||||
| 	anchoredShortIdentifierRegexp = anchored(ShortIdentifierRegexp) | ||||
| 	anchoredShortIdentifierRegexp = re(anchoredShortIdentifier) | ||||
| ) | ||||
| 
 | ||||
| // match compiles the string to a regular expression.
 | ||||
| var match = regexp.MustCompile | ||||
| // re compiles the string to a regular expression.
 | ||||
| var re = regexp.MustCompile | ||||
| 
 | ||||
| // literal compiles s into a literal regular expression, escaping any regexp
 | ||||
| // reserved characters.
 | ||||
| func literal(s string) *regexp.Regexp { | ||||
| 	re := match(regexp.QuoteMeta(s)) | ||||
| func literal(s string) string { | ||||
| 	re := re(regexp.QuoteMeta(s)) | ||||
| 
 | ||||
| 	if _, complete := re.LiteralPrefix(); !complete { | ||||
| 		panic("must be a literal") | ||||
| 	} | ||||
| 
 | ||||
| 	return re | ||||
| 	return re.String() | ||||
| } | ||||
| 
 | ||||
| // expression defines a full expression, where each regular expression must
 | ||||
| // follow the previous.
 | ||||
| func expression(res ...*regexp.Regexp) *regexp.Regexp { | ||||
| func expression(res ...string) string { | ||||
| 	var s string | ||||
| 	for _, re := range res { | ||||
| 		s += re.String() | ||||
| 		s += re | ||||
| 	} | ||||
| 
 | ||||
| 	return match(s) | ||||
| 	return s | ||||
| } | ||||
| 
 | ||||
| // optional wraps the expression in a non-capturing group and makes the
 | ||||
| // production optional.
 | ||||
| func optional(res ...*regexp.Regexp) *regexp.Regexp { | ||||
| 	return match(group(expression(res...)).String() + `?`) | ||||
| func optional(res ...string) string { | ||||
| 	return group(expression(res...)) + `?` | ||||
| } | ||||
| 
 | ||||
| // repeated wraps the regexp in a non-capturing group to get one or more
 | ||||
| // matches.
 | ||||
| func repeated(res ...*regexp.Regexp) *regexp.Regexp { | ||||
| 	return match(group(expression(res...)).String() + `+`) | ||||
| func repeated(res ...string) string { | ||||
| 	return group(expression(res...)) + `+` | ||||
| } | ||||
| 
 | ||||
| // group wraps the regexp in a non-capturing group.
 | ||||
| func group(res ...*regexp.Regexp) *regexp.Regexp { | ||||
| 	return match(`(?:` + expression(res...).String() + `)`) | ||||
| func group(res ...string) string { | ||||
| 	return `(?:` + expression(res...) + `)` | ||||
| } | ||||
| 
 | ||||
| // capture wraps the expression in a capturing group.
 | ||||
| func capture(res ...*regexp.Regexp) *regexp.Regexp { | ||||
| 	return match(`(` + expression(res...).String() + `)`) | ||||
| func capture(res ...string) string { | ||||
| 	return `(` + expression(res...) + `)` | ||||
| } | ||||
| 
 | ||||
| // anchored anchors the regular expression by adding start and end delimiters.
 | ||||
| func anchored(res ...*regexp.Regexp) *regexp.Regexp { | ||||
| 	return match(`^` + expression(res...).String() + `$`) | ||||
| func anchored(res ...string) string { | ||||
| 	return `^` + expression(res...) + `$` | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue