Merge pull request #3566 from paulcacheux/paulcacheux/improve-reference-regexps
Improve how reference regexps are builtmaster
						commit
						91f33cb5c0
					
				|  | @ -3,145 +3,154 @@ package reference | ||||||
| import "regexp" | import "regexp" | ||||||
| 
 | 
 | ||||||
| var ( | var ( | ||||||
| 	// alphaNumericRegexp defines the alpha numeric atom, typically a
 | 	// alphaNumeric defines the alpha numeric atom, typically a
 | ||||||
| 	// component of names. This only allows lower case characters and digits.
 | 	// component of names. This only allows lower case characters and digits.
 | ||||||
| 	alphaNumericRegexp = match(`[a-z0-9]+`) | 	alphaNumeric = `[a-z0-9]+` | ||||||
| 
 | 
 | ||||||
| 	// separatorRegexp defines the separators allowed to be embedded in name
 | 	// separator defines the separators allowed to be embedded in name
 | ||||||
| 	// components. This allow one period, one or two underscore and multiple
 | 	// components. This allow one period, one or two underscore and multiple
 | ||||||
| 	// dashes. Repeated dashes and underscores are intentionally treated
 | 	// dashes. Repeated dashes and underscores are intentionally treated
 | ||||||
| 	// differently. In order to support valid hostnames as name components,
 | 	// differently. In order to support valid hostnames as name components,
 | ||||||
| 	// supporting repeated dash was added. Additionally double underscore is
 | 	// supporting repeated dash was added. Additionally double underscore is
 | ||||||
| 	// now allowed as a separator to loosen the restriction for previously
 | 	// now allowed as a separator to loosen the restriction for previously
 | ||||||
| 	// supported names.
 | 	// supported names.
 | ||||||
| 	separatorRegexp = match(`(?:[._]|__|[-]*)`) | 	separator = `(?:[._]|__|[-]*)` | ||||||
| 
 | 
 | ||||||
| 	// nameComponentRegexp restricts registry path component names to start
 | 	// nameComponent restricts registry path component names to start
 | ||||||
| 	// with at least one letter or number, with following parts able to be
 | 	// with at least one letter or number, with following parts able to be
 | ||||||
| 	// separated by one period, one or two underscore and multiple dashes.
 | 	// separated by one period, one or two underscore and multiple dashes.
 | ||||||
| 	nameComponentRegexp = expression( | 	nameComponent = expression( | ||||||
| 		alphaNumericRegexp, | 		alphaNumeric, | ||||||
| 		optional(repeated(separatorRegexp, alphaNumericRegexp))) | 		optional(repeated(separator, alphaNumeric))) | ||||||
| 
 | 
 | ||||||
| 	// domainComponentRegexp restricts the registry domain component of a
 | 	// domainComponent restricts the registry domain component of a
 | ||||||
| 	// repository name to start with a component as defined by DomainRegexp
 | 	// repository name to start with a component as defined by DomainRegexp
 | ||||||
| 	// and followed by an optional port.
 | 	// and followed by an optional port.
 | ||||||
| 	domainComponentRegexp = match(`(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])`) | 	domainComponent = `(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])` | ||||||
| 
 | 
 | ||||||
|  | 	domain = expression( | ||||||
|  | 		domainComponent, | ||||||
|  | 		optional(repeated(literal(`.`), domainComponent)), | ||||||
|  | 		optional(literal(`:`), `[0-9]+`)) | ||||||
| 	// DomainRegexp defines the structure of potential domain components
 | 	// DomainRegexp defines the structure of potential domain components
 | ||||||
| 	// that may be part of image names. This is purposely a subset of what is
 | 	// that may be part of image names. This is purposely a subset of what is
 | ||||||
| 	// allowed by DNS to ensure backwards compatibility with Docker image
 | 	// allowed by DNS to ensure backwards compatibility with Docker image
 | ||||||
| 	// names.
 | 	// names.
 | ||||||
| 	DomainRegexp = expression( | 	DomainRegexp = regexp.MustCompile(domain) | ||||||
| 		domainComponentRegexp, |  | ||||||
| 		optional(repeated(literal(`.`), domainComponentRegexp)), |  | ||||||
| 		optional(literal(`:`), match(`[0-9]+`))) |  | ||||||
| 
 | 
 | ||||||
|  | 	tag = `[\w][\w.-]{0,127}` | ||||||
| 	// TagRegexp matches valid tag names. From docker/docker:graph/tags.go.
 | 	// TagRegexp matches valid tag names. From docker/docker:graph/tags.go.
 | ||||||
| 	TagRegexp = match(`[\w][\w.-]{0,127}`) | 	TagRegexp = regexp.MustCompile(tag) | ||||||
| 
 | 
 | ||||||
|  | 	anchoredTag = anchored(tag) | ||||||
| 	// anchoredTagRegexp matches valid tag names, anchored at the start and
 | 	// anchoredTagRegexp matches valid tag names, anchored at the start and
 | ||||||
| 	// end of the matched string.
 | 	// end of the matched string.
 | ||||||
| 	anchoredTagRegexp = anchored(TagRegexp) | 	anchoredTagRegexp = regexp.MustCompile(anchoredTag) | ||||||
| 
 | 
 | ||||||
|  | 	digestPat = `[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}` | ||||||
| 	// DigestRegexp matches valid digests.
 | 	// DigestRegexp matches valid digests.
 | ||||||
| 	DigestRegexp = match(`[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}`) | 	DigestRegexp = regexp.MustCompile(digestPat) | ||||||
| 
 | 
 | ||||||
|  | 	anchoredDigest = anchored(digestPat) | ||||||
| 	// anchoredDigestRegexp matches valid digests, anchored at the start and
 | 	// anchoredDigestRegexp matches valid digests, anchored at the start and
 | ||||||
| 	// end of the matched string.
 | 	// end of the matched string.
 | ||||||
| 	anchoredDigestRegexp = anchored(DigestRegexp) | 	anchoredDigestRegexp = regexp.MustCompile(anchoredDigest) | ||||||
| 
 | 
 | ||||||
|  | 	namePat = expression( | ||||||
|  | 		optional(domain, literal(`/`)), | ||||||
|  | 		nameComponent, | ||||||
|  | 		optional(repeated(literal(`/`), nameComponent))) | ||||||
| 	// NameRegexp is the format for the name component of references. The
 | 	// NameRegexp is the format for the name component of references. The
 | ||||||
| 	// regexp has capturing groups for the domain and name part omitting
 | 	// regexp has capturing groups for the domain and name part omitting
 | ||||||
| 	// the separating forward slash from either.
 | 	// the separating forward slash from either.
 | ||||||
| 	NameRegexp = expression( | 	NameRegexp = regexp.MustCompile(namePat) | ||||||
| 		optional(DomainRegexp, literal(`/`)), |  | ||||||
| 		nameComponentRegexp, |  | ||||||
| 		optional(repeated(literal(`/`), nameComponentRegexp))) |  | ||||||
| 
 | 
 | ||||||
|  | 	anchoredName = anchored( | ||||||
|  | 		optional(capture(domain), literal(`/`)), | ||||||
|  | 		capture(nameComponent, | ||||||
|  | 			optional(repeated(literal(`/`), nameComponent)))) | ||||||
| 	// anchoredNameRegexp is used to parse a name value, capturing the
 | 	// anchoredNameRegexp is used to parse a name value, capturing the
 | ||||||
| 	// domain and trailing components.
 | 	// domain and trailing components.
 | ||||||
| 	anchoredNameRegexp = anchored( | 	anchoredNameRegexp = regexp.MustCompile(anchoredName) | ||||||
| 		optional(capture(DomainRegexp), literal(`/`)), |  | ||||||
| 		capture(nameComponentRegexp, |  | ||||||
| 			optional(repeated(literal(`/`), nameComponentRegexp)))) |  | ||||||
| 
 | 
 | ||||||
|  | 	referencePat = anchored(capture(namePat), | ||||||
|  | 		optional(literal(":"), capture(tag)), | ||||||
|  | 		optional(literal("@"), capture(digestPat))) | ||||||
| 	// ReferenceRegexp is the full supported format of a reference. The regexp
 | 	// ReferenceRegexp is the full supported format of a reference. The regexp
 | ||||||
| 	// is anchored and has capturing groups for name, tag, and digest
 | 	// is anchored and has capturing groups for name, tag, and digest
 | ||||||
| 	// components.
 | 	// components.
 | ||||||
| 	ReferenceRegexp = anchored(capture(NameRegexp), | 	ReferenceRegexp = regexp.MustCompile(referencePat) | ||||||
| 		optional(literal(":"), capture(TagRegexp)), |  | ||||||
| 		optional(literal("@"), capture(DigestRegexp))) |  | ||||||
| 
 | 
 | ||||||
|  | 	identifier = `([a-f0-9]{64})` | ||||||
| 	// IdentifierRegexp is the format for string identifier used as a
 | 	// IdentifierRegexp is the format for string identifier used as a
 | ||||||
| 	// content addressable identifier using sha256. These identifiers
 | 	// content addressable identifier using sha256. These identifiers
 | ||||||
| 	// are like digests without the algorithm, since sha256 is used.
 | 	// are like digests without the algorithm, since sha256 is used.
 | ||||||
| 	IdentifierRegexp = match(`([a-f0-9]{64})`) | 	IdentifierRegexp = regexp.MustCompile(identifier) | ||||||
| 
 | 
 | ||||||
|  | 	shortIdentifier = `([a-f0-9]{6,64})` | ||||||
| 	// ShortIdentifierRegexp is the format used to represent a prefix
 | 	// ShortIdentifierRegexp is the format used to represent a prefix
 | ||||||
| 	// of an identifier. A prefix may be used to match a sha256 identifier
 | 	// of an identifier. A prefix may be used to match a sha256 identifier
 | ||||||
| 	// within a list of trusted identifiers.
 | 	// within a list of trusted identifiers.
 | ||||||
| 	ShortIdentifierRegexp = match(`([a-f0-9]{6,64})`) | 	ShortIdentifierRegexp = regexp.MustCompile(shortIdentifier) | ||||||
| 
 | 
 | ||||||
|  | 	anchoredIdentifier = anchored(identifier) | ||||||
| 	// anchoredIdentifierRegexp is used to check or match an
 | 	// anchoredIdentifierRegexp is used to check or match an
 | ||||||
| 	// identifier value, anchored at start and end of string.
 | 	// identifier value, anchored at start and end of string.
 | ||||||
| 	anchoredIdentifierRegexp = anchored(IdentifierRegexp) | 	anchoredIdentifierRegexp = regexp.MustCompile(anchoredIdentifier) | ||||||
| 
 | 
 | ||||||
|  | 	anchoredShortIdentifier = anchored(shortIdentifier) | ||||||
| 	// anchoredShortIdentifierRegexp is used to check if a value
 | 	// anchoredShortIdentifierRegexp is used to check if a value
 | ||||||
| 	// is a possible identifier prefix, anchored at start and end
 | 	// is a possible identifier prefix, anchored at start and end
 | ||||||
| 	// of string.
 | 	// of string.
 | ||||||
| 	anchoredShortIdentifierRegexp = anchored(ShortIdentifierRegexp) | 	anchoredShortIdentifierRegexp = regexp.MustCompile(anchoredShortIdentifier) | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| // match compiles the string to a regular expression.
 |  | ||||||
| var match = regexp.MustCompile |  | ||||||
| 
 |  | ||||||
| // literal compiles s into a literal regular expression, escaping any regexp
 | // literal compiles s into a literal regular expression, escaping any regexp
 | ||||||
| // reserved characters.
 | // reserved characters.
 | ||||||
| func literal(s string) *regexp.Regexp { | func literal(s string) string { | ||||||
| 	re := match(regexp.QuoteMeta(s)) | 	re := regexp.MustCompile(regexp.QuoteMeta(s)) | ||||||
| 
 | 
 | ||||||
| 	if _, complete := re.LiteralPrefix(); !complete { | 	if _, complete := re.LiteralPrefix(); !complete { | ||||||
| 		panic("must be a literal") | 		panic("must be a literal") | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	return re | 	return re.String() | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // expression defines a full expression, where each regular expression must
 | // expression defines a full expression, where each regular expression must
 | ||||||
| // follow the previous.
 | // follow the previous.
 | ||||||
| func expression(res ...*regexp.Regexp) *regexp.Regexp { | func expression(res ...string) string { | ||||||
| 	var s string | 	var s string | ||||||
| 	for _, re := range res { | 	for _, re := range res { | ||||||
| 		s += re.String() | 		s += re | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	return match(s) | 	return s | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // optional wraps the expression in a non-capturing group and makes the
 | // optional wraps the expression in a non-capturing group and makes the
 | ||||||
| // production optional.
 | // production optional.
 | ||||||
| func optional(res ...*regexp.Regexp) *regexp.Regexp { | func optional(res ...string) string { | ||||||
| 	return match(group(expression(res...)).String() + `?`) | 	return group(expression(res...)) + `?` | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // repeated wraps the regexp in a non-capturing group to get one or more
 | // repeated wraps the regexp in a non-capturing group to get one or more
 | ||||||
| // matches.
 | // matches.
 | ||||||
| func repeated(res ...*regexp.Regexp) *regexp.Regexp { | func repeated(res ...string) string { | ||||||
| 	return match(group(expression(res...)).String() + `+`) | 	return group(expression(res...)) + `+` | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // group wraps the regexp in a non-capturing group.
 | // group wraps the regexp in a non-capturing group.
 | ||||||
| func group(res ...*regexp.Regexp) *regexp.Regexp { | func group(res ...string) string { | ||||||
| 	return match(`(?:` + expression(res...).String() + `)`) | 	return `(?:` + expression(res...) + `)` | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // capture wraps the expression in a capturing group.
 | // capture wraps the expression in a capturing group.
 | ||||||
| func capture(res ...*regexp.Regexp) *regexp.Regexp { | func capture(res ...string) string { | ||||||
| 	return match(`(` + expression(res...).String() + `)`) | 	return `(` + expression(res...) + `)` | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // anchored anchors the regular expression by adding start and end delimiters.
 | // anchored anchors the regular expression by adding start and end delimiters.
 | ||||||
| func anchored(res ...*regexp.Regexp) *regexp.Regexp { | func anchored(res ...string) string { | ||||||
| 	return match(`^` + expression(res...).String() + `$`) | 	return `^` + expression(res...) + `$` | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue