/* Copyright The containerd Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package filters import ( "errors" "unicode/utf8" ) // NOTE(stevvooe): Most of this code in this file is copied from the stdlib // strconv package and modified to be able to handle quoting with `/` and `|` // as delimiters. The copyright is held by the Go authors. var errQuoteSyntax = errors.New("quote syntax error") // UnquoteChar decodes the first character or byte in the escaped string // or character literal represented by the string s. // It returns four values: // // 1) value, the decoded Unicode code point or byte value; // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; // 3) tail, the remainder of the string after the character; and // 4) an error that will be nil if the character is syntactically valid. // // The second argument, quote, specifies the type of literal being parsed // and therefore which escaped quote character is permitted. // If set to a single quote, it permits the sequence \' and disallows unescaped '. // If set to a double quote, it permits \" and disallows unescaped ". // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. // // This is from Go strconv package, modified to support `|` and `/` as double // quotes for use with regular expressions. func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { // easy cases switch c := s[0]; { case c == quote && (quote == '\'' || quote == '"' || quote == '/' || quote == '|'): err = errQuoteSyntax return case c >= utf8.RuneSelf: r, size := utf8.DecodeRuneInString(s) return r, true, s[size:], nil case c != '\\': return rune(s[0]), false, s[1:], nil } // hard case: c is backslash if len(s) <= 1 { err = errQuoteSyntax return } c := s[1] s = s[2:] switch c { case 'a': value = '\a' case 'b': value = '\b' case 'f': value = '\f' case 'n': value = '\n' case 'r': value = '\r' case 't': value = '\t' case 'v': value = '\v' case 'x', 'u', 'U': n := 0 switch c { case 'x': n = 2 case 'u': n = 4 case 'U': n = 8 } var v rune if len(s) < n { err = errQuoteSyntax return } for j := 0; j < n; j++ { x, ok := unhex(s[j]) if !ok { err = errQuoteSyntax return } v = v<<4 | x } s = s[n:] if c == 'x' { // single-byte string, possibly not UTF-8 value = v break } if v > utf8.MaxRune { err = errQuoteSyntax return } value = v multibyte = true case '0', '1', '2', '3', '4', '5', '6', '7': v := rune(c) - '0' if len(s) < 2 { err = errQuoteSyntax return } for j := 0; j < 2; j++ { // one digit already; two more x := rune(s[j]) - '0' if x < 0 || x > 7 { err = errQuoteSyntax return } v = (v << 3) | x } s = s[2:] if v > 255 { err = errQuoteSyntax return } value = v case '\\': value = '\\' case '\'', '"', '|', '/': if c != quote { err = errQuoteSyntax return } value = rune(c) default: err = errQuoteSyntax return } tail = s return } // unquote interprets s as a single-quoted, double-quoted, // or backquoted Go string literal, returning the string value // that s quotes. (If s is single-quoted, it would be a Go // character literal; Unquote returns the corresponding // one-character string.) // // This is modified from the standard library to support `|` and `/` as quote // characters for use with regular expressions. func unquote(s string) (string, error) { n := len(s) if n < 2 { return "", errQuoteSyntax } quote := s[0] if quote != s[n-1] { return "", errQuoteSyntax } s = s[1 : n-1] if quote == '`' { if contains(s, '`') { return "", errQuoteSyntax } if contains(s, '\r') { // -1 because we know there is at least one \r to remove. buf := make([]byte, 0, len(s)-1) for i := 0; i < len(s); i++ { if s[i] != '\r' { buf = append(buf, s[i]) } } return string(buf), nil } return s, nil } if quote != '"' && quote != '\'' && quote != '|' && quote != '/' { return "", errQuoteSyntax } if contains(s, '\n') { return "", errQuoteSyntax } // Is it trivial? Avoid allocation. if !contains(s, '\\') && !contains(s, quote) { switch quote { case '"', '/', '|': // pipe and slash are treated like double quote return s, nil case '\'': r, size := utf8.DecodeRuneInString(s) if size == len(s) && (r != utf8.RuneError || size != 1) { return s, nil } } } var runeTmp [utf8.UTFMax]byte buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. for len(s) > 0 { c, multibyte, ss, err := unquoteChar(s, quote) if err != nil { return "", err } s = ss if c < utf8.RuneSelf || !multibyte { buf = append(buf, byte(c)) } else { n := utf8.EncodeRune(runeTmp[:], c) buf = append(buf, runeTmp[:n]...) } if quote == '\'' && len(s) != 0 { // single-quoted must be single character return "", errQuoteSyntax } } return string(buf), nil } // contains reports whether the string contains the byte c. func contains(s string, c byte) bool { for i := 0; i < len(s); i++ { if s[i] == c { return true } } return false } func unhex(b byte) (v rune, ok bool) { c := rune(b) switch { case '0' <= c && c <= '9': return c - '0', true case 'a' <= c && c <= 'f': return c - 'a' + 10, true case 'A' <= c && c <= 'F': return c - 'A' + 10, true } return }