2022-11-14 22:56:28 +01:00
|
|
|
package shparse
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2022-11-15 04:57:29 +01:00
|
|
|
"unicode"
|
2022-11-14 22:56:28 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
//
|
|
|
|
// cmds := cmd (sep cmd)*
|
|
|
|
// sep := ';' | '&' | '&&' | '||' | '|' | '\n'
|
|
|
|
// cmd := simple-cmd | compound-command redirect-list?
|
|
|
|
// compound-command := brace-group | subshell | for-clause | case-clause | if-clause | while-clause | until-clause
|
|
|
|
// brace-group := '{' cmds '}'
|
|
|
|
// subshell := '(' cmds ')'
|
|
|
|
// simple-command := cmd-prefix cmd-word (io-redirect)*
|
|
|
|
// cmd-prefix := (io-redirect | assignment)*
|
|
|
|
// cmd-suffix := (io-redirect | word)*
|
|
|
|
// cmd-name := word
|
|
|
|
// cmd-word := word
|
|
|
|
// io-redirect := (io-number? io-file) | (io-number? io-here)
|
|
|
|
// io-file := ('<' | '<&' | '>' | '>&' | '>>' | '>|' ) filename
|
|
|
|
// io-here := ('<<' | '<<-') here_end
|
|
|
|
// here-end := word
|
|
|
|
// if-clause := 'if' compound-list 'then' compound-list else-part 'fi'
|
|
|
|
// else-part := 'elif' compound-list 'then' compound-list
|
|
|
|
// | 'elif' compount-list 'then' compound-list else-part
|
|
|
|
// | 'else' compound-list
|
|
|
|
// compound-list := linebreak term sep?
|
|
|
|
//
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// $var
|
|
|
|
// ${var}
|
|
|
|
// ${var op word?}
|
|
|
|
// op := '-' | '=' | '?' | '+' | ':-' | ':=' | ':?' | ':+' | '%' | '%%' | '#' | '##'
|
|
|
|
// ${ '#' var }
|
|
|
|
//
|
|
|
|
// $(command)
|
|
|
|
// `command`
|
|
|
|
// $(( arith ))
|
|
|
|
//
|
|
|
|
// " ... "
|
|
|
|
// ' ... '
|
|
|
|
// $' ... '
|
|
|
|
// $" ... '
|
|
|
|
|
|
|
|
// " => $, ", `, \
|
|
|
|
// ' => '
|
|
|
|
// (process quotes)
|
|
|
|
// mark as escaped
|
|
|
|
// split into commands (use ';' as separator)
|
|
|
|
// parse special operators
|
|
|
|
// perform expansions (vars, globs, commands)
|
|
|
|
// split command into name and arguments
|
|
|
|
|
|
|
|
// A correctly-formed brace expansion must contain unquoted opening and closing braces, and at least one unquoted comma or a valid sequence expression
|
|
|
|
// Any incorrectly formed brace expansion is left unchanged.
|
|
|
|
|
|
|
|
// word: char *word; flags
|
|
|
|
// bash aliases are lexical
|
|
|
|
|
|
|
|
// [[, ((, $(( <- DQ
|
|
|
|
|
|
|
|
// $ -> expansion
|
|
|
|
// $(...)
|
|
|
|
// (...)
|
|
|
|
// $((...))
|
|
|
|
// ((...))
|
|
|
|
// ${...}
|
|
|
|
// {...}
|
|
|
|
// X=(...)
|
|
|
|
|
|
|
|
// ambiguity between $((...)) and $((ls); ls)
|
|
|
|
// ambiguity between foo=([0]=hell) and foo=([abc)
|
|
|
|
|
2022-11-15 04:57:29 +01:00
|
|
|
// tokenization https://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html#tag_001_003
|
|
|
|
|
2022-11-14 22:56:28 +01:00
|
|
|
const (
|
2022-11-15 09:36:30 +01:00
|
|
|
WordTypeRaw = "raw"
|
|
|
|
WordTypeLit = "lit"
|
|
|
|
WordTypeOp = "op" // single: & ; | ( ) < > \n multi(2): && || ;; << >> <& >& <> >| (( multi(3): <<- ('((' requires special processing)
|
|
|
|
WordTypeKey = "key" // if then else elif fi do done case esac while until for in { } ! (( [[
|
|
|
|
WordTypeSimpleVar = "svar" // simplevar $
|
2022-11-16 09:37:22 +01:00
|
|
|
WordTypeGroup = "grp" // contains other words e.g. "hello"foo'bar'$x
|
2022-11-16 21:00:44 +01:00
|
|
|
|
|
|
|
WordTypeDQ = "dq" // " (quote-context)
|
|
|
|
WordTypeDDQ = "ddq" // $" (quote-context)
|
|
|
|
WordTypeVarBrace = "varb" // ${ (quote-context)
|
|
|
|
WordTypeDP = "dp" // $( (quote-context)
|
|
|
|
WordTypeBQ = "bq" // ` (quote-context)
|
|
|
|
|
|
|
|
WordTypeSQ = "sq" // '
|
|
|
|
WordTypeDSQ = "dsq" // $'
|
|
|
|
WordTypeDPP = "dpp" // $(( (internals not parsed)
|
|
|
|
WordTypePP = "pp" // (( (internals not parsed)
|
|
|
|
WordTypeDB = "db" // $[ (internals not parsed)
|
2022-11-14 22:56:28 +01:00
|
|
|
)
|
|
|
|
|
2022-11-15 09:36:30 +01:00
|
|
|
type quoteContext []string
|
|
|
|
|
|
|
|
func (qc quoteContext) push(q string) quoteContext {
|
|
|
|
rtn := make([]string, 0, len(qc)+1)
|
|
|
|
rtn = append(rtn, qc...)
|
|
|
|
rtn = append(rtn, q)
|
|
|
|
return rtn
|
|
|
|
}
|
|
|
|
|
|
|
|
func (qc quoteContext) cur() string {
|
|
|
|
if len(qc) == 0 {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
return qc[len(qc)-1]
|
|
|
|
}
|
|
|
|
|
2022-11-14 22:56:28 +01:00
|
|
|
type parseContext struct {
|
|
|
|
Input []rune
|
|
|
|
Pos int
|
2022-11-15 09:36:30 +01:00
|
|
|
QC quoteContext
|
2022-11-14 22:56:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
type wordType struct {
|
|
|
|
Type string
|
2022-11-15 04:57:29 +01:00
|
|
|
Offset int
|
2022-11-16 20:13:15 +01:00
|
|
|
QC quoteContext
|
2022-11-15 04:57:29 +01:00
|
|
|
Raw []rune
|
2022-11-14 22:56:28 +01:00
|
|
|
Complete bool
|
|
|
|
Val string // only for Op and Key (does *not* store string values of quoted expressions or expansions)
|
|
|
|
Prefix []rune
|
|
|
|
Subs []*wordType
|
|
|
|
}
|
|
|
|
|
2022-11-15 09:36:30 +01:00
|
|
|
func (c *parseContext) clone(pos int, newQuote string) *parseContext {
|
2022-11-16 09:37:22 +01:00
|
|
|
rtn := parseContext{Input: c.Input[pos:], QC: c.QC}
|
2022-11-15 09:36:30 +01:00
|
|
|
if newQuote != "" {
|
2022-11-16 09:37:22 +01:00
|
|
|
rtn.QC = rtn.QC.push(newQuote)
|
2022-11-15 09:36:30 +01:00
|
|
|
}
|
|
|
|
return &rtn
|
|
|
|
}
|
|
|
|
|
2022-11-14 22:56:28 +01:00
|
|
|
func (c *parseContext) at(offset int) rune {
|
|
|
|
pos := c.Pos + offset
|
|
|
|
if pos < 0 || pos >= len(c.Input) {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
return c.Input[pos]
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *parseContext) eof() bool {
|
|
|
|
return c.Pos >= len(c.Input)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *parseContext) cur() rune {
|
|
|
|
return c.at(0)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *parseContext) match(ch rune) bool {
|
|
|
|
return c.at(0) == ch
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *parseContext) match2(ch rune, ch2 rune) bool {
|
|
|
|
return c.at(0) == ch && c.at(1) == ch2
|
|
|
|
}
|
|
|
|
|
2022-11-15 04:57:29 +01:00
|
|
|
func (c *parseContext) match3(ch rune, ch2 rune, ch3 rune) bool {
|
|
|
|
return c.at(0) == ch && c.at(1) == ch2 && c.at(2) == ch3
|
|
|
|
}
|
|
|
|
|
2022-11-16 20:13:15 +01:00
|
|
|
func (c *parseContext) makeWord(t string, length int, complete bool) *wordType {
|
|
|
|
rtn := &wordType{Type: t}
|
2022-11-15 04:57:29 +01:00
|
|
|
rtn.Offset = c.Pos
|
2022-11-16 20:13:15 +01:00
|
|
|
rtn.QC = c.QC
|
2022-11-15 04:57:29 +01:00
|
|
|
rtn.Raw = c.Input[c.Pos : c.Pos+length]
|
2022-11-16 20:13:15 +01:00
|
|
|
rtn.Complete = complete
|
2022-11-14 22:56:28 +01:00
|
|
|
c.Pos += length
|
|
|
|
return rtn
|
|
|
|
}
|
|
|
|
|
2022-11-15 04:57:29 +01:00
|
|
|
// returns (found, newOffset)
|
2022-11-15 09:36:30 +01:00
|
|
|
// shell_meta_chars "()<>;&|"
|
|
|
|
// possible to maybe add ;;& &>> &> |& ;&
|
2022-11-15 04:57:29 +01:00
|
|
|
func (c *parseContext) parseOp(offset int) (bool, int) {
|
|
|
|
ch := c.at(offset)
|
2022-11-15 09:36:30 +01:00
|
|
|
if ch == '(' || ch == ')' || ch == '<' || ch == '>' || ch == ';' || ch == '&' || ch == '|' {
|
2022-11-15 04:57:29 +01:00
|
|
|
ch2 := c.at(offset + 1)
|
2022-11-14 22:56:28 +01:00
|
|
|
if ch2 == 0 {
|
2022-11-15 04:57:29 +01:00
|
|
|
return true, offset + 1
|
2022-11-14 22:56:28 +01:00
|
|
|
}
|
|
|
|
r2 := string([]rune{ch, ch2})
|
|
|
|
if r2 == "<<" {
|
2022-11-15 04:57:29 +01:00
|
|
|
ch3 := c.at(offset + 2)
|
2022-11-15 09:36:30 +01:00
|
|
|
if ch3 == '-' || ch3 == '<' {
|
|
|
|
return true, offset + 3 // "<<-" or "<<<"
|
2022-11-14 22:56:28 +01:00
|
|
|
}
|
2022-11-15 04:57:29 +01:00
|
|
|
return true, offset + 2 // "<<"
|
2022-11-14 22:56:28 +01:00
|
|
|
}
|
2022-11-15 09:36:30 +01:00
|
|
|
if r2 == ">>" || r2 == "&&" || r2 == "||" || r2 == ";;" || r2 == "<<" || r2 == "<&" || r2 == ">&" || r2 == "<>" || r2 == ">|" {
|
2022-11-15 04:57:29 +01:00
|
|
|
// we don't return '((' here (requires special processing)
|
2022-11-15 09:36:30 +01:00
|
|
|
return true, offset + 2
|
2022-11-14 22:56:28 +01:00
|
|
|
}
|
2022-11-15 09:36:30 +01:00
|
|
|
return true, offset + 1
|
2022-11-14 22:56:28 +01:00
|
|
|
}
|
2022-11-15 04:57:29 +01:00
|
|
|
return false, 0
|
2022-11-14 22:56:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// returns (new-offset, complete)
|
|
|
|
func (c *parseContext) skipToChar(offset int, endCh rune, allowEsc bool) (int, bool) {
|
|
|
|
for {
|
|
|
|
ch := c.at(offset)
|
|
|
|
if ch == 0 {
|
|
|
|
return offset, false
|
|
|
|
}
|
|
|
|
if allowEsc && ch == '\\' {
|
|
|
|
if c.at(offset+1) == 0 {
|
|
|
|
return offset + 1, false
|
|
|
|
}
|
|
|
|
offset += 2
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if ch == endCh {
|
|
|
|
return offset + 1, true
|
|
|
|
}
|
|
|
|
offset++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-15 19:27:36 +01:00
|
|
|
// returns (new-offset, complete)
|
|
|
|
func (c *parseContext) skipToChar2(offset int, endCh rune, endCh2 rune, allowEsc bool) (int, bool) {
|
|
|
|
for {
|
|
|
|
ch := c.at(offset)
|
|
|
|
ch2 := c.at(offset + 1)
|
|
|
|
if ch == 0 {
|
|
|
|
return offset, false
|
|
|
|
}
|
|
|
|
if ch2 == 0 {
|
|
|
|
return offset + 1, false
|
|
|
|
}
|
|
|
|
if allowEsc && ch == '\\' {
|
|
|
|
offset += 2
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if ch == endCh && ch2 == endCh2 {
|
|
|
|
return offset + 2, true
|
|
|
|
}
|
|
|
|
offset++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-14 22:56:28 +01:00
|
|
|
func (c *parseContext) parseStrSQ() *wordType {
|
|
|
|
if !c.match('\'') {
|
|
|
|
return nil
|
|
|
|
}
|
2022-11-15 04:57:29 +01:00
|
|
|
newOffset, complete := c.skipToChar(1, '\'', false)
|
2022-11-16 20:13:15 +01:00
|
|
|
w := c.makeWord(WordTypeSQ, newOffset, complete)
|
2022-11-14 22:56:28 +01:00
|
|
|
return w
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *parseContext) parseStrDQ() *wordType {
|
|
|
|
if !c.match('"') {
|
|
|
|
return nil
|
|
|
|
}
|
2022-11-16 09:37:22 +01:00
|
|
|
newContext := c.clone(c.Pos+1, WordTypeDQ)
|
|
|
|
subWords, eofExit := newContext.tokenizeDQ()
|
|
|
|
newOffset := newContext.Pos + 1
|
2022-11-16 20:13:15 +01:00
|
|
|
w := c.makeWord(WordTypeDQ, newOffset, !eofExit)
|
|
|
|
w.Subs = subWords
|
2022-11-14 22:56:28 +01:00
|
|
|
return w
|
|
|
|
}
|
|
|
|
|
2022-11-16 21:00:44 +01:00
|
|
|
func (c *parseContext) parseStrDDQ() *wordType {
|
|
|
|
if !c.match2('$', '"') {
|
2022-11-14 22:56:28 +01:00
|
|
|
return nil
|
|
|
|
}
|
2022-11-16 21:00:44 +01:00
|
|
|
newContext := c.clone(c.Pos+2, WordTypeDDQ)
|
|
|
|
subWords, eofExit := newContext.tokenizeDQ()
|
|
|
|
newOffset := newContext.Pos + 2
|
|
|
|
w := c.makeWord(WordTypeDDQ, newOffset, !eofExit)
|
|
|
|
w.Subs = subWords
|
2022-11-14 22:56:28 +01:00
|
|
|
return w
|
|
|
|
}
|
|
|
|
|
2022-11-16 21:00:44 +01:00
|
|
|
func (c *parseContext) parseStrBQ() *wordType {
|
|
|
|
if !c.match('`') {
|
2022-11-14 22:56:28 +01:00
|
|
|
return nil
|
|
|
|
}
|
2022-11-16 21:00:44 +01:00
|
|
|
newContext := c.clone(c.Pos+1, WordTypeBQ)
|
|
|
|
subWords, eofExit := newContext.tokenizeRaw()
|
|
|
|
newOffset := newContext.Pos + 1
|
|
|
|
w := c.makeWord(WordTypeBQ, newOffset, !eofExit)
|
|
|
|
w.Subs = subWords
|
2022-11-14 22:56:28 +01:00
|
|
|
return w
|
|
|
|
}
|
|
|
|
|
2022-11-16 21:00:44 +01:00
|
|
|
func (c *parseContext) parseStrANSI() *wordType {
|
|
|
|
if !c.match2('$', '\'') {
|
2022-11-14 22:56:28 +01:00
|
|
|
return nil
|
|
|
|
}
|
2022-11-16 21:00:44 +01:00
|
|
|
newOffset, complete := c.skipToChar(2, '\'', true)
|
|
|
|
w := c.makeWord(WordTypeDSQ, newOffset, complete)
|
2022-11-14 22:56:28 +01:00
|
|
|
return w
|
|
|
|
}
|
|
|
|
|
2022-11-16 09:37:22 +01:00
|
|
|
func (c *parseContext) parseArith(mustComplete bool) *wordType {
|
|
|
|
if !c.match2('(', '(') {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
newOffset, complete := c.skipToChar2(2, ')', ')', false)
|
|
|
|
if mustComplete && !complete {
|
|
|
|
return nil
|
|
|
|
}
|
2022-11-16 21:00:44 +01:00
|
|
|
w := c.makeWord(WordTypePP, newOffset, complete)
|
2022-11-16 09:37:22 +01:00
|
|
|
return w
|
|
|
|
}
|
|
|
|
|
2022-11-15 04:57:29 +01:00
|
|
|
func (c *parseContext) parseExpansion() *wordType {
|
2022-11-14 22:56:28 +01:00
|
|
|
if !c.match('$') {
|
|
|
|
return nil
|
|
|
|
}
|
2022-11-15 04:57:29 +01:00
|
|
|
if c.match3('$', '(', '(') {
|
2022-11-15 19:27:36 +01:00
|
|
|
newOffset, complete := c.skipToChar2(3, ')', ')', false)
|
2022-11-16 20:13:15 +01:00
|
|
|
w := c.makeWord(WordTypeDPP, newOffset, complete)
|
2022-11-15 19:27:36 +01:00
|
|
|
return w
|
2022-11-15 04:57:29 +01:00
|
|
|
}
|
|
|
|
if c.match2('$', '(') {
|
|
|
|
// subshell
|
2022-11-16 09:37:22 +01:00
|
|
|
newContext := c.clone(c.Pos+2, WordTypeDP)
|
|
|
|
subWords, eofExit := newContext.tokenizeRaw()
|
|
|
|
newOffset := newContext.Pos + 2
|
2022-11-16 20:13:15 +01:00
|
|
|
w := c.makeWord(WordTypeDP, newOffset, !eofExit)
|
2022-11-16 09:37:22 +01:00
|
|
|
w.Subs = subWords
|
2022-11-15 19:27:36 +01:00
|
|
|
return w
|
2022-11-15 04:57:29 +01:00
|
|
|
}
|
|
|
|
if c.match2('$', '[') {
|
|
|
|
// deprecated arith expansion
|
2022-11-15 19:27:36 +01:00
|
|
|
newOffset, complete := c.skipToChar(2, ']', false)
|
2022-11-16 20:13:15 +01:00
|
|
|
w := c.makeWord(WordTypeDB, newOffset, complete)
|
2022-11-15 19:27:36 +01:00
|
|
|
return w
|
2022-11-15 04:57:29 +01:00
|
|
|
}
|
|
|
|
if c.match2('$', '{') {
|
|
|
|
// variable expansion
|
2022-11-16 09:37:22 +01:00
|
|
|
newContext := c.clone(c.Pos+2, WordTypeVarBrace)
|
|
|
|
_, eofExit := newContext.tokenizeVarBrace()
|
|
|
|
newOffset := newContext.Pos + 2
|
2022-11-16 20:13:15 +01:00
|
|
|
w := c.makeWord(WordTypeVarBrace, newOffset, !eofExit)
|
2022-11-15 19:27:36 +01:00
|
|
|
return w
|
2022-11-15 04:57:29 +01:00
|
|
|
}
|
|
|
|
ch2 := c.at(1)
|
|
|
|
if ch2 == 0 || unicode.IsSpace(ch2) {
|
|
|
|
// no expansion
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
newOffset := c.parseSimpleVarName(1)
|
|
|
|
if newOffset > 1 {
|
|
|
|
// simple variable name
|
2022-11-16 20:13:15 +01:00
|
|
|
w := c.makeWord(WordTypeSimpleVar, newOffset, true)
|
|
|
|
return w
|
2022-11-15 04:57:29 +01:00
|
|
|
}
|
2022-11-16 20:13:15 +01:00
|
|
|
if ch2 == '*' || ch2 == '@' || ch2 == '#' || ch2 == '?' || ch2 == '-' || ch2 == '$' || ch2 == '!' || (ch2 >= '0' && ch2 <= '9') {
|
|
|
|
// single character variable name, e.g. $@, $_, $1, etc.
|
|
|
|
w := c.makeWord(WordTypeSimpleVar, 2, true)
|
|
|
|
return w
|
2022-11-15 04:57:29 +01:00
|
|
|
}
|
2022-11-16 20:13:15 +01:00
|
|
|
return nil
|
2022-11-15 04:57:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// returns newOffset
|
|
|
|
func (c *parseContext) parseSimpleVarName(offset int) int {
|
|
|
|
first := true
|
|
|
|
for {
|
|
|
|
ch := c.at(offset)
|
|
|
|
if ch == 0 {
|
|
|
|
return offset
|
|
|
|
}
|
|
|
|
if (ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) || (!first && ch >= '0' && ch <= '9') {
|
|
|
|
first = false
|
|
|
|
offset++
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
return offset
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-15 09:36:30 +01:00
|
|
|
func makeSpaceStr(slen int) string {
|
|
|
|
if slen == 0 {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
if slen == 1 {
|
|
|
|
return " "
|
|
|
|
}
|
|
|
|
rtn := make([]byte, slen)
|
|
|
|
for i := 0; i < slen; i++ {
|
|
|
|
rtn[i] = ' '
|
|
|
|
}
|
|
|
|
return string(rtn)
|
|
|
|
}
|
|
|
|
|
2022-11-15 04:57:29 +01:00
|
|
|
func (w *wordType) String() string {
|
2022-11-15 09:36:30 +01:00
|
|
|
notCompleteFlag := " "
|
2022-11-15 04:57:29 +01:00
|
|
|
if !w.Complete {
|
|
|
|
notCompleteFlag = "*"
|
|
|
|
}
|
2022-11-16 09:37:22 +01:00
|
|
|
return fmt.Sprintf("%4s[%3d]%s %s%q", w.Type, w.Offset, notCompleteFlag, makeSpaceStr(len(w.Prefix)), string(w.FullRawString()))
|
2022-11-14 22:56:28 +01:00
|
|
|
}
|
|
|
|
|
2022-11-15 09:36:30 +01:00
|
|
|
func dumpWords(words []*wordType, indentStr string) {
|
2022-11-14 22:56:28 +01:00
|
|
|
for _, word := range words {
|
2022-11-15 09:36:30 +01:00
|
|
|
fmt.Printf("%s%s\n", indentStr, word.String())
|
|
|
|
if len(word.Subs) > 0 {
|
|
|
|
dumpWords(word.Subs, indentStr+" ")
|
|
|
|
}
|
2022-11-14 22:56:28 +01:00
|
|
|
}
|
|
|
|
}
|