recursive parsing for double quotes, subshells, and variable expansions

This commit is contained in:
sawka 2022-11-16 00:37:22 -08:00
parent d44242fe71
commit f1958eaac7
3 changed files with 274 additions and 38 deletions

View File

@ -81,6 +81,8 @@ const (
WordTypeOp = "op" // single: & ; | ( ) < > \n multi(2): && || ;; << >> <& >& <> >| (( multi(3): <<- ('((' requires special processing)
WordTypeKey = "key" // if then else elif fi do done case esac while until for in { } ! (( [[
WordTypeSimpleVar = "svar" // simplevar $
WordTypeGroup = "grp" // contains other words e.g. "hello"foo'bar'$x
WordTypeArith = "ath"
// each of these can also be used as an entry in quoteContext
WordTypeDQ = "dq" // "
@ -129,11 +131,10 @@ type wordType struct {
}
func (c *parseContext) clone(pos int, newQuote string) *parseContext {
rtn := *c
rtn := parseContext{Input: c.Input[pos:], QC: c.QC}
if newQuote != "" {
rtn.QC = append(rtn.QC, newQuote)
rtn.QC = rtn.QC.push(newQuote)
}
rtn.Input = rtn.Input[pos:]
return &rtn
}
@ -264,12 +265,15 @@ func (c *parseContext) parseStrDQ() *wordType {
if !c.match('"') {
return nil
}
newOffset, complete := c.skipToChar(1, '"', false)
newContext := c.clone(c.Pos+1, WordTypeDQ)
subWords, eofExit := newContext.tokenizeDQ()
newOffset := newContext.Pos + 1
w := &wordType{
Type: WordTypeDQ,
Offset: c.Pos,
Raw: c.Input[c.Pos : c.Pos+newOffset],
Complete: complete,
Complete: !eofExit,
Subs: subWords,
}
c.Pos = c.Pos + newOffset
return w
@ -320,6 +324,19 @@ func (c *parseContext) parseStrDDQ() *wordType {
return w
}
func (c *parseContext) parseArith(mustComplete bool) *wordType {
if !c.match2('(', '(') {
return nil
}
newOffset, complete := c.skipToChar2(2, ')', ')', false)
if mustComplete && !complete {
return nil
}
w := &wordType{Type: WordTypeArith, Offset: c.Pos, Raw: c.Input[c.Pos : c.Pos+newOffset], Complete: complete}
c.Pos = c.Pos + newOffset
return w
}
func (c *parseContext) parseExpansion() *wordType {
if !c.match('$') {
return nil
@ -332,8 +349,12 @@ func (c *parseContext) parseExpansion() *wordType {
}
if c.match2('$', '(') {
// subshell
newOffset, complete := c.skipToChar(2, ')', false)
w := &wordType{Type: WordTypeDP, Offset: c.Pos, Raw: c.Input[c.Pos : c.Pos+newOffset], Complete: complete}
newContext := c.clone(c.Pos+2, WordTypeDP)
subWords, eofExit := newContext.tokenizeRaw()
newOffset := newContext.Pos + 2
// newOffset, complete := c.skipToChar(2, ')', false)
w := &wordType{Type: WordTypeDP, Offset: c.Pos, Raw: c.Input[c.Pos : c.Pos+newOffset], Complete: !eofExit}
w.Subs = subWords
c.Pos = c.Pos + newOffset
return w
}
@ -346,8 +367,10 @@ func (c *parseContext) parseExpansion() *wordType {
}
if c.match2('$', '{') {
// variable expansion
newOffset, complete := c.skipToChar(2, '}', false)
w := &wordType{Type: WordTypeVarBrace, Offset: c.Pos, Raw: c.Input[c.Pos : c.Pos+newOffset], Complete: complete}
newContext := c.clone(c.Pos+2, WordTypeVarBrace)
_, eofExit := newContext.tokenizeVarBrace()
newOffset := newContext.Pos + 2
w := &wordType{Type: WordTypeVarBrace, Offset: c.Pos, Raw: c.Input[c.Pos : c.Pos+newOffset], Complete: !eofExit}
c.Pos = c.Pos + newOffset
return w
}
@ -520,7 +543,7 @@ func (w *wordType) String() string {
if !w.Complete {
notCompleteFlag = "*"
}
return fmt.Sprintf("%4s[%3d]%s %s%q", w.Type, w.Offset, notCompleteFlag, makeSpaceStr(len(w.Prefix)), string(w.Raw))
return fmt.Sprintf("%4s[%3d]%s %s%q", w.Type, w.Offset, notCompleteFlag, makeSpaceStr(len(w.Prefix)), string(w.FullRawString()))
}
func dumpWords(words []*wordType, indentStr string) {

View File

@ -35,4 +35,12 @@ func Test1(t *testing.T) {
testParse(t, `ls ${x:"hello"} $[2+2] $((5 * 10)) $(ls; ls&)`)
testParse(t, `ls;ls&./foo > out 2> "out2"`)
testParse(t, `(( x = 5)); ls& cd ~/work/"hello again"`)
testParse(t, `echo "hello"abc$(ls)$x${y:foo}`)
testParse(t, `echo $(ls; ./x "foo")`)
testParse(t, `echo $(ls; (cd foo; ls); (cd bar; ls))xyz`)
testParse(t, `echo "$x ${y:-foo}"`)
testParse(t, `command="$(echo "$input" | sed -e "s/^[ \t]*\([^ \t]*\)[ \t]*.*$/\1/g")"`)
testParse(t, `echo $(ls $)`)
testParse(t, `echo ${x:-hello\}"}"} 2nd`)
testParse(t, `echo "$(ls "foo") more $x"`)
}

View File

@ -2,6 +2,7 @@ package shparse
import (
"bytes"
"fmt"
"unicode"
)
@ -16,7 +17,8 @@ type tokenizeOutputState struct {
SavedPrefix []rune
}
func (state *tokenizeOutputState) appendWord(word *wordType) {
// does not set CurWord
func (state *tokenizeOutputState) appendStandaloneWord(word *wordType) {
state.delimitCurWord()
if len(state.SavedPrefix) > 0 {
word.Prefix = state.SavedPrefix
@ -25,12 +27,72 @@ func (state *tokenizeOutputState) appendWord(word *wordType) {
state.Rtn = append(state.Rtn, word)
}
func (state *tokenizeOutputState) ensureCurWord(pc *parseContext) {
if state.CurWord != nil {
func (state *tokenizeOutputState) appendWord(word *wordType) {
if len(state.SavedPrefix) > 0 {
word.Prefix = state.SavedPrefix
state.SavedPrefix = nil
}
if state.CurWord == nil {
state.CurWord = word
return
}
state.CurWord = &wordType{Type: WordTypeLit, Offset: pc.Pos, Complete: true, Prefix: state.SavedPrefix}
state.SavedPrefix = nil
state.ensureGroupWord()
state.CurWord.Subs = append(state.CurWord.Subs, word)
}
func (state *tokenizeOutputState) ensureGroupWord() {
if state.CurWord == nil {
panic("invalid state, cannot make group word when CurWord is nil")
}
if state.CurWord.Type == WordTypeGroup {
return
}
// moves the prefix from CurWord to the new group word
groupWord := &wordType{
Type: WordTypeGroup,
Offset: state.CurWord.Offset,
Complete: true,
Prefix: state.CurWord.Prefix,
}
state.CurWord.Prefix = nil
groupWord.Subs = []*wordType{state.CurWord}
state.CurWord = groupWord
}
func ungroupWord(w *wordType) []*wordType {
if w.Type != WordTypeGroup {
return []*wordType{w}
}
rtn := w.Subs
if len(w.Prefix) > 0 && len(rtn) > 0 {
newPrefix := append([]rune{}, w.Prefix...)
newPrefix = append(newPrefix, rtn[0].Prefix...)
rtn[0].Prefix = newPrefix
}
return rtn
}
func (state *tokenizeOutputState) ensureLitCurWord(pc *parseContext) {
if state.CurWord == nil {
state.CurWord = &wordType{Type: WordTypeLit, Offset: pc.Pos, Complete: true, Prefix: state.SavedPrefix}
state.SavedPrefix = nil
return
}
if state.CurWord.Type == WordTypeLit {
return
}
state.ensureGroupWord()
lastWord := state.CurWord.Subs[len(state.CurWord.Subs)-1]
if lastWord.Type != WordTypeLit {
if len(state.SavedPrefix) > 0 {
dumpWords(state.Rtn, "**")
dumpWords([]*wordType{state.CurWord}, ">>")
fmt.Printf("sp: %q\n", state.SavedPrefix)
panic("invalid state, there can be no saved prefix")
}
litWord := &wordType{Type: WordTypeLit, Offset: pc.Pos, Complete: true}
state.CurWord.Subs = append(state.CurWord.Subs, litWord)
}
}
func (state *tokenizeOutputState) delimitCurWord() {
@ -45,40 +107,51 @@ func (state *tokenizeOutputState) delimitWithSpace(spaceCh rune) {
state.SavedPrefix = append(state.SavedPrefix, spaceCh)
}
func (state *tokenizeOutputState) appendLiteral(pc *parseContext, ch rune) {
state.ensureLitCurWord(pc)
if state.CurWord.Type == WordTypeLit {
state.CurWord.Raw = append(state.CurWord.Raw, ch)
} else if state.CurWord.Type == WordTypeGroup {
lastWord := state.CurWord.Subs[len(state.CurWord.Subs)-1]
if lastWord.Type != WordTypeLit {
panic(fmt.Sprintf("invalid curword type (group) %q", state.CurWord.Type))
}
lastWord.Raw = append(lastWord.Raw, ch)
} else {
panic(fmt.Sprintf("invalid curword type %q", state.CurWord.Type))
}
}
func (state *tokenizeOutputState) finish(pc *parseContext) {
state.delimitCurWord()
if len(state.SavedPrefix) > 0 {
state.ensureCurWord(pc)
state.ensureLitCurWord(pc)
state.delimitCurWord()
}
}
func Tokenize(cmd string) []*wordType {
c := &parseContext{Input: []rune(cmd)}
func (c *parseContext) tokenizeVarBrace() ([]*wordType, bool) {
state := &tokenizeOutputState{}
eofExit := false
for {
ch := c.cur()
if ch == 0 {
eofExit = true
break
}
// fmt.Printf("ch %d %q\n", c.Pos, string([]rune{ch}))
foundOp, newOffset := c.parseOp(0)
if foundOp {
opWord := &wordType{Type: WordTypeOp, Offset: c.Pos, Raw: c.Input[c.Pos : c.Pos+newOffset], Complete: true}
opWord.Val = string(opWord.Raw)
c.Pos = c.Pos + newOffset
state.appendWord(opWord)
continue
if ch == '}' {
c.Pos++
break
}
var quoteWord *wordType
switch ch {
case '\'':
if ch == '\'' {
quoteWord = c.parseStrSQ()
case '"':
}
if quoteWord == nil && ch == '"' {
quoteWord = c.parseStrDQ()
case '$':
}
isNextBrace := c.at(1) == '}'
if quoteWord == nil && ch == '$' && !isNextBrace {
quoteWord = c.parseStrANSI()
if quoteWord == nil {
quoteWord = c.parseStrDDQ()
@ -92,8 +165,124 @@ func Tokenize(cmd string) []*wordType {
continue
}
if ch == '\\' && c.at(1) != 0 {
state.ensureCurWord(c)
state.CurWord.Raw = append(state.CurWord.Raw, ch, c.at(1))
state.appendLiteral(c, ch)
state.appendLiteral(c, c.at(1))
c.Pos += 2
continue
}
state.appendLiteral(c, ch)
c.Pos++
}
return state.Rtn, eofExit
}
func (c *parseContext) tokenizeDQ() ([]*wordType, bool) {
state := &tokenizeOutputState{}
eofExit := false
for {
ch := c.cur()
if ch == 0 {
eofExit = true
break
}
if ch == '"' {
c.Pos++
break
}
if ch == '$' && c.at(1) != 0 {
quoteWord := c.parseStrANSI()
if quoteWord == nil {
quoteWord = c.parseStrDDQ()
}
if quoteWord == nil {
quoteWord = c.parseExpansion()
}
if quoteWord != nil {
state.appendWord(quoteWord)
continue
}
}
if ch == '\\' && c.at(1) != 0 {
state.appendLiteral(c, ch)
state.appendLiteral(c, c.at(1))
c.Pos += 2
continue
}
state.appendLiteral(c, ch)
c.Pos++
}
state.finish(c)
if len(state.Rtn) == 0 {
return nil, eofExit
}
if len(state.Rtn) == 1 && state.Rtn[0].Type == WordTypeGroup {
return ungroupWord(state.Rtn[0]), eofExit
}
return state.Rtn, eofExit
}
// returns (words, eofexit)
func (c *parseContext) tokenizeRaw() ([]*wordType, bool) {
state := &tokenizeOutputState{}
isExpSubShell := c.QC.cur() == WordTypeDP
parenLevel := 0
eofExit := false
for {
ch := c.cur()
if ch == 0 {
eofExit = true
break
}
if isExpSubShell && ch == ')' && parenLevel == 0 {
c.Pos++
break
}
// fmt.Printf("ch %d %q\n", c.Pos, string([]rune{ch}))
foundOp, newOffset := c.parseOp(0)
if foundOp {
rawOp := c.Input[c.Pos : c.Pos+newOffset]
opVal := string(rawOp)
opWord := &wordType{Type: WordTypeOp, Offset: c.Pos, Raw: rawOp, Val: opVal, Complete: true}
if opWord.Val == "(" {
arithWord := c.parseArith(true)
if arithWord != nil {
state.appendStandaloneWord(arithWord)
continue
} else {
parenLevel++
}
}
if opWord.Val == ")" {
parenLevel--
}
c.Pos = c.Pos + newOffset
state.appendStandaloneWord(opWord)
continue
}
var quoteWord *wordType
if ch == '\'' {
quoteWord = c.parseStrSQ()
}
if quoteWord == nil && ch == '"' {
quoteWord = c.parseStrDQ()
}
isNextParen := isExpSubShell && c.at(1) == ')'
if quoteWord == nil && ch == '$' && !isNextParen {
quoteWord = c.parseStrANSI()
if quoteWord == nil {
quoteWord = c.parseStrDDQ()
}
if quoteWord == nil {
quoteWord = c.parseExpansion()
}
}
if quoteWord != nil {
state.appendWord(quoteWord)
continue
}
if ch == '\\' && c.at(1) != 0 {
state.appendLiteral(c, ch)
state.appendLiteral(c, c.at(1))
c.Pos += 2
continue
}
@ -102,12 +291,28 @@ func Tokenize(cmd string) []*wordType {
c.Pos++
continue
}
state.ensureCurWord(c)
state.CurWord.Raw = append(state.CurWord.Raw, ch)
state.appendLiteral(c, ch)
c.Pos++
}
state.finish(c)
return state.Rtn
return state.Rtn, eofExit
}
func Tokenize(cmd string) []*wordType {
c := &parseContext{Input: []rune(cmd)}
rtn, _ := c.tokenizeRaw()
return rtn
}
func (w *wordType) FullRawString() []rune {
if w.Type == WordTypeGroup {
var rtn []rune
for _, sw := range w.Subs {
rtn = append(rtn, sw.FullRawString()...)
}
return rtn
}
return w.Raw
}
func wordsToStr(words []*wordType) string {
@ -116,7 +321,7 @@ func wordsToStr(words []*wordType) string {
if len(word.Prefix) > 0 {
buf.WriteString(string(word.Prefix))
}
buf.WriteString(string(word.Raw))
buf.WriteString(string(word.FullRawString()))
}
return buf.String()
}