mirror of
https://github.com/wavetermdev/waveterm.git
synced 2024-12-23 16:58:27 +01:00
recursive parsing for double quotes, subshells, and variable expansions
This commit is contained in:
parent
d44242fe71
commit
f1958eaac7
@ -81,6 +81,8 @@ const (
|
||||
WordTypeOp = "op" // single: & ; | ( ) < > \n multi(2): && || ;; << >> <& >& <> >| (( multi(3): <<- ('((' requires special processing)
|
||||
WordTypeKey = "key" // if then else elif fi do done case esac while until for in { } ! (( [[
|
||||
WordTypeSimpleVar = "svar" // simplevar $
|
||||
WordTypeGroup = "grp" // contains other words e.g. "hello"foo'bar'$x
|
||||
WordTypeArith = "ath"
|
||||
|
||||
// each of these can also be used as an entry in quoteContext
|
||||
WordTypeDQ = "dq" // "
|
||||
@ -129,11 +131,10 @@ type wordType struct {
|
||||
}
|
||||
|
||||
func (c *parseContext) clone(pos int, newQuote string) *parseContext {
|
||||
rtn := *c
|
||||
rtn := parseContext{Input: c.Input[pos:], QC: c.QC}
|
||||
if newQuote != "" {
|
||||
rtn.QC = append(rtn.QC, newQuote)
|
||||
rtn.QC = rtn.QC.push(newQuote)
|
||||
}
|
||||
rtn.Input = rtn.Input[pos:]
|
||||
return &rtn
|
||||
}
|
||||
|
||||
@ -264,12 +265,15 @@ func (c *parseContext) parseStrDQ() *wordType {
|
||||
if !c.match('"') {
|
||||
return nil
|
||||
}
|
||||
newOffset, complete := c.skipToChar(1, '"', false)
|
||||
newContext := c.clone(c.Pos+1, WordTypeDQ)
|
||||
subWords, eofExit := newContext.tokenizeDQ()
|
||||
newOffset := newContext.Pos + 1
|
||||
w := &wordType{
|
||||
Type: WordTypeDQ,
|
||||
Offset: c.Pos,
|
||||
Raw: c.Input[c.Pos : c.Pos+newOffset],
|
||||
Complete: complete,
|
||||
Complete: !eofExit,
|
||||
Subs: subWords,
|
||||
}
|
||||
c.Pos = c.Pos + newOffset
|
||||
return w
|
||||
@ -320,6 +324,19 @@ func (c *parseContext) parseStrDDQ() *wordType {
|
||||
return w
|
||||
}
|
||||
|
||||
func (c *parseContext) parseArith(mustComplete bool) *wordType {
|
||||
if !c.match2('(', '(') {
|
||||
return nil
|
||||
}
|
||||
newOffset, complete := c.skipToChar2(2, ')', ')', false)
|
||||
if mustComplete && !complete {
|
||||
return nil
|
||||
}
|
||||
w := &wordType{Type: WordTypeArith, Offset: c.Pos, Raw: c.Input[c.Pos : c.Pos+newOffset], Complete: complete}
|
||||
c.Pos = c.Pos + newOffset
|
||||
return w
|
||||
}
|
||||
|
||||
func (c *parseContext) parseExpansion() *wordType {
|
||||
if !c.match('$') {
|
||||
return nil
|
||||
@ -332,8 +349,12 @@ func (c *parseContext) parseExpansion() *wordType {
|
||||
}
|
||||
if c.match2('$', '(') {
|
||||
// subshell
|
||||
newOffset, complete := c.skipToChar(2, ')', false)
|
||||
w := &wordType{Type: WordTypeDP, Offset: c.Pos, Raw: c.Input[c.Pos : c.Pos+newOffset], Complete: complete}
|
||||
newContext := c.clone(c.Pos+2, WordTypeDP)
|
||||
subWords, eofExit := newContext.tokenizeRaw()
|
||||
newOffset := newContext.Pos + 2
|
||||
// newOffset, complete := c.skipToChar(2, ')', false)
|
||||
w := &wordType{Type: WordTypeDP, Offset: c.Pos, Raw: c.Input[c.Pos : c.Pos+newOffset], Complete: !eofExit}
|
||||
w.Subs = subWords
|
||||
c.Pos = c.Pos + newOffset
|
||||
return w
|
||||
}
|
||||
@ -346,8 +367,10 @@ func (c *parseContext) parseExpansion() *wordType {
|
||||
}
|
||||
if c.match2('$', '{') {
|
||||
// variable expansion
|
||||
newOffset, complete := c.skipToChar(2, '}', false)
|
||||
w := &wordType{Type: WordTypeVarBrace, Offset: c.Pos, Raw: c.Input[c.Pos : c.Pos+newOffset], Complete: complete}
|
||||
newContext := c.clone(c.Pos+2, WordTypeVarBrace)
|
||||
_, eofExit := newContext.tokenizeVarBrace()
|
||||
newOffset := newContext.Pos + 2
|
||||
w := &wordType{Type: WordTypeVarBrace, Offset: c.Pos, Raw: c.Input[c.Pos : c.Pos+newOffset], Complete: !eofExit}
|
||||
c.Pos = c.Pos + newOffset
|
||||
return w
|
||||
}
|
||||
@ -520,7 +543,7 @@ func (w *wordType) String() string {
|
||||
if !w.Complete {
|
||||
notCompleteFlag = "*"
|
||||
}
|
||||
return fmt.Sprintf("%4s[%3d]%s %s%q", w.Type, w.Offset, notCompleteFlag, makeSpaceStr(len(w.Prefix)), string(w.Raw))
|
||||
return fmt.Sprintf("%4s[%3d]%s %s%q", w.Type, w.Offset, notCompleteFlag, makeSpaceStr(len(w.Prefix)), string(w.FullRawString()))
|
||||
}
|
||||
|
||||
func dumpWords(words []*wordType, indentStr string) {
|
||||
|
@ -35,4 +35,12 @@ func Test1(t *testing.T) {
|
||||
testParse(t, `ls ${x:"hello"} $[2+2] $((5 * 10)) $(ls; ls&)`)
|
||||
testParse(t, `ls;ls&./foo > out 2> "out2"`)
|
||||
testParse(t, `(( x = 5)); ls& cd ~/work/"hello again"`)
|
||||
testParse(t, `echo "hello"abc$(ls)$x${y:foo}`)
|
||||
testParse(t, `echo $(ls; ./x "foo")`)
|
||||
testParse(t, `echo $(ls; (cd foo; ls); (cd bar; ls))xyz`)
|
||||
testParse(t, `echo "$x ${y:-foo}"`)
|
||||
testParse(t, `command="$(echo "$input" | sed -e "s/^[ \t]*\([^ \t]*\)[ \t]*.*$/\1/g")"`)
|
||||
testParse(t, `echo $(ls $)`)
|
||||
testParse(t, `echo ${x:-hello\}"}"} 2nd`)
|
||||
testParse(t, `echo "$(ls "foo") more $x"`)
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ package shparse
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
@ -16,7 +17,8 @@ type tokenizeOutputState struct {
|
||||
SavedPrefix []rune
|
||||
}
|
||||
|
||||
func (state *tokenizeOutputState) appendWord(word *wordType) {
|
||||
// does not set CurWord
|
||||
func (state *tokenizeOutputState) appendStandaloneWord(word *wordType) {
|
||||
state.delimitCurWord()
|
||||
if len(state.SavedPrefix) > 0 {
|
||||
word.Prefix = state.SavedPrefix
|
||||
@ -25,12 +27,72 @@ func (state *tokenizeOutputState) appendWord(word *wordType) {
|
||||
state.Rtn = append(state.Rtn, word)
|
||||
}
|
||||
|
||||
func (state *tokenizeOutputState) ensureCurWord(pc *parseContext) {
|
||||
if state.CurWord != nil {
|
||||
func (state *tokenizeOutputState) appendWord(word *wordType) {
|
||||
if len(state.SavedPrefix) > 0 {
|
||||
word.Prefix = state.SavedPrefix
|
||||
state.SavedPrefix = nil
|
||||
}
|
||||
if state.CurWord == nil {
|
||||
state.CurWord = word
|
||||
return
|
||||
}
|
||||
state.CurWord = &wordType{Type: WordTypeLit, Offset: pc.Pos, Complete: true, Prefix: state.SavedPrefix}
|
||||
state.SavedPrefix = nil
|
||||
state.ensureGroupWord()
|
||||
state.CurWord.Subs = append(state.CurWord.Subs, word)
|
||||
}
|
||||
|
||||
func (state *tokenizeOutputState) ensureGroupWord() {
|
||||
if state.CurWord == nil {
|
||||
panic("invalid state, cannot make group word when CurWord is nil")
|
||||
}
|
||||
if state.CurWord.Type == WordTypeGroup {
|
||||
return
|
||||
}
|
||||
// moves the prefix from CurWord to the new group word
|
||||
groupWord := &wordType{
|
||||
Type: WordTypeGroup,
|
||||
Offset: state.CurWord.Offset,
|
||||
Complete: true,
|
||||
Prefix: state.CurWord.Prefix,
|
||||
}
|
||||
state.CurWord.Prefix = nil
|
||||
groupWord.Subs = []*wordType{state.CurWord}
|
||||
state.CurWord = groupWord
|
||||
}
|
||||
|
||||
func ungroupWord(w *wordType) []*wordType {
|
||||
if w.Type != WordTypeGroup {
|
||||
return []*wordType{w}
|
||||
}
|
||||
rtn := w.Subs
|
||||
if len(w.Prefix) > 0 && len(rtn) > 0 {
|
||||
newPrefix := append([]rune{}, w.Prefix...)
|
||||
newPrefix = append(newPrefix, rtn[0].Prefix...)
|
||||
rtn[0].Prefix = newPrefix
|
||||
}
|
||||
return rtn
|
||||
}
|
||||
|
||||
func (state *tokenizeOutputState) ensureLitCurWord(pc *parseContext) {
|
||||
if state.CurWord == nil {
|
||||
state.CurWord = &wordType{Type: WordTypeLit, Offset: pc.Pos, Complete: true, Prefix: state.SavedPrefix}
|
||||
state.SavedPrefix = nil
|
||||
return
|
||||
}
|
||||
if state.CurWord.Type == WordTypeLit {
|
||||
return
|
||||
}
|
||||
state.ensureGroupWord()
|
||||
lastWord := state.CurWord.Subs[len(state.CurWord.Subs)-1]
|
||||
if lastWord.Type != WordTypeLit {
|
||||
if len(state.SavedPrefix) > 0 {
|
||||
dumpWords(state.Rtn, "**")
|
||||
dumpWords([]*wordType{state.CurWord}, ">>")
|
||||
fmt.Printf("sp: %q\n", state.SavedPrefix)
|
||||
panic("invalid state, there can be no saved prefix")
|
||||
}
|
||||
litWord := &wordType{Type: WordTypeLit, Offset: pc.Pos, Complete: true}
|
||||
state.CurWord.Subs = append(state.CurWord.Subs, litWord)
|
||||
}
|
||||
}
|
||||
|
||||
func (state *tokenizeOutputState) delimitCurWord() {
|
||||
@ -45,40 +107,51 @@ func (state *tokenizeOutputState) delimitWithSpace(spaceCh rune) {
|
||||
state.SavedPrefix = append(state.SavedPrefix, spaceCh)
|
||||
}
|
||||
|
||||
func (state *tokenizeOutputState) appendLiteral(pc *parseContext, ch rune) {
|
||||
state.ensureLitCurWord(pc)
|
||||
if state.CurWord.Type == WordTypeLit {
|
||||
state.CurWord.Raw = append(state.CurWord.Raw, ch)
|
||||
} else if state.CurWord.Type == WordTypeGroup {
|
||||
lastWord := state.CurWord.Subs[len(state.CurWord.Subs)-1]
|
||||
if lastWord.Type != WordTypeLit {
|
||||
panic(fmt.Sprintf("invalid curword type (group) %q", state.CurWord.Type))
|
||||
}
|
||||
lastWord.Raw = append(lastWord.Raw, ch)
|
||||
} else {
|
||||
panic(fmt.Sprintf("invalid curword type %q", state.CurWord.Type))
|
||||
}
|
||||
}
|
||||
|
||||
func (state *tokenizeOutputState) finish(pc *parseContext) {
|
||||
state.delimitCurWord()
|
||||
if len(state.SavedPrefix) > 0 {
|
||||
state.ensureCurWord(pc)
|
||||
state.ensureLitCurWord(pc)
|
||||
state.delimitCurWord()
|
||||
}
|
||||
}
|
||||
|
||||
func Tokenize(cmd string) []*wordType {
|
||||
c := &parseContext{Input: []rune(cmd)}
|
||||
func (c *parseContext) tokenizeVarBrace() ([]*wordType, bool) {
|
||||
state := &tokenizeOutputState{}
|
||||
eofExit := false
|
||||
for {
|
||||
ch := c.cur()
|
||||
if ch == 0 {
|
||||
eofExit = true
|
||||
break
|
||||
}
|
||||
// fmt.Printf("ch %d %q\n", c.Pos, string([]rune{ch}))
|
||||
foundOp, newOffset := c.parseOp(0)
|
||||
if foundOp {
|
||||
opWord := &wordType{Type: WordTypeOp, Offset: c.Pos, Raw: c.Input[c.Pos : c.Pos+newOffset], Complete: true}
|
||||
opWord.Val = string(opWord.Raw)
|
||||
c.Pos = c.Pos + newOffset
|
||||
state.appendWord(opWord)
|
||||
continue
|
||||
if ch == '}' {
|
||||
c.Pos++
|
||||
break
|
||||
}
|
||||
var quoteWord *wordType
|
||||
switch ch {
|
||||
case '\'':
|
||||
if ch == '\'' {
|
||||
quoteWord = c.parseStrSQ()
|
||||
|
||||
case '"':
|
||||
}
|
||||
if quoteWord == nil && ch == '"' {
|
||||
quoteWord = c.parseStrDQ()
|
||||
|
||||
case '$':
|
||||
}
|
||||
isNextBrace := c.at(1) == '}'
|
||||
if quoteWord == nil && ch == '$' && !isNextBrace {
|
||||
quoteWord = c.parseStrANSI()
|
||||
if quoteWord == nil {
|
||||
quoteWord = c.parseStrDDQ()
|
||||
@ -92,8 +165,124 @@ func Tokenize(cmd string) []*wordType {
|
||||
continue
|
||||
}
|
||||
if ch == '\\' && c.at(1) != 0 {
|
||||
state.ensureCurWord(c)
|
||||
state.CurWord.Raw = append(state.CurWord.Raw, ch, c.at(1))
|
||||
state.appendLiteral(c, ch)
|
||||
state.appendLiteral(c, c.at(1))
|
||||
c.Pos += 2
|
||||
continue
|
||||
}
|
||||
state.appendLiteral(c, ch)
|
||||
c.Pos++
|
||||
}
|
||||
return state.Rtn, eofExit
|
||||
}
|
||||
|
||||
func (c *parseContext) tokenizeDQ() ([]*wordType, bool) {
|
||||
state := &tokenizeOutputState{}
|
||||
eofExit := false
|
||||
for {
|
||||
ch := c.cur()
|
||||
if ch == 0 {
|
||||
eofExit = true
|
||||
break
|
||||
}
|
||||
if ch == '"' {
|
||||
c.Pos++
|
||||
break
|
||||
}
|
||||
if ch == '$' && c.at(1) != 0 {
|
||||
quoteWord := c.parseStrANSI()
|
||||
if quoteWord == nil {
|
||||
quoteWord = c.parseStrDDQ()
|
||||
}
|
||||
if quoteWord == nil {
|
||||
quoteWord = c.parseExpansion()
|
||||
}
|
||||
if quoteWord != nil {
|
||||
state.appendWord(quoteWord)
|
||||
continue
|
||||
}
|
||||
}
|
||||
if ch == '\\' && c.at(1) != 0 {
|
||||
state.appendLiteral(c, ch)
|
||||
state.appendLiteral(c, c.at(1))
|
||||
c.Pos += 2
|
||||
continue
|
||||
}
|
||||
state.appendLiteral(c, ch)
|
||||
c.Pos++
|
||||
}
|
||||
state.finish(c)
|
||||
if len(state.Rtn) == 0 {
|
||||
return nil, eofExit
|
||||
}
|
||||
if len(state.Rtn) == 1 && state.Rtn[0].Type == WordTypeGroup {
|
||||
return ungroupWord(state.Rtn[0]), eofExit
|
||||
}
|
||||
return state.Rtn, eofExit
|
||||
}
|
||||
|
||||
// returns (words, eofexit)
|
||||
func (c *parseContext) tokenizeRaw() ([]*wordType, bool) {
|
||||
state := &tokenizeOutputState{}
|
||||
isExpSubShell := c.QC.cur() == WordTypeDP
|
||||
parenLevel := 0
|
||||
eofExit := false
|
||||
for {
|
||||
ch := c.cur()
|
||||
if ch == 0 {
|
||||
eofExit = true
|
||||
break
|
||||
}
|
||||
if isExpSubShell && ch == ')' && parenLevel == 0 {
|
||||
c.Pos++
|
||||
break
|
||||
}
|
||||
// fmt.Printf("ch %d %q\n", c.Pos, string([]rune{ch}))
|
||||
foundOp, newOffset := c.parseOp(0)
|
||||
if foundOp {
|
||||
rawOp := c.Input[c.Pos : c.Pos+newOffset]
|
||||
opVal := string(rawOp)
|
||||
opWord := &wordType{Type: WordTypeOp, Offset: c.Pos, Raw: rawOp, Val: opVal, Complete: true}
|
||||
if opWord.Val == "(" {
|
||||
arithWord := c.parseArith(true)
|
||||
if arithWord != nil {
|
||||
state.appendStandaloneWord(arithWord)
|
||||
continue
|
||||
} else {
|
||||
parenLevel++
|
||||
}
|
||||
}
|
||||
if opWord.Val == ")" {
|
||||
parenLevel--
|
||||
}
|
||||
c.Pos = c.Pos + newOffset
|
||||
state.appendStandaloneWord(opWord)
|
||||
continue
|
||||
}
|
||||
var quoteWord *wordType
|
||||
if ch == '\'' {
|
||||
quoteWord = c.parseStrSQ()
|
||||
}
|
||||
if quoteWord == nil && ch == '"' {
|
||||
quoteWord = c.parseStrDQ()
|
||||
}
|
||||
isNextParen := isExpSubShell && c.at(1) == ')'
|
||||
if quoteWord == nil && ch == '$' && !isNextParen {
|
||||
quoteWord = c.parseStrANSI()
|
||||
if quoteWord == nil {
|
||||
quoteWord = c.parseStrDDQ()
|
||||
}
|
||||
if quoteWord == nil {
|
||||
quoteWord = c.parseExpansion()
|
||||
}
|
||||
}
|
||||
if quoteWord != nil {
|
||||
state.appendWord(quoteWord)
|
||||
continue
|
||||
}
|
||||
if ch == '\\' && c.at(1) != 0 {
|
||||
state.appendLiteral(c, ch)
|
||||
state.appendLiteral(c, c.at(1))
|
||||
c.Pos += 2
|
||||
continue
|
||||
}
|
||||
@ -102,12 +291,28 @@ func Tokenize(cmd string) []*wordType {
|
||||
c.Pos++
|
||||
continue
|
||||
}
|
||||
state.ensureCurWord(c)
|
||||
state.CurWord.Raw = append(state.CurWord.Raw, ch)
|
||||
state.appendLiteral(c, ch)
|
||||
c.Pos++
|
||||
}
|
||||
state.finish(c)
|
||||
return state.Rtn
|
||||
return state.Rtn, eofExit
|
||||
}
|
||||
|
||||
func Tokenize(cmd string) []*wordType {
|
||||
c := &parseContext{Input: []rune(cmd)}
|
||||
rtn, _ := c.tokenizeRaw()
|
||||
return rtn
|
||||
}
|
||||
|
||||
func (w *wordType) FullRawString() []rune {
|
||||
if w.Type == WordTypeGroup {
|
||||
var rtn []rune
|
||||
for _, sw := range w.Subs {
|
||||
rtn = append(rtn, sw.FullRawString()...)
|
||||
}
|
||||
return rtn
|
||||
}
|
||||
return w.Raw
|
||||
}
|
||||
|
||||
func wordsToStr(words []*wordType) string {
|
||||
@ -116,7 +321,7 @@ func wordsToStr(words []*wordType) string {
|
||||
if len(word.Prefix) > 0 {
|
||||
buf.WriteString(string(word.Prefix))
|
||||
}
|
||||
buf.WriteString(string(word.Raw))
|
||||
buf.WriteString(string(word.FullRawString()))
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user