2022-11-15 09:36:30 +01:00
|
|
|
package shparse
|
|
|
|
|
|
|
|
import (
|
2022-11-15 09:39:53 +01:00
|
|
|
"bytes"
|
2022-11-16 09:37:22 +01:00
|
|
|
"fmt"
|
2022-11-15 09:36:30 +01:00
|
|
|
"unicode"
|
|
|
|
)
|
|
|
|
|
|
|
|
// from bash source
|
|
|
|
//
|
|
|
|
// shell_meta_chars "()<>;&|"
|
|
|
|
//
|
|
|
|
|
|
|
|
type tokenizeOutputState struct {
|
|
|
|
Rtn []*wordType
|
|
|
|
CurWord *wordType
|
|
|
|
SavedPrefix []rune
|
|
|
|
}
|
|
|
|
|
2022-11-16 09:37:22 +01:00
|
|
|
// does not set CurWord
|
|
|
|
func (state *tokenizeOutputState) appendStandaloneWord(word *wordType) {
|
2022-11-15 09:36:30 +01:00
|
|
|
state.delimitCurWord()
|
|
|
|
if len(state.SavedPrefix) > 0 {
|
|
|
|
word.Prefix = state.SavedPrefix
|
|
|
|
state.SavedPrefix = nil
|
|
|
|
}
|
|
|
|
state.Rtn = append(state.Rtn, word)
|
|
|
|
}
|
|
|
|
|
2022-11-16 09:37:22 +01:00
|
|
|
func (state *tokenizeOutputState) appendWord(word *wordType) {
|
|
|
|
if len(state.SavedPrefix) > 0 {
|
|
|
|
word.Prefix = state.SavedPrefix
|
|
|
|
state.SavedPrefix = nil
|
|
|
|
}
|
|
|
|
if state.CurWord == nil {
|
|
|
|
state.CurWord = word
|
2022-11-15 09:36:30 +01:00
|
|
|
return
|
|
|
|
}
|
2022-11-16 09:37:22 +01:00
|
|
|
state.ensureGroupWord()
|
|
|
|
state.CurWord.Subs = append(state.CurWord.Subs, word)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (state *tokenizeOutputState) ensureGroupWord() {
|
|
|
|
if state.CurWord == nil {
|
|
|
|
panic("invalid state, cannot make group word when CurWord is nil")
|
|
|
|
}
|
|
|
|
if state.CurWord.Type == WordTypeGroup {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
// moves the prefix from CurWord to the new group word
|
|
|
|
groupWord := &wordType{
|
|
|
|
Type: WordTypeGroup,
|
|
|
|
Offset: state.CurWord.Offset,
|
2022-11-16 20:13:15 +01:00
|
|
|
QC: state.CurWord.QC,
|
2022-11-16 09:37:22 +01:00
|
|
|
Complete: true,
|
|
|
|
Prefix: state.CurWord.Prefix,
|
|
|
|
}
|
|
|
|
state.CurWord.Prefix = nil
|
|
|
|
groupWord.Subs = []*wordType{state.CurWord}
|
|
|
|
state.CurWord = groupWord
|
|
|
|
}
|
|
|
|
|
|
|
|
func ungroupWord(w *wordType) []*wordType {
|
|
|
|
if w.Type != WordTypeGroup {
|
|
|
|
return []*wordType{w}
|
|
|
|
}
|
|
|
|
rtn := w.Subs
|
|
|
|
if len(w.Prefix) > 0 && len(rtn) > 0 {
|
|
|
|
newPrefix := append([]rune{}, w.Prefix...)
|
|
|
|
newPrefix = append(newPrefix, rtn[0].Prefix...)
|
|
|
|
rtn[0].Prefix = newPrefix
|
|
|
|
}
|
|
|
|
return rtn
|
|
|
|
}
|
|
|
|
|
|
|
|
func (state *tokenizeOutputState) ensureLitCurWord(pc *parseContext) {
|
|
|
|
if state.CurWord == nil {
|
2022-11-16 20:13:15 +01:00
|
|
|
state.CurWord = pc.makeWord(WordTypeLit, 0, true)
|
|
|
|
state.CurWord.Prefix = state.SavedPrefix
|
2022-11-16 09:37:22 +01:00
|
|
|
state.SavedPrefix = nil
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if state.CurWord.Type == WordTypeLit {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
state.ensureGroupWord()
|
|
|
|
lastWord := state.CurWord.Subs[len(state.CurWord.Subs)-1]
|
|
|
|
if lastWord.Type != WordTypeLit {
|
|
|
|
if len(state.SavedPrefix) > 0 {
|
|
|
|
panic("invalid state, there can be no saved prefix")
|
|
|
|
}
|
2022-11-16 20:13:15 +01:00
|
|
|
litWord := pc.makeWord(WordTypeLit, 0, true)
|
2022-11-16 09:37:22 +01:00
|
|
|
state.CurWord.Subs = append(state.CurWord.Subs, litWord)
|
|
|
|
}
|
2022-11-15 09:36:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func (state *tokenizeOutputState) delimitCurWord() {
|
|
|
|
if state.CurWord != nil {
|
|
|
|
state.Rtn = append(state.Rtn, state.CurWord)
|
|
|
|
state.CurWord = nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (state *tokenizeOutputState) delimitWithSpace(spaceCh rune) {
|
|
|
|
state.delimitCurWord()
|
|
|
|
state.SavedPrefix = append(state.SavedPrefix, spaceCh)
|
|
|
|
}
|
|
|
|
|
2022-11-16 09:37:22 +01:00
|
|
|
func (state *tokenizeOutputState) appendLiteral(pc *parseContext, ch rune) {
|
|
|
|
state.ensureLitCurWord(pc)
|
|
|
|
if state.CurWord.Type == WordTypeLit {
|
|
|
|
state.CurWord.Raw = append(state.CurWord.Raw, ch)
|
|
|
|
} else if state.CurWord.Type == WordTypeGroup {
|
|
|
|
lastWord := state.CurWord.Subs[len(state.CurWord.Subs)-1]
|
|
|
|
if lastWord.Type != WordTypeLit {
|
|
|
|
panic(fmt.Sprintf("invalid curword type (group) %q", state.CurWord.Type))
|
|
|
|
}
|
|
|
|
lastWord.Raw = append(lastWord.Raw, ch)
|
|
|
|
} else {
|
|
|
|
panic(fmt.Sprintf("invalid curword type %q", state.CurWord.Type))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-15 09:36:30 +01:00
|
|
|
func (state *tokenizeOutputState) finish(pc *parseContext) {
|
|
|
|
state.delimitCurWord()
|
|
|
|
if len(state.SavedPrefix) > 0 {
|
2022-11-16 09:37:22 +01:00
|
|
|
state.ensureLitCurWord(pc)
|
2022-11-15 09:36:30 +01:00
|
|
|
state.delimitCurWord()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-16 09:37:22 +01:00
|
|
|
func (c *parseContext) tokenizeVarBrace() ([]*wordType, bool) {
|
2022-11-15 09:36:30 +01:00
|
|
|
state := &tokenizeOutputState{}
|
2022-11-16 09:37:22 +01:00
|
|
|
eofExit := false
|
2022-11-15 09:36:30 +01:00
|
|
|
for {
|
|
|
|
ch := c.cur()
|
|
|
|
if ch == 0 {
|
2022-11-16 09:37:22 +01:00
|
|
|
eofExit = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
if ch == '}' {
|
|
|
|
c.Pos++
|
|
|
|
break
|
|
|
|
}
|
|
|
|
var quoteWord *wordType
|
|
|
|
if ch == '\'' {
|
|
|
|
quoteWord = c.parseStrSQ()
|
|
|
|
}
|
|
|
|
if quoteWord == nil && ch == '"' {
|
|
|
|
quoteWord = c.parseStrDQ()
|
|
|
|
}
|
|
|
|
isNextBrace := c.at(1) == '}'
|
|
|
|
if quoteWord == nil && ch == '$' && !isNextBrace {
|
|
|
|
quoteWord = c.parseStrANSI()
|
|
|
|
if quoteWord == nil {
|
|
|
|
quoteWord = c.parseStrDDQ()
|
|
|
|
}
|
|
|
|
if quoteWord == nil {
|
|
|
|
quoteWord = c.parseExpansion()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if quoteWord != nil {
|
|
|
|
state.appendWord(quoteWord)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if ch == '\\' && c.at(1) != 0 {
|
|
|
|
state.appendLiteral(c, ch)
|
|
|
|
state.appendLiteral(c, c.at(1))
|
|
|
|
c.Pos += 2
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
state.appendLiteral(c, ch)
|
|
|
|
c.Pos++
|
|
|
|
}
|
|
|
|
return state.Rtn, eofExit
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *parseContext) tokenizeDQ() ([]*wordType, bool) {
|
|
|
|
state := &tokenizeOutputState{}
|
|
|
|
eofExit := false
|
|
|
|
for {
|
|
|
|
ch := c.cur()
|
|
|
|
if ch == 0 {
|
|
|
|
eofExit = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
if ch == '"' {
|
|
|
|
c.Pos++
|
|
|
|
break
|
|
|
|
}
|
|
|
|
if ch == '$' && c.at(1) != 0 {
|
|
|
|
quoteWord := c.parseStrANSI()
|
|
|
|
if quoteWord == nil {
|
|
|
|
quoteWord = c.parseStrDDQ()
|
|
|
|
}
|
|
|
|
if quoteWord == nil {
|
|
|
|
quoteWord = c.parseExpansion()
|
|
|
|
}
|
|
|
|
if quoteWord != nil {
|
|
|
|
state.appendWord(quoteWord)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ch == '\\' && c.at(1) != 0 {
|
|
|
|
state.appendLiteral(c, ch)
|
|
|
|
state.appendLiteral(c, c.at(1))
|
|
|
|
c.Pos += 2
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
state.appendLiteral(c, ch)
|
|
|
|
c.Pos++
|
|
|
|
}
|
|
|
|
state.finish(c)
|
|
|
|
if len(state.Rtn) == 0 {
|
|
|
|
return nil, eofExit
|
|
|
|
}
|
|
|
|
if len(state.Rtn) == 1 && state.Rtn[0].Type == WordTypeGroup {
|
|
|
|
return ungroupWord(state.Rtn[0]), eofExit
|
|
|
|
}
|
|
|
|
return state.Rtn, eofExit
|
|
|
|
}
|
|
|
|
|
|
|
|
// returns (words, eofexit)
|
2022-11-16 21:00:44 +01:00
|
|
|
// backticks (WordTypeBQ) handle backslash in a special way, but that seems to mainly effect execution (not completion)
|
|
|
|
// de_backslash => removes initial backslash in \`, \\, and \$ before execution
|
2022-11-16 09:37:22 +01:00
|
|
|
func (c *parseContext) tokenizeRaw() ([]*wordType, bool) {
|
|
|
|
state := &tokenizeOutputState{}
|
|
|
|
isExpSubShell := c.QC.cur() == WordTypeDP
|
2022-11-16 21:00:44 +01:00
|
|
|
isInBQ := c.QC.cur() == WordTypeBQ
|
2022-11-16 09:37:22 +01:00
|
|
|
parenLevel := 0
|
|
|
|
eofExit := false
|
|
|
|
for {
|
|
|
|
ch := c.cur()
|
|
|
|
if ch == 0 {
|
|
|
|
eofExit = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
if isExpSubShell && ch == ')' && parenLevel == 0 {
|
|
|
|
c.Pos++
|
2022-11-15 09:36:30 +01:00
|
|
|
break
|
|
|
|
}
|
2022-11-16 21:00:44 +01:00
|
|
|
if isInBQ && ch == '`' {
|
|
|
|
c.Pos++
|
|
|
|
break
|
|
|
|
}
|
2022-11-15 09:36:30 +01:00
|
|
|
// fmt.Printf("ch %d %q\n", c.Pos, string([]rune{ch}))
|
|
|
|
foundOp, newOffset := c.parseOp(0)
|
|
|
|
if foundOp {
|
2022-11-16 20:13:15 +01:00
|
|
|
opVal := string(c.Input[c.Pos : c.Pos+newOffset])
|
|
|
|
if opVal == "(" {
|
2022-11-16 09:37:22 +01:00
|
|
|
arithWord := c.parseArith(true)
|
|
|
|
if arithWord != nil {
|
|
|
|
state.appendStandaloneWord(arithWord)
|
|
|
|
continue
|
|
|
|
} else {
|
|
|
|
parenLevel++
|
|
|
|
}
|
|
|
|
}
|
2022-11-16 20:13:15 +01:00
|
|
|
if opVal == ")" {
|
2022-11-16 09:37:22 +01:00
|
|
|
parenLevel--
|
|
|
|
}
|
2022-11-16 20:13:15 +01:00
|
|
|
opWord := c.makeWord(WordTypeOp, newOffset, true)
|
|
|
|
opWord.Val = opVal
|
2022-11-16 09:37:22 +01:00
|
|
|
state.appendStandaloneWord(opWord)
|
2022-11-15 09:36:30 +01:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
var quoteWord *wordType
|
2022-11-16 09:37:22 +01:00
|
|
|
if ch == '\'' {
|
2022-11-15 09:36:30 +01:00
|
|
|
quoteWord = c.parseStrSQ()
|
2022-11-16 09:37:22 +01:00
|
|
|
}
|
|
|
|
if quoteWord == nil && ch == '"' {
|
2022-11-15 09:36:30 +01:00
|
|
|
quoteWord = c.parseStrDQ()
|
2022-11-16 09:37:22 +01:00
|
|
|
}
|
2022-11-16 21:00:44 +01:00
|
|
|
if quoteWord == nil && ch == '`' {
|
|
|
|
quoteWord = c.parseStrBQ()
|
|
|
|
}
|
2022-11-16 09:37:22 +01:00
|
|
|
isNextParen := isExpSubShell && c.at(1) == ')'
|
|
|
|
if quoteWord == nil && ch == '$' && !isNextParen {
|
2022-11-15 09:36:30 +01:00
|
|
|
quoteWord = c.parseStrANSI()
|
|
|
|
if quoteWord == nil {
|
|
|
|
quoteWord = c.parseStrDDQ()
|
|
|
|
}
|
|
|
|
if quoteWord == nil {
|
|
|
|
quoteWord = c.parseExpansion()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if quoteWord != nil {
|
|
|
|
state.appendWord(quoteWord)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if ch == '\\' && c.at(1) != 0 {
|
2022-11-16 09:37:22 +01:00
|
|
|
state.appendLiteral(c, ch)
|
|
|
|
state.appendLiteral(c, c.at(1))
|
2022-11-15 09:36:30 +01:00
|
|
|
c.Pos += 2
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if unicode.IsSpace(ch) {
|
|
|
|
state.delimitWithSpace(ch)
|
|
|
|
c.Pos++
|
|
|
|
continue
|
|
|
|
}
|
2022-11-16 09:37:22 +01:00
|
|
|
state.appendLiteral(c, ch)
|
2022-11-15 09:36:30 +01:00
|
|
|
c.Pos++
|
|
|
|
}
|
|
|
|
state.finish(c)
|
2022-11-16 09:37:22 +01:00
|
|
|
return state.Rtn, eofExit
|
|
|
|
}
|
|
|
|
|
|
|
|
func Tokenize(cmd string) []*wordType {
|
|
|
|
c := &parseContext{Input: []rune(cmd)}
|
|
|
|
rtn, _ := c.tokenizeRaw()
|
|
|
|
return rtn
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w *wordType) FullRawString() []rune {
|
|
|
|
if w.Type == WordTypeGroup {
|
|
|
|
var rtn []rune
|
|
|
|
for _, sw := range w.Subs {
|
|
|
|
rtn = append(rtn, sw.FullRawString()...)
|
|
|
|
}
|
|
|
|
return rtn
|
|
|
|
}
|
|
|
|
return w.Raw
|
2022-11-15 09:36:30 +01:00
|
|
|
}
|
2022-11-15 09:39:53 +01:00
|
|
|
|
|
|
|
func wordsToStr(words []*wordType) string {
|
|
|
|
var buf bytes.Buffer
|
|
|
|
for _, word := range words {
|
|
|
|
if len(word.Prefix) > 0 {
|
|
|
|
buf.WriteString(string(word.Prefix))
|
|
|
|
}
|
2022-11-16 09:37:22 +01:00
|
|
|
buf.WriteString(string(word.FullRawString()))
|
2022-11-15 09:39:53 +01:00
|
|
|
}
|
|
|
|
return buf.String()
|
|
|
|
}
|