// Copyright 2023, Command Line Inc. // SPDX-License-Identifier: Apache-2.0 package shparse import ( "bytes" "fmt" "github.com/wavetermdev/waveterm/waveshell/pkg/utilfn" ) // // cmds := cmd (sep cmd)* // sep := ';' | '&' | '&&' | '||' | '|' | '\n' // cmd := simple-cmd | compound-command redirect-list? // compound-command := brace-group | subshell | for-clause | case-clause | if-clause | while-clause | until-clause // brace-group := '{' cmds '}' // subshell := '(' cmds ')' // simple-command := cmd-prefix cmd-word (io-redirect)* // cmd-prefix := (io-redirect | assignment)* // cmd-suffix := (io-redirect | word)* // cmd-name := word // cmd-word := word // io-redirect := (io-number? io-file) | (io-number? io-here) // io-file := ('<' | '<&' | '>' | '>&' | '>>' | '>|' ) filename // io-here := ('<<' | '<<-') here_end // here-end := word // if-clause := 'if' compound-list 'then' compound-list else-part 'fi' // else-part := 'elif' compound-list 'then' compound-list // | 'elif' compount-list 'then' compound-list else-part // | 'else' compound-list // compound-list := linebreak term sep? // // // // A correctly-formed brace expansion must contain unquoted opening and closing braces, and at least one unquoted comma or a valid sequence expression // Any incorrectly formed brace expansion is left unchanged. // // ambiguity between $((...)) and $((ls); ls) // ambiguity between foo=([0]=hell) and foo=([abc) // tokenization https://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html#tag_001_003 // can-extend: WordTypeLit, WordTypeSimpleVar, WordTypeVarBrace, WordTypeDQ, WordTypeDDQ, WordTypeSQ, WordTypeDSQ const ( WordTypeRaw = "raw" WordTypeLit = "lit" // (can-extend) WordTypeOp = "op" // single: & ; | ( ) < > \n multi(2): && || ;; << >> <& >& <> >| (( multi(3): <<- ('((' requires special processing) WordTypeKey = "key" // if then else elif fi do done case esac while until for in { } ! (( [[ WordTypeGroup = "grp" // contains other words e.g. "hello"foo'bar'$x (has-subs) (can-extend) WordTypeSimpleVar = "svar" // simplevar $ (can-extend) WordTypeDQ = "dq" // " (quote-context) (can-extend) (has-subs) WordTypeDDQ = "ddq" // $" (can-extend) (has-subs) (for quotecontext, uses WordTypeDQ) WordTypeVarBrace = "varb" // ${ (quote-context) (can-extend) (internals not parsed) WordTypeDP = "dp" // $( (quote-context) (has-subs) WordTypeBQ = "bq" // ` (quote-context) (has-subs) WordTypeSQ = "sq" // ' (can-extend) WordTypeDSQ = "dsq" // $' (can-extend) WordTypeDPP = "dpp" // $(( (internals not parsed) WordTypePP = "pp" // (( (internals not parsed) WordTypeDB = "db" // $[ (internals not parsed) ) const ( CmdTypeNone = "none" // holds control structures: '(' ')' 'for' 'while' etc. CmdTypeSimple = "simple" // holds real commands ) type WordType struct { Type string Offset int QC QuoteContext Raw []rune Complete bool Prefix []rune Subs []*WordType } type CmdType struct { Type string AssignmentWords []*WordType Words []*WordType NoneComplete bool // set to true when last-word is a "separator" } type QuoteContext []string var wordMetaMap map[string]wordMeta // same order as https://www.gnu.org/software/bash/manual/html_node/Reserved-Words.html var bashReservedWords = []string{ "if", "then", "elif", "else", "fi", "time", "for", "in", "until", "while", "do", "done", "case", "esac", "coproc", "select", "function", "{", "}", "[[", "]]", "!", } // special reserved words: "for", "in", "case", "select", "function", "[[", and "]]" var bashNoneRW = []string{ "if", "then", "elif", "else", "fi", "time", "until", "while", "do", "done", "esac", "coproc", "{", "}", "!", } type wordMeta struct { Type string EmptyWord []rune PrefixLen int SuffixLen int CanExtend bool QuoteContext bool } func (m wordMeta) getSuffix() string { if m.SuffixLen == 0 { return "" } return string(m.EmptyWord[len(m.EmptyWord)-m.SuffixLen:]) } func (m wordMeta) getPrefix() string { if m.PrefixLen == 0 { return "" } return string(m.EmptyWord[:m.PrefixLen]) } func makeWordMeta(wtype string, emptyWord string, prefixLen int, suffixLen int, canExtend bool, quoteContext bool) { if len(emptyWord) != prefixLen+suffixLen { panic(fmt.Sprintf("invalid empty word %s %d %d", emptyWord, prefixLen, suffixLen)) } wordMetaMap[wtype] = wordMeta{wtype, []rune(emptyWord), prefixLen, suffixLen, canExtend, quoteContext} } func init() { wordMetaMap = make(map[string]wordMeta) makeWordMeta(WordTypeRaw, "", 0, 0, false, false) makeWordMeta(WordTypeLit, "", 0, 0, true, false) makeWordMeta(WordTypeOp, "", 0, 0, false, false) makeWordMeta(WordTypeKey, "", 0, 0, false, false) makeWordMeta(WordTypeGroup, "", 0, 0, false, false) makeWordMeta(WordTypeSimpleVar, "$", 1, 0, true, false) makeWordMeta(WordTypeVarBrace, "${}", 2, 1, true, true) makeWordMeta(WordTypeDQ, `""`, 1, 1, true, true) makeWordMeta(WordTypeDDQ, `$""`, 2, 1, true, true) makeWordMeta(WordTypeDP, "$()", 2, 1, false, false) makeWordMeta(WordTypeBQ, "``", 1, 1, false, false) makeWordMeta(WordTypeSQ, "''", 1, 1, true, false) makeWordMeta(WordTypeDSQ, "$''", 2, 1, true, false) makeWordMeta(WordTypeDPP, "$(())", 3, 2, false, false) makeWordMeta(WordTypePP, "(())", 2, 2, false, false) makeWordMeta(WordTypeDB, "$[]", 2, 1, false, false) } func MakeEmptyWord(wtype string, qc QuoteContext, offset int, complete bool) *WordType { meta := wordMetaMap[wtype] if meta.Type == "" { meta = wordMetaMap[WordTypeRaw] } rtn := &WordType{Type: meta.Type, QC: qc, Offset: offset, Complete: complete} if len(meta.EmptyWord) > 0 { if complete { rtn.Raw = append([]rune(nil), meta.EmptyWord...) } else { rtn.Raw = append([]rune(nil), []rune(meta.getPrefix())...) } } return rtn } func (qc QuoteContext) push(q string) QuoteContext { rtn := make([]string, 0, len(qc)+1) rtn = append(rtn, qc...) rtn = append(rtn, q) return rtn } func (qc QuoteContext) cur() string { if len(qc) == 0 { return "" } return qc[len(qc)-1] } func (qc QuoteContext) clone() QuoteContext { if len(qc) == 0 { return nil } return append([]string(nil), qc...) } func makeRepeatStr(ch byte, slen int) string { if slen == 0 { return "" } rtn := make([]byte, slen) for i := 0; i < slen; i++ { rtn[i] = ch } return string(rtn) } func (w *WordType) isBlank() bool { return w.Type == WordTypeLit && len(w.Raw) == 0 } func (w *WordType) contentEndPos() int { if !w.Complete { return len(w.Raw) } wmeta := wordMetaMap[w.Type] return len(w.Raw) - wmeta.SuffixLen } func (w *WordType) contentStartPos() int { wmeta := wordMetaMap[w.Type] return wmeta.PrefixLen } func (w *WordType) canHaveSubs() bool { switch w.Type { case WordTypeGroup, WordTypeDQ, WordTypeDDQ, WordTypeDP, WordTypeBQ: return true default: return false } } func (w *WordType) uncompletable() bool { switch w.Type { case WordTypeRaw, WordTypeOp, WordTypeKey, WordTypeDPP, WordTypePP, WordTypeDB, WordTypeBQ, WordTypeDP: return true default: return false } } func (w *WordType) stringWithPos(pos int) string { notCompleteFlag := " " if !w.Complete { notCompleteFlag = "*" } str := string(w.Raw) if pos != -1 { str = utilfn.StrWithPos{Str: str, Pos: pos}.String() } return fmt.Sprintf("%-4s[%3d]%s %s%q", w.Type, w.Offset, notCompleteFlag, makeRepeatStr('_', len(w.Prefix)), str) } func (w *WordType) String() string { notCompleteFlag := " " if !w.Complete { notCompleteFlag = "*" } return fmt.Sprintf("%-4s[%3d]%s %s%q", w.Type, w.Offset, notCompleteFlag, makeRepeatStr('_', len(w.Prefix)), string(w.Raw)) } // offset = -1 for don't show func dumpWords(words []*WordType, indentStr string, offset int) { wrotePos := false for _, word := range words { posInWord := false if !wrotePos && offset != -1 && offset <= word.Offset { fmt.Printf("%s* [%3d] [*]\n", indentStr, offset) wrotePos = true } if !wrotePos && offset != -1 && offset < word.Offset+len(word.Raw) { fmt.Printf("%s%s\n", indentStr, word.stringWithPos(offset-word.Offset)) wrotePos = true posInWord = true } else { fmt.Printf("%s%s\n", indentStr, word.String()) } if len(word.Subs) > 0 { if posInWord { wmeta := wordMetaMap[word.Type] dumpWords(word.Subs, indentStr+" ", offset-word.Offset-wmeta.PrefixLen) } else { dumpWords(word.Subs, indentStr+" ", -1) } } } } func dumpCommands(cmds []*CmdType, indentStr string, pos int) { for _, cmd := range cmds { fmt.Printf("%sCMD: %s [%d] pos:%d\n", indentStr, cmd.Type, len(cmd.Words), pos) dumpWords(cmd.AssignmentWords, indentStr+" *", pos) dumpWords(cmd.Words, indentStr+" ", pos) } } func wordsToStr(words []*WordType) string { var buf bytes.Buffer for _, word := range words { if len(word.Prefix) > 0 { buf.WriteString(string(word.Prefix)) } buf.WriteString(string(word.Raw)) } return buf.String() } // recognizes reserved words in first position func convertToAnyReservedWord(w *WordType) bool { if w == nil || w.Type != WordTypeLit { return false } rawVal := string(w.Raw) for _, rw := range bashReservedWords { if rawVal == rw { w.Type = WordTypeKey return true } } return false } // recognizes the specific reserved-word given only ('in' and 'do' in 'for', 'case', and 'select' commands) func convertToReservedWord(w *WordType, reservedWord string) { if w == nil || w.Type != WordTypeLit { return } if string(w.Raw) == reservedWord { w.Type = WordTypeKey } } func isNoneReservedWord(w *WordType) bool { if w.Type != WordTypeKey { return false } rawVal := string(w.Raw) for _, rw := range bashNoneRW { if rawVal == rw { return true } } return false } type parseCmdState struct { Input []*WordType InputPos int Rtn []*CmdType Cur *CmdType } func (state *parseCmdState) isEof() bool { return state.InputPos >= len(state.Input) } func (state *parseCmdState) curWord() *WordType { if state.isEof() { return nil } return state.Input[state.InputPos] } func (state *parseCmdState) lastCmd() *CmdType { if len(state.Rtn) == 0 { return nil } return state.Rtn[len(state.Rtn)-1] } func (state *parseCmdState) makeNoneCmd(sep bool) { if state.Cur == nil || state.Cur.Type != CmdTypeNone { state.Cur = &CmdType{Type: CmdTypeNone} state.Rtn = append(state.Rtn, state.Cur) } state.Cur.Words = append(state.Cur.Words, state.curWord()) if sep { state.Cur.NoneComplete = true state.Cur = nil } state.InputPos++ } func (state *parseCmdState) handleKeyword(word *WordType) bool { if word.Type != WordTypeKey { return false } if isNoneReservedWord(word) { state.makeNoneCmd(true) return true } rw := string(word.Raw) if rw == "[[" { // just ignore everything between [[ and ]] for !state.isEof() { curWord := state.curWord() if curWord.Type == WordTypeLit && string(curWord.Raw) == "]]" { convertToReservedWord(curWord, "]]") state.makeNoneCmd(false) break } state.makeNoneCmd(false) } return true } if rw == "case" { // ignore everything between "case" and "esac" for !state.isEof() { curWord := state.curWord() if curWord.Type == WordTypeKey && string(curWord.Raw) == "esac" { state.makeNoneCmd(false) break } state.makeNoneCmd(false) } return true } if rw == "for" || rw == "select" { // ignore until a "do" for !state.isEof() { curWord := state.curWord() if curWord.Type == WordTypeKey && string(curWord.Raw) == "do" { state.makeNoneCmd(true) break } state.makeNoneCmd(false) } return true } if rw == "in" { // the "for" and "case" clauses should skip "in". so encountering an "in" here is a syntax error. // just treat it as a none and allow a new command after. state.makeNoneCmd(false) return true } if rw == "function" { // ignore until '{' for !state.isEof() { curWord := state.curWord() if curWord.Type == WordTypeKey && string(curWord.Raw) == "{" { state.makeNoneCmd(true) break } state.makeNoneCmd(false) } return true } state.makeNoneCmd(true) return true } func isCmdSeparatorOp(word *WordType) bool { if word.Type != WordTypeOp { return false } opVal := string(word.Raw) return opVal == ";" || opVal == "\n" || opVal == "&" || opVal == "|" || opVal == "|&" || opVal == "&&" || opVal == "||" || opVal == "(" || opVal == ")" } func (state *parseCmdState) handleOp(word *WordType) bool { opVal := string(word.Raw) // sequential separators if opVal == ";" || opVal == "\n" { state.makeNoneCmd(true) return true } // separator if opVal == "&" { state.makeNoneCmd(true) return true } // pipelines if opVal == "|" || opVal == "|&" { state.makeNoneCmd(true) return true } // lists if opVal == "&&" || opVal == "||" { state.makeNoneCmd(true) return true } // subshell if opVal == "(" || opVal == ")" { state.makeNoneCmd(true) return true } return false } func wordSliceBoundedIdx(words []*WordType, idx int) *WordType { if idx >= len(words) { return nil } return words[idx] } // note that a newline "op" can appear in the third position of "for" or "case". the "in" keyword is still converted because of wordNum == 0 func identifyReservedWords(words []*WordType) { wordNum := 0 lastReserved := false for idx, word := range words { if wordNum == 0 || lastReserved { convertToAnyReservedWord(word) } if word.Type == WordTypeKey { rwVal := string(word.Raw) switch rwVal { case "for": lastReserved = false third := wordSliceBoundedIdx(words, idx+2) convertToReservedWord(third, "in") convertToReservedWord(third, "do") case "case": lastReserved = false third := wordSliceBoundedIdx(words, idx+2) convertToReservedWord(third, "in") case "in": lastReserved = false default: lastReserved = true } continue } lastReserved = false if isCmdSeparatorOp(word) { wordNum = 0 continue } wordNum++ } } func ResetWordOffsets(words []*WordType, startIdx int) { pos := startIdx for _, word := range words { pos += len(word.Prefix) word.Offset = pos if len(word.Subs) > 0 { ResetWordOffsets(word.Subs, 0) } pos += len(word.Raw) } } func CommandsToWords(cmds []*CmdType) []*WordType { var rtn []*WordType for _, cmd := range cmds { rtn = append(rtn, cmd.Words...) } return rtn } func (c *CmdType) stripPrefix() []rune { if len(c.AssignmentWords) > 0 { w := c.AssignmentWords[0] prefix := w.Prefix if len(prefix) == 0 { return nil } newWord := *w newWord.Prefix = nil c.AssignmentWords[0] = &newWord return prefix } if len(c.Words) > 0 { w := c.Words[0] prefix := w.Prefix if len(prefix) == 0 { return nil } newWord := *w newWord.Prefix = nil c.Words[0] = &newWord return prefix } return nil } func (c *CmdType) isEmpty() bool { return len(c.AssignmentWords) == 0 && len(c.Words) == 0 } func (c *CmdType) lastWord() *WordType { if len(c.Words) > 0 { return c.Words[len(c.Words)-1] } if len(c.AssignmentWords) > 0 { return c.AssignmentWords[len(c.AssignmentWords)-1] } return nil } func (c *CmdType) firstWord() *WordType { if len(c.AssignmentWords) > 0 { return c.AssignmentWords[0] } if len(c.Words) > 0 { return c.Words[0] } return nil } func (c *CmdType) offset() int { firstWord := c.firstWord() if firstWord == nil { return 0 } return firstWord.Offset } func (c *CmdType) endOffset() int { lastWord := c.lastWord() if lastWord == nil { return 0 } return lastWord.Offset + len(lastWord.Raw) } func indexInRunes(arr []rune, ch rune) int { for idx, r := range arr { if r == ch { return idx } } return -1 } func isAssignmentWord(w *WordType) bool { if w.Type == WordTypeLit || w.Type == WordTypeGroup { eqIdx := indexInRunes(w.Raw, '=') if eqIdx == -1 { return false } prefix := w.Raw[0:eqIdx] return isSimpleVarName(prefix) } return false } // simple commands steal whitespace from subsequent commands func cmdWhitespaceFixup(cmds []*CmdType) { for idx := 0; idx < len(cmds)-1; idx++ { cmd := cmds[idx] if cmd.Type != CmdTypeSimple || cmd.isEmpty() { continue } nextCmd := cmds[idx+1] nextPrefix := nextCmd.stripPrefix() if len(nextPrefix) > 0 { blankWord := &WordType{Type: WordTypeLit, QC: cmd.lastWord().QC, Offset: cmd.endOffset() + len(nextPrefix), Prefix: nextPrefix, Complete: true} cmd.Words = append(cmd.Words, blankWord) } } } func ParseCommands(words []*WordType) []*CmdType { identifyReservedWords(words) state := parseCmdState{Input: words} for { if state.isEof() { break } word := state.curWord() if word.Type == WordTypeKey { done := state.handleKeyword(word) if done { continue } } if word.Type == WordTypeOp { done := state.handleOp(word) if done { continue } } if state.Cur == nil || state.Cur.Type != CmdTypeSimple { state.Cur = &CmdType{Type: CmdTypeSimple} state.Rtn = append(state.Rtn, state.Cur) } if len(state.Cur.Words) == 0 && isAssignmentWord(word) { state.Cur.AssignmentWords = append(state.Cur.AssignmentWords, word) } else { state.Cur.Words = append(state.Cur.Words, word) } state.InputPos++ } cmdWhitespaceFixup(state.Rtn) return state.Rtn }