waveterm/wavesrv/pkg/shparse/shparse.go
2023-10-16 21:31:13 -07:00

697 lines
17 KiB
Go

// Copyright 2023, Command Line Inc.
// SPDX-License-Identifier: Apache-2.0
package shparse
import (
"bytes"
"fmt"
"github.com/wavetermdev/waveterm/wavesrv/pkg/utilfn"
)
//
// cmds := cmd (sep cmd)*
// sep := ';' | '&' | '&&' | '||' | '|' | '\n'
// cmd := simple-cmd | compound-command redirect-list?
// compound-command := brace-group | subshell | for-clause | case-clause | if-clause | while-clause | until-clause
// brace-group := '{' cmds '}'
// subshell := '(' cmds ')'
// simple-command := cmd-prefix cmd-word (io-redirect)*
// cmd-prefix := (io-redirect | assignment)*
// cmd-suffix := (io-redirect | word)*
// cmd-name := word
// cmd-word := word
// io-redirect := (io-number? io-file) | (io-number? io-here)
// io-file := ('<' | '<&' | '>' | '>&' | '>>' | '>|' ) filename
// io-here := ('<<' | '<<-') here_end
// here-end := word
// if-clause := 'if' compound-list 'then' compound-list else-part 'fi'
// else-part := 'elif' compound-list 'then' compound-list
// | 'elif' compount-list 'then' compound-list else-part
// | 'else' compound-list
// compound-list := linebreak term sep?
//
//
//
// A correctly-formed brace expansion must contain unquoted opening and closing braces, and at least one unquoted comma or a valid sequence expression
// Any incorrectly formed brace expansion is left unchanged.
//
// ambiguity between $((...)) and $((ls); ls)
// ambiguity between foo=([0]=hell) and foo=([abc)
// tokenization https://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html#tag_001_003
// can-extend: WordTypeLit, WordTypeSimpleVar, WordTypeVarBrace, WordTypeDQ, WordTypeDDQ, WordTypeSQ, WordTypeDSQ
const (
WordTypeRaw = "raw"
WordTypeLit = "lit" // (can-extend)
WordTypeOp = "op" // single: & ; | ( ) < > \n multi(2): && || ;; << >> <& >& <> >| (( multi(3): <<- ('((' requires special processing)
WordTypeKey = "key" // if then else elif fi do done case esac while until for in { } ! (( [[
WordTypeGroup = "grp" // contains other words e.g. "hello"foo'bar'$x (has-subs) (can-extend)
WordTypeSimpleVar = "svar" // simplevar $ (can-extend)
WordTypeDQ = "dq" // " (quote-context) (can-extend) (has-subs)
WordTypeDDQ = "ddq" // $" (can-extend) (has-subs) (for quotecontext, uses WordTypeDQ)
WordTypeVarBrace = "varb" // ${ (quote-context) (can-extend) (internals not parsed)
WordTypeDP = "dp" // $( (quote-context) (has-subs)
WordTypeBQ = "bq" // ` (quote-context) (has-subs)
WordTypeSQ = "sq" // ' (can-extend)
WordTypeDSQ = "dsq" // $' (can-extend)
WordTypeDPP = "dpp" // $(( (internals not parsed)
WordTypePP = "pp" // (( (internals not parsed)
WordTypeDB = "db" // $[ (internals not parsed)
)
const (
CmdTypeNone = "none" // holds control structures: '(' ')' 'for' 'while' etc.
CmdTypeSimple = "simple" // holds real commands
)
type WordType struct {
Type string
Offset int
QC QuoteContext
Raw []rune
Complete bool
Prefix []rune
Subs []*WordType
}
type CmdType struct {
Type string
AssignmentWords []*WordType
Words []*WordType
NoneComplete bool // set to true when last-word is a "separator"
}
type QuoteContext []string
var wordMetaMap map[string]wordMeta
// same order as https://www.gnu.org/software/bash/manual/html_node/Reserved-Words.html
var bashReservedWords = []string{
"if", "then", "elif", "else", "fi", "time",
"for", "in", "until", "while", "do", "done",
"case", "esac", "coproc", "select", "function",
"{", "}", "[[", "]]", "!",
}
// special reserved words: "for", "in", "case", "select", "function", "[[", and "]]"
var bashNoneRW = []string{
"if", "then",
"elif", "else", "fi", "time",
"until", "while", "do", "done",
"esac", "coproc",
"{", "}", "!",
}
type wordMeta struct {
Type string
EmptyWord []rune
PrefixLen int
SuffixLen int
CanExtend bool
QuoteContext bool
}
func (m wordMeta) getSuffix() string {
if m.SuffixLen == 0 {
return ""
}
return string(m.EmptyWord[len(m.EmptyWord)-m.SuffixLen:])
}
func (m wordMeta) getPrefix() string {
if m.PrefixLen == 0 {
return ""
}
return string(m.EmptyWord[:m.PrefixLen])
}
func makeWordMeta(wtype string, emptyWord string, prefixLen int, suffixLen int, canExtend bool, quoteContext bool) {
if len(emptyWord) != prefixLen+suffixLen {
panic(fmt.Sprintf("invalid empty word %s %d %d", emptyWord, prefixLen, suffixLen))
}
wordMetaMap[wtype] = wordMeta{wtype, []rune(emptyWord), prefixLen, suffixLen, canExtend, quoteContext}
}
func init() {
wordMetaMap = make(map[string]wordMeta)
makeWordMeta(WordTypeRaw, "", 0, 0, false, false)
makeWordMeta(WordTypeLit, "", 0, 0, true, false)
makeWordMeta(WordTypeOp, "", 0, 0, false, false)
makeWordMeta(WordTypeKey, "", 0, 0, false, false)
makeWordMeta(WordTypeGroup, "", 0, 0, false, false)
makeWordMeta(WordTypeSimpleVar, "$", 1, 0, true, false)
makeWordMeta(WordTypeVarBrace, "${}", 2, 1, true, true)
makeWordMeta(WordTypeDQ, `""`, 1, 1, true, true)
makeWordMeta(WordTypeDDQ, `$""`, 2, 1, true, true)
makeWordMeta(WordTypeDP, "$()", 2, 1, false, false)
makeWordMeta(WordTypeBQ, "``", 1, 1, false, false)
makeWordMeta(WordTypeSQ, "''", 1, 1, true, false)
makeWordMeta(WordTypeDSQ, "$''", 2, 1, true, false)
makeWordMeta(WordTypeDPP, "$(())", 3, 2, false, false)
makeWordMeta(WordTypePP, "(())", 2, 2, false, false)
makeWordMeta(WordTypeDB, "$[]", 2, 1, false, false)
}
func MakeEmptyWord(wtype string, qc QuoteContext, offset int, complete bool) *WordType {
meta := wordMetaMap[wtype]
if meta.Type == "" {
meta = wordMetaMap[WordTypeRaw]
}
rtn := &WordType{Type: meta.Type, QC: qc, Offset: offset, Complete: complete}
if len(meta.EmptyWord) > 0 {
if complete {
rtn.Raw = append([]rune(nil), meta.EmptyWord...)
} else {
rtn.Raw = append([]rune(nil), []rune(meta.getPrefix())...)
}
}
return rtn
}
func (qc QuoteContext) push(q string) QuoteContext {
rtn := make([]string, 0, len(qc)+1)
rtn = append(rtn, qc...)
rtn = append(rtn, q)
return rtn
}
func (qc QuoteContext) cur() string {
if len(qc) == 0 {
return ""
}
return qc[len(qc)-1]
}
func (qc QuoteContext) clone() QuoteContext {
if len(qc) == 0 {
return nil
}
return append([]string(nil), qc...)
}
func makeRepeatStr(ch byte, slen int) string {
if slen == 0 {
return ""
}
rtn := make([]byte, slen)
for i := 0; i < slen; i++ {
rtn[i] = ch
}
return string(rtn)
}
func (w *WordType) isBlank() bool {
return w.Type == WordTypeLit && len(w.Raw) == 0
}
func (w *WordType) contentEndPos() int {
if !w.Complete {
return len(w.Raw)
}
wmeta := wordMetaMap[w.Type]
return len(w.Raw) - wmeta.SuffixLen
}
func (w *WordType) contentStartPos() int {
wmeta := wordMetaMap[w.Type]
return wmeta.PrefixLen
}
func (w *WordType) canHaveSubs() bool {
switch w.Type {
case WordTypeGroup, WordTypeDQ, WordTypeDDQ, WordTypeDP, WordTypeBQ:
return true
default:
return false
}
}
func (w *WordType) uncompletable() bool {
switch w.Type {
case WordTypeRaw, WordTypeOp, WordTypeKey, WordTypeDPP, WordTypePP, WordTypeDB, WordTypeBQ, WordTypeDP:
return true
default:
return false
}
}
func (w *WordType) stringWithPos(pos int) string {
notCompleteFlag := " "
if !w.Complete {
notCompleteFlag = "*"
}
str := string(w.Raw)
if pos != -1 {
str = utilfn.StrWithPos{Str: str, Pos: pos}.String()
}
return fmt.Sprintf("%-4s[%3d]%s %s%q", w.Type, w.Offset, notCompleteFlag, makeRepeatStr('_', len(w.Prefix)), str)
}
func (w *WordType) String() string {
notCompleteFlag := " "
if !w.Complete {
notCompleteFlag = "*"
}
return fmt.Sprintf("%-4s[%3d]%s %s%q", w.Type, w.Offset, notCompleteFlag, makeRepeatStr('_', len(w.Prefix)), string(w.Raw))
}
// offset = -1 for don't show
func dumpWords(words []*WordType, indentStr string, offset int) {
wrotePos := false
for _, word := range words {
posInWord := false
if !wrotePos && offset != -1 && offset <= word.Offset {
fmt.Printf("%s* [%3d] [*]\n", indentStr, offset)
wrotePos = true
}
if !wrotePos && offset != -1 && offset < word.Offset+len(word.Raw) {
fmt.Printf("%s%s\n", indentStr, word.stringWithPos(offset-word.Offset))
wrotePos = true
posInWord = true
} else {
fmt.Printf("%s%s\n", indentStr, word.String())
}
if len(word.Subs) > 0 {
if posInWord {
wmeta := wordMetaMap[word.Type]
dumpWords(word.Subs, indentStr+" ", offset-word.Offset-wmeta.PrefixLen)
} else {
dumpWords(word.Subs, indentStr+" ", -1)
}
}
}
}
func dumpCommands(cmds []*CmdType, indentStr string, pos int) {
for _, cmd := range cmds {
fmt.Printf("%sCMD: %s [%d] pos:%d\n", indentStr, cmd.Type, len(cmd.Words), pos)
dumpWords(cmd.AssignmentWords, indentStr+" *", pos)
dumpWords(cmd.Words, indentStr+" ", pos)
}
}
func wordsToStr(words []*WordType) string {
var buf bytes.Buffer
for _, word := range words {
if len(word.Prefix) > 0 {
buf.WriteString(string(word.Prefix))
}
buf.WriteString(string(word.Raw))
}
return buf.String()
}
// recognizes reserved words in first position
func convertToAnyReservedWord(w *WordType) bool {
if w == nil || w.Type != WordTypeLit {
return false
}
rawVal := string(w.Raw)
for _, rw := range bashReservedWords {
if rawVal == rw {
w.Type = WordTypeKey
return true
}
}
return false
}
// recognizes the specific reserved-word given only ('in' and 'do' in 'for', 'case', and 'select' commands)
func convertToReservedWord(w *WordType, reservedWord string) {
if w == nil || w.Type != WordTypeLit {
return
}
if string(w.Raw) == reservedWord {
w.Type = WordTypeKey
}
}
func isNoneReservedWord(w *WordType) bool {
if w.Type != WordTypeKey {
return false
}
rawVal := string(w.Raw)
for _, rw := range bashNoneRW {
if rawVal == rw {
return true
}
}
return false
}
type parseCmdState struct {
Input []*WordType
InputPos int
Rtn []*CmdType
Cur *CmdType
}
func (state *parseCmdState) isEof() bool {
return state.InputPos >= len(state.Input)
}
func (state *parseCmdState) curWord() *WordType {
if state.isEof() {
return nil
}
return state.Input[state.InputPos]
}
func (state *parseCmdState) lastCmd() *CmdType {
if len(state.Rtn) == 0 {
return nil
}
return state.Rtn[len(state.Rtn)-1]
}
func (state *parseCmdState) makeNoneCmd(sep bool) {
if state.Cur == nil || state.Cur.Type != CmdTypeNone {
state.Cur = &CmdType{Type: CmdTypeNone}
state.Rtn = append(state.Rtn, state.Cur)
}
state.Cur.Words = append(state.Cur.Words, state.curWord())
if sep {
state.Cur.NoneComplete = true
state.Cur = nil
}
state.InputPos++
}
func (state *parseCmdState) handleKeyword(word *WordType) bool {
if word.Type != WordTypeKey {
return false
}
if isNoneReservedWord(word) {
state.makeNoneCmd(true)
return true
}
rw := string(word.Raw)
if rw == "[[" {
// just ignore everything between [[ and ]]
for !state.isEof() {
curWord := state.curWord()
if curWord.Type == WordTypeLit && string(curWord.Raw) == "]]" {
convertToReservedWord(curWord, "]]")
state.makeNoneCmd(false)
break
}
state.makeNoneCmd(false)
}
return true
}
if rw == "case" {
// ignore everything between "case" and "esac"
for !state.isEof() {
curWord := state.curWord()
if curWord.Type == WordTypeKey && string(curWord.Raw) == "esac" {
state.makeNoneCmd(false)
break
}
state.makeNoneCmd(false)
}
return true
}
if rw == "for" || rw == "select" {
// ignore until a "do"
for !state.isEof() {
curWord := state.curWord()
if curWord.Type == WordTypeKey && string(curWord.Raw) == "do" {
state.makeNoneCmd(true)
break
}
state.makeNoneCmd(false)
}
return true
}
if rw == "in" {
// the "for" and "case" clauses should skip "in". so encountering an "in" here is a syntax error.
// just treat it as a none and allow a new command after.
state.makeNoneCmd(false)
return true
}
if rw == "function" {
// ignore until '{'
for !state.isEof() {
curWord := state.curWord()
if curWord.Type == WordTypeKey && string(curWord.Raw) == "{" {
state.makeNoneCmd(true)
break
}
state.makeNoneCmd(false)
}
return true
}
state.makeNoneCmd(true)
return true
}
func isCmdSeparatorOp(word *WordType) bool {
if word.Type != WordTypeOp {
return false
}
opVal := string(word.Raw)
return opVal == ";" || opVal == "\n" || opVal == "&" || opVal == "|" || opVal == "|&" || opVal == "&&" || opVal == "||" || opVal == "(" || opVal == ")"
}
func (state *parseCmdState) handleOp(word *WordType) bool {
opVal := string(word.Raw)
// sequential separators
if opVal == ";" || opVal == "\n" {
state.makeNoneCmd(true)
return true
}
// separator
if opVal == "&" {
state.makeNoneCmd(true)
return true
}
// pipelines
if opVal == "|" || opVal == "|&" {
state.makeNoneCmd(true)
return true
}
// lists
if opVal == "&&" || opVal == "||" {
state.makeNoneCmd(true)
return true
}
// subshell
if opVal == "(" || opVal == ")" {
state.makeNoneCmd(true)
return true
}
return false
}
func wordSliceBoundedIdx(words []*WordType, idx int) *WordType {
if idx >= len(words) {
return nil
}
return words[idx]
}
// note that a newline "op" can appear in the third position of "for" or "case". the "in" keyword is still converted because of wordNum == 0
func identifyReservedWords(words []*WordType) {
wordNum := 0
lastReserved := false
for idx, word := range words {
if wordNum == 0 || lastReserved {
convertToAnyReservedWord(word)
}
if word.Type == WordTypeKey {
rwVal := string(word.Raw)
switch rwVal {
case "for":
lastReserved = false
third := wordSliceBoundedIdx(words, idx+2)
convertToReservedWord(third, "in")
convertToReservedWord(third, "do")
case "case":
lastReserved = false
third := wordSliceBoundedIdx(words, idx+2)
convertToReservedWord(third, "in")
case "in":
lastReserved = false
default:
lastReserved = true
}
continue
}
lastReserved = false
if isCmdSeparatorOp(word) {
wordNum = 0
continue
}
wordNum++
}
}
func ResetWordOffsets(words []*WordType, startIdx int) {
pos := startIdx
for _, word := range words {
pos += len(word.Prefix)
word.Offset = pos
if len(word.Subs) > 0 {
ResetWordOffsets(word.Subs, 0)
}
pos += len(word.Raw)
}
}
func CommandsToWords(cmds []*CmdType) []*WordType {
var rtn []*WordType
for _, cmd := range cmds {
rtn = append(rtn, cmd.Words...)
}
return rtn
}
func (c *CmdType) stripPrefix() []rune {
if len(c.AssignmentWords) > 0 {
w := c.AssignmentWords[0]
prefix := w.Prefix
if len(prefix) == 0 {
return nil
}
newWord := *w
newWord.Prefix = nil
c.AssignmentWords[0] = &newWord
return prefix
}
if len(c.Words) > 0 {
w := c.Words[0]
prefix := w.Prefix
if len(prefix) == 0 {
return nil
}
newWord := *w
newWord.Prefix = nil
c.Words[0] = &newWord
return prefix
}
return nil
}
func (c *CmdType) isEmpty() bool {
return len(c.AssignmentWords) == 0 && len(c.Words) == 0
}
func (c *CmdType) lastWord() *WordType {
if len(c.Words) > 0 {
return c.Words[len(c.Words)-1]
}
if len(c.AssignmentWords) > 0 {
return c.AssignmentWords[len(c.AssignmentWords)-1]
}
return nil
}
func (c *CmdType) firstWord() *WordType {
if len(c.AssignmentWords) > 0 {
return c.AssignmentWords[0]
}
if len(c.Words) > 0 {
return c.Words[0]
}
return nil
}
func (c *CmdType) offset() int {
firstWord := c.firstWord()
if firstWord == nil {
return 0
}
return firstWord.Offset
}
func (c *CmdType) endOffset() int {
lastWord := c.lastWord()
if lastWord == nil {
return 0
}
return lastWord.Offset + len(lastWord.Raw)
}
func indexInRunes(arr []rune, ch rune) int {
for idx, r := range arr {
if r == ch {
return idx
}
}
return -1
}
func isAssignmentWord(w *WordType) bool {
if w.Type == WordTypeLit || w.Type == WordTypeGroup {
eqIdx := indexInRunes(w.Raw, '=')
if eqIdx == -1 {
return false
}
prefix := w.Raw[0:eqIdx]
return isSimpleVarName(prefix)
}
return false
}
// simple commands steal whitespace from subsequent commands
func cmdWhitespaceFixup(cmds []*CmdType) {
for idx := 0; idx < len(cmds)-1; idx++ {
cmd := cmds[idx]
if cmd.Type != CmdTypeSimple || cmd.isEmpty() {
continue
}
nextCmd := cmds[idx+1]
nextPrefix := nextCmd.stripPrefix()
if len(nextPrefix) > 0 {
blankWord := &WordType{Type: WordTypeLit, QC: cmd.lastWord().QC, Offset: cmd.endOffset() + len(nextPrefix), Prefix: nextPrefix, Complete: true}
cmd.Words = append(cmd.Words, blankWord)
}
}
}
func ParseCommands(words []*WordType) []*CmdType {
identifyReservedWords(words)
state := parseCmdState{Input: words}
for {
if state.isEof() {
break
}
word := state.curWord()
if word.Type == WordTypeKey {
done := state.handleKeyword(word)
if done {
continue
}
}
if word.Type == WordTypeOp {
done := state.handleOp(word)
if done {
continue
}
}
if state.Cur == nil || state.Cur.Type != CmdTypeSimple {
state.Cur = &CmdType{Type: CmdTypeSimple}
state.Rtn = append(state.Rtn, state.Cur)
}
if len(state.Cur.Words) == 0 && isAssignmentWord(word) {
state.Cur.AssignmentWords = append(state.Cur.AssignmentWords, word)
} else {
state.Cur.Words = append(state.Cur.Words, word)
}
state.InputPos++
}
cmdWhitespaceFixup(state.Rtn)
return state.Rtn
}