waveterm/pkg/shparse/shparse.go

620 lines
16 KiB
Go
Raw Normal View History

2022-11-14 22:56:28 +01:00
package shparse
import (
"bytes"
2022-11-14 22:56:28 +01:00
"fmt"
"github.com/scripthaus-dev/sh2-server/pkg/utilfn"
2022-11-14 22:56:28 +01:00
)
//
// cmds := cmd (sep cmd)*
// sep := ';' | '&' | '&&' | '||' | '|' | '\n'
// cmd := simple-cmd | compound-command redirect-list?
// compound-command := brace-group | subshell | for-clause | case-clause | if-clause | while-clause | until-clause
// brace-group := '{' cmds '}'
// subshell := '(' cmds ')'
// simple-command := cmd-prefix cmd-word (io-redirect)*
// cmd-prefix := (io-redirect | assignment)*
// cmd-suffix := (io-redirect | word)*
// cmd-name := word
// cmd-word := word
// io-redirect := (io-number? io-file) | (io-number? io-here)
// io-file := ('<' | '<&' | '>' | '>&' | '>>' | '>|' ) filename
// io-here := ('<<' | '<<-') here_end
// here-end := word
// if-clause := 'if' compound-list 'then' compound-list else-part 'fi'
// else-part := 'elif' compound-list 'then' compound-list
// | 'elif' compount-list 'then' compound-list else-part
// | 'else' compound-list
// compound-list := linebreak term sep?
//
//
//
// A correctly-formed brace expansion must contain unquoted opening and closing braces, and at least one unquoted comma or a valid sequence expression
// Any incorrectly formed brace expansion is left unchanged.
//
2022-11-14 22:56:28 +01:00
// ambiguity between $((...)) and $((ls); ls)
// ambiguity between foo=([0]=hell) and foo=([abc)
2022-11-15 04:57:29 +01:00
// tokenization https://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html#tag_001_003
2022-11-17 08:52:10 +01:00
// can-extend: WordTypeLit, WordTypeSimpleVar, WordTypeVarBrace, WordTypeDQ, WordTypeDDQ, WordTypeSQ, WordTypeDSQ
2022-11-14 22:56:28 +01:00
const (
2022-11-15 09:36:30 +01:00
WordTypeRaw = "raw"
2022-11-17 07:17:38 +01:00
WordTypeLit = "lit" // (can-extend)
2022-11-15 09:36:30 +01:00
WordTypeOp = "op" // single: & ; | ( ) < > \n multi(2): && || ;; << >> <& >& <> >| (( multi(3): <<- ('((' requires special processing)
WordTypeKey = "key" // if then else elif fi do done case esac while until for in { } ! (( [[
WordTypeGroup = "grp" // contains other words e.g. "hello"foo'bar'$x
2022-11-17 07:17:38 +01:00
WordTypeSimpleVar = "svar" // simplevar $ (can-extend)
2022-11-17 07:17:38 +01:00
WordTypeDQ = "dq" // " (quote-context) (can-extend)
WordTypeDDQ = "ddq" // $" (quote-context) (can-extend)
WordTypeVarBrace = "varb" // ${ (quote-context) (can-extend)
WordTypeDP = "dp" // $( (quote-context)
WordTypeBQ = "bq" // ` (quote-context)
2022-11-17 07:17:38 +01:00
WordTypeSQ = "sq" // ' (can-extend)
WordTypeDSQ = "dsq" // $' (can-extend)
WordTypeDPP = "dpp" // $(( (internals not parsed)
WordTypePP = "pp" // (( (internals not parsed)
WordTypeDB = "db" // $[ (internals not parsed)
2022-11-14 22:56:28 +01:00
)
const (
CmdTypeNone = "none" // holds control structures: '(' ')' 'for' 'while' etc.
CmdTypeSimple = "simple" // holds real commands
)
type WordType struct {
Type string
Offset int
QC QuoteContext
Raw []rune
Complete bool
Prefix []rune
Subs []*WordType
}
type CmdType struct {
Type string
AssignmentWords []*WordType
Words []*WordType
}
type QuoteContext []string
2022-11-17 08:52:10 +01:00
var wordMetaMap map[string]wordMeta
// same order as https://www.gnu.org/software/bash/manual/html_node/Reserved-Words.html
var bashReservedWords = []string{
"if", "then", "elif", "else", "fi", "time",
"for", "in", "until", "while", "do", "done",
"case", "esac", "coproc", "select", "function",
"{", "}", "[[", "]]", "!",
}
// special reserved words: "for", "in", "case", "select", "function", "[[", and "]]"
var bashNoneRW = []string{
2022-11-18 23:57:25 +01:00
"if", "then",
"elif", "else", "fi", "time",
"until", "while", "do", "done",
"esac", "coproc",
"{", "}", "!",
}
2022-11-17 08:52:10 +01:00
type wordMeta struct {
Type string
EmptyWord []rune
PrefixLen int
2022-11-17 08:52:10 +01:00
SuffixLen int
CanExtend bool
QuoteContext bool
}
func makeWordMeta(wtype string, emptyWord string, prefixLen int, suffixLen int, canExtend bool, quoteContext bool) {
if len(emptyWord) != prefixLen+suffixLen {
panic(fmt.Sprintf("invalid empty word %s %d %d", emptyWord, prefixLen, suffixLen))
}
wordMetaMap[wtype] = wordMeta{wtype, []rune(emptyWord), prefixLen, suffixLen, canExtend, quoteContext}
2022-11-17 08:52:10 +01:00
}
func init() {
wordMetaMap = make(map[string]wordMeta)
makeWordMeta(WordTypeRaw, "", 0, 0, false, false)
makeWordMeta(WordTypeLit, "", 0, 0, true, false)
makeWordMeta(WordTypeOp, "", 0, 0, false, false)
makeWordMeta(WordTypeKey, "", 0, 0, false, false)
makeWordMeta(WordTypeGroup, "", 0, 0, false, false)
makeWordMeta(WordTypeSimpleVar, "$", 1, 0, true, false)
makeWordMeta(WordTypeVarBrace, "${}", 2, 1, true, true)
makeWordMeta(WordTypeDQ, `""`, 1, 1, true, true)
makeWordMeta(WordTypeDDQ, `$""`, 2, 1, true, true)
makeWordMeta(WordTypeDP, "$()", 2, 1, false, false)
makeWordMeta(WordTypeBQ, "``", 1, 1, false, false)
makeWordMeta(WordTypeSQ, "''", 1, 1, true, false)
makeWordMeta(WordTypeDSQ, "$''", 2, 1, true, false)
makeWordMeta(WordTypeDPP, "$(())", 3, 2, false, false)
makeWordMeta(WordTypePP, "(())", 2, 2, false, false)
makeWordMeta(WordTypeDB, "$[]", 2, 1, false, false)
2022-11-17 08:52:10 +01:00
}
func MakeEmptyWord(wtype string, qc QuoteContext, offset int) *WordType {
2022-11-17 08:52:10 +01:00
meta := wordMetaMap[wtype]
if meta.Type == "" {
meta = wordMetaMap[WordTypeRaw]
}
rtn := &WordType{Type: meta.Type, QC: qc, Offset: offset, Complete: true}
2022-11-17 08:52:10 +01:00
if len(meta.EmptyWord) > 0 {
rtn.Raw = append([]rune(nil), meta.EmptyWord...)
}
return rtn
}
func (qc QuoteContext) push(q string) QuoteContext {
2022-11-15 09:36:30 +01:00
rtn := make([]string, 0, len(qc)+1)
rtn = append(rtn, qc...)
rtn = append(rtn, q)
return rtn
}
func (qc QuoteContext) cur() string {
2022-11-15 09:36:30 +01:00
if len(qc) == 0 {
return ""
}
return qc[len(qc)-1]
}
func makeRepeatStr(ch byte, slen int) string {
if slen == 0 {
return ""
2022-11-15 09:36:30 +01:00
}
rtn := make([]byte, slen)
for i := 0; i < slen; i++ {
rtn[i] = ch
2022-11-14 22:56:28 +01:00
}
return string(rtn)
2022-11-14 22:56:28 +01:00
}
func (w *WordType) isBlank() bool {
return w.Type == WordTypeLit && len(w.Raw) == 0
}
func (w *WordType) stringWithPos(pos int) string {
notCompleteFlag := " "
if !w.Complete {
notCompleteFlag = "*"
}
str := string(w.Raw)
if pos != -1 {
str = utilfn.StrWithPos{Str: str, Pos: pos}.String()
}
return fmt.Sprintf("%-4s[%3d]%s %s%q", w.Type, w.Offset, notCompleteFlag, makeRepeatStr('_', len(w.Prefix)), str)
}
func (w *WordType) String() string {
notCompleteFlag := " "
if !w.Complete {
notCompleteFlag = "*"
}
return fmt.Sprintf("%-4s[%3d]%s %s%q", w.Type, w.Offset, notCompleteFlag, makeRepeatStr('_', len(w.Prefix)), string(w.Raw))
2022-11-14 22:56:28 +01:00
}
// offset = -1 for don't show
func dumpWords(words []*WordType, indentStr string, offset int) {
wrotePos := false
for _, word := range words {
posInWord := false
if !wrotePos && offset != -1 && offset <= word.Offset {
fmt.Printf("%s* [%3d] [*]\n", indentStr, offset)
wrotePos = true
}
if !wrotePos && offset != -1 && offset < word.Offset+len(word.Raw) {
fmt.Printf("%s%s\n", indentStr, word.stringWithPos(offset-word.Offset))
wrotePos = true
posInWord = true
} else {
fmt.Printf("%s%s\n", indentStr, word.String())
}
if len(word.Subs) > 0 {
if posInWord {
wmeta := wordMetaMap[word.Type]
dumpWords(word.Subs, indentStr+" ", offset-word.Offset-wmeta.PrefixLen)
} else {
dumpWords(word.Subs, indentStr+" ", -1)
}
}
}
2022-11-14 22:56:28 +01:00
}
func dumpCommands(cmds []*CmdType, indentStr string, pos *CmdPos) {
for _, cmd := range cmds {
fmt.Printf("%sCMD: %s [%d]\n", indentStr, cmd.Type, len(cmd.Words))
dumpWords(cmd.AssignmentWords, indentStr+" *", -1)
dumpWords(cmd.Words, indentStr+" ", -1)
}
2022-11-14 22:56:28 +01:00
}
func wordsToStr(words []*WordType) string {
var buf bytes.Buffer
for _, word := range words {
if len(word.Prefix) > 0 {
buf.WriteString(string(word.Prefix))
}
buf.WriteString(string(word.Raw))
}
return buf.String()
2022-11-14 22:56:28 +01:00
}
// recognizes reserved words in first position
func convertToAnyReservedWord(w *WordType) bool {
if w == nil || w.Type != WordTypeLit {
return false
}
rawVal := string(w.Raw)
for _, rw := range bashReservedWords {
if rawVal == rw {
w.Type = WordTypeKey
return true
2022-11-14 22:56:28 +01:00
}
}
return false
2022-11-14 22:56:28 +01:00
}
// recognizes the specific reserved-word given only ('in' and 'do' in 'for', 'case', and 'select' commands)
func convertToReservedWord(w *WordType, reservedWord string) {
if w == nil || w.Type != WordTypeLit {
return
}
if string(w.Raw) == reservedWord {
w.Type = WordTypeKey
2022-11-14 22:56:28 +01:00
}
}
func isNoneReservedWord(w *WordType) bool {
if w.Type != WordTypeKey {
return false
}
rawVal := string(w.Raw)
for _, rw := range bashNoneRW {
if rawVal == rw {
return true
2022-11-15 19:27:36 +01:00
}
}
return false
2022-11-15 19:27:36 +01:00
}
type parseCmdState struct {
Input []*WordType
InputPos int
Rtn []*CmdType
Cur *CmdType
2022-11-14 22:56:28 +01:00
}
func (state *parseCmdState) isEof() bool {
return state.InputPos >= len(state.Input)
2022-11-14 22:56:28 +01:00
}
func (state *parseCmdState) curWord() *WordType {
if state.isEof() {
2022-11-14 22:56:28 +01:00
return nil
}
return state.Input[state.InputPos]
2022-11-14 22:56:28 +01:00
}
func (state *parseCmdState) lastCmd() *CmdType {
if len(state.Rtn) == 0 {
2022-11-14 22:56:28 +01:00
return nil
}
return state.Rtn[len(state.Rtn)-1]
2022-11-14 22:56:28 +01:00
}
2022-11-18 23:57:25 +01:00
func (state *parseCmdState) makeNoneCmd(sep bool) {
if state.Cur == nil || state.Cur.Type != CmdTypeNone {
state.Cur = &CmdType{Type: CmdTypeNone}
state.Rtn = append(state.Rtn, state.Cur)
}
state.Cur.Words = append(state.Cur.Words, state.curWord())
if sep {
state.Cur = nil
2022-11-14 22:56:28 +01:00
}
state.InputPos++
2022-11-14 22:56:28 +01:00
}
func (state *parseCmdState) handleKeyword(word *WordType) bool {
if word.Type != WordTypeKey {
return false
}
if isNoneReservedWord(word) {
2022-11-18 23:57:25 +01:00
state.makeNoneCmd(true)
return true
}
rw := string(word.Raw)
if rw == "[[" {
// just ignore everything between [[ and ]]
for !state.isEof() {
curWord := state.curWord()
if curWord.Type == WordTypeLit && string(curWord.Raw) == "]]" {
convertToReservedWord(curWord, "]]")
2022-11-18 23:57:25 +01:00
state.makeNoneCmd(false)
break
}
2022-11-18 23:57:25 +01:00
state.makeNoneCmd(false)
}
return true
2022-11-14 22:56:28 +01:00
}
if rw == "case" {
// ignore everything between "case" and "esac"
for !state.isEof() {
curWord := state.curWord()
if curWord.Type == WordTypeKey && string(curWord.Raw) == "esac" {
2022-11-18 23:57:25 +01:00
state.makeNoneCmd(false)
break
}
2022-11-18 23:57:25 +01:00
state.makeNoneCmd(false)
}
return true
}
if rw == "for" || rw == "select" {
// ignore until a "do"
for !state.isEof() {
curWord := state.curWord()
if curWord.Type == WordTypeKey && string(curWord.Raw) == "do" {
2022-11-18 23:57:25 +01:00
state.makeNoneCmd(true)
break
}
2022-11-18 23:57:25 +01:00
state.makeNoneCmd(false)
}
return true
2022-11-15 04:57:29 +01:00
}
if rw == "in" {
// the "for" and "case" clauses should skip "in". so encountering an "in" here is a syntax error.
// just treat it as a none and allow a new command after.
2022-11-18 23:57:25 +01:00
state.makeNoneCmd(false)
return true
2022-11-15 04:57:29 +01:00
}
if rw == "function" {
// ignore until '{'
for !state.isEof() {
curWord := state.curWord()
if curWord.Type == WordTypeKey && string(curWord.Raw) == "{" {
2022-11-18 23:57:25 +01:00
state.makeNoneCmd(true)
break
}
2022-11-18 23:57:25 +01:00
state.makeNoneCmd(false)
}
return true
2022-11-15 04:57:29 +01:00
}
2022-11-18 23:57:25 +01:00
state.makeNoneCmd(true)
return true
2022-11-15 04:57:29 +01:00
}
func isCmdSeparatorOp(word *WordType) bool {
if word.Type != WordTypeOp {
return false
2022-11-15 04:57:29 +01:00
}
opVal := string(word.Raw)
return opVal == ";" || opVal == "\n" || opVal == "&" || opVal == "|" || opVal == "|&" || opVal == "&&" || opVal == "||" || opVal == "(" || opVal == ")"
2022-11-15 04:57:29 +01:00
}
func (state *parseCmdState) handleOp(word *WordType) bool {
opVal := string(word.Raw)
// sequential separators
if opVal == ";" || opVal == "\n" {
2022-11-18 23:57:25 +01:00
state.makeNoneCmd(true)
return true
2022-11-15 09:36:30 +01:00
}
// separator
if opVal == "&" {
2022-11-18 23:57:25 +01:00
state.makeNoneCmd(true)
return true
2022-11-15 09:36:30 +01:00
}
// pipelines
if opVal == "|" || opVal == "|&" {
2022-11-18 23:57:25 +01:00
state.makeNoneCmd(true)
return true
2022-11-15 09:36:30 +01:00
}
// lists
if opVal == "&&" || opVal == "||" {
2022-11-18 23:57:25 +01:00
state.makeNoneCmd(true)
return true
}
// subshell
if opVal == "(" || opVal == ")" {
2022-11-18 23:57:25 +01:00
state.makeNoneCmd(true)
return true
}
return false
2022-11-15 09:36:30 +01:00
}
func wordSliceBoundedIdx(words []*WordType, idx int) *WordType {
if idx >= len(words) {
return nil
2022-11-15 04:57:29 +01:00
}
return words[idx]
2022-11-14 22:56:28 +01:00
}
// note that a newline "op" can appear in the third position of "for" or "case". the "in" keyword is still converted because of wordNum == 0
func identifyReservedWords(words []*WordType) {
wordNum := 0
lastReserved := false
for idx, word := range words {
if wordNum == 0 || lastReserved {
convertToAnyReservedWord(word)
}
if word.Type == WordTypeKey {
rwVal := string(word.Raw)
switch rwVal {
case "for":
lastReserved = false
third := wordSliceBoundedIdx(words, idx+2)
convertToReservedWord(third, "in")
convertToReservedWord(third, "do")
case "case":
lastReserved = false
third := wordSliceBoundedIdx(words, idx+2)
convertToReservedWord(third, "in")
case "in":
lastReserved = false
default:
lastReserved = true
}
continue
}
lastReserved = false
if isCmdSeparatorOp(word) {
wordNum = 0
continue
}
wordNum++
}
}
type CmdPos struct {
2022-11-18 23:57:25 +01:00
CmdPos int // index into cmd array
Cmd *CmdType // nil if between commands (only if CmdPos == 0 || CmdPos == len(cmds), otherwise should be a valid entry into a command)
CmdOffset int // offset within the command
CmdWordPos int // (index into cmd) 0 = command-word, negative numbers are assignment-words. can be past the end of Words (means start new word)
CmdWord *WordType // nil if between words
CmdWordOffset int // offset into the word. when cmdword is nil, positive offset would mean in the prefix of next word
}
func FindCmdPos(cmds []*CmdType, offset int) CmdPos {
return CmdPos{}
}
func ResetWordOffsets(words []*WordType) {
pos := 0
for _, word := range words {
pos += len(word.Prefix)
word.Offset = pos
if len(word.Subs) > 0 {
ResetWordOffsets(word.Subs)
}
pos += len(word.Raw)
}
}
func CommandsToWords(cmds []*CmdType) []*WordType {
var rtn []*WordType
for _, cmd := range cmds {
rtn = append(rtn, cmd.Words...)
}
return rtn
}
func (c *CmdType) stripPrefix() []rune {
if len(c.AssignmentWords) > 0 {
w := c.AssignmentWords[0]
prefix := w.Prefix
w.Prefix = nil
return prefix
}
if len(c.Words) > 0 {
w := c.Words[0]
prefix := w.Prefix
w.Prefix = nil
return prefix
}
return nil
}
func (c *CmdType) isEmpty() bool {
return len(c.AssignmentWords) == 0 && len(c.Words) == 0
}
func (c *CmdType) lastWord() *WordType {
if len(c.Words) > 0 {
return c.Words[len(c.Words)-1]
}
if len(c.AssignmentWords) > 0 {
return c.AssignmentWords[len(c.AssignmentWords)-1]
}
return nil
}
func (c *CmdType) endOffset() int {
lastWord := c.lastWord()
if lastWord == nil {
return 0
}
return lastWord.Offset + len(lastWord.Raw)
}
func indexInRunes(arr []rune, ch rune) int {
for idx, r := range arr {
if r == ch {
return idx
}
}
return -1
}
func isAssignmentWord(w *WordType) bool {
if w.Type == WordTypeLit || w.Type == WordTypeGroup {
eqIdx := indexInRunes(w.Raw, '=')
if eqIdx == -1 {
return false
}
prefix := w.Raw[0:eqIdx]
return isSimpleVarName(prefix)
}
return false
}
// simple commands steal whitespace from subsequent commands
func cmdWhitespaceFixup(cmds []*CmdType) {
for idx := 0; idx < len(cmds)-1; idx++ {
cmd := cmds[idx]
if cmd.Type != CmdTypeSimple || cmd.isEmpty() {
continue
}
nextCmd := cmds[idx+1]
nextPrefix := nextCmd.stripPrefix()
2022-11-18 23:57:25 +01:00
if len(nextPrefix) > 0 {
blankWord := &WordType{Type: WordTypeLit, QC: cmd.lastWord().QC, Offset: cmd.endOffset(), Prefix: nextPrefix, Complete: true}
cmd.Words = append(cmd.Words, blankWord)
}
}
}
func ParseCommands(words []*WordType) []*CmdType {
identifyReservedWords(words)
state := parseCmdState{Input: words}
for {
if state.isEof() {
break
}
word := state.curWord()
if word.Type == WordTypeKey {
done := state.handleKeyword(word)
if done {
continue
}
}
if word.Type == WordTypeOp {
done := state.handleOp(word)
if done {
continue
}
}
2022-11-18 23:57:25 +01:00
if state.Cur == nil || state.Cur.Type != CmdTypeSimple {
state.Cur = &CmdType{Type: CmdTypeSimple}
state.Rtn = append(state.Rtn, state.Cur)
2022-11-15 09:36:30 +01:00
}
if len(state.Cur.Words) == 0 && isAssignmentWord(word) {
state.Cur.AssignmentWords = append(state.Cur.AssignmentWords, word)
} else {
state.Cur.Words = append(state.Cur.Words, word)
}
state.InputPos++
2022-11-14 22:56:28 +01:00
}
cmdWhitespaceFixup(state.Rtn)
return state.Rtn
2022-11-14 22:56:28 +01:00
}