waveterm/pkg/shparse/shparse.go

474 lines
12 KiB
Go
Raw Normal View History

2022-11-14 22:56:28 +01:00
package shparse
import (
"bytes"
2022-11-14 22:56:28 +01:00
"fmt"
)
//
// cmds := cmd (sep cmd)*
// sep := ';' | '&' | '&&' | '||' | '|' | '\n'
// cmd := simple-cmd | compound-command redirect-list?
// compound-command := brace-group | subshell | for-clause | case-clause | if-clause | while-clause | until-clause
// brace-group := '{' cmds '}'
// subshell := '(' cmds ')'
// simple-command := cmd-prefix cmd-word (io-redirect)*
// cmd-prefix := (io-redirect | assignment)*
// cmd-suffix := (io-redirect | word)*
// cmd-name := word
// cmd-word := word
// io-redirect := (io-number? io-file) | (io-number? io-here)
// io-file := ('<' | '<&' | '>' | '>&' | '>>' | '>|' ) filename
// io-here := ('<<' | '<<-') here_end
// here-end := word
// if-clause := 'if' compound-list 'then' compound-list else-part 'fi'
// else-part := 'elif' compound-list 'then' compound-list
// | 'elif' compount-list 'then' compound-list else-part
// | 'else' compound-list
// compound-list := linebreak term sep?
//
//
//
// A correctly-formed brace expansion must contain unquoted opening and closing braces, and at least one unquoted comma or a valid sequence expression
// Any incorrectly formed brace expansion is left unchanged.
//
2022-11-14 22:56:28 +01:00
// ambiguity between $((...)) and $((ls); ls)
// ambiguity between foo=([0]=hell) and foo=([abc)
2022-11-15 04:57:29 +01:00
// tokenization https://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html#tag_001_003
2022-11-17 08:52:10 +01:00
// can-extend: WordTypeLit, WordTypeSimpleVar, WordTypeVarBrace, WordTypeDQ, WordTypeDDQ, WordTypeSQ, WordTypeDSQ
2022-11-14 22:56:28 +01:00
const (
2022-11-15 09:36:30 +01:00
WordTypeRaw = "raw"
2022-11-17 07:17:38 +01:00
WordTypeLit = "lit" // (can-extend)
2022-11-15 09:36:30 +01:00
WordTypeOp = "op" // single: & ; | ( ) < > \n multi(2): && || ;; << >> <& >& <> >| (( multi(3): <<- ('((' requires special processing)
WordTypeKey = "key" // if then else elif fi do done case esac while until for in { } ! (( [[
WordTypeGroup = "grp" // contains other words e.g. "hello"foo'bar'$x
2022-11-17 07:17:38 +01:00
WordTypeSimpleVar = "svar" // simplevar $ (can-extend)
2022-11-17 07:17:38 +01:00
WordTypeDQ = "dq" // " (quote-context) (can-extend)
WordTypeDDQ = "ddq" // $" (quote-context) (can-extend)
WordTypeVarBrace = "varb" // ${ (quote-context) (can-extend)
WordTypeDP = "dp" // $( (quote-context)
WordTypeBQ = "bq" // ` (quote-context)
2022-11-17 07:17:38 +01:00
WordTypeSQ = "sq" // ' (can-extend)
WordTypeDSQ = "dsq" // $' (can-extend)
WordTypeDPP = "dpp" // $(( (internals not parsed)
WordTypePP = "pp" // (( (internals not parsed)
WordTypeDB = "db" // $[ (internals not parsed)
2022-11-14 22:56:28 +01:00
)
const (
CmdTypeNone = "none" // holds control structures: '(' ')' 'for' 'while' etc.
CmdTypeSimple = "simple" // holds real commands
)
type WordType struct {
Type string
Offset int
QC QuoteContext
Raw []rune
Complete bool
Prefix []rune
Subs []*WordType
}
type CmdType struct {
Type string
AssignmentWords []*WordType
Words []*WordType
}
type QuoteContext []string
2022-11-17 08:52:10 +01:00
var wordMetaMap map[string]wordMeta
// same order as https://www.gnu.org/software/bash/manual/html_node/Reserved-Words.html
var bashReservedWords = []string{
"if", "then", "elif", "else", "fi", "time",
"for", "in", "until", "while", "do", "done",
"case", "esac", "coproc", "select", "function",
"{", "}", "[[", "]]", "!",
}
// special reserved words: "for", "in", "case", "select", "function", "[[", and "]]"
var bashNoneRW = []string{
"if", "then", "elif", "else", "fi", "time",
"until", "while", "do", "done",
"esac", "coproc",
"{", "}", "!",
}
2022-11-17 08:52:10 +01:00
type wordMeta struct {
Type string
EmptyWord []rune
SuffixLen int
CanExtend bool
QuoteContext bool
}
func makeWordMeta(wtype string, emptyWord string, suffixLen int, canExtend bool, quoteContext bool) {
wordMetaMap[wtype] = wordMeta{wtype, []rune(emptyWord), suffixLen, canExtend, quoteContext}
}
func init() {
wordMetaMap = make(map[string]wordMeta)
makeWordMeta(WordTypeRaw, "", 0, false, false)
makeWordMeta(WordTypeLit, "", 0, true, false)
makeWordMeta(WordTypeOp, "", 0, false, false)
makeWordMeta(WordTypeKey, "", 0, false, false)
makeWordMeta(WordTypeGroup, "", 0, false, false)
makeWordMeta(WordTypeSimpleVar, "$", 0, true, false)
makeWordMeta(WordTypeVarBrace, "${}", 1, true, true)
makeWordMeta(WordTypeDQ, `""`, 1, true, true)
makeWordMeta(WordTypeDDQ, `$""`, 1, true, true)
makeWordMeta(WordTypeDP, "$()", 1, false, false)
makeWordMeta(WordTypeBQ, "``", 1, false, false)
makeWordMeta(WordTypeSQ, "''", 1, true, false)
makeWordMeta(WordTypeDSQ, "$''", 1, true, false)
makeWordMeta(WordTypeDPP, "$(())", 2, false, false)
makeWordMeta(WordTypePP, "(())", 2, false, false)
makeWordMeta(WordTypeDB, "$[]", 1, false, false)
}
func MakeEmptyWord(wtype string, qc QuoteContext, offset int) *WordType {
2022-11-17 08:52:10 +01:00
meta := wordMetaMap[wtype]
if meta.Type == "" {
meta = wordMetaMap[WordTypeRaw]
}
rtn := &WordType{Type: meta.Type, QC: qc, Offset: offset, Complete: true}
2022-11-17 08:52:10 +01:00
if len(meta.EmptyWord) > 0 {
rtn.Raw = append([]rune(nil), meta.EmptyWord...)
}
return rtn
}
func (qc QuoteContext) push(q string) QuoteContext {
2022-11-15 09:36:30 +01:00
rtn := make([]string, 0, len(qc)+1)
rtn = append(rtn, qc...)
rtn = append(rtn, q)
return rtn
}
func (qc QuoteContext) cur() string {
2022-11-15 09:36:30 +01:00
if len(qc) == 0 {
return ""
}
return qc[len(qc)-1]
}
func makeRepeatStr(ch byte, slen int) string {
if slen == 0 {
return ""
2022-11-15 09:36:30 +01:00
}
rtn := make([]byte, slen)
for i := 0; i < slen; i++ {
rtn[i] = ch
2022-11-14 22:56:28 +01:00
}
return string(rtn)
2022-11-14 22:56:28 +01:00
}
func (w *WordType) String() string {
notCompleteFlag := " "
if !w.Complete {
notCompleteFlag = "*"
}
return fmt.Sprintf("%4s[%3d]%s %s%q", w.Type, w.Offset, notCompleteFlag, makeRepeatStr('_', len(w.Prefix)), string(w.FullRawString()))
2022-11-14 22:56:28 +01:00
}
func dumpWords(words []*WordType, indentStr string) {
for _, word := range words {
fmt.Printf("%s%s\n", indentStr, word.String())
if len(word.Subs) > 0 {
dumpWords(word.Subs, indentStr+" ")
}
}
2022-11-14 22:56:28 +01:00
}
func dumpCommands(cmds []*CmdType, indentStr string) {
for _, cmd := range cmds {
fmt.Printf("%sCMD: %s [%d]\n", indentStr, cmd.Type, len(cmd.Words))
dumpWords(cmd.Words, indentStr+" ")
}
2022-11-14 22:56:28 +01:00
}
func (w *WordType) FullRawString() []rune {
if w.Type == WordTypeGroup {
var rtn []rune
for _, sw := range w.Subs {
rtn = append(rtn, sw.FullRawString()...)
}
return rtn
}
return w.Raw
2022-11-15 04:57:29 +01:00
}
func wordsToStr(words []*WordType) string {
var buf bytes.Buffer
for _, word := range words {
if len(word.Prefix) > 0 {
buf.WriteString(string(word.Prefix))
}
buf.WriteString(string(word.FullRawString()))
}
return buf.String()
2022-11-14 22:56:28 +01:00
}
// recognizes reserved words in first position
func convertToAnyReservedWord(w *WordType) bool {
if w == nil || w.Type != WordTypeLit {
return false
}
rawVal := string(w.Raw)
for _, rw := range bashReservedWords {
if rawVal == rw {
w.Type = WordTypeKey
return true
2022-11-14 22:56:28 +01:00
}
}
return false
2022-11-14 22:56:28 +01:00
}
// recognizes the specific reserved-word given only ('in' and 'do' in 'for', 'case', and 'select' commands)
func convertToReservedWord(w *WordType, reservedWord string) {
if w == nil || w.Type != WordTypeLit {
return
}
if string(w.Raw) == reservedWord {
w.Type = WordTypeKey
2022-11-14 22:56:28 +01:00
}
}
func isNoneReservedWord(w *WordType) bool {
if w.Type != WordTypeKey {
return false
}
rawVal := string(w.Raw)
for _, rw := range bashNoneRW {
if rawVal == rw {
return true
2022-11-15 19:27:36 +01:00
}
}
return false
2022-11-15 19:27:36 +01:00
}
type parseCmdState struct {
Input []*WordType
InputPos int
Rtn []*CmdType
Cur *CmdType
2022-11-14 22:56:28 +01:00
}
func (state *parseCmdState) isEof() bool {
return state.InputPos >= len(state.Input)
2022-11-14 22:56:28 +01:00
}
func (state *parseCmdState) curWord() *WordType {
if state.isEof() {
2022-11-14 22:56:28 +01:00
return nil
}
return state.Input[state.InputPos]
2022-11-14 22:56:28 +01:00
}
func (state *parseCmdState) lastCmd() *CmdType {
if len(state.Rtn) == 0 {
2022-11-14 22:56:28 +01:00
return nil
}
return state.Rtn[len(state.Rtn)-1]
2022-11-14 22:56:28 +01:00
}
func (state *parseCmdState) makeNoneCmd() {
lastCmd := state.lastCmd()
if lastCmd == nil || lastCmd.Type != CmdTypeNone {
lastCmd = &CmdType{Type: CmdTypeNone}
state.Rtn = append(state.Rtn, lastCmd)
2022-11-14 22:56:28 +01:00
}
lastCmd.Words = append(lastCmd.Words, state.curWord())
state.Cur = nil
state.InputPos++
2022-11-14 22:56:28 +01:00
}
func (state *parseCmdState) handleKeyword(word *WordType) bool {
if word.Type != WordTypeKey {
return false
}
if isNoneReservedWord(word) {
state.makeNoneCmd()
return true
}
rw := string(word.Raw)
if rw == "[[" {
// just ignore everything between [[ and ]]
for !state.isEof() {
curWord := state.curWord()
if curWord.Type == WordTypeLit && string(curWord.Raw) == "]]" {
convertToReservedWord(curWord, "]]")
state.makeNoneCmd()
break
}
state.makeNoneCmd()
}
return true
2022-11-14 22:56:28 +01:00
}
if rw == "case" {
// ignore everything between "case" and "esac"
for !state.isEof() {
curWord := state.curWord()
if curWord.Type == WordTypeKey && string(curWord.Raw) == "esac" {
state.makeNoneCmd()
break
}
state.makeNoneCmd()
}
return true
}
if rw == "for" || rw == "select" {
// ignore until a "do"
for !state.isEof() {
curWord := state.curWord()
if curWord.Type == WordTypeKey && string(curWord.Raw) == "do" {
state.makeNoneCmd()
break
}
state.makeNoneCmd()
}
return true
2022-11-15 04:57:29 +01:00
}
if rw == "in" {
// the "for" and "case" clauses should skip "in". so encountering an "in" here is a syntax error.
// just treat it as a none and allow a new command after.
state.makeNoneCmd()
return true
2022-11-15 04:57:29 +01:00
}
if rw == "function" {
// ignore until '{'
for !state.isEof() {
curWord := state.curWord()
if curWord.Type == WordTypeKey && string(curWord.Raw) == "{" {
state.makeNoneCmd()
break
}
state.makeNoneCmd()
}
return true
2022-11-15 04:57:29 +01:00
}
state.makeNoneCmd()
return true
2022-11-15 04:57:29 +01:00
}
func isCmdSeparatorOp(word *WordType) bool {
if word.Type != WordTypeOp {
return false
2022-11-15 04:57:29 +01:00
}
opVal := string(word.Raw)
return opVal == ";" || opVal == "\n" || opVal == "&" || opVal == "|" || opVal == "|&" || opVal == "&&" || opVal == "||" || opVal == "(" || opVal == ")"
2022-11-15 04:57:29 +01:00
}
func (state *parseCmdState) handleOp(word *WordType) bool {
opVal := string(word.Raw)
// sequential separators
if opVal == ";" || opVal == "\n" {
state.makeNoneCmd()
return true
2022-11-15 09:36:30 +01:00
}
// separator
if opVal == "&" {
state.makeNoneCmd()
return true
2022-11-15 09:36:30 +01:00
}
// pipelines
if opVal == "|" || opVal == "|&" {
state.makeNoneCmd()
return true
2022-11-15 09:36:30 +01:00
}
// lists
if opVal == "&&" || opVal == "||" {
state.makeNoneCmd()
return true
}
// subshell
if opVal == "(" || opVal == ")" {
state.makeNoneCmd()
return true
}
return false
2022-11-15 09:36:30 +01:00
}
func wordSliceBoundedIdx(words []*WordType, idx int) *WordType {
if idx >= len(words) {
return nil
2022-11-15 04:57:29 +01:00
}
return words[idx]
2022-11-14 22:56:28 +01:00
}
// note that a newline "op" can appear in the third position of "for" or "case". the "in" keyword is still converted because of wordNum == 0
func identifyReservedWords(words []*WordType) {
wordNum := 0
lastReserved := false
for idx, word := range words {
if wordNum == 0 || lastReserved {
convertToAnyReservedWord(word)
}
if word.Type == WordTypeKey {
rwVal := string(word.Raw)
switch rwVal {
case "for":
lastReserved = false
third := wordSliceBoundedIdx(words, idx+2)
convertToReservedWord(third, "in")
convertToReservedWord(third, "do")
case "case":
lastReserved = false
third := wordSliceBoundedIdx(words, idx+2)
convertToReservedWord(third, "in")
case "in":
lastReserved = false
default:
lastReserved = true
}
continue
}
lastReserved = false
if isCmdSeparatorOp(word) {
wordNum = 0
continue
}
wordNum++
}
}
func ParseCommands(words []*WordType) []*CmdType {
identifyReservedWords(words)
state := parseCmdState{Input: words}
for {
if state.isEof() {
break
}
word := state.curWord()
if word.Type == WordTypeKey {
done := state.handleKeyword(word)
if done {
continue
}
}
if word.Type == WordTypeOp {
done := state.handleOp(word)
if done {
continue
}
}
if state.Cur == nil {
state.Cur = &CmdType{Type: CmdTypeSimple}
state.Rtn = append(state.Rtn, state.Cur)
2022-11-15 09:36:30 +01:00
}
state.Cur.Words = append(state.Cur.Words, word)
state.InputPos++
2022-11-14 22:56:28 +01:00
}
return state.Rtn
2022-11-14 22:56:28 +01:00
}