waveterm/wavesrv/pkg/shparse/shparse.go
Mike Sawka 422338c04b
zsh support (#227)
adds zsh support to waveterm.  big change, lots going on here.  lots of other improvements and bug fixes added while debugging and building out the feature.

Commits:

* refactor shexec parser.go into new package shellenv.  separate out bash specific parsing from generic functions

* checkpoint

* work on refactoring shexec.  created two new packages shellapi (for bash/zsh specific stuff), and shellutil (shared between shellapi and shexec)

* more refactoring

* create shellapi interface to abstract bash specific functionality

* more refactoring, move bash shell state parsing to shellapi

* move makeRcFile to shellapi.  remove all of the 'client' options CLI options from waveshell

* get shellType passed through to server/single paths for waveshell

* add a local shelltype detector

* mock out a zshapi

* move shelltype through more of the code

* get a command to run via zsh

* zsh can now switch directories.  poc, needs cleanup

* working on ShellState encoding differences between zsh/bash.  Working on parsing zsh decls.  move utilfn package into waveshell (shouldn't have been in wavesrv)

* switch to use []byte for vardecl serialization + diffs

* progress on zsh environment.  still have issues reconciling init environment with trap environment

* fix typeset argument parsing

* parse promptvars, more zsh specific ignores

* fix bug with promptvar not getting set (wrong check in FeState func)

* add sdk (issue #188) to list of rtnstate commands

* more zsh compatibility -- working with a larger ohmyzsh environment.  ignore more variables, handle exit trap better.  unique path/fpath.  add a processtype variable to base.

* must return a value

* zsh alias parsing/restoring.  diff changes (and rtnstate changes).  introduces linediff v1.

* force zmodload of zsh/parameter

* starting work on zsh functions

* need a v1 of mapdiff as well (to handle null chars)

* pack/unpack of ints was wrong (one used int and one use uint).  turned out we only ever encoded '0' so it worked.  that also means it is safe to change unpack to unpackUInt

* reworking for binary encoding of aliases and functions (because of zsh allows any character, including nulls, in names and values)

* fixes, working on functions, issue with line endings

* zsh functions.  lots of ugliness here around dealing with line dicipline and cooked stty.  new runcommand function to grab output from a non-tty fd.  note that we still to run the actual command in a stty to get the proper output.

* write uuid tempdir, cleanup with tmprcfilename code

* hack in some simple zsh function declaration finding code for rtnstate.  create function diff for rtnstate that supports zsh

* make sure key order is constant so shell hashes are consistent

* fix problems with state diffs to support new zsh formats.  add diff/apply code to shellapi (moved from shellenv), that is now specific to zsh or bash

* add log packet and new shellstate packets

* switch to shellstate map that's also keyed by shelltype

* add shelltype to remoteinstance

* remove shell argument from waveshell

* added new shelltype statemap to remote.go (msh), deal with fallout

* move shellstate out of init packet, and move to an explicit reinit call.  try to initialize all of the active shell states

* change dont always store init state (only store on demand).  initialize shell states on demand (if not already initialized).  allow reset to change shells

* add shellpref field to remote table.  use to drive the default shell choice for new tabs

* show shelltag on cmdinput, pass through ri and remote (defaultshellstate)

* bump mshell version to v0.4

* better version validation for shellstate.  also relax compatibility requirements for diffing states (shelltype + major version need to match)

* better error handling, check shellstate compatibility during run (on waveshell server)

* add extra separator for bash shellstate processing to deal with spurious output from rc files

* special migration for v30 -- flag invalid bash shell states and show special button in UI to fix

* format

* remove zsh-decls (unused)

* remove test code

* remove debug print

* fix typo
2024-01-16 16:11:04 -08:00

697 lines
17 KiB
Go

// Copyright 2023, Command Line Inc.
// SPDX-License-Identifier: Apache-2.0
package shparse
import (
"bytes"
"fmt"
"github.com/wavetermdev/waveterm/waveshell/pkg/utilfn"
)
//
// cmds := cmd (sep cmd)*
// sep := ';' | '&' | '&&' | '||' | '|' | '\n'
// cmd := simple-cmd | compound-command redirect-list?
// compound-command := brace-group | subshell | for-clause | case-clause | if-clause | while-clause | until-clause
// brace-group := '{' cmds '}'
// subshell := '(' cmds ')'
// simple-command := cmd-prefix cmd-word (io-redirect)*
// cmd-prefix := (io-redirect | assignment)*
// cmd-suffix := (io-redirect | word)*
// cmd-name := word
// cmd-word := word
// io-redirect := (io-number? io-file) | (io-number? io-here)
// io-file := ('<' | '<&' | '>' | '>&' | '>>' | '>|' ) filename
// io-here := ('<<' | '<<-') here_end
// here-end := word
// if-clause := 'if' compound-list 'then' compound-list else-part 'fi'
// else-part := 'elif' compound-list 'then' compound-list
// | 'elif' compount-list 'then' compound-list else-part
// | 'else' compound-list
// compound-list := linebreak term sep?
//
//
//
// A correctly-formed brace expansion must contain unquoted opening and closing braces, and at least one unquoted comma or a valid sequence expression
// Any incorrectly formed brace expansion is left unchanged.
//
// ambiguity between $((...)) and $((ls); ls)
// ambiguity between foo=([0]=hell) and foo=([abc)
// tokenization https://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html#tag_001_003
// can-extend: WordTypeLit, WordTypeSimpleVar, WordTypeVarBrace, WordTypeDQ, WordTypeDDQ, WordTypeSQ, WordTypeDSQ
const (
WordTypeRaw = "raw"
WordTypeLit = "lit" // (can-extend)
WordTypeOp = "op" // single: & ; | ( ) < > \n multi(2): && || ;; << >> <& >& <> >| (( multi(3): <<- ('((' requires special processing)
WordTypeKey = "key" // if then else elif fi do done case esac while until for in { } ! (( [[
WordTypeGroup = "grp" // contains other words e.g. "hello"foo'bar'$x (has-subs) (can-extend)
WordTypeSimpleVar = "svar" // simplevar $ (can-extend)
WordTypeDQ = "dq" // " (quote-context) (can-extend) (has-subs)
WordTypeDDQ = "ddq" // $" (can-extend) (has-subs) (for quotecontext, uses WordTypeDQ)
WordTypeVarBrace = "varb" // ${ (quote-context) (can-extend) (internals not parsed)
WordTypeDP = "dp" // $( (quote-context) (has-subs)
WordTypeBQ = "bq" // ` (quote-context) (has-subs)
WordTypeSQ = "sq" // ' (can-extend)
WordTypeDSQ = "dsq" // $' (can-extend)
WordTypeDPP = "dpp" // $(( (internals not parsed)
WordTypePP = "pp" // (( (internals not parsed)
WordTypeDB = "db" // $[ (internals not parsed)
)
const (
CmdTypeNone = "none" // holds control structures: '(' ')' 'for' 'while' etc.
CmdTypeSimple = "simple" // holds real commands
)
type WordType struct {
Type string
Offset int
QC QuoteContext
Raw []rune
Complete bool
Prefix []rune
Subs []*WordType
}
type CmdType struct {
Type string
AssignmentWords []*WordType
Words []*WordType
NoneComplete bool // set to true when last-word is a "separator"
}
type QuoteContext []string
var wordMetaMap map[string]wordMeta
// same order as https://www.gnu.org/software/bash/manual/html_node/Reserved-Words.html
var bashReservedWords = []string{
"if", "then", "elif", "else", "fi", "time",
"for", "in", "until", "while", "do", "done",
"case", "esac", "coproc", "select", "function",
"{", "}", "[[", "]]", "!",
}
// special reserved words: "for", "in", "case", "select", "function", "[[", and "]]"
var bashNoneRW = []string{
"if", "then",
"elif", "else", "fi", "time",
"until", "while", "do", "done",
"esac", "coproc",
"{", "}", "!",
}
type wordMeta struct {
Type string
EmptyWord []rune
PrefixLen int
SuffixLen int
CanExtend bool
QuoteContext bool
}
func (m wordMeta) getSuffix() string {
if m.SuffixLen == 0 {
return ""
}
return string(m.EmptyWord[len(m.EmptyWord)-m.SuffixLen:])
}
func (m wordMeta) getPrefix() string {
if m.PrefixLen == 0 {
return ""
}
return string(m.EmptyWord[:m.PrefixLen])
}
func makeWordMeta(wtype string, emptyWord string, prefixLen int, suffixLen int, canExtend bool, quoteContext bool) {
if len(emptyWord) != prefixLen+suffixLen {
panic(fmt.Sprintf("invalid empty word %s %d %d", emptyWord, prefixLen, suffixLen))
}
wordMetaMap[wtype] = wordMeta{wtype, []rune(emptyWord), prefixLen, suffixLen, canExtend, quoteContext}
}
func init() {
wordMetaMap = make(map[string]wordMeta)
makeWordMeta(WordTypeRaw, "", 0, 0, false, false)
makeWordMeta(WordTypeLit, "", 0, 0, true, false)
makeWordMeta(WordTypeOp, "", 0, 0, false, false)
makeWordMeta(WordTypeKey, "", 0, 0, false, false)
makeWordMeta(WordTypeGroup, "", 0, 0, false, false)
makeWordMeta(WordTypeSimpleVar, "$", 1, 0, true, false)
makeWordMeta(WordTypeVarBrace, "${}", 2, 1, true, true)
makeWordMeta(WordTypeDQ, `""`, 1, 1, true, true)
makeWordMeta(WordTypeDDQ, `$""`, 2, 1, true, true)
makeWordMeta(WordTypeDP, "$()", 2, 1, false, false)
makeWordMeta(WordTypeBQ, "``", 1, 1, false, false)
makeWordMeta(WordTypeSQ, "''", 1, 1, true, false)
makeWordMeta(WordTypeDSQ, "$''", 2, 1, true, false)
makeWordMeta(WordTypeDPP, "$(())", 3, 2, false, false)
makeWordMeta(WordTypePP, "(())", 2, 2, false, false)
makeWordMeta(WordTypeDB, "$[]", 2, 1, false, false)
}
func MakeEmptyWord(wtype string, qc QuoteContext, offset int, complete bool) *WordType {
meta := wordMetaMap[wtype]
if meta.Type == "" {
meta = wordMetaMap[WordTypeRaw]
}
rtn := &WordType{Type: meta.Type, QC: qc, Offset: offset, Complete: complete}
if len(meta.EmptyWord) > 0 {
if complete {
rtn.Raw = append([]rune(nil), meta.EmptyWord...)
} else {
rtn.Raw = append([]rune(nil), []rune(meta.getPrefix())...)
}
}
return rtn
}
func (qc QuoteContext) push(q string) QuoteContext {
rtn := make([]string, 0, len(qc)+1)
rtn = append(rtn, qc...)
rtn = append(rtn, q)
return rtn
}
func (qc QuoteContext) cur() string {
if len(qc) == 0 {
return ""
}
return qc[len(qc)-1]
}
func (qc QuoteContext) clone() QuoteContext {
if len(qc) == 0 {
return nil
}
return append([]string(nil), qc...)
}
func makeRepeatStr(ch byte, slen int) string {
if slen == 0 {
return ""
}
rtn := make([]byte, slen)
for i := 0; i < slen; i++ {
rtn[i] = ch
}
return string(rtn)
}
func (w *WordType) isBlank() bool {
return w.Type == WordTypeLit && len(w.Raw) == 0
}
func (w *WordType) contentEndPos() int {
if !w.Complete {
return len(w.Raw)
}
wmeta := wordMetaMap[w.Type]
return len(w.Raw) - wmeta.SuffixLen
}
func (w *WordType) contentStartPos() int {
wmeta := wordMetaMap[w.Type]
return wmeta.PrefixLen
}
func (w *WordType) canHaveSubs() bool {
switch w.Type {
case WordTypeGroup, WordTypeDQ, WordTypeDDQ, WordTypeDP, WordTypeBQ:
return true
default:
return false
}
}
func (w *WordType) uncompletable() bool {
switch w.Type {
case WordTypeRaw, WordTypeOp, WordTypeKey, WordTypeDPP, WordTypePP, WordTypeDB, WordTypeBQ, WordTypeDP:
return true
default:
return false
}
}
func (w *WordType) stringWithPos(pos int) string {
notCompleteFlag := " "
if !w.Complete {
notCompleteFlag = "*"
}
str := string(w.Raw)
if pos != -1 {
str = utilfn.StrWithPos{Str: str, Pos: pos}.String()
}
return fmt.Sprintf("%-4s[%3d]%s %s%q", w.Type, w.Offset, notCompleteFlag, makeRepeatStr('_', len(w.Prefix)), str)
}
func (w *WordType) String() string {
notCompleteFlag := " "
if !w.Complete {
notCompleteFlag = "*"
}
return fmt.Sprintf("%-4s[%3d]%s %s%q", w.Type, w.Offset, notCompleteFlag, makeRepeatStr('_', len(w.Prefix)), string(w.Raw))
}
// offset = -1 for don't show
func dumpWords(words []*WordType, indentStr string, offset int) {
wrotePos := false
for _, word := range words {
posInWord := false
if !wrotePos && offset != -1 && offset <= word.Offset {
fmt.Printf("%s* [%3d] [*]\n", indentStr, offset)
wrotePos = true
}
if !wrotePos && offset != -1 && offset < word.Offset+len(word.Raw) {
fmt.Printf("%s%s\n", indentStr, word.stringWithPos(offset-word.Offset))
wrotePos = true
posInWord = true
} else {
fmt.Printf("%s%s\n", indentStr, word.String())
}
if len(word.Subs) > 0 {
if posInWord {
wmeta := wordMetaMap[word.Type]
dumpWords(word.Subs, indentStr+" ", offset-word.Offset-wmeta.PrefixLen)
} else {
dumpWords(word.Subs, indentStr+" ", -1)
}
}
}
}
func dumpCommands(cmds []*CmdType, indentStr string, pos int) {
for _, cmd := range cmds {
fmt.Printf("%sCMD: %s [%d] pos:%d\n", indentStr, cmd.Type, len(cmd.Words), pos)
dumpWords(cmd.AssignmentWords, indentStr+" *", pos)
dumpWords(cmd.Words, indentStr+" ", pos)
}
}
func wordsToStr(words []*WordType) string {
var buf bytes.Buffer
for _, word := range words {
if len(word.Prefix) > 0 {
buf.WriteString(string(word.Prefix))
}
buf.WriteString(string(word.Raw))
}
return buf.String()
}
// recognizes reserved words in first position
func convertToAnyReservedWord(w *WordType) bool {
if w == nil || w.Type != WordTypeLit {
return false
}
rawVal := string(w.Raw)
for _, rw := range bashReservedWords {
if rawVal == rw {
w.Type = WordTypeKey
return true
}
}
return false
}
// recognizes the specific reserved-word given only ('in' and 'do' in 'for', 'case', and 'select' commands)
func convertToReservedWord(w *WordType, reservedWord string) {
if w == nil || w.Type != WordTypeLit {
return
}
if string(w.Raw) == reservedWord {
w.Type = WordTypeKey
}
}
func isNoneReservedWord(w *WordType) bool {
if w.Type != WordTypeKey {
return false
}
rawVal := string(w.Raw)
for _, rw := range bashNoneRW {
if rawVal == rw {
return true
}
}
return false
}
type parseCmdState struct {
Input []*WordType
InputPos int
Rtn []*CmdType
Cur *CmdType
}
func (state *parseCmdState) isEof() bool {
return state.InputPos >= len(state.Input)
}
func (state *parseCmdState) curWord() *WordType {
if state.isEof() {
return nil
}
return state.Input[state.InputPos]
}
func (state *parseCmdState) lastCmd() *CmdType {
if len(state.Rtn) == 0 {
return nil
}
return state.Rtn[len(state.Rtn)-1]
}
func (state *parseCmdState) makeNoneCmd(sep bool) {
if state.Cur == nil || state.Cur.Type != CmdTypeNone {
state.Cur = &CmdType{Type: CmdTypeNone}
state.Rtn = append(state.Rtn, state.Cur)
}
state.Cur.Words = append(state.Cur.Words, state.curWord())
if sep {
state.Cur.NoneComplete = true
state.Cur = nil
}
state.InputPos++
}
func (state *parseCmdState) handleKeyword(word *WordType) bool {
if word.Type != WordTypeKey {
return false
}
if isNoneReservedWord(word) {
state.makeNoneCmd(true)
return true
}
rw := string(word.Raw)
if rw == "[[" {
// just ignore everything between [[ and ]]
for !state.isEof() {
curWord := state.curWord()
if curWord.Type == WordTypeLit && string(curWord.Raw) == "]]" {
convertToReservedWord(curWord, "]]")
state.makeNoneCmd(false)
break
}
state.makeNoneCmd(false)
}
return true
}
if rw == "case" {
// ignore everything between "case" and "esac"
for !state.isEof() {
curWord := state.curWord()
if curWord.Type == WordTypeKey && string(curWord.Raw) == "esac" {
state.makeNoneCmd(false)
break
}
state.makeNoneCmd(false)
}
return true
}
if rw == "for" || rw == "select" {
// ignore until a "do"
for !state.isEof() {
curWord := state.curWord()
if curWord.Type == WordTypeKey && string(curWord.Raw) == "do" {
state.makeNoneCmd(true)
break
}
state.makeNoneCmd(false)
}
return true
}
if rw == "in" {
// the "for" and "case" clauses should skip "in". so encountering an "in" here is a syntax error.
// just treat it as a none and allow a new command after.
state.makeNoneCmd(false)
return true
}
if rw == "function" {
// ignore until '{'
for !state.isEof() {
curWord := state.curWord()
if curWord.Type == WordTypeKey && string(curWord.Raw) == "{" {
state.makeNoneCmd(true)
break
}
state.makeNoneCmd(false)
}
return true
}
state.makeNoneCmd(true)
return true
}
func isCmdSeparatorOp(word *WordType) bool {
if word.Type != WordTypeOp {
return false
}
opVal := string(word.Raw)
return opVal == ";" || opVal == "\n" || opVal == "&" || opVal == "|" || opVal == "|&" || opVal == "&&" || opVal == "||" || opVal == "(" || opVal == ")"
}
func (state *parseCmdState) handleOp(word *WordType) bool {
opVal := string(word.Raw)
// sequential separators
if opVal == ";" || opVal == "\n" {
state.makeNoneCmd(true)
return true
}
// separator
if opVal == "&" {
state.makeNoneCmd(true)
return true
}
// pipelines
if opVal == "|" || opVal == "|&" {
state.makeNoneCmd(true)
return true
}
// lists
if opVal == "&&" || opVal == "||" {
state.makeNoneCmd(true)
return true
}
// subshell
if opVal == "(" || opVal == ")" {
state.makeNoneCmd(true)
return true
}
return false
}
func wordSliceBoundedIdx(words []*WordType, idx int) *WordType {
if idx >= len(words) {
return nil
}
return words[idx]
}
// note that a newline "op" can appear in the third position of "for" or "case". the "in" keyword is still converted because of wordNum == 0
func identifyReservedWords(words []*WordType) {
wordNum := 0
lastReserved := false
for idx, word := range words {
if wordNum == 0 || lastReserved {
convertToAnyReservedWord(word)
}
if word.Type == WordTypeKey {
rwVal := string(word.Raw)
switch rwVal {
case "for":
lastReserved = false
third := wordSliceBoundedIdx(words, idx+2)
convertToReservedWord(third, "in")
convertToReservedWord(third, "do")
case "case":
lastReserved = false
third := wordSliceBoundedIdx(words, idx+2)
convertToReservedWord(third, "in")
case "in":
lastReserved = false
default:
lastReserved = true
}
continue
}
lastReserved = false
if isCmdSeparatorOp(word) {
wordNum = 0
continue
}
wordNum++
}
}
func ResetWordOffsets(words []*WordType, startIdx int) {
pos := startIdx
for _, word := range words {
pos += len(word.Prefix)
word.Offset = pos
if len(word.Subs) > 0 {
ResetWordOffsets(word.Subs, 0)
}
pos += len(word.Raw)
}
}
func CommandsToWords(cmds []*CmdType) []*WordType {
var rtn []*WordType
for _, cmd := range cmds {
rtn = append(rtn, cmd.Words...)
}
return rtn
}
func (c *CmdType) stripPrefix() []rune {
if len(c.AssignmentWords) > 0 {
w := c.AssignmentWords[0]
prefix := w.Prefix
if len(prefix) == 0 {
return nil
}
newWord := *w
newWord.Prefix = nil
c.AssignmentWords[0] = &newWord
return prefix
}
if len(c.Words) > 0 {
w := c.Words[0]
prefix := w.Prefix
if len(prefix) == 0 {
return nil
}
newWord := *w
newWord.Prefix = nil
c.Words[0] = &newWord
return prefix
}
return nil
}
func (c *CmdType) isEmpty() bool {
return len(c.AssignmentWords) == 0 && len(c.Words) == 0
}
func (c *CmdType) lastWord() *WordType {
if len(c.Words) > 0 {
return c.Words[len(c.Words)-1]
}
if len(c.AssignmentWords) > 0 {
return c.AssignmentWords[len(c.AssignmentWords)-1]
}
return nil
}
func (c *CmdType) firstWord() *WordType {
if len(c.AssignmentWords) > 0 {
return c.AssignmentWords[0]
}
if len(c.Words) > 0 {
return c.Words[0]
}
return nil
}
func (c *CmdType) offset() int {
firstWord := c.firstWord()
if firstWord == nil {
return 0
}
return firstWord.Offset
}
func (c *CmdType) endOffset() int {
lastWord := c.lastWord()
if lastWord == nil {
return 0
}
return lastWord.Offset + len(lastWord.Raw)
}
func indexInRunes(arr []rune, ch rune) int {
for idx, r := range arr {
if r == ch {
return idx
}
}
return -1
}
func isAssignmentWord(w *WordType) bool {
if w.Type == WordTypeLit || w.Type == WordTypeGroup {
eqIdx := indexInRunes(w.Raw, '=')
if eqIdx == -1 {
return false
}
prefix := w.Raw[0:eqIdx]
return isSimpleVarName(prefix)
}
return false
}
// simple commands steal whitespace from subsequent commands
func cmdWhitespaceFixup(cmds []*CmdType) {
for idx := 0; idx < len(cmds)-1; idx++ {
cmd := cmds[idx]
if cmd.Type != CmdTypeSimple || cmd.isEmpty() {
continue
}
nextCmd := cmds[idx+1]
nextPrefix := nextCmd.stripPrefix()
if len(nextPrefix) > 0 {
blankWord := &WordType{Type: WordTypeLit, QC: cmd.lastWord().QC, Offset: cmd.endOffset() + len(nextPrefix), Prefix: nextPrefix, Complete: true}
cmd.Words = append(cmd.Words, blankWord)
}
}
}
func ParseCommands(words []*WordType) []*CmdType {
identifyReservedWords(words)
state := parseCmdState{Input: words}
for {
if state.isEof() {
break
}
word := state.curWord()
if word.Type == WordTypeKey {
done := state.handleKeyword(word)
if done {
continue
}
}
if word.Type == WordTypeOp {
done := state.handleOp(word)
if done {
continue
}
}
if state.Cur == nil || state.Cur.Type != CmdTypeSimple {
state.Cur = &CmdType{Type: CmdTypeSimple}
state.Rtn = append(state.Rtn, state.Cur)
}
if len(state.Cur.Words) == 0 && isAssignmentWord(word) {
state.Cur.AssignmentWords = append(state.Cur.AssignmentWords, word)
} else {
state.Cur.Words = append(state.Cur.Words, word)
}
state.InputPos++
}
cmdWhitespaceFixup(state.Rtn)
return state.Rtn
}