working on integrating position. identify cmd assignment words. make group words consistent. always copy Raw runes. fixup command whitespace

This commit is contained in:
sawka 2022-11-18 14:26:52 -08:00
parent a599dc473a
commit a630606020
5 changed files with 279 additions and 68 deletions

View File

@ -64,30 +64,26 @@ func (w *WordType) writeRune(ch rune) {
return
}
func (w *WordType) cloneRaw() {
if len(w.Raw) == 0 {
return
}
buf := make([]rune, 0, len(w.Raw))
w.Raw = append(buf, w.Raw...)
}
type extendContext struct {
Input []*WordType
InputPos int
QC QuoteContext
Rtn []*WordType
CurWord *WordType
Intention string
}
func makeExtendContext(qc QuoteContext, w *WordType) *extendContext {
rtn := &extendContext{QC: qc, Intention: WordTypeLit}
if w != nil {
w.cloneRaw()
rtn.Rtn = []*WordType{w}
rtn.CurWord = w
rtn.Intention = w.Type
func makeExtendContext(qc QuoteContext, word *WordType) *extendContext {
rtn := &extendContext{QC: qc}
if word == nil {
rtn.Intention = WordTypeLit
return rtn
} else {
rtn.Intention = word.Type
rtn.Rtn = []*WordType{word}
rtn.CurWord = word
return rtn
}
return rtn
}
func (ec *extendContext) appendWord(w *WordType) {

View File

@ -3,6 +3,8 @@ package shparse
import (
"bytes"
"fmt"
"github.com/scripthaus-dev/sh2-server/pkg/utilfn"
)
//
@ -103,33 +105,37 @@ var bashNoneRW = []string{
type wordMeta struct {
Type string
EmptyWord []rune
PrefixLen int
SuffixLen int
CanExtend bool
QuoteContext bool
}
func makeWordMeta(wtype string, emptyWord string, suffixLen int, canExtend bool, quoteContext bool) {
wordMetaMap[wtype] = wordMeta{wtype, []rune(emptyWord), suffixLen, canExtend, quoteContext}
func makeWordMeta(wtype string, emptyWord string, prefixLen int, suffixLen int, canExtend bool, quoteContext bool) {
if len(emptyWord) != prefixLen+suffixLen {
panic(fmt.Sprintf("invalid empty word %s %d %d", emptyWord, prefixLen, suffixLen))
}
wordMetaMap[wtype] = wordMeta{wtype, []rune(emptyWord), prefixLen, suffixLen, canExtend, quoteContext}
}
func init() {
wordMetaMap = make(map[string]wordMeta)
makeWordMeta(WordTypeRaw, "", 0, false, false)
makeWordMeta(WordTypeLit, "", 0, true, false)
makeWordMeta(WordTypeOp, "", 0, false, false)
makeWordMeta(WordTypeKey, "", 0, false, false)
makeWordMeta(WordTypeGroup, "", 0, false, false)
makeWordMeta(WordTypeSimpleVar, "$", 0, true, false)
makeWordMeta(WordTypeVarBrace, "${}", 1, true, true)
makeWordMeta(WordTypeDQ, `""`, 1, true, true)
makeWordMeta(WordTypeDDQ, `$""`, 1, true, true)
makeWordMeta(WordTypeDP, "$()", 1, false, false)
makeWordMeta(WordTypeBQ, "``", 1, false, false)
makeWordMeta(WordTypeSQ, "''", 1, true, false)
makeWordMeta(WordTypeDSQ, "$''", 1, true, false)
makeWordMeta(WordTypeDPP, "$(())", 2, false, false)
makeWordMeta(WordTypePP, "(())", 2, false, false)
makeWordMeta(WordTypeDB, "$[]", 1, false, false)
makeWordMeta(WordTypeRaw, "", 0, 0, false, false)
makeWordMeta(WordTypeLit, "", 0, 0, true, false)
makeWordMeta(WordTypeOp, "", 0, 0, false, false)
makeWordMeta(WordTypeKey, "", 0, 0, false, false)
makeWordMeta(WordTypeGroup, "", 0, 0, false, false)
makeWordMeta(WordTypeSimpleVar, "$", 1, 0, true, false)
makeWordMeta(WordTypeVarBrace, "${}", 2, 1, true, true)
makeWordMeta(WordTypeDQ, `""`, 1, 1, true, true)
makeWordMeta(WordTypeDDQ, `$""`, 2, 1, true, true)
makeWordMeta(WordTypeDP, "$()", 2, 1, false, false)
makeWordMeta(WordTypeBQ, "``", 1, 1, false, false)
makeWordMeta(WordTypeSQ, "''", 1, 1, true, false)
makeWordMeta(WordTypeDSQ, "$''", 2, 1, true, false)
makeWordMeta(WordTypeDPP, "$(())", 3, 2, false, false)
makeWordMeta(WordTypePP, "(())", 2, 2, false, false)
makeWordMeta(WordTypeDB, "$[]", 2, 1, false, false)
}
func MakeEmptyWord(wtype string, qc QuoteContext, offset int) *WordType {
@ -169,48 +175,72 @@ func makeRepeatStr(ch byte, slen int) string {
return string(rtn)
}
func (w *WordType) isBlank() bool {
return w.Type == WordTypeLit && len(w.Raw) == 0
}
func (w *WordType) stringWithPos(pos int) string {
notCompleteFlag := " "
if !w.Complete {
notCompleteFlag = "*"
}
str := string(w.Raw)
if pos != -1 {
str = utilfn.StrWithPos{Str: str, Pos: pos}.String()
}
return fmt.Sprintf("%-4s[%3d]%s %s%q", w.Type, w.Offset, notCompleteFlag, makeRepeatStr('_', len(w.Prefix)), str)
}
func (w *WordType) String() string {
notCompleteFlag := " "
if !w.Complete {
notCompleteFlag = "*"
}
return fmt.Sprintf("%4s[%3d]%s %s%q", w.Type, w.Offset, notCompleteFlag, makeRepeatStr('_', len(w.Prefix)), string(w.FullRawString()))
return fmt.Sprintf("%-4s[%3d]%s %s%q", w.Type, w.Offset, notCompleteFlag, makeRepeatStr('_', len(w.Prefix)), string(w.Raw))
}
func dumpWords(words []*WordType, indentStr string) {
// offset = -1 for don't show
func dumpWords(words []*WordType, indentStr string, offset int) {
wrotePos := false
for _, word := range words {
fmt.Printf("%s%s\n", indentStr, word.String())
posInWord := false
if !wrotePos && offset != -1 && offset <= word.Offset {
fmt.Printf("%s* [%3d] [*]\n", indentStr, offset)
wrotePos = true
}
if !wrotePos && offset != -1 && offset < word.Offset+len(word.Raw) {
fmt.Printf("%s%s\n", indentStr, word.stringWithPos(offset-word.Offset))
wrotePos = true
posInWord = true
} else {
fmt.Printf("%s%s\n", indentStr, word.String())
}
if len(word.Subs) > 0 {
dumpWords(word.Subs, indentStr+" ")
if posInWord {
wmeta := wordMetaMap[word.Type]
dumpWords(word.Subs, indentStr+" ", offset-word.Offset-wmeta.PrefixLen)
} else {
dumpWords(word.Subs, indentStr+" ", -1)
}
}
}
}
func dumpCommands(cmds []*CmdType, indentStr string) {
func dumpCommands(cmds []*CmdType, indentStr string, pos *CmdPos) {
for _, cmd := range cmds {
fmt.Printf("%sCMD: %s [%d]\n", indentStr, cmd.Type, len(cmd.Words))
dumpWords(cmd.Words, indentStr+" ")
dumpWords(cmd.AssignmentWords, indentStr+" *", -1)
dumpWords(cmd.Words, indentStr+" ", -1)
}
}
func (w *WordType) FullRawString() []rune {
if w.Type == WordTypeGroup {
var rtn []rune
for _, sw := range w.Subs {
rtn = append(rtn, sw.FullRawString()...)
}
return rtn
}
return w.Raw
}
func wordsToStr(words []*WordType) string {
var buf bytes.Buffer
for _, word := range words {
if len(word.Prefix) > 0 {
buf.WriteString(string(word.Prefix))
}
buf.WriteString(string(word.FullRawString()))
buf.WriteString(string(word.Raw))
}
return buf.String()
}
@ -442,6 +472,132 @@ func identifyReservedWords(words []*WordType) {
}
}
type CmdPos struct {
CmdPos int
CmdOffset int
CurWord *WordType // nil if between words
CurWordOffset int
CmdWordPos int
OffsetInWord int // if BetweenWords is set, this offset can be negative (position is inside of prefix)
BetweenWords bool
}
// func FindCmdPos(cmds []*CmdType, offset int) CmdPos {
// if len(words) == 0 {
// return WordsPos{[]int{0}, 0, true}
// }
// pos := 0
// for idx, word := range words {
// if offset <= word.Offset+len(word.Raw) {
// if offset <= word.Offset {
// // in the prefix, so we are between-words with a possibly negative offset
// return WordPos{WordPos: idx, OffsetInWord: offset - word.Offset, BetweenWords: true}
// }
// if offset == pos+fullWordLen {
// return WordPos{WordPos: idx + 1, OffsetInWord: 0, BetweenWords: true}
// }
// return WordPos{WordPos: idx, OffsetInWord: offset - word.Offset, BetweenWords: false}
// }
// pos += fullWordLen
// }
// return WordPos{WordPos: []int{len(words)}, OffsetInWord: 0, BetweenWords: true}
// }
func ResetWordOffsets(words []*WordType) {
pos := 0
for _, word := range words {
pos += len(word.Prefix)
word.Offset = pos
if len(word.Subs) > 0 {
ResetWordOffsets(word.Subs)
}
pos += len(word.Raw)
}
}
func CommandsToWords(cmds []*CmdType) []*WordType {
var rtn []*WordType
for _, cmd := range cmds {
rtn = append(rtn, cmd.Words...)
}
return rtn
}
func (c *CmdType) stripPrefix() []rune {
if len(c.AssignmentWords) > 0 {
w := c.AssignmentWords[0]
prefix := w.Prefix
w.Prefix = nil
return prefix
}
if len(c.Words) > 0 {
w := c.Words[0]
prefix := w.Prefix
w.Prefix = nil
return prefix
}
return nil
}
func (c *CmdType) isEmpty() bool {
return len(c.AssignmentWords) == 0 && len(c.Words) == 0
}
func (c *CmdType) lastWord() *WordType {
if len(c.Words) > 0 {
return c.Words[len(c.Words)-1]
}
if len(c.AssignmentWords) > 0 {
return c.AssignmentWords[len(c.AssignmentWords)-1]
}
return nil
}
func (c *CmdType) endOffset() int {
lastWord := c.lastWord()
if lastWord == nil {
return 0
}
return lastWord.Offset + len(lastWord.Raw)
}
func indexInRunes(arr []rune, ch rune) int {
for idx, r := range arr {
if r == ch {
return idx
}
}
return -1
}
func isAssignmentWord(w *WordType) bool {
if w.Type == WordTypeLit || w.Type == WordTypeGroup {
eqIdx := indexInRunes(w.Raw, '=')
if eqIdx == -1 {
return false
}
prefix := w.Raw[0:eqIdx]
return isSimpleVarName(prefix)
}
return false
}
// simple commands steal whitespace from subsequent commands
func cmdWhitespaceFixup(cmds []*CmdType) {
for idx := 0; idx < len(cmds)-1; idx++ {
cmd := cmds[idx]
if cmd.Type != CmdTypeSimple || cmd.isEmpty() {
continue
}
nextCmd := cmds[idx+1]
nextPrefix := nextCmd.stripPrefix()
blankWord := &WordType{Type: WordTypeLit, QC: cmd.lastWord().QC, Offset: cmd.endOffset(), Prefix: nextPrefix, Complete: true}
cmd.Words = append(cmd.Words, blankWord)
}
}
func ParseCommands(words []*WordType) []*CmdType {
identifyReservedWords(words)
state := parseCmdState{Input: words}
@ -466,8 +622,13 @@ func ParseCommands(words []*WordType) []*CmdType {
state.Cur = &CmdType{Type: CmdTypeSimple}
state.Rtn = append(state.Rtn, state.Cur)
}
state.Cur.Words = append(state.Cur.Words, word)
if len(state.Cur.Words) == 0 && isAssignmentWord(word) {
state.Cur.AssignmentWords = append(state.Cur.AssignmentWords, word)
} else {
state.Cur.Words = append(state.Cur.Words, word)
}
state.InputPos++
}
cmdWhitespaceFixup(state.Rtn)
return state.Rtn
}

View File

@ -15,14 +15,13 @@ import (
func testParse(t *testing.T, s string) {
words := Tokenize(s)
fmt.Printf("%s\n", s)
dumpWords(words, " ")
fmt.Printf("\n")
fmt.Printf("parse <<\n%s\n>>\n", s)
dumpWords(words, " ", 8)
outStr := wordsToStr(words)
if outStr != s {
t.Errorf("tokenization output does not match input: %q => %q", s, outStr)
}
fmt.Printf("------\n\n")
}
func Test1(t *testing.T) {
@ -46,6 +45,7 @@ func Test1(t *testing.T) {
testParse(t, "echo `ls $x \"hello $x\" \\`ls\\`; ./foo`")
testParse(t, `echo $"hello $x $(ls)"`)
testParse(t, "echo 'hello'\nls\n")
testParse(t, "echo 'hello'abc$'\a'")
}
func lastWord(words []*WordType) *WordType {
@ -88,15 +88,17 @@ func testParseCommands(t *testing.T, str string) {
fmt.Printf("parse: %q\n", str)
words := Tokenize(str)
cmds := ParseCommands(words)
dumpCommands(cmds, " ")
dumpCommands(cmds, " ", nil)
fmt.Printf("\n")
}
func TestCmd(t *testing.T) {
testParseCommands(t, "ls foo")
testParseCommands(t, "function foo () { echo hello; }")
testParseCommands(t, "ls foo && ls bar; ./run $x hello | xargs foo; ")
testParseCommands(t, "if [[ 2 > 1 ]]; then echo hello\nelse echo world; echo next; done")
testParseCommands(t, "case lots of stuff; i don\\'t know how to parse; esac; ls foo")
testParseCommands(t, "(ls & ./x); for x in $vars 3; do { echo $x; ls foo; } done")
testParseCommands(t, "function foo () { echo hello; }")
testParseCommands(t, "(ls & ./x \n\n); for x in $vars 3; do { echo $x; ls foo; } done")
testParseCommands(t, `ls f"oo" "${x:"hello$y"}"`)
testParseCommands(t, `x="foo $y" z=10 ls`)
}

View File

@ -16,6 +16,13 @@ type tokenizeOutputState struct {
SavedPrefix []rune
}
func copyRunes(rarr []rune) []rune {
if len(rarr) == 0 {
return nil
}
return append([]rune(nil), rarr...)
}
// does not set CurWord
func (state *tokenizeOutputState) appendStandaloneWord(word *WordType) {
state.delimitCurWord()
@ -36,7 +43,9 @@ func (state *tokenizeOutputState) appendWord(word *WordType) {
return
}
state.ensureGroupWord()
word.Offset = word.Offset - state.CurWord.Offset
state.CurWord.Subs = append(state.CurWord.Subs, word)
state.CurWord.Raw = append(state.CurWord.Raw, word.Raw...)
}
func (state *tokenizeOutputState) ensureGroupWord() {
@ -46,29 +55,34 @@ func (state *tokenizeOutputState) ensureGroupWord() {
if state.CurWord.Type == WordTypeGroup {
return
}
// moves the prefix from CurWord to the new group word
// moves the prefix from CurWord to the new group word, resets offsets
groupWord := &WordType{
Type: WordTypeGroup,
Offset: state.CurWord.Offset,
QC: state.CurWord.QC,
Raw: copyRunes(state.CurWord.Raw),
Complete: true,
Prefix: state.CurWord.Prefix,
}
state.CurWord.Prefix = nil
state.CurWord.Offset = 0
groupWord.Subs = []*WordType{state.CurWord}
state.CurWord = groupWord
}
func ungroupWord(w *WordType) []*WordType {
if w.Type != WordTypeGroup {
return []*WordType{w}
func ungroupWord(groupWord *WordType) []*WordType {
if groupWord.Type != WordTypeGroup {
return []*WordType{groupWord}
}
rtn := w.Subs
if len(w.Prefix) > 0 && len(rtn) > 0 {
newPrefix := append([]rune{}, w.Prefix...)
rtn := groupWord.Subs
if len(groupWord.Prefix) > 0 && len(rtn) > 0 {
newPrefix := append([]rune{}, groupWord.Prefix...)
newPrefix = append(newPrefix, rtn[0].Prefix...)
rtn[0].Prefix = newPrefix
}
for _, word := range rtn {
word.Offset = word.Offset + groupWord.Offset
}
return rtn
}
@ -89,6 +103,7 @@ func (state *tokenizeOutputState) ensureLitCurWord(pc *parseContext) {
panic("invalid state, there can be no saved prefix")
}
litWord := pc.makeWord(WordTypeLit, 0, true)
litWord.Offset = litWord.Offset - state.CurWord.Offset
state.CurWord.Subs = append(state.CurWord.Subs, litWord)
}
}
@ -115,6 +130,7 @@ func (state *tokenizeOutputState) appendLiteral(pc *parseContext, ch rune) {
panic(fmt.Sprintf("invalid curword type (group) %q", state.CurWord.Type))
}
lastWord.Raw = append(lastWord.Raw, ch)
state.CurWord.Raw = append(state.CurWord.Raw, ch)
} else {
panic(fmt.Sprintf("invalid curword type %q", state.CurWord.Type))
}
@ -355,7 +371,7 @@ func (c *parseContext) makeWord(t string, length int, complete bool) *WordType {
rtn := &WordType{Type: t}
rtn.Offset = c.Pos
rtn.QC = c.QC
rtn.Raw = c.Input[c.Pos : c.Pos+length]
rtn.Raw = copyRunes(c.Input[c.Pos : c.Pos+length])
rtn.Complete = complete
c.Pos += length
return rtn
@ -565,6 +581,19 @@ func (c *parseContext) parseSimpleVarName(offset int) int {
}
}
func isSimpleVarName(rstr []rune) bool {
if len(rstr) == 0 {
return false
}
for idx, ch := range rstr {
if (ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) || ((idx != 0) && ch >= '0' && ch <= '9') {
continue
}
return false
}
return true
}
func Tokenize(cmd string) []*WordType {
c := &parseContext{Input: []rune(cmd)}
rtn, _ := c.tokenizeRaw()

View File

@ -118,3 +118,26 @@ func ContainsStr(strs []string, test string) bool {
}
return false
}
type StrWithPos struct {
Str string
Pos int
}
func (sp StrWithPos) String() string {
return strWithCursor(sp.Str, sp.Pos)
}
func strWithCursor(str string, pos int) string {
if pos < 0 {
return "[*]_" + str
}
if pos >= len(str) {
if pos > len(str) {
return str + "_[*]"
}
return str + "[*]"
} else {
return str[:pos] + "[*]" + str[pos:]
}
}