From ec2de4609ba01bc8311af80302381c0d3695e2f3 Mon Sep 17 00:00:00 2001 From: sawka Date: Wed, 16 Nov 2022 11:13:15 -0800 Subject: [PATCH] add QC to word, refactor makeWord, fix special variables --- pkg/shparse/shparse.go | 181 +++++----------------------------------- pkg/shparse/tokenize.go | 20 ++--- 2 files changed, 32 insertions(+), 169 deletions(-) diff --git a/pkg/shparse/shparse.go b/pkg/shparse/shparse.go index acb219983..192f6b88c 100644 --- a/pkg/shparse/shparse.go +++ b/pkg/shparse/shparse.go @@ -123,6 +123,7 @@ type parseContext struct { type wordType struct { Type string Offset int + QC quoteContext Raw []rune Complete bool Val string // only for Op and Key (does *not* store string values of quoted expressions or expansions) @@ -166,12 +167,12 @@ func (c *parseContext) match3(ch rune, ch2 rune, ch3 rune) bool { return c.at(0) == ch && c.at(1) == ch2 && c.at(2) == ch3 } -func (c *parseContext) newOp(length int) *wordType { - rtn := &wordType{Type: WordTypeOp} +func (c *parseContext) makeWord(t string, length int, complete bool) *wordType { + rtn := &wordType{Type: t} rtn.Offset = c.Pos + rtn.QC = c.QC rtn.Raw = c.Input[c.Pos : c.Pos+length] - rtn.Val = string(rtn.Raw) - rtn.Complete = true + rtn.Complete = complete c.Pos += length return rtn } @@ -251,13 +252,7 @@ func (c *parseContext) parseStrSQ() *wordType { return nil } newOffset, complete := c.skipToChar(1, '\'', false) - w := &wordType{ - Type: WordTypeDQ, - Offset: c.Pos, - Raw: c.Input[c.Pos : c.Pos+newOffset], - Complete: complete, - } - c.Pos = c.Pos + newOffset + w := c.makeWord(WordTypeSQ, newOffset, complete) return w } @@ -268,14 +263,8 @@ func (c *parseContext) parseStrDQ() *wordType { newContext := c.clone(c.Pos+1, WordTypeDQ) subWords, eofExit := newContext.tokenizeDQ() newOffset := newContext.Pos + 1 - w := &wordType{ - Type: WordTypeDQ, - Offset: c.Pos, - Raw: c.Input[c.Pos : c.Pos+newOffset], - Complete: !eofExit, - Subs: subWords, - } - c.Pos = c.Pos + newOffset + w := c.makeWord(WordTypeDQ, newOffset, !eofExit) + w.Subs = subWords return w } @@ -284,13 +273,7 @@ func (c *parseContext) parseStrBQ() *wordType { return nil } newOffset, complete := c.skipToChar(1, '`', true) - w := &wordType{ - Type: WordTypeBQ, - Offset: c.Pos, - Raw: c.Input[c.Pos : c.Pos+newOffset], - Complete: complete, - } - c.Pos = c.Pos + newOffset + w := c.makeWord(WordTypeBQ, newOffset, complete) return w } @@ -299,13 +282,7 @@ func (c *parseContext) parseStrANSI() *wordType { return nil } newOffset, complete := c.skipToChar(2, '\'', true) - w := &wordType{ - Type: WordTypeDSQ, - Offset: c.Pos, - Raw: c.Input[c.Pos : c.Pos+newOffset], - Complete: complete, - } - c.Pos = c.Pos + newOffset + w := c.makeWord(WordTypeDSQ, newOffset, complete) return w } @@ -314,13 +291,7 @@ func (c *parseContext) parseStrDDQ() *wordType { return nil } newOffset, complete := c.skipToChar(2, '"', true) - w := &wordType{ - Type: WordTypeDDQ, - Offset: c.Pos, - Raw: c.Input[c.Pos : c.Pos+newOffset], - Complete: complete, - } - c.Pos = c.Pos + newOffset + w := c.makeWord(WordTypeDDQ, newOffset, complete) return w } @@ -332,8 +303,7 @@ func (c *parseContext) parseArith(mustComplete bool) *wordType { if mustComplete && !complete { return nil } - w := &wordType{Type: WordTypeArith, Offset: c.Pos, Raw: c.Input[c.Pos : c.Pos+newOffset], Complete: complete} - c.Pos = c.Pos + newOffset + w := c.makeWord(WordTypeArith, newOffset, complete) return w } @@ -343,8 +313,7 @@ func (c *parseContext) parseExpansion() *wordType { } if c.match3('$', '(', '(') { newOffset, complete := c.skipToChar2(3, ')', ')', false) - w := &wordType{Type: WordTypeDPP, Offset: c.Pos, Raw: c.Input[c.Pos : c.Pos+newOffset], Complete: complete} - c.Pos = c.Pos + newOffset + w := c.makeWord(WordTypeDPP, newOffset, complete) return w } if c.match2('$', '(') { @@ -352,17 +321,14 @@ func (c *parseContext) parseExpansion() *wordType { newContext := c.clone(c.Pos+2, WordTypeDP) subWords, eofExit := newContext.tokenizeRaw() newOffset := newContext.Pos + 2 - // newOffset, complete := c.skipToChar(2, ')', false) - w := &wordType{Type: WordTypeDP, Offset: c.Pos, Raw: c.Input[c.Pos : c.Pos+newOffset], Complete: !eofExit} + w := c.makeWord(WordTypeDP, newOffset, !eofExit) w.Subs = subWords - c.Pos = c.Pos + newOffset return w } if c.match2('$', '[') { // deprecated arith expansion newOffset, complete := c.skipToChar(2, ']', false) - w := &wordType{Type: WordTypeDB, Offset: c.Pos, Raw: c.Input[c.Pos : c.Pos+newOffset], Complete: complete} - c.Pos = c.Pos + newOffset + w := c.makeWord(WordTypeDB, newOffset, complete) return w } if c.match2('$', '{') { @@ -370,8 +336,7 @@ func (c *parseContext) parseExpansion() *wordType { newContext := c.clone(c.Pos+2, WordTypeVarBrace) _, eofExit := newContext.tokenizeVarBrace() newOffset := newContext.Pos + 2 - w := &wordType{Type: WordTypeVarBrace, Offset: c.Pos, Raw: c.Input[c.Pos : c.Pos+newOffset], Complete: !eofExit} - c.Pos = c.Pos + newOffset + w := c.makeWord(WordTypeVarBrace, newOffset, !eofExit) return w } ch2 := c.at(1) @@ -382,24 +347,15 @@ func (c *parseContext) parseExpansion() *wordType { newOffset := c.parseSimpleVarName(1) if newOffset > 1 { // simple variable name - rtn := &wordType{ - Type: WordTypeSimpleVar, - Offset: c.Pos, - Raw: c.Input[c.Pos : c.Pos+newOffset], - Complete: true, - } - c.Pos = c.Pos + newOffset - return rtn + w := c.makeWord(WordTypeSimpleVar, newOffset, true) + return w } - // single character variable name, e.g. $@, $_, $1, etc. - rtn := &wordType{ - Type: WordTypeSimpleVar, - Offset: c.Pos, - Raw: c.Input[c.Pos : c.Pos+2], - Complete: true, + if ch2 == '*' || ch2 == '@' || ch2 == '#' || ch2 == '?' || ch2 == '-' || ch2 == '$' || ch2 == '!' || (ch2 >= '0' && ch2 <= '9') { + // single character variable name, e.g. $@, $_, $1, etc. + w := c.makeWord(WordTypeSimpleVar, 2, true) + return w } - c.Pos += 2 - return rtn + return nil } func (c *parseContext) parseShellTest() *wordType { @@ -433,97 +389,6 @@ func (c *parseContext) parseSimpleVarName(offset int) int { } } -func parseInput(inputStr string) []*wordType { - c := &parseContext{Input: []rune(inputStr)} - var rtn []*wordType - var litWord *wordType - for { - var quoteWord *wordType - ch := c.cur() - if ch == 0 { - break - } - switch ch { - case '\'': - quoteWord = c.parseStrSQ() - - case '"': - quoteWord = c.parseStrDQ() - - case '$': - quoteWord = c.parseStrANSI() - if quoteWord == nil { - quoteWord = c.parseStrDDQ() - } - if quoteWord == nil { - quoteWord = c.parseExpansion() - } - } - if quoteWord != nil { - if litWord != nil { - rtn = append(rtn, litWord) - litWord = nil - } - rtn = append(rtn, quoteWord) - continue - } - if litWord == nil { - litWord = &wordType{Type: WordTypeLit, Offset: c.Pos, Complete: true} - } - if ch == '\\' && c.at(1) != 0 { - litWord.Raw = append(litWord.Raw, ch, c.at(1)) - c.Pos += 2 - continue - } - litWord.Raw = append(litWord.Raw, ch) - c.Pos++ - } - if litWord != nil { - rtn = append(rtn, litWord) - } - - // now we want to expand ops - rtn = expandAllOps(rtn) - - return rtn -} - -func expandOps(word *wordType) []*wordType { - if word.Type != WordTypeLit { - return nil - } - var lastBackSlash bool - var rtn []*wordType - for _, ch := range word.Raw { - if ch == 0 { - break - } - if ch == '\\' { - lastBackSlash = true - continue - } - if lastBackSlash { - lastBackSlash = false - continue - } - } - rtn = append(rtn, word) - return rtn -} - -func expandAllOps(words []*wordType) []*wordType { - var rtn []*wordType - for _, word := range words { - exWords := expandOps(word) - if len(exWords) == 0 { - rtn = append(rtn, word) - } else { - rtn = append(rtn, exWords...) - } - } - return rtn -} - func makeSpaceStr(slen int) string { if slen == 0 { return "" diff --git a/pkg/shparse/tokenize.go b/pkg/shparse/tokenize.go index d2b45b683..67f45688c 100644 --- a/pkg/shparse/tokenize.go +++ b/pkg/shparse/tokenize.go @@ -51,6 +51,7 @@ func (state *tokenizeOutputState) ensureGroupWord() { groupWord := &wordType{ Type: WordTypeGroup, Offset: state.CurWord.Offset, + QC: state.CurWord.QC, Complete: true, Prefix: state.CurWord.Prefix, } @@ -74,7 +75,8 @@ func ungroupWord(w *wordType) []*wordType { func (state *tokenizeOutputState) ensureLitCurWord(pc *parseContext) { if state.CurWord == nil { - state.CurWord = &wordType{Type: WordTypeLit, Offset: pc.Pos, Complete: true, Prefix: state.SavedPrefix} + state.CurWord = pc.makeWord(WordTypeLit, 0, true) + state.CurWord.Prefix = state.SavedPrefix state.SavedPrefix = nil return } @@ -85,12 +87,9 @@ func (state *tokenizeOutputState) ensureLitCurWord(pc *parseContext) { lastWord := state.CurWord.Subs[len(state.CurWord.Subs)-1] if lastWord.Type != WordTypeLit { if len(state.SavedPrefix) > 0 { - dumpWords(state.Rtn, "**") - dumpWords([]*wordType{state.CurWord}, ">>") - fmt.Printf("sp: %q\n", state.SavedPrefix) panic("invalid state, there can be no saved prefix") } - litWord := &wordType{Type: WordTypeLit, Offset: pc.Pos, Complete: true} + litWord := pc.makeWord(WordTypeLit, 0, true) state.CurWord.Subs = append(state.CurWord.Subs, litWord) } } @@ -240,10 +239,8 @@ func (c *parseContext) tokenizeRaw() ([]*wordType, bool) { // fmt.Printf("ch %d %q\n", c.Pos, string([]rune{ch})) foundOp, newOffset := c.parseOp(0) if foundOp { - rawOp := c.Input[c.Pos : c.Pos+newOffset] - opVal := string(rawOp) - opWord := &wordType{Type: WordTypeOp, Offset: c.Pos, Raw: rawOp, Val: opVal, Complete: true} - if opWord.Val == "(" { + opVal := string(c.Input[c.Pos : c.Pos+newOffset]) + if opVal == "(" { arithWord := c.parseArith(true) if arithWord != nil { state.appendStandaloneWord(arithWord) @@ -252,10 +249,11 @@ func (c *parseContext) tokenizeRaw() ([]*wordType, bool) { parenLevel++ } } - if opWord.Val == ")" { + if opVal == ")" { parenLevel-- } - c.Pos = c.Pos + newOffset + opWord := c.makeWord(WordTypeOp, newOffset, true) + opWord.Val = opVal state.appendStandaloneWord(opWord) continue }