waveterm/waveshell/pkg/statediff/linediff.go
Mike Sawka 422338c04b
zsh support (#227)
adds zsh support to waveterm.  big change, lots going on here.  lots of other improvements and bug fixes added while debugging and building out the feature.

Commits:

* refactor shexec parser.go into new package shellenv.  separate out bash specific parsing from generic functions

* checkpoint

* work on refactoring shexec.  created two new packages shellapi (for bash/zsh specific stuff), and shellutil (shared between shellapi and shexec)

* more refactoring

* create shellapi interface to abstract bash specific functionality

* more refactoring, move bash shell state parsing to shellapi

* move makeRcFile to shellapi.  remove all of the 'client' options CLI options from waveshell

* get shellType passed through to server/single paths for waveshell

* add a local shelltype detector

* mock out a zshapi

* move shelltype through more of the code

* get a command to run via zsh

* zsh can now switch directories.  poc, needs cleanup

* working on ShellState encoding differences between zsh/bash.  Working on parsing zsh decls.  move utilfn package into waveshell (shouldn't have been in wavesrv)

* switch to use []byte for vardecl serialization + diffs

* progress on zsh environment.  still have issues reconciling init environment with trap environment

* fix typeset argument parsing

* parse promptvars, more zsh specific ignores

* fix bug with promptvar not getting set (wrong check in FeState func)

* add sdk (issue #188) to list of rtnstate commands

* more zsh compatibility -- working with a larger ohmyzsh environment.  ignore more variables, handle exit trap better.  unique path/fpath.  add a processtype variable to base.

* must return a value

* zsh alias parsing/restoring.  diff changes (and rtnstate changes).  introduces linediff v1.

* force zmodload of zsh/parameter

* starting work on zsh functions

* need a v1 of mapdiff as well (to handle null chars)

* pack/unpack of ints was wrong (one used int and one use uint).  turned out we only ever encoded '0' so it worked.  that also means it is safe to change unpack to unpackUInt

* reworking for binary encoding of aliases and functions (because of zsh allows any character, including nulls, in names and values)

* fixes, working on functions, issue with line endings

* zsh functions.  lots of ugliness here around dealing with line dicipline and cooked stty.  new runcommand function to grab output from a non-tty fd.  note that we still to run the actual command in a stty to get the proper output.

* write uuid tempdir, cleanup with tmprcfilename code

* hack in some simple zsh function declaration finding code for rtnstate.  create function diff for rtnstate that supports zsh

* make sure key order is constant so shell hashes are consistent

* fix problems with state diffs to support new zsh formats.  add diff/apply code to shellapi (moved from shellenv), that is now specific to zsh or bash

* add log packet and new shellstate packets

* switch to shellstate map that's also keyed by shelltype

* add shelltype to remoteinstance

* remove shell argument from waveshell

* added new shelltype statemap to remote.go (msh), deal with fallout

* move shellstate out of init packet, and move to an explicit reinit call.  try to initialize all of the active shell states

* change dont always store init state (only store on demand).  initialize shell states on demand (if not already initialized).  allow reset to change shells

* add shellpref field to remote table.  use to drive the default shell choice for new tabs

* show shelltag on cmdinput, pass through ri and remote (defaultshellstate)

* bump mshell version to v0.4

* better version validation for shellstate.  also relax compatibility requirements for diffing states (shelltype + major version need to match)

* better error handling, check shellstate compatibility during run (on waveshell server)

* add extra separator for bash shellstate processing to deal with spurious output from rc files

* special migration for v30 -- flag invalid bash shell states and show special button in UI to fix

* format

* remove zsh-decls (unused)

* remove test code

* remove debug print

* fix typo
2024-01-16 16:11:04 -08:00

308 lines
8.0 KiB
Go

// Copyright 2023, Command Line Inc.
// SPDX-License-Identifier: Apache-2.0
package statediff
import (
"bytes"
"encoding/binary"
"fmt"
"strings"
)
const LineDiffVersion_0 = 0
const LineDiffVersion = 1
type SingleLineEntry struct {
LineVal int
Run int
}
type LineDiffType struct {
Version int
SplitString string // added in version 1
Lines []SingleLineEntry
NewData []string
}
func (diff *LineDiffType) Clear() {
diff.Version = LineDiffVersion
diff.SplitString = ""
diff.Lines = nil
diff.NewData = nil
}
func (diff LineDiffType) Dump() {
fmt.Printf("DIFF: v%d\n", diff.Version)
pos := 1
for _, entry := range diff.Lines {
fmt.Printf(" %d-%d: %d\n", pos, pos+entry.Run, entry.LineVal)
pos += entry.Run
}
for idx, str := range diff.NewData {
fmt.Printf(" n%d: %s\n", idx+1, str)
}
}
// simple encoding
// a 0 means read a line from NewData
// a non-zero number means read the 1-indexed line from OldData
func (diff LineDiffType) applyDiff(oldData []string) ([]string, error) {
rtn := make([]string, 0, len(diff.Lines))
newDataPos := 0
for _, entry := range diff.Lines {
if entry.LineVal == 0 {
for i := 0; i < entry.Run; i++ {
if newDataPos >= len(diff.NewData) {
return nil, fmt.Errorf("not enough newdata for diff")
}
rtn = append(rtn, diff.NewData[newDataPos])
newDataPos++
}
} else {
oldDataPos := entry.LineVal - 1 // 1-indexed
for i := 0; i < entry.Run; i++ {
realPos := oldDataPos + i
if realPos < 0 || realPos >= len(oldData) {
return nil, fmt.Errorf("diff index out of bounds %d old-data-len:%d", realPos, len(oldData))
}
rtn = append(rtn, oldData[realPos])
}
}
}
return rtn, nil
}
func putUVarint(buf *bytes.Buffer, viBuf []byte, ival int) {
l := binary.PutUvarint(viBuf, uint64(ival))
buf.Write(viBuf[0:l])
}
// run length encoding, writes a uvarint for length, and then that many bytes of data
func putEncodedString(buf *bytes.Buffer, viBuf []byte, str string) {
l := binary.PutUvarint(viBuf, uint64(len(str)))
buf.Write(viBuf[0:l])
buf.WriteString(str)
}
func readEncodedString(buf *bytes.Buffer) (string, error) {
strLen64, err := binary.ReadUvarint(buf)
if err != nil {
return "", fmt.Errorf("invalid diff, cannot read string length: %v", err)
}
strLen := int(strLen64)
if strLen == 0 {
return "", nil
}
strBytes := buf.Next(strLen)
if len(strBytes) != strLen {
return "", fmt.Errorf("invalid diff, partial read, expected %d, got %d", strLen, len(strBytes))
}
return string(strBytes), nil
}
// version 0 is no longer used, but kept here as a reference for decoding
// simple encoding
// write varints. first version, then then len, then len-number-of-varints, then fill the rest with newdata
// [version] [len-varint] [varint]xlen... newdata (bytes)
func (diff LineDiffType) Encode_v0() []byte {
var buf bytes.Buffer
viBuf := make([]byte, binary.MaxVarintLen64)
putUVarint(&buf, viBuf, LineDiffVersion_0)
putUVarint(&buf, viBuf, len(diff.Lines))
for _, entry := range diff.Lines {
putUVarint(&buf, viBuf, entry.LineVal)
putUVarint(&buf, viBuf, entry.Run)
}
for idx, str := range diff.NewData {
buf.WriteString(str)
if idx != len(diff.NewData)-1 {
buf.WriteString(diff.SplitString)
}
}
return buf.Bytes()
}
// version 1 updates the diff to include the split-string
// it also encodes all the strings with run-length encoding
func (diff LineDiffType) Encode() []byte {
var buf bytes.Buffer
viBuf := make([]byte, binary.MaxVarintLen64)
putUVarint(&buf, viBuf, LineDiffVersion)
putEncodedString(&buf, viBuf, diff.SplitString)
putUVarint(&buf, viBuf, len(diff.Lines))
for _, entry := range diff.Lines {
putUVarint(&buf, viBuf, entry.LineVal)
putUVarint(&buf, viBuf, entry.Run)
}
writeEncodedStringArray(&buf, viBuf, diff.NewData)
return buf.Bytes()
}
func (rtn *LineDiffType) readEncodedLines(buf *bytes.Buffer) error {
linesLen64, err := binary.ReadUvarint(buf)
if err != nil {
return fmt.Errorf("invalid diff, cannot read lines length: %v", err)
}
linesLen := int(linesLen64)
rtn.Lines = make([]SingleLineEntry, linesLen)
for idx := 0; idx < linesLen; idx++ {
lineVal64, err := binary.ReadUvarint(buf)
if err != nil {
return fmt.Errorf("invalid diff, cannot read line %d: %v", idx, err)
}
lineRun64, err := binary.ReadUvarint(buf)
if err != nil {
return fmt.Errorf("invalid diff, cannot read line-run %d: %v", idx, err)
}
rtn.Lines[idx] = SingleLineEntry{LineVal: int(lineVal64), Run: int(lineRun64)}
}
return nil
}
func writeEncodedStringArray(buf *bytes.Buffer, viBuf []byte, strArr []string) {
putUVarint(buf, viBuf, len(strArr))
for _, str := range strArr {
putEncodedString(buf, viBuf, str)
}
}
func readEncodedStringArray(buf *bytes.Buffer) ([]string, error) {
strArrLen64, err := binary.ReadUvarint(buf)
if err != nil {
return nil, fmt.Errorf("invalid diff, cannot read string-array length: %v", err)
}
strArrLen := int(strArrLen64)
rtn := make([]string, strArrLen)
for idx := 0; idx < strArrLen; idx++ {
str, err := readEncodedString(buf)
if err != nil {
return nil, err
}
rtn[idx] = str
}
return rtn, nil
}
func (rtn *LineDiffType) Decode(diffBytes []byte) error {
rtn.Clear()
r := bytes.NewBuffer(diffBytes)
version, err := binary.ReadUvarint(r)
if err != nil {
return fmt.Errorf("invalid diff, cannot read version: %v", err)
}
if version == LineDiffVersion_0 {
return rtn.Decode_v0(diffBytes)
}
if version != LineDiffVersion {
return fmt.Errorf("invalid diff, bad version: %d", version)
}
rtn.Version = int(version)
splitString, err := readEncodedString(r)
if err != nil {
return fmt.Errorf("invalid diff, cannot read split-string: %v", err)
}
rtn.SplitString = splitString
err = rtn.readEncodedLines(r)
if err != nil {
return err
}
rtn.NewData, err = readEncodedStringArray(r)
if err != nil {
return err
}
return nil
}
func (rtn *LineDiffType) Decode_v0(diffBytes []byte) error {
r := bytes.NewBuffer(diffBytes)
version, err := binary.ReadUvarint(r)
if err != nil {
return fmt.Errorf("invalid diff, cannot read version: %v", err)
}
if version != LineDiffVersion_0 {
return fmt.Errorf("invalid diff, bad version: %d", version)
}
rtn.Version = int(version)
rtn.SplitString = "\n" // added when we added version 1
err = rtn.readEncodedLines(r)
if err != nil {
return err
}
restOfInput := string(r.Bytes())
if len(restOfInput) > 0 {
rtn.NewData = strings.Split(restOfInput, "\n")
}
return nil
}
func makeLineDiff(oldData []string, newData []string, splitString string) LineDiffType {
var rtn LineDiffType
rtn.Version = LineDiffVersion
rtn.SplitString = splitString
oldDataMap := make(map[string]int) // 1-indexed
for idx, str := range oldData {
if _, found := oldDataMap[str]; found {
continue
}
oldDataMap[str] = idx + 1
}
var cur *SingleLineEntry
rtn.Lines = make([]SingleLineEntry, 0)
for _, str := range newData {
oldIdx, found := oldDataMap[str]
if cur != nil && cur.LineVal != 0 {
checkLine := cur.LineVal + cur.Run - 1
if checkLine < len(oldData) && oldData[checkLine] == str {
cur.Run++
continue
}
} else if cur != nil && cur.LineVal == 0 && !found {
cur.Run++
rtn.NewData = append(rtn.NewData, str)
continue
}
if cur != nil {
rtn.Lines = append(rtn.Lines, *cur)
}
cur = &SingleLineEntry{Run: 1}
if found {
cur.LineVal = oldIdx
} else {
cur.LineVal = 0
rtn.NewData = append(rtn.NewData, str)
}
}
if cur != nil {
rtn.Lines = append(rtn.Lines, *cur)
}
return rtn
}
func MakeLineDiff(str1 string, str2 string, splitString string) []byte {
if str1 == str2 {
return nil
}
str1Arr := strings.Split(str1, splitString)
str2Arr := strings.Split(str2, splitString)
diff := makeLineDiff(str1Arr, str2Arr, splitString)
return diff.Encode()
}
func ApplyLineDiff(str1 string, diffBytes []byte) (string, error) {
if len(diffBytes) == 0 {
return str1, nil
}
var diff LineDiffType
err := diff.Decode(diffBytes)
if err != nil {
return "", err
}
str1Arr := strings.Split(str1, diff.SplitString)
str2Arr, err := diff.applyDiff(str1Arr)
if err != nil {
return "", err
}
return strings.Join(str2Arr, diff.SplitString), nil
}