waveterm/pkg/blockstore/blockstore.go

385 lines
10 KiB
Go
Raw Normal View History

// Copyright 2024, Command Line Inc.
// SPDX-License-Identifier: Apache-2.0
package blockstore
2024-05-13 06:59:42 +02:00
// the blockstore package implements a write cache for block files
// it is not a read cache (reads still go to the DB -- unless items are in the cache)
// but all writes only go to the cache, and then the cache is periodically flushed to the DB
import (
2024-05-13 06:59:42 +02:00
"context"
"fmt"
2024-05-20 08:27:21 +02:00
"io/fs"
2024-05-13 06:59:42 +02:00
"sync"
2024-05-13 22:40:25 +02:00
"sync/atomic"
2024-05-13 06:59:42 +02:00
"time"
)
2024-05-13 09:02:32 +02:00
const DefaultPartDataSize = 64 * 1024
2024-05-13 06:59:42 +02:00
const DefaultFlushTime = 5 * time.Second
const NoPartIdx = -1
2024-05-19 21:22:55 +02:00
// for unit tests
2024-05-19 09:26:53 +02:00
var warningCount = &atomic.Int32{}
2024-05-19 21:22:55 +02:00
var flushErrorCount = &atomic.Int32{}
2024-05-13 09:02:32 +02:00
var partDataSize int64 = DefaultPartDataSize // overridden in tests
2024-05-13 22:40:25 +02:00
var stopFlush = &atomic.Bool{}
2024-05-13 09:02:32 +02:00
var GBS *BlockStore = &BlockStore{
2024-05-20 07:40:27 +02:00
Lock: &sync.Mutex{},
Cache: make(map[cacheKey]*CacheEntry),
2024-05-13 06:59:42 +02:00
}
type FileOptsType struct {
MaxSize int64
Circular bool
IJson bool
}
2024-05-13 06:59:42 +02:00
type FileMeta = map[string]any
type BlockFile struct {
// these fields are static (not updated)
2024-05-13 06:59:42 +02:00
BlockId string `json:"blockid"`
Name string `json:"name"`
Opts FileOptsType `json:"opts"`
CreatedTs int64 `json:"createdts"`
// these fields are mutable
Size int64 `json:"size"`
ModTs int64 `json:"modts"`
Meta FileMeta `json:"meta"` // only top-level keys can be updated (lower levels are immutable)
2024-05-13 06:59:42 +02:00
}
// this works because lower levels are immutable
2024-05-13 06:59:42 +02:00
func copyMeta(meta FileMeta) FileMeta {
newMeta := make(FileMeta)
for k, v := range meta {
newMeta[k] = v
}
return newMeta
}
func (f *BlockFile) DeepCopy() *BlockFile {
if f == nil {
return nil
}
newFile := *f
newFile.Meta = copyMeta(f.Meta)
return &newFile
}
func (BlockFile) UseDBMap() {}
type BlockData struct {
BlockId string `json:"blockid"`
Name string `json:"name"`
PartIdx int `json:"partidx"`
Data []byte `json:"data"`
}
2024-05-13 06:59:42 +02:00
func (BlockData) UseDBMap() {}
// synchronous (does not interact with the cache)
func (s *BlockStore) MakeFile(ctx context.Context, blockId string, name string, meta FileMeta, opts FileOptsType) error {
if opts.MaxSize < 0 {
return fmt.Errorf("max size must be non-negative")
}
if opts.Circular && opts.MaxSize <= 0 {
return fmt.Errorf("circular file must have a max size")
}
if opts.Circular && opts.IJson {
return fmt.Errorf("circular file cannot be ijson")
}
2024-05-13 09:02:32 +02:00
if opts.Circular {
if opts.MaxSize%partDataSize != 0 {
opts.MaxSize = (opts.MaxSize/partDataSize + 1) * partDataSize
}
}
2024-05-20 07:40:27 +02:00
return withLock(s, blockId, name, func(entry *CacheEntry) error {
if entry.File != nil {
2024-05-20 08:27:21 +02:00
return fs.ErrExist
2024-05-18 21:31:54 +02:00
}
2024-05-20 07:40:27 +02:00
now := time.Now().UnixMilli()
file := &BlockFile{
BlockId: blockId,
Name: name,
Size: 0,
CreatedTs: now,
ModTs: now,
Opts: opts,
Meta: meta,
2024-05-18 21:31:54 +02:00
}
2024-05-20 07:40:27 +02:00
return dbInsertFile(ctx, file)
2024-05-18 21:31:54 +02:00
})
2024-05-13 06:59:42 +02:00
}
func (s *BlockStore) DeleteFile(ctx context.Context, blockId string, name string) error {
2024-05-20 07:40:27 +02:00
return withLock(s, blockId, name, func(entry *CacheEntry) error {
err := dbDeleteFile(ctx, blockId, name)
if err != nil {
return fmt.Errorf("error deleting file: %v", err)
2024-05-18 21:31:54 +02:00
}
2024-05-20 07:40:27 +02:00
entry.clear()
return nil
2024-05-13 06:59:42 +02:00
})
}
2024-05-13 06:59:42 +02:00
func (s *BlockStore) DeleteBlock(ctx context.Context, blockId string) error {
fileNames, err := dbGetBlockFileNames(ctx, blockId)
if err != nil {
return fmt.Errorf("error getting block files: %v", err)
}
for _, name := range fileNames {
s.DeleteFile(ctx, blockId, name)
}
return nil
}
2024-05-20 08:27:21 +02:00
// if file doesn't exsit, returns fs.ErrNotExist
2024-05-13 06:59:42 +02:00
func (s *BlockStore) Stat(ctx context.Context, blockId string, name string) (*BlockFile, error) {
2024-05-20 07:40:27 +02:00
return withLockRtn(s, blockId, name, func(entry *CacheEntry) (*BlockFile, error) {
file, err := entry.loadFileForRead(ctx)
if err != nil {
return nil, fmt.Errorf("error getting file: %v", err)
2024-05-13 06:59:42 +02:00
}
2024-05-20 07:40:27 +02:00
return file.DeepCopy(), nil
})
}
2024-05-13 06:59:42 +02:00
func (s *BlockStore) ListFiles(ctx context.Context, blockId string) ([]*BlockFile, error) {
files, err := dbGetBlockFiles(ctx, blockId)
if err != nil {
2024-05-13 06:59:42 +02:00
return nil, fmt.Errorf("error getting block files: %v", err)
}
2024-05-20 07:40:27 +02:00
for idx, file := range files {
withLock(s, file.BlockId, file.Name, func(entry *CacheEntry) error {
if entry.File != nil {
files[idx] = entry.File.DeepCopy()
2024-05-13 06:59:42 +02:00
}
2024-05-20 07:40:27 +02:00
return nil
})
}
2024-05-13 06:59:42 +02:00
return files, nil
}
2024-05-13 09:12:55 +02:00
func (s *BlockStore) WriteMeta(ctx context.Context, blockId string, name string, meta FileMeta, merge bool) error {
2024-05-20 07:40:27 +02:00
return withLock(s, blockId, name, func(entry *CacheEntry) error {
err := entry.loadFileIntoCache(ctx)
if err != nil {
return err
}
if merge {
for k, v := range meta {
if v == nil {
delete(entry.File.Meta, k)
continue
2024-05-13 09:12:55 +02:00
}
2024-05-20 07:40:27 +02:00
entry.File.Meta[k] = v
2024-05-13 09:12:55 +02:00
}
2024-05-20 07:40:27 +02:00
} else {
entry.File.Meta = meta
}
entry.File.ModTs = time.Now().UnixMilli()
return nil
2024-05-13 06:59:42 +02:00
})
}
2024-05-20 07:40:27 +02:00
func (s *BlockStore) WriteFile(ctx context.Context, blockId string, name string, data []byte) error {
return withLock(s, blockId, name, func(entry *CacheEntry) error {
err := entry.loadFileIntoCache(ctx)
if err != nil {
return err
2024-05-13 06:59:42 +02:00
}
2024-05-20 07:40:27 +02:00
entry.writeAt(0, data, true)
2024-05-20 08:27:21 +02:00
// since WriteFile can *truncate* the file, we need to flush the file to the DB immediately
return entry.flushToDB(ctx, true)
2024-05-20 07:40:27 +02:00
})
}
func (s *BlockStore) WriteAt(ctx context.Context, blockId string, name string, offset int64, data []byte) error {
if offset < 0 {
return fmt.Errorf("offset must be non-negative")
2024-05-13 06:59:42 +02:00
}
2024-05-20 07:40:27 +02:00
return withLock(s, blockId, name, func(entry *CacheEntry) error {
err := entry.loadFileIntoCache(ctx)
if err != nil {
return err
2024-05-13 06:59:42 +02:00
}
2024-05-20 07:40:27 +02:00
file := entry.File
if offset > file.Size {
return fmt.Errorf("offset is past the end of the file")
2024-05-13 06:59:42 +02:00
}
2024-05-20 07:40:27 +02:00
partMap := file.computePartMap(offset, int64(len(data)))
incompleteParts := incompletePartsFromMap(partMap)
err = entry.loadDataPartsIntoCache(ctx, incompleteParts)
if err != nil {
return err
}
entry.writeAt(offset, data, true)
return nil
2024-05-13 06:59:42 +02:00
})
}
2024-05-20 07:40:27 +02:00
func (s *BlockStore) AppendData(ctx context.Context, blockId string, name string, data []byte) error {
return withLock(s, blockId, name, func(entry *CacheEntry) error {
err := entry.loadFileIntoCache(ctx)
if err != nil {
return err
}
lastPartIdx := entry.File.getLastIncompletePartNum()
if lastPartIdx != NoPartIdx {
err = entry.loadDataPartsIntoCache(ctx, []int{lastPartIdx})
if err != nil {
return err
}
}
entry.writeAt(entry.File.Size, data, false)
2024-05-13 06:59:42 +02:00
return nil
})
}
2024-05-20 07:40:27 +02:00
func (s *BlockStore) GetAllBlockIds(ctx context.Context) ([]string, error) {
return dbGetAllBlockIds(ctx)
2024-05-13 06:59:42 +02:00
}
2024-05-20 07:40:27 +02:00
// returns (offset, data, error)
// we return the offset because the offset may have been adjusted if the size was too big (for circular files)
func (s *BlockStore) ReadAt(ctx context.Context, blockId string, name string, offset int64, size int64) (rtnOffset int64, rtnData []byte, rtnErr error) {
withLock(s, blockId, name, func(entry *CacheEntry) error {
rtnOffset, rtnData, rtnErr = entry.readAt(ctx, offset, size, false)
2024-05-13 06:59:42 +02:00
return nil
})
2024-05-20 07:40:27 +02:00
return
2024-05-13 06:59:42 +02:00
}
2024-05-20 07:40:27 +02:00
// returns (offset, data, error)
func (s *BlockStore) ReadFile(ctx context.Context, blockId string, name string) (rtnOffset int64, rtnData []byte, rtnErr error) {
withLock(s, blockId, name, func(entry *CacheEntry) error {
rtnOffset, rtnData, rtnErr = entry.readAt(ctx, 0, 0, true)
2024-05-13 06:59:42 +02:00
return nil
})
2024-05-20 07:40:27 +02:00
return
2024-05-13 06:59:42 +02:00
}
2024-05-20 07:40:27 +02:00
func (s *BlockStore) FlushCache(ctx context.Context) error {
wasFlushing := s.setUnlessFlushing()
if wasFlushing {
return fmt.Errorf("flush already in progress")
2024-05-13 06:59:42 +02:00
}
2024-05-20 07:40:27 +02:00
defer s.setIsFlushing(false)
// get a copy of dirty keys so we can iterate without the lock
dirtyCacheKeys := s.getDirtyCacheKeys()
for _, key := range dirtyCacheKeys {
err := withLock(s, key.BlockId, key.Name, func(entry *CacheEntry) error {
2024-05-20 08:27:21 +02:00
return entry.flushToDB(ctx, false)
2024-05-20 07:40:27 +02:00
})
if ctx.Err() != nil {
// transient error (also must stop the loop)
return ctx.Err()
}
if err != nil {
return fmt.Errorf("error flushing cache entry[%v]: %v", key, err)
}
2024-05-13 06:59:42 +02:00
}
return nil
}
2024-05-20 07:40:27 +02:00
///////////////////////////////////
func (f *BlockFile) getLastIncompletePartNum() int {
if f.Size%partDataSize == 0 {
return NoPartIdx
}
return f.partIdxAtOffset(f.Size)
}
func (f *BlockFile) partIdxAtOffset(offset int64) int {
partIdx := int(offset / partDataSize)
if f.Opts.Circular {
maxPart := int(f.Opts.MaxSize / partDataSize)
partIdx = partIdx % maxPart
}
return partIdx
2024-05-13 06:59:42 +02:00
}
func incompletePartsFromMap(partMap map[int]int) []int {
var incompleteParts []int
for partIdx, size := range partMap {
if size != int(partDataSize) {
incompleteParts = append(incompleteParts, partIdx)
}
}
return incompleteParts
}
2024-05-20 07:40:27 +02:00
func getPartIdxsFromMap(partMap map[int]int) []int {
var partIdxs []int
for partIdx := range partMap {
partIdxs = append(partIdxs, partIdx)
}
return partIdxs
}
2024-05-18 21:31:54 +02:00
// returns a map of partIdx to amount of data to write to that part
func (file *BlockFile) computePartMap(startOffset int64, size int64) map[int]int {
partMap := make(map[int]int)
endOffset := startOffset + size
startBlockOffset := startOffset - (startOffset % partDataSize)
for testOffset := startBlockOffset; testOffset < endOffset; testOffset += partDataSize {
partIdx := file.partIdxAtOffset(testOffset)
partStartOffset := testOffset
partEndOffset := testOffset + partDataSize
partWriteStartOffset := 0
partWriteEndOffset := int(partDataSize)
if startOffset > partStartOffset && startOffset < partEndOffset {
partWriteStartOffset = int(startOffset - partStartOffset)
}
if endOffset > partStartOffset && endOffset < partEndOffset {
partWriteEndOffset = int(endOffset - partStartOffset)
}
partMap[partIdx] = partWriteEndOffset - partWriteStartOffset
}
return partMap
}
2024-05-19 09:26:53 +02:00
func (s *BlockStore) getDirtyCacheKeys() []cacheKey {
s.Lock.Lock()
2024-05-19 09:26:53 +02:00
defer s.Lock.Unlock()
2024-05-20 07:40:27 +02:00
var dirtyCacheKeys []cacheKey
for key, entry := range s.Cache {
2024-05-20 07:40:27 +02:00
if entry.File != nil {
dirtyCacheKeys = append(dirtyCacheKeys, key)
}
2024-05-19 09:26:53 +02:00
}
return dirtyCacheKeys
}
2024-05-19 21:42:05 +02:00
func (s *BlockStore) setIsFlushing(flushing bool) {
s.Lock.Lock()
defer s.Lock.Unlock()
s.IsFlushing = flushing
}
// returns old value of IsFlushing
func (s *BlockStore) setUnlessFlushing() bool {
s.Lock.Lock()
defer s.Lock.Unlock()
if s.IsFlushing {
return true
}
s.IsFlushing = true
return false
}
2024-05-13 06:59:42 +02:00
func minInt64(a, b int64) int64 {
if a < b {
return a
}
return b
}