perf(stream): improve file stream range reading and caching mechanism (#1001)

* perf(stream): improve file stream range reading and caching mechanism

* 。

* add bytes_test.go

* fix(stream): handle EOF and buffer reading more gracefully

* 注释

* refactor: update CacheFullAndWriter to accept pointer for UpdateProgress

* update tests

* Update drivers/google_drive/util.go

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Signed-off-by: j2rong4cn <36783515+j2rong4cn@users.noreply.github.com>

* 更优雅的克隆Link

* 修复stream已缓存但无法重复读取

* 将Bytes类型重命名为Reader

* 修复栈溢出

* update tests

---------

Signed-off-by: j2rong4cn <36783515+j2rong4cn@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
j2rong4cn
2025-08-11 23:41:22 +08:00
committed by GitHub
parent 8c244a984d
commit 57fceabcf4
48 changed files with 657 additions and 380 deletions

92
pkg/buffer/bytes.go Normal file
View File

@@ -0,0 +1,92 @@
package buffer
import (
"errors"
"io"
)
// 用于存储不复用的[]byte
type Reader struct {
bufs [][]byte
length int
offset int
}
func (r *Reader) Len() int {
return r.length
}
func (r *Reader) Append(buf []byte) {
r.length += len(buf)
r.bufs = append(r.bufs, buf)
}
func (r *Reader) Read(p []byte) (int, error) {
n, err := r.ReadAt(p, int64(r.offset))
if n > 0 {
r.offset += n
}
return n, err
}
func (r *Reader) ReadAt(p []byte, off int64) (int, error) {
if off < 0 || off >= int64(r.length) {
return 0, io.EOF
}
n, length := 0, int64(0)
readFrom := false
for _, buf := range r.bufs {
newLength := length + int64(len(buf))
if readFrom {
w := copy(p[n:], buf)
n += w
} else if off < newLength {
readFrom = true
w := copy(p[n:], buf[int(off-length):])
n += w
}
if n == len(p) {
return n, nil
}
length = newLength
}
return n, io.EOF
}
func (r *Reader) Seek(offset int64, whence int) (int64, error) {
var abs int
switch whence {
case io.SeekStart:
abs = int(offset)
case io.SeekCurrent:
abs = r.offset + int(offset)
case io.SeekEnd:
abs = r.length + int(offset)
default:
return 0, errors.New("Seek: invalid whence")
}
if abs < 0 || abs > r.length {
return 0, errors.New("Seek: invalid offset")
}
r.offset = abs
return int64(abs), nil
}
func (r *Reader) Reset() {
clear(r.bufs)
r.bufs = nil
r.length = 0
r.offset = 0
}
func NewReader(buf ...[]byte) *Reader {
b := &Reader{}
for _, b1 := range buf {
b.Append(b1)
}
return b
}

95
pkg/buffer/bytes_test.go Normal file
View File

@@ -0,0 +1,95 @@
package buffer
import (
"errors"
"io"
"testing"
)
func TestReader_ReadAt(t *testing.T) {
type args struct {
p []byte
off int64
}
bs := &Reader{}
bs.Append([]byte("github.com"))
bs.Append([]byte("/"))
bs.Append([]byte("OpenList"))
bs.Append([]byte("Team/"))
bs.Append([]byte("OpenList"))
tests := []struct {
name string
b *Reader
args args
want func(a args, n int, err error) error
}{
{
name: "readAt len 10 offset 0",
b: bs,
args: args{
p: make([]byte, 10),
off: 0,
},
want: func(a args, n int, err error) error {
if n != len(a.p) {
return errors.New("read length not match")
}
if string(a.p) != "github.com" {
return errors.New("read content not match")
}
if err != nil {
return err
}
return nil
},
},
{
name: "readAt len 12 offset 11",
b: bs,
args: args{
p: make([]byte, 12),
off: 11,
},
want: func(a args, n int, err error) error {
if n != len(a.p) {
return errors.New("read length not match")
}
if string(a.p) != "OpenListTeam" {
return errors.New("read content not match")
}
if err != nil {
return err
}
return nil
},
},
{
name: "readAt len 50 offset 24",
b: bs,
args: args{
p: make([]byte, 50),
off: 24,
},
want: func(a args, n int, err error) error {
if n != bs.Len()-int(a.off) {
return errors.New("read length not match")
}
if string(a.p[:n]) != "OpenList" {
return errors.New("read content not match")
}
if err != io.EOF {
return errors.New("expect eof")
}
return nil
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := tt.b.ReadAt(tt.args.p, tt.args.off)
if err := tt.want(tt.args, got, err); err != nil {
t.Errorf("Bytes.ReadAt() error = %v", err)
}
})
}
}

View File

@@ -53,11 +53,12 @@ func (g *Group) Go(do func(ctx context.Context) error) {
}
type Lifecycle struct {
// Before在OrderedGroup是线程安全的
// Before在OrderedGroup是线程安全的
// 只会被调用一次
Before func(ctx context.Context) error
// 如果Before返回err就不调用Do
Do func(ctx context.Context) error
// 最后调用After
// 最后调用一次After
After func(err error)
}

View File

@@ -194,32 +194,32 @@ type SyncClosersIF interface {
type SyncClosers struct {
closers []io.Closer
ref atomic.Int32
ref int32
}
var _ SyncClosersIF = (*SyncClosers)(nil)
func (c *SyncClosers) AcquireReference() bool {
ref := c.ref.Add(1)
ref := atomic.AddInt32(&c.ref, 1)
if ref > 0 {
// log.Debugf("SyncClosers.AcquireReference %p,ref=%d\n", c, ref)
return true
}
c.ref.Store(math.MinInt16)
atomic.StoreInt32(&c.ref, math.MinInt16)
return false
}
func (c *SyncClosers) Close() error {
ref := c.ref.Add(-1)
ref := atomic.AddInt32(&c.ref, -1)
if ref < -1 {
c.ref.Store(math.MinInt16)
atomic.StoreInt32(&c.ref, math.MinInt16)
return nil
}
// log.Debugf("SyncClosers.Close %p,ref=%d\n", c, ref+1)
if ref > 0 {
return nil
}
c.ref.Store(math.MinInt16)
atomic.StoreInt32(&c.ref, math.MinInt16)
var errs []error
for _, closer := range c.closers {
@@ -234,7 +234,7 @@ func (c *SyncClosers) Close() error {
func (c *SyncClosers) Add(closer io.Closer) {
if closer != nil {
if c.ref.Load() < 0 {
if atomic.LoadInt32(&c.ref) < 0 {
panic("Not reusable")
}
c.closers = append(c.closers, closer)
@@ -243,7 +243,7 @@ func (c *SyncClosers) Add(closer io.Closer) {
func (c *SyncClosers) AddIfCloser(a any) {
if closer, ok := a.(io.Closer); ok {
if c.ref.Load() < 0 {
if atomic.LoadInt32(&c.ref) < 0 {
panic("Not reusable")
}
c.closers = append(c.closers, closer)