From ee2c77acd86c7d06b1a625da53717fd1e18c505b Mon Sep 17 00:00:00 2001 From: KirCute <951206789@qq.com> Date: Mon, 10 Nov 2025 19:08:50 +0800 Subject: [PATCH] fix(archive/zip): user specific encoding for non-EFS zips (#1599) * fix(archive/zip): user specific encoding for non-EFS zips * fix(stream): simplify head cache initialization and improve reader retrieval logic * fix: support multipart zips (.z01) * chore(deps): update github.com/KirCute/zip to v1.0.1 --------- Co-authored-by: j2rong4cn Co-authored-by: Pikachu Ren <40362270+PIKACHUIM@users.noreply.github.com> --- go.mod | 3 +- go.sum | 6 +- internal/archive/rardecode/rardecode.go | 3 +- internal/archive/sevenzip/sevenzip.go | 3 +- internal/archive/tool/base.go | 3 +- internal/archive/zip/utils.go | 164 +++++++----------------- internal/archive/zip/zip.go | 46 ++++--- internal/bootstrap/data/setting.go | 1 + internal/conf/const.go | 1 + internal/op/archive.go | 107 +++++++++++----- internal/stream/stream.go | 59 +++++---- 11 files changed, 188 insertions(+), 208 deletions(-) diff --git a/go.mod b/go.mod index 44279e78..c34fc6f3 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.23.4 require ( github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.1 github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.2 + github.com/KirCute/zip v1.0.1 github.com/OpenListTeam/go-cache v0.1.0 github.com/OpenListTeam/sftpd-openlist v1.0.1 github.com/OpenListTeam/tache v0.2.1 @@ -57,7 +58,6 @@ require ( github.com/pquerna/otp v1.5.0 github.com/quic-go/quic-go v0.54.1 github.com/rclone/rclone v1.70.3 - github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d github.com/shirou/gopsutil/v4 v4.25.5 github.com/sirupsen/logrus v1.9.3 github.com/spf13/afero v1.14.0 @@ -68,7 +68,6 @@ require ( github.com/u2takey/ffmpeg-go v0.5.0 github.com/upyun/go-sdk/v3 v3.0.4 github.com/winfsp/cgofuse v1.6.0 - github.com/yeka/zip v0.0.0-20231116150916-03d6312748a9 github.com/zzzhr1990/go-common-entity v0.0.0-20250202070650-1a200048f0d3 golang.org/x/crypto v0.40.0 golang.org/x/image v0.29.0 diff --git a/go.sum b/go.sum index ea666e38..7d45ace2 100644 --- a/go.sum +++ b/go.sum @@ -39,6 +39,8 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/Da3zKi7/saferith v0.33.0-fixed h1:fnIWTk7EP9mZAICf7aQjeoAwpfrlCrkOvqmi6CbWdTk= github.com/Da3zKi7/saferith v0.33.0-fixed/go.mod h1:QKJhjoqUtBsXCAVEjw38mFqoi7DebT7kthcD7UzbnoA= +github.com/KirCute/zip v1.0.1 h1:L/tVZglOiDVKDi9Ud+fN49htgKdQ3Z0H80iX8OZk13c= +github.com/KirCute/zip v1.0.1/go.mod h1:xhF7dCB+Bjvy+5a56lenYCKBsH+gxDNPZSy5Cp+nlXk= github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g= github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= github.com/Max-Sum/base32768 v0.0.0-20230304063302-18e6ce5945fd h1:nzE1YQBdx1bq9IlZinHa+HVffy+NmVRoKr+wHN8fpLE= @@ -639,8 +641,6 @@ github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 h1:GHRpF1pTW19a8tTFrMLUcfWwyC0pnifVo2ClaLq+hP8= github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46/go.mod h1:uAQ5PCi+MFsC7HjREoAz1BU+Mq60+05gifQSsHSDG/8= -github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA= -github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= github.com/secsy/goftp v0.0.0-20200609142545-aa2de14babf4 h1:PT+ElG/UUFMfqy5HrxJxNzj3QBOf7dZwupeVC+mG1Lo= github.com/secsy/goftp v0.0.0-20200609142545-aa2de14babf4/go.mod h1:MnkX001NG75g3p8bhFycnyIjeQoOjGL6CEIsdE/nKSY= github.com/shabbyrobe/gocovmerge v0.0.0-20230507112040-c3350d9342df h1:S77Pf5fIGMa7oSwp8SQPp7Hb4ZiI38K3RNBKD2LLeEM= @@ -713,8 +713,6 @@ github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavM github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= -github.com/yeka/zip v0.0.0-20231116150916-03d6312748a9 h1:K8gF0eekWPEX+57l30ixxzGhHH/qscI3JCnuhbN6V4M= -github.com/yeka/zip v0.0.0-20231116150916-03d6312748a9/go.mod h1:9BnoKCcgJ/+SLhfAXj15352hTOuVmG5Gzo8xNRINfqI= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yuin/goldmark v1.7.13 h1:GPddIs617DnBLFFVJFgpo1aBfe/4xcvMc3SB5t/D0pA= github.com/yuin/goldmark v1.7.13/go.mod h1:ip/1k0VRfGynBgxOz0yCqHrbZXhcjxyuS66Brc7iBKg= diff --git a/internal/archive/rardecode/rardecode.go b/internal/archive/rardecode/rardecode.go index 13a22e3e..d81d0694 100644 --- a/internal/archive/rardecode/rardecode.go +++ b/internal/archive/rardecode/rardecode.go @@ -4,6 +4,7 @@ import ( "io" "os" "path/filepath" + "regexp" "strings" "github.com/OpenListTeam/OpenList/v4/internal/archive/tool" @@ -21,7 +22,7 @@ func (RarDecoder) AcceptedExtensions() []string { func (RarDecoder) AcceptedMultipartExtensions() map[string]tool.MultipartExtension { return map[string]tool.MultipartExtension{ - ".part1.rar": {".part%d.rar", 2}, + ".part1.rar": {regexp.MustCompile("^.*\\.part(\\d+)\\.rar$"), 2}, } } diff --git a/internal/archive/sevenzip/sevenzip.go b/internal/archive/sevenzip/sevenzip.go index c59cf27c..9cc2df8d 100644 --- a/internal/archive/sevenzip/sevenzip.go +++ b/internal/archive/sevenzip/sevenzip.go @@ -2,6 +2,7 @@ package sevenzip import ( "io" + "regexp" "strings" "github.com/OpenListTeam/OpenList/v4/internal/archive/tool" @@ -18,7 +19,7 @@ func (SevenZip) AcceptedExtensions() []string { func (SevenZip) AcceptedMultipartExtensions() map[string]tool.MultipartExtension { return map[string]tool.MultipartExtension{ - ".7z.001": {".7z.%.3d", 2}, + ".7z.001": {regexp.MustCompile("^.*\\.7z\\.(\\d+)$"), 2}, } } diff --git a/internal/archive/tool/base.go b/internal/archive/tool/base.go index 60609453..571ada74 100644 --- a/internal/archive/tool/base.go +++ b/internal/archive/tool/base.go @@ -2,13 +2,14 @@ package tool import ( "io" + "regexp" "github.com/OpenListTeam/OpenList/v4/internal/model" "github.com/OpenListTeam/OpenList/v4/internal/stream" ) type MultipartExtension struct { - PartFileFormat string + PartFileFormat *regexp.Regexp SecondPartIndex int } diff --git a/internal/archive/zip/utils.go b/internal/archive/zip/utils.go index 0cd1a637..4f367a63 100644 --- a/internal/archive/zip/utils.go +++ b/internal/archive/zip/utils.go @@ -4,22 +4,15 @@ import ( "bytes" "io" "io/fs" - stdpath "path" "strings" + "github.com/KirCute/zip" "github.com/OpenListTeam/OpenList/v4/internal/archive/tool" + "github.com/OpenListTeam/OpenList/v4/internal/conf" "github.com/OpenListTeam/OpenList/v4/internal/errs" + "github.com/OpenListTeam/OpenList/v4/internal/setting" "github.com/OpenListTeam/OpenList/v4/internal/stream" - "github.com/saintfish/chardet" - "github.com/yeka/zip" - "golang.org/x/text/encoding" - "golang.org/x/text/encoding/charmap" - "golang.org/x/text/encoding/japanese" - "golang.org/x/text/encoding/korean" - "golang.org/x/text/encoding/simplifiedchinese" - "golang.org/x/text/encoding/traditionalchinese" - "golang.org/x/text/encoding/unicode" - "golang.org/x/text/encoding/unicode/utf32" + "golang.org/x/text/encoding/ianaindex" "golang.org/x/text/transform" ) @@ -37,10 +30,11 @@ func (r *WrapReader) Files() []tool.SubFile { type WrapFileInfo struct { fs.FileInfo + efs bool } func (f *WrapFileInfo) Name() string { - return decodeName(f.FileInfo.Name()) + return decodeName(f.FileInfo.Name(), f.efs) } type WrapFile struct { @@ -48,11 +42,11 @@ type WrapFile struct { } func (f *WrapFile) Name() string { - return decodeName(f.f.Name) + return decodeName(f.f.Name, isEFS(f.f.Flags)) } func (f *WrapFile) FileInfo() fs.FileInfo { - return &WrapFileInfo{FileInfo: f.f.FileInfo()} + return &WrapFileInfo{FileInfo: f.f.FileInfo(), efs: isEFS(f.f.Flags)} } func (f *WrapFile) Open() (io.ReadCloser, error) { @@ -67,16 +61,33 @@ func (f *WrapFile) SetPassword(password string) { f.f.SetPassword(password) } -func getReader(ss []*stream.SeekableStream) (*zip.Reader, error) { - if len(ss) > 1 && stdpath.Ext(ss[1].GetName()) == ".z01" { - // FIXME: Incorrect parsing method for standard multipart zip format - ss = append(ss[1:], ss[0]) - } - reader, err := stream.NewMultiReaderAt(ss) +func makePart(ss *stream.SeekableStream) (zip.SizeReaderAt, error) { + ra, err := stream.NewReadAtSeeker(ss, 0) if err != nil { return nil, err } - return zip.NewReader(reader, reader.Size()) + return &inlineSizeReaderAt{ReaderAt: ra, size: ss.GetSize()}, nil +} + +func (z *Zip) getReader(ss []*stream.SeekableStream) (*zip.Reader, error) { + if len(ss) > 1 && z.traditionalSecondPartRegExp.MatchString(ss[1].GetName()) { + ss = append(ss[1:], ss[0]) + ras := make([]zip.SizeReaderAt, 0, len(ss)) + for _, s := range ss { + ra, err := makePart(s) + if err != nil { + return nil, err + } + ras = append(ras, ra) + } + return zip.NewMultipartReader(ras) + } else { + reader, err := stream.NewMultiReaderAt(ss) + if err != nil { + return nil, err + } + return zip.NewReader(reader, reader.Size()) + } } func filterPassword(err error) error { @@ -86,110 +97,29 @@ func filterPassword(err error) error { return err } -func decodeName(name string) string { - b := []byte(name) - detector := chardet.NewTextDetector() - results, err := detector.DetectAll(b) +func decodeName(name string, efs bool) string { + if efs { + return name + } + enc, err := ianaindex.IANA.Encoding(setting.GetStr(conf.NonEFSZipEncoding)) if err != nil { return name } - var ce, re, enc encoding.Encoding - for _, r := range results { - if r.Confidence > 30 { - ce = getCommonEncoding(r.Charset) - if ce != nil { - break - } - } - if re == nil { - re = getEncoding(r.Charset) - } - } - if ce != nil { - enc = ce - } else if re != nil { - enc = re - } else { - return name - } - i := bytes.NewReader(b) + i := bytes.NewReader([]byte(name)) decoder := transform.NewReader(i, enc.NewDecoder()) content, _ := io.ReadAll(decoder) return string(content) } -func getCommonEncoding(name string) (enc encoding.Encoding) { - switch name { - case "UTF-8": - enc = unicode.UTF8 - case "UTF-16LE": - enc = unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM) - case "Shift_JIS": - enc = japanese.ShiftJIS - case "GB-18030": - enc = simplifiedchinese.GB18030 - case "EUC-KR": - enc = korean.EUCKR - case "Big5": - enc = traditionalchinese.Big5 - default: - enc = nil - } - return +func isEFS(flags uint16) bool { + return (flags & 0x800) > 0 } -func getEncoding(name string) (enc encoding.Encoding) { - switch name { - case "UTF-8": - enc = unicode.UTF8 - case "UTF-16BE": - enc = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM) - case "UTF-16LE": - enc = unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM) - case "UTF-32BE": - enc = utf32.UTF32(utf32.BigEndian, utf32.IgnoreBOM) - case "UTF-32LE": - enc = utf32.UTF32(utf32.LittleEndian, utf32.IgnoreBOM) - case "ISO-8859-1": - enc = charmap.ISO8859_1 - case "ISO-8859-2": - enc = charmap.ISO8859_2 - case "ISO-8859-3": - enc = charmap.ISO8859_3 - case "ISO-8859-4": - enc = charmap.ISO8859_4 - case "ISO-8859-5": - enc = charmap.ISO8859_5 - case "ISO-8859-6": - enc = charmap.ISO8859_6 - case "ISO-8859-7": - enc = charmap.ISO8859_7 - case "ISO-8859-8": - enc = charmap.ISO8859_8 - case "ISO-8859-8-I": - enc = charmap.ISO8859_8I - case "ISO-8859-9": - enc = charmap.ISO8859_9 - case "windows-1251": - enc = charmap.Windows1251 - case "windows-1256": - enc = charmap.Windows1256 - case "KOI8-R": - enc = charmap.KOI8R - case "Shift_JIS": - enc = japanese.ShiftJIS - case "GB-18030": - enc = simplifiedchinese.GB18030 - case "EUC-JP": - enc = japanese.EUCJP - case "EUC-KR": - enc = korean.EUCKR - case "Big5": - enc = traditionalchinese.Big5 - case "ISO-2022-JP": - enc = japanese.ISO2022JP - default: - enc = nil - } - return +type inlineSizeReaderAt struct { + io.ReaderAt + size int64 +} + +func (i *inlineSizeReaderAt) Size() int64 { + return i.size } diff --git a/internal/archive/zip/zip.go b/internal/archive/zip/zip.go index 8ed83ac7..2c886c29 100644 --- a/internal/archive/zip/zip.go +++ b/internal/archive/zip/zip.go @@ -3,6 +3,7 @@ package zip import ( "io" stdpath "path" + "regexp" "strings" "github.com/OpenListTeam/OpenList/v4/internal/archive/tool" @@ -12,34 +13,39 @@ import ( ) type Zip struct { + traditionalSecondPartRegExp *regexp.Regexp } -func (Zip) AcceptedExtensions() []string { +func (z *Zip) AcceptedExtensions() []string { return []string{} } -func (Zip) AcceptedMultipartExtensions() map[string]tool.MultipartExtension { +func (z *Zip) AcceptedMultipartExtensions() map[string]tool.MultipartExtension { return map[string]tool.MultipartExtension{ - ".zip": {".z%.2d", 1}, - ".zip.001": {".zip.%.3d", 2}, + ".zip": {regexp.MustCompile("^.*\\.z(\\d+)$"), 1}, + ".zip.001": {regexp.MustCompile("^.*\\.zip\\.(\\d+)$"), 2}, } } -func (Zip) GetMeta(ss []*stream.SeekableStream, args model.ArchiveArgs) (model.ArchiveMeta, error) { - zipReader, err := getReader(ss) +func (z *Zip) GetMeta(ss []*stream.SeekableStream, args model.ArchiveArgs) (model.ArchiveMeta, error) { + zipReader, err := z.getReader(ss) if err != nil { return nil, err } + efs := true + if len(zipReader.File) > 0 { + efs = isEFS(zipReader.File[0].Flags) + } encrypted, tree := tool.GenerateMetaTreeFromFolderTraversal(&WrapReader{Reader: zipReader}) return &model.ArchiveMetaInfo{ - Comment: zipReader.Comment, + Comment: decodeName(zipReader.Comment, efs), Encrypted: encrypted, Tree: tree, }, nil } -func (Zip) List(ss []*stream.SeekableStream, args model.ArchiveInnerArgs) ([]model.Obj, error) { - zipReader, err := getReader(ss) +func (z *Zip) List(ss []*stream.SeekableStream, args model.ArchiveInnerArgs) ([]model.Obj, error) { + zipReader, err := z.getReader(ss) if err != nil { return nil, err } @@ -57,7 +63,7 @@ func (Zip) List(ss []*stream.SeekableStream, args model.ArchiveInnerArgs) ([]mod _ = rc.Close() passVerified = true } - name := strings.TrimSuffix(decodeName(file.Name), "/") + name := strings.TrimSuffix(decodeName(file.Name, isEFS(file.Flags)), "/") if strings.Contains(name, "/") { // 有些压缩包不压缩第一个文件夹 strs := strings.Split(name, "/") @@ -70,7 +76,7 @@ func (Zip) List(ss []*stream.SeekableStream, args model.ArchiveInnerArgs) ([]mod } continue } - ret = append(ret, tool.MakeModelObj(&WrapFileInfo{FileInfo: file.FileInfo()})) + ret = append(ret, tool.MakeModelObj(&WrapFileInfo{FileInfo: file.FileInfo(), efs: isEFS(file.Flags)})) } if len(ret) == 0 && dir != nil { ret = append(ret, dir) @@ -81,13 +87,13 @@ func (Zip) List(ss []*stream.SeekableStream, args model.ArchiveInnerArgs) ([]mod ret := make([]model.Obj, 0) exist := false for _, file := range zipReader.File { - name := decodeName(file.Name) + name := decodeName(file.Name, isEFS(file.Flags)) dir := stdpath.Dir(strings.TrimSuffix(name, "/")) + "/" if dir != innerPath { continue } exist = true - ret = append(ret, tool.MakeModelObj(&WrapFileInfo{file.FileInfo()})) + ret = append(ret, tool.MakeModelObj(&WrapFileInfo{file.FileInfo(), isEFS(file.Flags)})) } if !exist { return nil, errs.ObjectNotFound @@ -96,14 +102,14 @@ func (Zip) List(ss []*stream.SeekableStream, args model.ArchiveInnerArgs) ([]mod } } -func (Zip) Extract(ss []*stream.SeekableStream, args model.ArchiveInnerArgs) (io.ReadCloser, int64, error) { - zipReader, err := getReader(ss) +func (z *Zip) Extract(ss []*stream.SeekableStream, args model.ArchiveInnerArgs) (io.ReadCloser, int64, error) { + zipReader, err := z.getReader(ss) if err != nil { return nil, 0, err } innerPath := strings.TrimPrefix(args.InnerPath, "/") for _, file := range zipReader.File { - if decodeName(file.Name) == innerPath { + if decodeName(file.Name, isEFS(file.Flags)) == innerPath { if file.IsEncrypted() { file.SetPassword(args.Password) } @@ -117,8 +123,8 @@ func (Zip) Extract(ss []*stream.SeekableStream, args model.ArchiveInnerArgs) (io return nil, 0, errs.ObjectNotFound } -func (Zip) Decompress(ss []*stream.SeekableStream, outputPath string, args model.ArchiveInnerArgs, up model.UpdateProgress) error { - zipReader, err := getReader(ss) +func (z *Zip) Decompress(ss []*stream.SeekableStream, outputPath string, args model.ArchiveInnerArgs, up model.UpdateProgress) error { + zipReader, err := z.getReader(ss) if err != nil { return err } @@ -128,5 +134,7 @@ func (Zip) Decompress(ss []*stream.SeekableStream, outputPath string, args model var _ tool.Tool = (*Zip)(nil) func init() { - tool.RegisterTool(Zip{}) + tool.RegisterTool(&Zip{ + traditionalSecondPartRegExp: regexp.MustCompile("^.*\\.z0*1$"), + }) } diff --git a/internal/bootstrap/data/setting.go b/internal/bootstrap/data/setting.go index 667bdb94..58b6b568 100644 --- a/internal/bootstrap/data/setting.go +++ b/internal/bootstrap/data/setting.go @@ -154,6 +154,7 @@ func InitialSettings() []model.SettingItem { {Key: conf.SharePreviewArchivesByDefault, Value: "false", Type: conf.TypeBool, Group: model.PREVIEW}, {Key: conf.ReadMeAutoRender, Value: "true", Type: conf.TypeBool, Group: model.PREVIEW}, {Key: conf.FilterReadMeScripts, Value: "true", Type: conf.TypeBool, Group: model.PREVIEW}, + {Key: conf.NonEFSZipEncoding, Value: "IBM437", Type: conf.TypeString, Group: model.PREVIEW}, // global settings {Key: conf.HideFiles, Value: "/\\/README.md/i", Type: conf.TypeText, Group: model.GLOBAL}, {Key: "package_download", Value: "true", Type: conf.TypeBool, Group: model.GLOBAL}, diff --git a/internal/conf/const.go b/internal/conf/const.go index 9f7c06b0..a0b1a157 100644 --- a/internal/conf/const.go +++ b/internal/conf/const.go @@ -38,6 +38,7 @@ const ( SharePreviewArchivesByDefault = "share_preview_archives_by_default" ReadMeAutoRender = "readme_autorender" FilterReadMeScripts = "filter_readme_scripts" + NonEFSZipEncoding = "non_efs_zip_encoding" // global HideFiles = "hide_files" diff --git a/internal/op/archive.go b/internal/op/archive.go index 50e05141..2cdcb2b2 100644 --- a/internal/op/archive.go +++ b/internal/op/archive.go @@ -3,19 +3,18 @@ package op import ( "context" stderrors "errors" - "fmt" "io" stdpath "path" + "strconv" "strings" "time" "github.com/OpenListTeam/OpenList/v4/internal/archive/tool" "github.com/OpenListTeam/OpenList/v4/internal/cache" - "github.com/OpenListTeam/OpenList/v4/internal/stream" - "github.com/OpenListTeam/OpenList/v4/internal/driver" "github.com/OpenListTeam/OpenList/v4/internal/errs" "github.com/OpenListTeam/OpenList/v4/internal/model" + "github.com/OpenListTeam/OpenList/v4/internal/stream" "github.com/OpenListTeam/OpenList/v4/pkg/singleflight" "github.com/OpenListTeam/OpenList/v4/pkg/utils" gocache "github.com/OpenListTeam/go-cache" @@ -61,20 +60,25 @@ func GetArchiveToolAndStream(ctx context.Context, storage driver.Driver, path st if err != nil { return nil, nil, nil, errors.WithMessagef(err, "failed get [%s] link", path) } - baseName, ext, found := strings.Cut(obj.GetName(), ".") - if !found { - _ = l.Close() - return nil, nil, nil, errors.Errorf("failed get archive tool: the obj does not have an extension.") - } - partExt, t, err := tool.GetArchiveTool("." + ext) - if err != nil { - var e error - partExt, t, e = tool.GetArchiveTool(stdpath.Ext(obj.GetName())) - if e != nil { + + // Get archive tool + var partExt *tool.MultipartExtension + var t tool.Tool + ext := obj.GetName() + for { + var found bool + _, ext, found = strings.Cut(ext, ".") + if !found { _ = l.Close() - return nil, nil, nil, errors.WithMessagef(stderrors.Join(err, e), "failed get archive tool: %s", ext) + return nil, nil, nil, errors.Errorf("failed get archive tool: the obj does not have an extension.") + } + partExt, t, err = tool.GetArchiveTool("." + ext) + if err == nil { + break } } + + // Get first part stream ss, err := stream.NewSeekableStream(&stream.FileStream{Ctx: ctx, Obj: obj}, l) if err != nil { _ = l.Close() @@ -83,29 +87,62 @@ func GetArchiveToolAndStream(ctx context.Context, storage driver.Driver, path st ret := []*stream.SeekableStream{ss} if partExt == nil { return obj, t, ret, nil - } else { - index := partExt.SecondPartIndex - dir := stdpath.Dir(path) - for { - p := stdpath.Join(dir, baseName+fmt.Sprintf(partExt.PartFileFormat, index)) - var o model.Obj - l, o, err = Link(ctx, storage, p, args) - if err != nil { - break - } - ss, err = stream.NewSeekableStream(&stream.FileStream{Ctx: ctx, Obj: o}, l) - if err != nil { - _ = l.Close() - for _, s := range ret { - _ = s.Close() - } - return nil, nil, nil, errors.WithMessagef(err, "failed get [%s] stream", path) - } - ret = append(ret, ss) - index++ - } + } + + // Merge multi-part archive + dir := stdpath.Dir(path) + objs, err := List(ctx, storage, dir, model.ListArgs{}) + if err != nil { return obj, t, ret, nil } + for _, o := range objs { + submatch := partExt.PartFileFormat.FindStringSubmatch(o.GetName()) + if submatch == nil { + continue + } + partIdx, e := strconv.Atoi(submatch[1]) + if e != nil { + continue + } + partIdx = partIdx - partExt.SecondPartIndex + 1 + if partIdx < 1 { + continue + } + p := stdpath.Join(dir, o.GetName()) + l1, o1, e := Link(ctx, storage, p, args) + if e != nil { + err = errors.WithMessagef(e, "failed get [%s] link", p) + break + } + ss1, e := stream.NewSeekableStream(&stream.FileStream{Ctx: ctx, Obj: o1}, l1) + if e != nil { + _ = l1.Close() + err = errors.WithMessagef(e, "failed get [%s] stream", p) + break + } + for partIdx >= len(ret) { + ret = append(ret, nil) + } + ret[partIdx] = ss1 + } + closeAll := func(r []*stream.SeekableStream) { + for _, s := range r { + if s != nil { + _ = s.Close() + } + } + } + if err != nil { + closeAll(ret) + return nil, nil, nil, err + } + for i, ss1 := range ret { + if ss1 == nil { + closeAll(ret) + return nil, nil, nil, errors.Errorf("failed merge [%s] parts, missing part %d", path, i) + } + } + return obj, t, ret, nil } func getArchiveMeta(ctx context.Context, storage driver.Driver, path string, args model.ArchiveMetaArgs) (model.Obj, *model.ArchiveMetaProvider, error) { diff --git a/internal/stream/stream.go b/internal/stream/stream.go index fad03f6f..4c823810 100644 --- a/internal/stream/stream.go +++ b/internal/stream/stream.go @@ -456,7 +456,7 @@ func (r *headCache) Close() error { } func (r *RangeReadReadAtSeeker) InitHeadCache() { - if r.ss.GetFile() == nil && r.masterOff == 0 { + if r.masterOff == 0 { value, _ := r.readerMap.LoadAndDelete(int64(0)) r.headCache = &headCache{reader: value.(io.Reader)} r.ss.Closers.Add(r.headCache) @@ -464,12 +464,12 @@ func (r *RangeReadReadAtSeeker) InitHeadCache() { } func NewReadAtSeeker(ss *SeekableStream, offset int64, forceRange ...bool) (model.File, error) { - if ss.GetFile() != nil { - _, err := ss.GetFile().Seek(offset, io.SeekStart) + if cache := ss.GetFile(); cache != nil { + _, err := cache.Seek(offset, io.SeekStart) if err != nil { return nil, err } - return ss.GetFile(), nil + return cache, nil } r := &RangeReadReadAtSeeker{ ss: ss, @@ -479,10 +479,11 @@ func NewReadAtSeeker(ss *SeekableStream, offset int64, forceRange ...bool) (mode if offset < 0 || offset > ss.GetSize() { return nil, errors.New("offset out of range") } - _, err := r.getReaderAtOffset(offset) + reader, err := r.getReaderAtOffset(offset) if err != nil { return nil, err } + r.readerMap.Store(int64(offset), reader) } else { r.readerMap.Store(int64(offset), ss) } @@ -502,39 +503,41 @@ func NewMultiReaderAt(ss []*SeekableStream) (readerutil.SizeReaderAt, error) { } func (r *RangeReadReadAtSeeker) getReaderAtOffset(off int64) (io.Reader, error) { - var rr io.Reader - var cur int64 = -1 - r.readerMap.Range(func(key, value any) bool { - k := key.(int64) - if off == k { - cur = k - rr = value.(io.Reader) - return false + for { + var cur int64 = -1 + r.readerMap.Range(func(key, value any) bool { + k := key.(int64) + if off == k { + cur = k + return false + } + if off > k && off-k <= 4*utils.MB && k > cur { + cur = k + } + return true + }) + if cur < 0 { + break } - if off > k && off-k <= 4*utils.MB && (rr == nil || k < cur) { - rr = value.(io.Reader) - cur = k + v, ok := r.readerMap.LoadAndDelete(int64(cur)) + if !ok { + continue + } + rr := v.(io.Reader) + if off == int64(cur) { + // logrus.Debugf("getReaderAtOffset match_%d", off) + return rr, nil } - return true - }) - if cur >= 0 { - r.readerMap.Delete(int64(cur)) - } - if off == int64(cur) { - // logrus.Debugf("getReaderAtOffset match_%d", off) - return rr, nil - } - - if rr != nil { n, _ := utils.CopyWithBufferN(io.Discard, rr, off-cur) cur += n if cur == off { // logrus.Debugf("getReaderAtOffset old_%d", off) return rr, nil } + break } - // logrus.Debugf("getReaderAtOffset new_%d", off) + // logrus.Debugf("getReaderAtOffset new_%d", off) reader, err := r.ss.RangeRead(http_range.Range{Start: off, Length: -1}) if err != nil { return nil, err