mirror of
https://github.com/GopeedLab/gopeed.git
synced 2026-03-13 08:31:47 +08:00
fix: gbk filename encoding in ZIP extraction (#1260)
This commit is contained in:
8
go.sum
8
go.sum
@@ -164,13 +164,9 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo=
|
||||
github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
||||
github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ=
|
||||
github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
||||
github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE=
|
||||
github.com/dop251/goja v0.0.0-20240919115326-6c7d1df7ff05 h1:oK4+QcKsczZjHYTHD0JAdkvq5w74JEkG95J0XNBx/BI=
|
||||
github.com/dop251/goja v0.0.0-20240919115326-6c7d1df7ff05/go.mod h1:MxLav0peU43GgvwVgNbLAj1s/bSGboKkhuULvq/7hx4=
|
||||
github.com/dop251/goja v0.0.0-20260106131823-651366fbe6e3 h1:bVp3yUzvSAJzu9GqID+Z96P+eu5TKnIMJSV4QaZMauM=
|
||||
github.com/dop251/goja v0.0.0-20260106131823-651366fbe6e3/go.mod h1:MxLav0peU43GgvwVgNbLAj1s/bSGboKkhuULvq/7hx4=
|
||||
github.com/dop251/goja_nodejs v0.0.0-20240728170619-29b559befffc h1:MKYt39yZJi0Z9xEeRmDX2L4ocE0ETKcHKw6MVL3R+co=
|
||||
@@ -286,8 +282,6 @@ github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OI
|
||||
github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
|
||||
github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
|
||||
github.com/google/pprof v0.0.0-20240227163752-401108e1b7e7/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik=
|
||||
github.com/google/pprof v0.0.0-20260106004452-d7df1bf2cac7 h1:kmPAX+IJBcUAFTddx2+xC0H7sk2U9ijIIxZLLrPLNng=
|
||||
github.com/google/pprof v0.0.0-20260106004452-d7df1bf2cac7/go.mod h1:67FPmZWbr+KDT/VlpWtw6sO9XSjpJmLuHpoLmWiTGgY=
|
||||
github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83 h1:z2ogiKUYzX5Is6zr/vP9vJGqPwcdqsWjOt+V8J7+bTc=
|
||||
github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83/go.mod h1:MxpfABSjhmINe3F1It9d+8exIHFvUqtLIRCdOGNXqiI=
|
||||
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
|
||||
@@ -741,8 +735,6 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||
golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
|
||||
golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU=
|
||||
golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY=
|
||||
golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE=
|
||||
golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8=
|
||||
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/mholt/archives"
|
||||
"golang.org/x/text/encoding/simplifiedchinese"
|
||||
)
|
||||
|
||||
// supportedArchiveExtensions contains file extensions supported by mholt/archives library
|
||||
@@ -70,6 +71,16 @@ type ArchivePartInfo struct {
|
||||
// ExtractProgressCallback is called to report extraction progress
|
||||
type ExtractProgressCallback func(extractedFiles int, totalFiles int, progress int)
|
||||
|
||||
// newZipFormat creates a Zip format with proper character encoding support.
|
||||
// It uses GB18030 encoding to handle Chinese characters in filenames that may
|
||||
// be encoded with legacy GBK/GB18030 instead of UTF-8.
|
||||
func newZipFormat() archives.Zip {
|
||||
return archives.Zip{
|
||||
// GB18030 is a superset of GBK and handles Chinese characters correctly
|
||||
TextEncoding: simplifiedchinese.GB18030,
|
||||
}
|
||||
}
|
||||
|
||||
// isArchiveFile checks if a file is a supported archive format
|
||||
func isArchiveFile(filename string) bool {
|
||||
lowerName := strings.ToLower(filename)
|
||||
@@ -109,7 +120,8 @@ func openArchive(archivePath string, password string) (*archiveInfo, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Handle password-protected archives
|
||||
// Configure format-specific settings
|
||||
// Handle password-protected archives and character encoding
|
||||
if password != "" {
|
||||
if rar, ok := format.(archives.Rar); ok {
|
||||
rar.Password = password
|
||||
@@ -121,6 +133,12 @@ func openArchive(archivePath string, password string) (*archiveInfo, error) {
|
||||
}
|
||||
}
|
||||
|
||||
// For ZIP files, configure character encoding to handle non-UTF8 filenames
|
||||
// This is essential for Chinese characters encoded in GBK/GB18030
|
||||
if _, ok := format.(archives.Zip); ok {
|
||||
format = newZipFormat()
|
||||
}
|
||||
|
||||
return &archiveInfo{
|
||||
file: file,
|
||||
stat: stat,
|
||||
|
||||
@@ -10,6 +10,8 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/encoding/simplifiedchinese"
|
||||
)
|
||||
|
||||
func TestIsArchiveFile(t *testing.T) {
|
||||
@@ -243,6 +245,69 @@ func createTestZipWithMultipleFiles(path string, numFiles int) error {
|
||||
return w.Close()
|
||||
}
|
||||
|
||||
// createTestZipWithChineseFilenames creates a test ZIP file with Chinese filenames encoded in GBK
|
||||
func createTestZipWithChineseFilenames(path string) error {
|
||||
zipFile, err := os.Create(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer zipFile.Close()
|
||||
|
||||
w := zip.NewWriter(zipFile)
|
||||
|
||||
// Encode Chinese filenames in GBK (as some legacy Windows applications do)
|
||||
encoder := simplifiedchinese.GBK.NewEncoder()
|
||||
|
||||
// Add a file with Chinese filename
|
||||
chineseFilename := "测试文件.txt"
|
||||
gbkFilename, err := encoder.String(chineseFilename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create a FileHeader and manually set the Name with GBK encoding
|
||||
// We need to mark it as non-UTF8 by not setting the UTF-8 flag
|
||||
header := &zip.FileHeader{
|
||||
Name: gbkFilename,
|
||||
Method: zip.Deflate,
|
||||
}
|
||||
// Clear the UTF-8 bit (bit 11) to indicate non-UTF8 encoding
|
||||
header.Flags = 0
|
||||
|
||||
f, err := w.CreateHeader(header)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = f.Write([]byte("这是测试内容"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Add a file in a subdirectory with Chinese name
|
||||
chineseDirAndFile := "文件夹/中文内容.txt"
|
||||
gbkDirAndFile, err := encoder.String(chineseDirAndFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
header2 := &zip.FileHeader{
|
||||
Name: gbkDirAndFile,
|
||||
Method: zip.Deflate,
|
||||
}
|
||||
header2.Flags = 0
|
||||
|
||||
f2, err := w.CreateHeader(header2)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = f2.Write([]byte("中文子文件内容"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return w.Close()
|
||||
}
|
||||
|
||||
func TestOpenArchive_NonExistentFile(t *testing.T) {
|
||||
_, err := openArchive("/nonexistent/path/file.zip", "")
|
||||
if err == nil {
|
||||
@@ -2382,3 +2447,47 @@ func TestExtractZipMultiPart_Progress(t *testing.T) {
|
||||
t.Error("Expected progress callbacks")
|
||||
}
|
||||
}
|
||||
|
||||
// Test extracting ZIP files with Chinese filenames encoded in GBK/GB18030
|
||||
func TestExtractArchive_ChineseFilenames(t *testing.T) {
|
||||
tempDir, err := os.MkdirTemp("", "extract_chinese_test")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer os.RemoveAll(tempDir)
|
||||
|
||||
// Create a test ZIP file with Chinese filenames encoded in GBK
|
||||
zipPath := filepath.Join(tempDir, "chinese.zip")
|
||||
destDir := filepath.Join(tempDir, "extracted")
|
||||
|
||||
if err := createTestZipWithChineseFilenames(zipPath); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Extract the archive
|
||||
err = extractArchive(zipPath, destDir, "", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("extractArchive failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify the extracted files with proper Chinese filenames
|
||||
expectedFiles := []string{
|
||||
filepath.Join(destDir, "测试文件.txt"),
|
||||
filepath.Join(destDir, "文件夹", "中文内容.txt"),
|
||||
}
|
||||
|
||||
for _, path := range expectedFiles {
|
||||
if _, err := os.Stat(path); os.IsNotExist(err) {
|
||||
t.Errorf("expected file %q not found after extraction", path)
|
||||
}
|
||||
}
|
||||
|
||||
// Verify content of the Chinese file
|
||||
content, err := os.ReadFile(filepath.Join(destDir, "测试文件.txt"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if string(content) != "这是测试内容" {
|
||||
t.Errorf("unexpected content: %q", string(content))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ func extractZipMultiPart(firstPartPath string, destDir string, password string,
|
||||
|
||||
// First pass: count files for progress
|
||||
totalFiles := 0
|
||||
zip := archives.Zip{}
|
||||
zip := newZipFormat()
|
||||
err = zip.Extract(context.Background(), io.NewSectionReader(multiReader, 0, totalSize), func(ctx context.Context, fileInfo archives.FileInfo) error {
|
||||
if !fileInfo.IsDir() {
|
||||
totalFiles++
|
||||
|
||||
Reference in New Issue
Block a user