fix: gbk filename encoding in ZIP extraction (#1260)

This commit is contained in:
Copilot
2026-01-29 22:37:12 +08:00
committed by GitHub
parent 93088b22f1
commit 4c90b38247
4 changed files with 129 additions and 10 deletions

8
go.sum
View File

@@ -164,13 +164,9 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo=
github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ=
github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE=
github.com/dop251/goja v0.0.0-20240919115326-6c7d1df7ff05 h1:oK4+QcKsczZjHYTHD0JAdkvq5w74JEkG95J0XNBx/BI=
github.com/dop251/goja v0.0.0-20240919115326-6c7d1df7ff05/go.mod h1:MxLav0peU43GgvwVgNbLAj1s/bSGboKkhuULvq/7hx4=
github.com/dop251/goja v0.0.0-20260106131823-651366fbe6e3 h1:bVp3yUzvSAJzu9GqID+Z96P+eu5TKnIMJSV4QaZMauM=
github.com/dop251/goja v0.0.0-20260106131823-651366fbe6e3/go.mod h1:MxLav0peU43GgvwVgNbLAj1s/bSGboKkhuULvq/7hx4=
github.com/dop251/goja_nodejs v0.0.0-20240728170619-29b559befffc h1:MKYt39yZJi0Z9xEeRmDX2L4ocE0ETKcHKw6MVL3R+co=
@@ -286,8 +282,6 @@ github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OI
github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20240227163752-401108e1b7e7/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik=
github.com/google/pprof v0.0.0-20260106004452-d7df1bf2cac7 h1:kmPAX+IJBcUAFTddx2+xC0H7sk2U9ijIIxZLLrPLNng=
github.com/google/pprof v0.0.0-20260106004452-d7df1bf2cac7/go.mod h1:67FPmZWbr+KDT/VlpWtw6sO9XSjpJmLuHpoLmWiTGgY=
github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83 h1:z2ogiKUYzX5Is6zr/vP9vJGqPwcdqsWjOt+V8J7+bTc=
github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83/go.mod h1:MxpfABSjhmINe3F1It9d+8exIHFvUqtLIRCdOGNXqiI=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
@@ -741,8 +735,6 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU=
golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY=
golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE=
golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=

View File

@@ -12,6 +12,7 @@ import (
"sync/atomic"
"github.com/mholt/archives"
"golang.org/x/text/encoding/simplifiedchinese"
)
// supportedArchiveExtensions contains file extensions supported by mholt/archives library
@@ -70,6 +71,16 @@ type ArchivePartInfo struct {
// ExtractProgressCallback is called to report extraction progress
type ExtractProgressCallback func(extractedFiles int, totalFiles int, progress int)
// newZipFormat creates a Zip format with proper character encoding support.
// It uses GB18030 encoding to handle Chinese characters in filenames that may
// be encoded with legacy GBK/GB18030 instead of UTF-8.
func newZipFormat() archives.Zip {
return archives.Zip{
// GB18030 is a superset of GBK and handles Chinese characters correctly
TextEncoding: simplifiedchinese.GB18030,
}
}
// isArchiveFile checks if a file is a supported archive format
func isArchiveFile(filename string) bool {
lowerName := strings.ToLower(filename)
@@ -109,7 +120,8 @@ func openArchive(archivePath string, password string) (*archiveInfo, error) {
return nil, err
}
// Handle password-protected archives
// Configure format-specific settings
// Handle password-protected archives and character encoding
if password != "" {
if rar, ok := format.(archives.Rar); ok {
rar.Password = password
@@ -121,6 +133,12 @@ func openArchive(archivePath string, password string) (*archiveInfo, error) {
}
}
// For ZIP files, configure character encoding to handle non-UTF8 filenames
// This is essential for Chinese characters encoded in GBK/GB18030
if _, ok := format.(archives.Zip); ok {
format = newZipFormat()
}
return &archiveInfo{
file: file,
stat: stat,

View File

@@ -10,6 +10,8 @@ import (
"path/filepath"
"strings"
"testing"
"golang.org/x/text/encoding/simplifiedchinese"
)
func TestIsArchiveFile(t *testing.T) {
@@ -243,6 +245,69 @@ func createTestZipWithMultipleFiles(path string, numFiles int) error {
return w.Close()
}
// createTestZipWithChineseFilenames creates a test ZIP file with Chinese filenames encoded in GBK
func createTestZipWithChineseFilenames(path string) error {
zipFile, err := os.Create(path)
if err != nil {
return err
}
defer zipFile.Close()
w := zip.NewWriter(zipFile)
// Encode Chinese filenames in GBK (as some legacy Windows applications do)
encoder := simplifiedchinese.GBK.NewEncoder()
// Add a file with Chinese filename
chineseFilename := "测试文件.txt"
gbkFilename, err := encoder.String(chineseFilename)
if err != nil {
return err
}
// Create a FileHeader and manually set the Name with GBK encoding
// We need to mark it as non-UTF8 by not setting the UTF-8 flag
header := &zip.FileHeader{
Name: gbkFilename,
Method: zip.Deflate,
}
// Clear the UTF-8 bit (bit 11) to indicate non-UTF8 encoding
header.Flags = 0
f, err := w.CreateHeader(header)
if err != nil {
return err
}
_, err = f.Write([]byte("这是测试内容"))
if err != nil {
return err
}
// Add a file in a subdirectory with Chinese name
chineseDirAndFile := "文件夹/中文内容.txt"
gbkDirAndFile, err := encoder.String(chineseDirAndFile)
if err != nil {
return err
}
header2 := &zip.FileHeader{
Name: gbkDirAndFile,
Method: zip.Deflate,
}
header2.Flags = 0
f2, err := w.CreateHeader(header2)
if err != nil {
return err
}
_, err = f2.Write([]byte("中文子文件内容"))
if err != nil {
return err
}
return w.Close()
}
func TestOpenArchive_NonExistentFile(t *testing.T) {
_, err := openArchive("/nonexistent/path/file.zip", "")
if err == nil {
@@ -2382,3 +2447,47 @@ func TestExtractZipMultiPart_Progress(t *testing.T) {
t.Error("Expected progress callbacks")
}
}
// Test extracting ZIP files with Chinese filenames encoded in GBK/GB18030
func TestExtractArchive_ChineseFilenames(t *testing.T) {
tempDir, err := os.MkdirTemp("", "extract_chinese_test")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(tempDir)
// Create a test ZIP file with Chinese filenames encoded in GBK
zipPath := filepath.Join(tempDir, "chinese.zip")
destDir := filepath.Join(tempDir, "extracted")
if err := createTestZipWithChineseFilenames(zipPath); err != nil {
t.Fatal(err)
}
// Extract the archive
err = extractArchive(zipPath, destDir, "", nil)
if err != nil {
t.Fatalf("extractArchive failed: %v", err)
}
// Verify the extracted files with proper Chinese filenames
expectedFiles := []string{
filepath.Join(destDir, "测试文件.txt"),
filepath.Join(destDir, "文件夹", "中文内容.txt"),
}
for _, path := range expectedFiles {
if _, err := os.Stat(path); os.IsNotExist(err) {
t.Errorf("expected file %q not found after extraction", path)
}
}
// Verify content of the Chinese file
content, err := os.ReadFile(filepath.Join(destDir, "测试文件.txt"))
if err != nil {
t.Fatal(err)
}
if string(content) != "这是测试内容" {
t.Errorf("unexpected content: %q", string(content))
}
}

View File

@@ -34,7 +34,7 @@ func extractZipMultiPart(firstPartPath string, destDir string, password string,
// First pass: count files for progress
totalFiles := 0
zip := archives.Zip{}
zip := newZipFormat()
err = zip.Extract(context.Background(), io.NewSectionReader(multiReader, 0, totalSize), func(ctx context.Context, fileInfo archives.FileInfo) error {
if !fileInfo.IsDir() {
totalFiles++