Slug: Use urlencoding to support non-ASCII characters (#70691)

This commit is contained in:
Emil Tullstedt
2023-07-10 21:05:39 +02:00
committed by GitHub
parent d358bce562
commit 5c19272065
4 changed files with 58 additions and 35 deletions

View File

@ -32,7 +32,7 @@ package slugify
import (
"bytes"
"encoding/base64"
"fmt"
"strings"
"unicode/utf8"
@ -42,20 +42,18 @@ import (
var (
simpleSlugger = &slugger{
isValidCharacter: validCharacter,
replaceCharacter: '-',
replacementMap: getDefaultReplacements(),
omitMap: getDefaultOmitments(),
}
)
// Slugify creates a URL safe latin slug for a given value
// Slugify creates a URL safe version from a given string that is at most 50 bytes long.
func Slugify(value string) string {
s := simpleSlugger.Slugify(value)
if s == "" {
s = base64.RawURLEncoding.EncodeToString([]byte(value))
if len(s) > 50 || s == "" {
s = uuid.NewSHA1(uuid.NameSpaceOID, []byte(value)).String()
}
s := simpleSlugger.Slugify(strings.TrimSpace(value))
if len(s) > 50 || s == "" {
s = uuid.NewSHA1(uuid.NameSpaceOID, []byte(value)).String()
}
return s
}
@ -66,21 +64,23 @@ func validCharacter(c rune) bool {
if c >= '0' && c <= '9' {
return true
}
if c == '_' || c == '-' {
return true
}
return false
}
// Slugifier based on settings
type slugger struct {
isValidCharacter func(c rune) bool
replaceCharacter rune
replacementMap map[rune]string
omitMap map[rune]struct{}
}
// Slugify creates a slug for a string
func (s slugger) Slugify(value string) string {
value = strings.ToLower(value)
var buffer bytes.Buffer
lastCharacterWasInvalid := false
for len(value) > 0 {
c, size := utf8.DecodeRuneInString(value)
@ -88,24 +88,47 @@ func (s slugger) Slugify(value string) string {
if newCharacter, ok := s.replacementMap[c]; ok {
buffer.WriteString(newCharacter)
lastCharacterWasInvalid = false
continue
}
if s.isValidCharacter(c) {
buffer.WriteRune(c)
lastCharacterWasInvalid = false
} else if !lastCharacterWasInvalid {
buffer.WriteRune(s.replaceCharacter)
lastCharacterWasInvalid = true
continue
}
if _, ok := s.omitMap[c]; ok {
continue
}
p := make([]byte, 4)
size = utf8.EncodeRune(p, c)
for i := 0; i < size; i++ {
buffer.WriteString(fmt.Sprintf("%%%x", p[i]))
}
}
return strings.Trim(buffer.String(), string(s.replaceCharacter))
return buffer.String()
}
func getDefaultOmitments() map[rune]struct{} {
return map[rune]struct{}{
',': {},
'"': {},
'\'': {},
'\n': {},
'\r': {},
'\x00': {},
'?': {},
'.': {},
'(': {},
')': {},
}
}
func getDefaultReplacements() map[rune]string {
return map[rune]string{
' ': "-",
'&': "and",
'@': "at",
'©': "c",

View File

@ -7,12 +7,12 @@ import (
func TestSlugify(t *testing.T) {
results := make(map[string]string)
results["hello-playground"] = "Hello, playground"
results["hello-it-s-paradise"] = "😢 😣 😤 😥 😦 😧 😨 😩 😪 😫 😬 Hello, it's paradise"
results["00a4bc92-3695-5702-9ddf-6719fdf11567"] = "😢 😣 😤 😥 😦 😧 😨 😩 😪 😫 😬 Hello, it's paradise"
results["61db60b5-f1e7-5853-9b81-0f074fc268ea"] = "😢 😣 😤 😥 😦 😧 😨 😩 😪 😫 😬"
results["8J-YoiAt"] = "😢 -"
results["a"] = "?,a . \n "
results["%f0%9f%98%a2--"] = "😢 -"
results["a-"] = "?,a . \n "
results["0a68eb57-c88a-5f34-9e9d-27f85e68af4f"] = "" // empty input has a slug!
results["hi-this-is-a-test"] = "方向盤後面 hi this is a test خلف المقو"
results["3cbb528a-0ebf-54ad-bed2-2a188cd1824e"] = "方向盤後面 hi this is a test خلف المقو"
results["cong-hoa-xa-hoi-chu-nghia-viet-nam"] = "Cộng hòa xã hội chủ nghĩa Việt Nam"
results["noi-nang-canh-canh-ben-long-bieng-khuay"] = "Nỗi nàng canh cánh bên lòng biếng khuây" // This line in a poem called Truyen Kieu
@ -39,16 +39,16 @@ func BenchmarkSlugifyLongString(b *testing.B) {
😢 😣 😤 😥 😦 😧 😨 😩 😪 😫 😬 Hello, it's paradise
😢 😣 😤 😥 😦 😧 😨 😩 😪 😫 😬 Hello, it's paradise
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
Aliquam sapien nisl, laoreet quis vestibulum ut, cursus
in turpis. Sed magna mi, blandit id nisi vel, imperdiet
mollis turpis. Fusce vel fringilla mauris. Donec cursus
rhoncus bibendum. Aliquam erat volutpat. Maecenas
faucibus turpis ex, quis lacinia ligula ultrices non.
Sed gravida justo augue. Nulla bibendum dignissim tellus
vitae lobortis. Suspendisse fermentum vel purus in pulvinar.
Vivamus eu fermentum purus, sit amet tempor orci.
Praesent congue convallis turpis, ac ullamcorper lorem
semper id.
Aliquam sapien nisl, laoreet quis vestibulum ut, cursus
in turpis. Sed magna mi, blandit id nisi vel, imperdiet
mollis turpis. Fusce vel fringilla mauris. Donec cursus
rhoncus bibendum. Aliquam erat volutpat. Maecenas
faucibus turpis ex, quis lacinia ligula ultrices non.
Sed gravida justo augue. Nulla bibendum dignissim tellus
vitae lobortis. Suspendisse fermentum vel purus in pulvinar.
Vivamus eu fermentum purus, sit amet tempor orci.
Praesent congue convallis turpis, ac ullamcorper lorem
semper id.
`)
}
}