mirror of
https://github.com/navidrome/navidrome.git
synced 2025-04-24 07:30:57 +03:00
Better support on Chinese metadata, reduce duplication caused by Traditional/Simplified Chinese variants
This commit is contained in:
parent
15a3d2ca66
commit
217678af2d
4
go.mod
4
go.mod
@ -35,6 +35,7 @@ require (
|
||||
github.com/kardianos/service v1.2.2
|
||||
github.com/kr/pretty v0.3.1
|
||||
github.com/lestrrat-go/jwx/v2 v2.1.3
|
||||
github.com/liuzl/gocc v0.0.0-20231231122217-0372e1059ca5
|
||||
github.com/matoous/go-nanoid/v2 v2.1.0
|
||||
github.com/mattn/go-sqlite3 v1.14.24
|
||||
github.com/microcosm-cc/bluemonday v1.0.27
|
||||
@ -66,6 +67,7 @@ require (
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d // indirect
|
||||
github.com/aymerick/douceur v0.2.0 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
@ -90,6 +92,8 @@ require (
|
||||
github.com/lestrrat-go/httprc v1.0.6 // indirect
|
||||
github.com/lestrrat-go/iter v1.0.2 // indirect
|
||||
github.com/lestrrat-go/option v1.0.1 // indirect
|
||||
github.com/liuzl/cedar-go v0.0.0-20170805034717-80a9c64b256d // indirect
|
||||
github.com/liuzl/da v0.0.0-20180704015230-14771aad5b1d // indirect
|
||||
github.com/magiconair/properties v1.8.9 // indirect
|
||||
github.com/mfridman/interpolate v0.0.2 // indirect
|
||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
||||
|
8
go.sum
8
go.sum
@ -4,6 +4,8 @@ github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8
|
||||
github.com/Masterminds/squirrel v1.5.4/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10=
|
||||
github.com/RaveNoX/go-jsoncommentstrip v1.0.0 h1:t527LHHE3HmiHrq74QMpNPZpGCIJzTx+apLkMKt4HC0=
|
||||
github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk=
|
||||
github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d h1:ir/IFJU5xbja5UaBEQLjcvn7aAU01nqU/NUyOBEU+ew=
|
||||
github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d/go.mod h1:PRWNwWq0yifz6XDPZu48aSld8BWwBfr2JKB2bGWiEd4=
|
||||
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
|
||||
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
|
||||
github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
|
||||
@ -129,6 +131,12 @@ github.com/lestrrat-go/jwx/v2 v2.1.3 h1:Ud4lb2QuxRClYAmRleF50KrbKIoM1TddXgBrneT5
|
||||
github.com/lestrrat-go/jwx/v2 v2.1.3/go.mod h1:q6uFgbgZfEmQrfJfrCo90QcQOcXFMfbI/fO0NqRtvZo=
|
||||
github.com/lestrrat-go/option v1.0.1 h1:oAzP2fvZGQKWkvHa1/SAcFolBEca1oN+mQ7eooNBEYU=
|
||||
github.com/lestrrat-go/option v1.0.1/go.mod h1:5ZHFbivi4xwXxhxY9XHDe2FHo6/Z7WWmtT7T5nBBp3I=
|
||||
github.com/liuzl/cedar-go v0.0.0-20170805034717-80a9c64b256d h1:qSmEGTgjkESUX5kPMSGJ4pcBUtYVDdkNzMrjQyvRvp0=
|
||||
github.com/liuzl/cedar-go v0.0.0-20170805034717-80a9c64b256d/go.mod h1:x7SghIWwLVcJObXbjK7S2ENsT1cAcdJcPl7dRaSFog0=
|
||||
github.com/liuzl/da v0.0.0-20180704015230-14771aad5b1d h1:hTRDIpJ1FjS9ULJuEzu69n3qTgc18eI+ztw/pJv47hs=
|
||||
github.com/liuzl/da v0.0.0-20180704015230-14771aad5b1d/go.mod h1:7xD3p0XnHvJFQ3t/stEJd877CSIMkH/fACVWen5pYnc=
|
||||
github.com/liuzl/gocc v0.0.0-20231231122217-0372e1059ca5 h1:wnbHIeP1UX8ClYEWKGnw66PfYvReCHu9G5lXSte3Sqc=
|
||||
github.com/liuzl/gocc v0.0.0-20231231122217-0372e1059ca5/go.mod h1:7KaV9YIR92M1FpbczAcfYQ3UZ5ayT27pNtunDmXvLBo=
|
||||
github.com/magiconair/properties v1.8.9 h1:nWcCbLq1N2v/cpNsy5WvQ37Fb+YElfq20WJ/a8RkpQM=
|
||||
github.com/magiconair/properties v1.8.9/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
|
||||
github.com/matoous/go-nanoid/v2 v2.1.0 h1:P64+dmq21hhWdtvZfEAofnvJULaRR1Yib0+PnU669bE=
|
||||
|
@ -15,6 +15,7 @@ import (
|
||||
"github.com/navidrome/navidrome/log"
|
||||
"github.com/navidrome/navidrome/model"
|
||||
"github.com/navidrome/navidrome/utils/slice"
|
||||
"github.com/navidrome/navidrome/utils/str"
|
||||
)
|
||||
|
||||
type Info struct {
|
||||
@ -368,6 +369,9 @@ func sanitize(filePath string, tagName model.TagName, tag model.TagConf, value s
|
||||
log.Trace("Invalid UUID tag value", "tag", tagName, "value", value)
|
||||
return ""
|
||||
}
|
||||
default:
|
||||
// normalize the tag value to reduce duplication
|
||||
value = str.NormalizeText(value)
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
@ -15,12 +15,6 @@ func formatFullText(text ...string) string {
|
||||
}
|
||||
|
||||
func (r sqlRepository) doSearch(sq SelectBuilder, q string, offset, size int, includeMissing bool, results any, orderBys ...string) error {
|
||||
q = strings.TrimSpace(q)
|
||||
q = strings.TrimSuffix(q, "*")
|
||||
if len(q) < 2 {
|
||||
return nil
|
||||
}
|
||||
|
||||
//sq := r.newSelect().Columns(r.tableName + ".*")
|
||||
//sq = r.withAnnotation(sq, r.tableName+".id")
|
||||
//sq = r.withBookmark(sq, r.tableName+".id")
|
||||
@ -41,7 +35,7 @@ func (r sqlRepository) doSearch(sq SelectBuilder, q string, offset, size int, in
|
||||
}
|
||||
|
||||
func fullTextExpr(tableName string, s string) Sqlizer {
|
||||
q := str.SanitizeStrings(s)
|
||||
q := str.NormalizeText(str.SanitizeStrings(s))
|
||||
if q == "" {
|
||||
return nil
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ import (
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("sqlRepository", func() {
|
||||
var _ = Describe("Search", func() {
|
||||
Describe("formatFullText", func() {
|
||||
It("prefixes with a space", func() {
|
||||
Expect(formatFullText("legiao urbana")).To(Equal(" legiao urbana"))
|
||||
|
@ -2,8 +2,22 @@ package str
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"github.com/liuzl/gocc"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
func init() {
|
||||
var err error
|
||||
opencc, err = gocc.New("t2s")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
var opencc *gocc.OpenCC
|
||||
|
||||
var utf8ToAscii = func() *strings.Replacer {
|
||||
var utf8Map = map[string]string{
|
||||
"'": `‘’‛′`,
|
||||
@ -39,3 +53,32 @@ func LongestCommonPrefix(list []string) string {
|
||||
}
|
||||
return list[0]
|
||||
}
|
||||
|
||||
// NormalizeText performs normalization on the given text
|
||||
// This includes
|
||||
// - converts input to Unicode NFC
|
||||
// - converts all Chinese character to simplified
|
||||
func NormalizeText(s string) string {
|
||||
transformFuncs := []func(s string) string{
|
||||
norm.NFC.String,
|
||||
ToSimplifiedChinese,
|
||||
}
|
||||
|
||||
for _, f := range transformFuncs {
|
||||
s = f(s)
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// ToSimplifiedChinese converts the given string from Traditional Chinese to Simplified
|
||||
// Original string is returned if it contains no Chinese character
|
||||
func ToSimplifiedChinese(s string) string {
|
||||
for _, r := range s {
|
||||
if unicode.Is(unicode.Han, r) {
|
||||
s, _ = opencc.Convert(s)
|
||||
break
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
@ -31,6 +31,26 @@ var _ = Describe("String Utils", func() {
|
||||
Expect(str.LongestCommonPrefix(albums)).To(Equal("/artist/album"))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("NormalizeText", func() {
|
||||
It("traditional Chinese should be replaced with simplified", func() {
|
||||
Expect(str.NormalizeText("週傑倫")).To(Equal("周杰伦"))
|
||||
})
|
||||
It("simplified Chinese should be unchanged", func() {
|
||||
Expect(str.NormalizeText("简体")).To(Equal("简体"))
|
||||
})
|
||||
It("alphabet should be unchanged", func() {
|
||||
for _, letter := range letters {
|
||||
Expect(str.NormalizeText(letter)).To(Equal(letter))
|
||||
}
|
||||
})
|
||||
It("Japanese should be unchanged", func() {
|
||||
Expect(str.NormalizeText("にっぽんご")).To(Equal("にっぽんご"))
|
||||
})
|
||||
It("unicode normalization", func() {
|
||||
Expect(str.NormalizeText("e\u0301")).To(Equal("\u00e9"))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
var testPaths = []string{
|
||||
@ -146,3 +166,5 @@ var testPaths = []string{
|
||||
"/Music/iTunes 1/iTunes Media/Music/War/Why Can't We Be Friends/Low Rider.m4a",
|
||||
"/Music/iTunes 1/iTunes Media/Music/Yes/Fragile/01 Roundabout.m4a",
|
||||
}
|
||||
|
||||
var letters = []string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"}
|
||||
|
Loading…
x
Reference in New Issue
Block a user