diff --git a/go.mod b/go.mod index edd5006ec..371adcda7 100644 --- a/go.mod +++ b/go.mod @@ -35,6 +35,7 @@ require ( github.com/kardianos/service v1.2.2 github.com/kr/pretty v0.3.1 github.com/lestrrat-go/jwx/v2 v2.1.3 + github.com/liuzl/gocc v0.0.0-20231231122217-0372e1059ca5 github.com/matoous/go-nanoid/v2 v2.1.0 github.com/mattn/go-sqlite3 v1.14.24 github.com/microcosm-cc/bluemonday v1.0.27 @@ -66,6 +67,7 @@ require ( ) require ( + github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d // indirect github.com/aymerick/douceur v0.2.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect @@ -90,6 +92,8 @@ require ( github.com/lestrrat-go/httprc v1.0.6 // indirect github.com/lestrrat-go/iter v1.0.2 // indirect github.com/lestrrat-go/option v1.0.1 // indirect + github.com/liuzl/cedar-go v0.0.0-20170805034717-80a9c64b256d // indirect + github.com/liuzl/da v0.0.0-20180704015230-14771aad5b1d // indirect github.com/magiconair/properties v1.8.9 // indirect github.com/mfridman/interpolate v0.0.2 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect diff --git a/go.sum b/go.sum index 198379a28..6055046ce 100644 --- a/go.sum +++ b/go.sum @@ -4,6 +4,8 @@ github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8 github.com/Masterminds/squirrel v1.5.4/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10= github.com/RaveNoX/go-jsoncommentstrip v1.0.0 h1:t527LHHE3HmiHrq74QMpNPZpGCIJzTx+apLkMKt4HC0= github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk= +github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d h1:ir/IFJU5xbja5UaBEQLjcvn7aAU01nqU/NUyOBEU+ew= +github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d/go.mod h1:PRWNwWq0yifz6XDPZu48aSld8BWwBfr2JKB2bGWiEd4= github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM= github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA= github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= @@ -129,6 +131,12 @@ github.com/lestrrat-go/jwx/v2 v2.1.3 h1:Ud4lb2QuxRClYAmRleF50KrbKIoM1TddXgBrneT5 github.com/lestrrat-go/jwx/v2 v2.1.3/go.mod h1:q6uFgbgZfEmQrfJfrCo90QcQOcXFMfbI/fO0NqRtvZo= github.com/lestrrat-go/option v1.0.1 h1:oAzP2fvZGQKWkvHa1/SAcFolBEca1oN+mQ7eooNBEYU= github.com/lestrrat-go/option v1.0.1/go.mod h1:5ZHFbivi4xwXxhxY9XHDe2FHo6/Z7WWmtT7T5nBBp3I= +github.com/liuzl/cedar-go v0.0.0-20170805034717-80a9c64b256d h1:qSmEGTgjkESUX5kPMSGJ4pcBUtYVDdkNzMrjQyvRvp0= +github.com/liuzl/cedar-go v0.0.0-20170805034717-80a9c64b256d/go.mod h1:x7SghIWwLVcJObXbjK7S2ENsT1cAcdJcPl7dRaSFog0= +github.com/liuzl/da v0.0.0-20180704015230-14771aad5b1d h1:hTRDIpJ1FjS9ULJuEzu69n3qTgc18eI+ztw/pJv47hs= +github.com/liuzl/da v0.0.0-20180704015230-14771aad5b1d/go.mod h1:7xD3p0XnHvJFQ3t/stEJd877CSIMkH/fACVWen5pYnc= +github.com/liuzl/gocc v0.0.0-20231231122217-0372e1059ca5 h1:wnbHIeP1UX8ClYEWKGnw66PfYvReCHu9G5lXSte3Sqc= +github.com/liuzl/gocc v0.0.0-20231231122217-0372e1059ca5/go.mod h1:7KaV9YIR92M1FpbczAcfYQ3UZ5ayT27pNtunDmXvLBo= github.com/magiconair/properties v1.8.9 h1:nWcCbLq1N2v/cpNsy5WvQ37Fb+YElfq20WJ/a8RkpQM= github.com/magiconair/properties v1.8.9/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= github.com/matoous/go-nanoid/v2 v2.1.0 h1:P64+dmq21hhWdtvZfEAofnvJULaRR1Yib0+PnU669bE= diff --git a/model/metadata/metadata.go b/model/metadata/metadata.go index 3d5d64dd1..1cbeb616c 100644 --- a/model/metadata/metadata.go +++ b/model/metadata/metadata.go @@ -15,6 +15,7 @@ import ( "github.com/navidrome/navidrome/log" "github.com/navidrome/navidrome/model" "github.com/navidrome/navidrome/utils/slice" + "github.com/navidrome/navidrome/utils/str" ) type Info struct { @@ -368,6 +369,9 @@ func sanitize(filePath string, tagName model.TagName, tag model.TagConf, value s log.Trace("Invalid UUID tag value", "tag", tagName, "value", value) return "" } + default: + // normalize the tag value to reduce duplication + value = str.NormalizeText(value) } return value } diff --git a/persistence/sql_search.go b/persistence/sql_search.go index 9ac171263..e711a128a 100644 --- a/persistence/sql_search.go +++ b/persistence/sql_search.go @@ -15,12 +15,6 @@ func formatFullText(text ...string) string { } func (r sqlRepository) doSearch(sq SelectBuilder, q string, offset, size int, includeMissing bool, results any, orderBys ...string) error { - q = strings.TrimSpace(q) - q = strings.TrimSuffix(q, "*") - if len(q) < 2 { - return nil - } - //sq := r.newSelect().Columns(r.tableName + ".*") //sq = r.withAnnotation(sq, r.tableName+".id") //sq = r.withBookmark(sq, r.tableName+".id") @@ -41,7 +35,7 @@ func (r sqlRepository) doSearch(sq SelectBuilder, q string, offset, size int, in } func fullTextExpr(tableName string, s string) Sqlizer { - q := str.SanitizeStrings(s) + q := str.NormalizeText(str.SanitizeStrings(s)) if q == "" { return nil } diff --git a/persistence/sql_search_test.go b/persistence/sql_search_test.go index 6bfd88d9f..ded1286e3 100644 --- a/persistence/sql_search_test.go +++ b/persistence/sql_search_test.go @@ -5,7 +5,7 @@ import ( . "github.com/onsi/gomega" ) -var _ = Describe("sqlRepository", func() { +var _ = Describe("Search", func() { Describe("formatFullText", func() { It("prefixes with a space", func() { Expect(formatFullText("legiao urbana")).To(Equal(" legiao urbana")) diff --git a/utils/str/str.go b/utils/str/str.go index 8a94488de..4c9ed9de8 100644 --- a/utils/str/str.go +++ b/utils/str/str.go @@ -2,8 +2,22 @@ package str import ( "strings" + "unicode" + + "github.com/liuzl/gocc" + "golang.org/x/text/unicode/norm" ) +func init() { + var err error + opencc, err = gocc.New("t2s") + if err != nil { + panic(err) + } +} + +var opencc *gocc.OpenCC + var utf8ToAscii = func() *strings.Replacer { var utf8Map = map[string]string{ "'": `‘’‛′`, @@ -39,3 +53,32 @@ func LongestCommonPrefix(list []string) string { } return list[0] } + +// NormalizeText performs normalization on the given text +// This includes +// - converts input to Unicode NFC +// - converts all Chinese character to simplified +func NormalizeText(s string) string { + transformFuncs := []func(s string) string{ + norm.NFC.String, + ToSimplifiedChinese, + } + + for _, f := range transformFuncs { + s = f(s) + } + + return s +} + +// ToSimplifiedChinese converts the given string from Traditional Chinese to Simplified +// Original string is returned if it contains no Chinese character +func ToSimplifiedChinese(s string) string { + for _, r := range s { + if unicode.Is(unicode.Han, r) { + s, _ = opencc.Convert(s) + break + } + } + return s +} diff --git a/utils/str/str_test.go b/utils/str/str_test.go index 0c3524e4e..741081f20 100644 --- a/utils/str/str_test.go +++ b/utils/str/str_test.go @@ -31,6 +31,26 @@ var _ = Describe("String Utils", func() { Expect(str.LongestCommonPrefix(albums)).To(Equal("/artist/album")) }) }) + + Describe("NormalizeText", func() { + It("traditional Chinese should be replaced with simplified", func() { + Expect(str.NormalizeText("週傑倫")).To(Equal("周杰伦")) + }) + It("simplified Chinese should be unchanged", func() { + Expect(str.NormalizeText("简体")).To(Equal("简体")) + }) + It("alphabet should be unchanged", func() { + for _, letter := range letters { + Expect(str.NormalizeText(letter)).To(Equal(letter)) + } + }) + It("Japanese should be unchanged", func() { + Expect(str.NormalizeText("にっぽんご")).To(Equal("にっぽんご")) + }) + It("unicode normalization", func() { + Expect(str.NormalizeText("e\u0301")).To(Equal("\u00e9")) + }) + }) }) var testPaths = []string{ @@ -146,3 +166,5 @@ var testPaths = []string{ "/Music/iTunes 1/iTunes Media/Music/War/Why Can't We Be Friends/Low Rider.m4a", "/Music/iTunes 1/iTunes Media/Music/Yes/Fragile/01 Roundabout.m4a", } + +var letters = []string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"}