mirror of
https://github.com/ergochat/ergo.git
synced 2025-12-21 18:41:58 -08:00
migrate additional dependencies to ergochat
This commit is contained in:
parent
9dad717c04
commit
7944871eb6
14 changed files with 21 additions and 9 deletions
2
vendor/github.com/ergochat/confusables/.gitignore
generated
vendored
Normal file
2
vendor/github.com/ergochat/confusables/.gitignore
generated
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
/maketables
|
||||
confusables.txt
|
||||
28
vendor/github.com/ergochat/confusables/LICENSE
generated
vendored
Normal file
28
vendor/github.com/ergochat/confusables/LICENSE
generated
vendored
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
Copyright (c) 2013 Michael Tibben. All rights reserved.
|
||||
Copyright (c) 2014 Filippo Valsorda. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
17
vendor/github.com/ergochat/confusables/README.md
generated
vendored
Normal file
17
vendor/github.com/ergochat/confusables/README.md
generated
vendored
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
# Unicode confusables
|
||||
|
||||
This Go library implements the `Skeleton` algorithm from Unicode TR39
|
||||
|
||||
See http://www.unicode.org/reports/tr39/
|
||||
|
||||
### Examples
|
||||
```
|
||||
import "github.com/mtibben/confusables"
|
||||
|
||||
confusables.Skeleton("𝔭𝒶ỿ𝕡𝕒ℓ") # "paypal"
|
||||
confusables.Confusable("𝔭𝒶ỿ𝕡𝕒ℓ", "paypal") # true
|
||||
```
|
||||
|
||||
*Note on the use of `Skeleton`, from TR39:*
|
||||
|
||||
> A skeleton is intended only for internal use for testing confusability of strings; the resulting text is not suitable for display to users, because it will appear to be a hodgepodge of different scripts. In particular, the result of mapping an identifier will not necessary be an identifier. Thus the confusability mappings can be used to test whether two identifiers are confusable (if their skeletons are the same), but should definitely not be used as a "normalization" of identifiers.
|
||||
82
vendor/github.com/ergochat/confusables/confusables.go
generated
vendored
Normal file
82
vendor/github.com/ergochat/confusables/confusables.go
generated
vendored
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
//go:generate go run maketables.go > tables.go
|
||||
|
||||
package confusables
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// TODO: document casefolding approaches
|
||||
// (suggest to force casefold strings; explain how to catch paypal - pAypal)
|
||||
// TODO: DOC you might want to store the Skeleton and check against it later
|
||||
// TODO: implement xidmodifications.txt restricted characters
|
||||
|
||||
type lookupFunc func(rune) (string)
|
||||
|
||||
func lookupReplacement(r rune) string {
|
||||
return confusablesMap[r]
|
||||
}
|
||||
|
||||
func lookupReplacementTweaked(r rune) string {
|
||||
if replacement, ok := tweaksMap[r]; ok {
|
||||
return replacement
|
||||
}
|
||||
return confusablesMap[r]
|
||||
}
|
||||
|
||||
func skeletonBase(s string, lookup lookupFunc) string {
|
||||
|
||||
// 1. Converting X to NFD format
|
||||
s = norm.NFD.String(s)
|
||||
|
||||
// 2. Successively mapping each source character in X to the target string
|
||||
// according to the specified data table
|
||||
var buf bytes.Buffer
|
||||
changed := false // fast path: if this remains false, keep s intact
|
||||
prevPos := 0
|
||||
var replacement string
|
||||
for i, r := range s {
|
||||
if changed && replacement == "" {
|
||||
buf.WriteString(s[prevPos:i])
|
||||
}
|
||||
prevPos = i
|
||||
replacement = lookup(r)
|
||||
if replacement != "" {
|
||||
if !changed {
|
||||
changed = true
|
||||
// first replacement: copy over the previously unmodified text
|
||||
buf.WriteString(s[:i])
|
||||
}
|
||||
buf.WriteString(replacement)
|
||||
}
|
||||
}
|
||||
if changed && replacement == "" {
|
||||
buf.WriteString(s[prevPos:]) // loop-and-a-half
|
||||
}
|
||||
if changed {
|
||||
s = buf.String()
|
||||
}
|
||||
|
||||
// 3. Reapplying NFD
|
||||
s = norm.NFD.String(s)
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// Skeleton converts a string to its "skeleton" form
|
||||
// as described in http://www.unicode.org/reports/tr39/#Confusable_Detection
|
||||
func Skeleton(s string) string {
|
||||
return skeletonBase(s, lookupReplacement)
|
||||
}
|
||||
|
||||
// SkeletonTweaked is like Skeleton, but it implements some custom overrides
|
||||
// to the confusables table (currently it removes the m -> rn mapping):
|
||||
func SkeletonTweaked(s string) string {
|
||||
return skeletonBase(s, lookupReplacementTweaked)
|
||||
}
|
||||
|
||||
func Confusable(x, y string) bool {
|
||||
return Skeleton(x) == Skeleton(y)
|
||||
}
|
||||
6332
vendor/github.com/ergochat/confusables/tables.go
generated
vendored
Normal file
6332
vendor/github.com/ergochat/confusables/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
38
vendor/github.com/ergochat/confusables/tweaks.go
generated
vendored
Normal file
38
vendor/github.com/ergochat/confusables/tweaks.go
generated
vendored
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
package confusables
|
||||
|
||||
// these are overrides for the standard confusables table:
|
||||
// a mapping to "" means "don't map", a mapping to a replacement means
|
||||
// "replace with this", no entry means "defer to the standard table"
|
||||
|
||||
var tweaksMap = map[rune]string{
|
||||
// ASCII-to-ASCII mapping that we are removing:
|
||||
0x6d: "", // m -> rn
|
||||
// these characters are confusable with m, hence the official table
|
||||
// maps them to rn (`grep "LATIN SMALL LETTER R, LATIN SMALL LETTER N" confusables.txt`)
|
||||
0x118E3: "m", // 118E3 ; 0072 006E ; MA # ( 𑣣 → rn ) WARANG CITI DIGIT THREE → LATIN SMALL LETTER R, LATIN SMALL LETTER N
|
||||
0x11700: "m", // 11700 ; 0072 006E ; MA # ( 𑜀 → rn ) AHOM LETTER KA → LATIN SMALL LETTER R, LATIN SMALL LETTER N
|
||||
// the table thinks this is confusable with m̦ but I think it's confusable with m:
|
||||
0x0271: "m", // 0271 ; 0072 006E 0326 ; MA # ( ɱ → rn̦ ) LATIN SMALL LETTER M WITH HOOK → LATIN SMALL LETTER R, LATIN SMALL LETTER N, COMBINING COMMA BELOW # →m̡→
|
||||
|
||||
/*
|
||||
// ASCII-to-ASCII mapping that we are removing:
|
||||
0x49: "", // I -> l
|
||||
// these characters are confusable with I, hence the official table
|
||||
// maps them to l (`grep "LATIN SMALL LETTER L" confusables.txt`)
|
||||
0x0399: "I", // 0399 ; 006C ; MA # ( Ι → l ) GREEK CAPITAL LETTER IOTA → LATIN SMALL LETTER L #
|
||||
0x0406: "I", // 0406 ; 006C ; MA # ( І → l ) CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I → LATIN SMALL LETTER L #
|
||||
0x04C0: "I", // 04C0 ; 006C ; MA # ( Ӏ → l ) CYRILLIC LETTER PALOCHKA → LATIN SMALL LETTER L #
|
||||
|
||||
// ASCII-to-ASCII mapping that we are removing:
|
||||
0x31: "", // 1 -> l
|
||||
// these characters are confusable with 1, hence the official table
|
||||
// maps them to l (`grep "LATIN SMALL LETTER L" confusables.txt`)
|
||||
// [nothing yet]
|
||||
|
||||
// ASCII-to-ASCII mapping that we are removing:
|
||||
0x30: "", // 0 -> O
|
||||
// these characters are confusable with 0, hence the official table
|
||||
// maps them to O (`grep "LATIN CAPITAL LETTER O\>" confusables.txt`)
|
||||
// [nothing yet]
|
||||
*/
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue