migrate additional dependencies to ergochat

This commit is contained in:
Shivaram Lingamneni 2021-05-26 21:58:29 -04:00
parent 9dad717c04
commit 7944871eb6
14 changed files with 21 additions and 9 deletions

2
vendor/github.com/ergochat/confusables/.gitignore generated vendored Normal file
View file

@ -0,0 +1,2 @@
/maketables
confusables.txt

28
vendor/github.com/ergochat/confusables/LICENSE generated vendored Normal file
View file

@ -0,0 +1,28 @@
Copyright (c) 2013 Michael Tibben. All rights reserved.
Copyright (c) 2014 Filippo Valsorda. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

17
vendor/github.com/ergochat/confusables/README.md generated vendored Normal file
View file

@ -0,0 +1,17 @@
# Unicode confusables
This Go library implements the `Skeleton` algorithm from Unicode TR39
See http://www.unicode.org/reports/tr39/
### Examples
```
import "github.com/mtibben/confusables"
confusables.Skeleton("𝔭𝒶ỿ𝕡𝕒") # "paypal"
confusables.Confusable("𝔭𝒶ỿ𝕡𝕒", "paypal") # true
```
*Note on the use of `Skeleton`, from TR39:*
> A skeleton is intended only for internal use for testing confusability of strings; the resulting text is not suitable for display to users, because it will appear to be a hodgepodge of different scripts. In particular, the result of mapping an identifier will not necessary be an identifier. Thus the confusability mappings can be used to test whether two identifiers are confusable (if their skeletons are the same), but should definitely not be used as a "normalization" of identifiers.

82
vendor/github.com/ergochat/confusables/confusables.go generated vendored Normal file
View file

@ -0,0 +1,82 @@
//go:generate go run maketables.go > tables.go
package confusables
import (
"bytes"
"golang.org/x/text/unicode/norm"
)
// TODO: document casefolding approaches
// (suggest to force casefold strings; explain how to catch paypal - pAypal)
// TODO: DOC you might want to store the Skeleton and check against it later
// TODO: implement xidmodifications.txt restricted characters
type lookupFunc func(rune) (string)
func lookupReplacement(r rune) string {
return confusablesMap[r]
}
func lookupReplacementTweaked(r rune) string {
if replacement, ok := tweaksMap[r]; ok {
return replacement
}
return confusablesMap[r]
}
func skeletonBase(s string, lookup lookupFunc) string {
// 1. Converting X to NFD format
s = norm.NFD.String(s)
// 2. Successively mapping each source character in X to the target string
// according to the specified data table
var buf bytes.Buffer
changed := false // fast path: if this remains false, keep s intact
prevPos := 0
var replacement string
for i, r := range s {
if changed && replacement == "" {
buf.WriteString(s[prevPos:i])
}
prevPos = i
replacement = lookup(r)
if replacement != "" {
if !changed {
changed = true
// first replacement: copy over the previously unmodified text
buf.WriteString(s[:i])
}
buf.WriteString(replacement)
}
}
if changed && replacement == "" {
buf.WriteString(s[prevPos:]) // loop-and-a-half
}
if changed {
s = buf.String()
}
// 3. Reapplying NFD
s = norm.NFD.String(s)
return s
}
// Skeleton converts a string to its "skeleton" form
// as described in http://www.unicode.org/reports/tr39/#Confusable_Detection
func Skeleton(s string) string {
return skeletonBase(s, lookupReplacement)
}
// SkeletonTweaked is like Skeleton, but it implements some custom overrides
// to the confusables table (currently it removes the m -> rn mapping):
func SkeletonTweaked(s string) string {
return skeletonBase(s, lookupReplacementTweaked)
}
func Confusable(x, y string) bool {
return Skeleton(x) == Skeleton(y)
}

6332
vendor/github.com/ergochat/confusables/tables.go generated vendored Normal file

File diff suppressed because it is too large Load diff

38
vendor/github.com/ergochat/confusables/tweaks.go generated vendored Normal file
View file

@ -0,0 +1,38 @@
package confusables
// these are overrides for the standard confusables table:
// a mapping to "" means "don't map", a mapping to a replacement means
// "replace with this", no entry means "defer to the standard table"
var tweaksMap = map[rune]string{
// ASCII-to-ASCII mapping that we are removing:
0x6d: "", // m -> rn
// these characters are confusable with m, hence the official table
// maps them to rn (`grep "LATIN SMALL LETTER R, LATIN SMALL LETTER N" confusables.txt`)
0x118E3: "m", // 118E3 ; 0072 006E ; MA # ( 𑣣 → rn ) WARANG CITI DIGIT THREE → LATIN SMALL LETTER R, LATIN SMALL LETTER N
0x11700: "m", // 11700 ; 0072 006E ; MA # ( 𑜀 → rn ) AHOM LETTER KA → LATIN SMALL LETTER R, LATIN SMALL LETTER N
// the table thinks this is confusable with m̦ but I think it's confusable with m:
0x0271: "m", // 0271 ; 0072 006E 0326 ; MA # ( ɱ → rn̦ ) LATIN SMALL LETTER M WITH HOOK → LATIN SMALL LETTER R, LATIN SMALL LETTER N, COMBINING COMMA BELOW # →m̡→
/*
// ASCII-to-ASCII mapping that we are removing:
0x49: "", // I -> l
// these characters are confusable with I, hence the official table
// maps them to l (`grep "LATIN SMALL LETTER L" confusables.txt`)
0x0399: "I", // 0399 ; 006C ; MA # ( Ι → l ) GREEK CAPITAL LETTER IOTA → LATIN SMALL LETTER L #
0x0406: "I", // 0406 ; 006C ; MA # ( І → l ) CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I → LATIN SMALL LETTER L #
0x04C0: "I", // 04C0 ; 006C ; MA # ( Ӏ → l ) CYRILLIC LETTER PALOCHKA → LATIN SMALL LETTER L #
// ASCII-to-ASCII mapping that we are removing:
0x31: "", // 1 -> l
// these characters are confusable with 1, hence the official table
// maps them to l (`grep "LATIN SMALL LETTER L" confusables.txt`)
// [nothing yet]
// ASCII-to-ASCII mapping that we are removing:
0x30: "", // 0 -> O
// these characters are confusable with 0, hence the official table
// maps them to O (`grep "LATIN CAPITAL LETTER O\>" confusables.txt`)
// [nothing yet]
*/
}