From a4f9e08a850256ce24103b368fa9306ef1e85c25 Mon Sep 17 00:00:00 2001 From: Shivaram Lingamneni Date: Mon, 22 Jun 2020 14:54:43 -0400 Subject: [PATCH] fix #1151 --- conventional.yaml | 7 ++++++- default.yaml | 7 ++++++- irc/client.go | 7 ++++++- irc/commands.go | 5 +++++ irc/config.go | 1 + irc/errors.go | 1 + irc/handlers.go | 6 ++++++ irc/ircconn.go | 9 ++++++--- irc/server.go | 3 +++ irc/socket.go | 3 +++ irc/strings.go | 2 ++ 11 files changed, 45 insertions(+), 6 deletions(-) diff --git a/conventional.yaml b/conventional.yaml index 574b6997..d1c1afab 100644 --- a/conventional.yaml +++ b/conventional.yaml @@ -100,7 +100,7 @@ server: # casemapping controls what kinds of strings are permitted as identifiers (nicknames, # channel names, account names, etc.), and how they are normalized for case. - # with the recommended default of 'precis', utf-8 identifiers that are "sane" + # with the recommended default of 'precis', UTF8 identifiers that are "sane" # (according to RFC 8265) are allowed, and the server additionally tries to protect # against confusable characters ("homoglyph attacks"). # the other options are 'ascii' (traditional ASCII-only identifiers), and 'permissive', @@ -110,6 +110,11 @@ server: # already up and running is problematic). casemapping: "precis" + # enforce-utf8 controls whether the server allows non-UTF8 bytes in messages + # (as in traditional IRC) or preemptively discards non-UTF8 messages (since + # they cannot be relayed to websocket clients). + enforce-utf8: true + # whether to look up user hostnames with reverse DNS. # (disabling this will expose user IPs instead of hostnames; # to make IP/hostname information private, see the ip-cloaking section) diff --git a/default.yaml b/default.yaml index d161462c..a075b3d7 100644 --- a/default.yaml +++ b/default.yaml @@ -126,7 +126,7 @@ server: # casemapping controls what kinds of strings are permitted as identifiers (nicknames, # channel names, account names, etc.), and how they are normalized for case. - # with the recommended default of 'precis', utf-8 identifiers that are "sane" + # with the recommended default of 'precis', UTF8 identifiers that are "sane" # (according to RFC 8265) are allowed, and the server additionally tries to protect # against confusable characters ("homoglyph attacks"). # the other options are 'ascii' (traditional ASCII-only identifiers), and 'permissive', @@ -136,6 +136,11 @@ server: # already up and running is problematic). casemapping: "precis" + # enforce-utf8 controls whether the server allows non-UTF8 bytes in messages + # (as in traditional IRC) or preemptively discards non-UTF8 messages (since + # they cannot be relayed to websocket clients). + enforce-utf8: true + # whether to look up user hostnames with reverse DNS. # (disabling this will expose user IPs instead of hostnames; # to make IP/hostname information private, see the ip-cloaking section) diff --git a/irc/client.go b/irc/client.go index 4a36e926..f934d001 100644 --- a/irc/client.go +++ b/irc/client.go @@ -615,8 +615,11 @@ func (client *Client) run(session *Session) { firstLine := !isReattach for { + var invalidUtf8 bool line, err := session.socket.Read() - if err != nil { + if err == errInvalidUtf8 { + invalidUtf8 = true // handle as normal, including labeling + } else if err != nil { quitMessage := "connection closed" if err == errReadQ { quitMessage = "readQ exceeded" @@ -676,6 +679,8 @@ func (client *Client) run(session *Session) { cmd, exists := Commands[msg.Command] if !exists { cmd = unknownCommand + } else if invalidUtf8 { + cmd = invalidUtf8Command } isExiting := cmd.Run(client.server, client, session, msg) diff --git a/irc/commands.go b/irc/commands.go index 01423c65..0870c80b 100644 --- a/irc/commands.go +++ b/irc/commands.go @@ -79,6 +79,11 @@ var unknownCommand = Command{ usablePreReg: true, } +var invalidUtf8Command = Command{ + handler: invalidUtf8Handler, + usablePreReg: true, +} + // Commands holds all commands executable by a client connected to us. var Commands map[string]Command diff --git a/irc/config.go b/irc/config.go index 3e8723f4..7f3373d8 100644 --- a/irc/config.go +++ b/irc/config.go @@ -518,6 +518,7 @@ type Config struct { supportedCaps *caps.Set capValues caps.Values Casemapping Casemapping + EnforceUtf8 bool `yaml:"enforce-utf8"` OutputPath string `yaml:"output-path"` } diff --git a/irc/errors.go b/irc/errors.go index 6f199677..09afdf8a 100644 --- a/irc/errors.go +++ b/irc/errors.go @@ -66,6 +66,7 @@ var ( errCredsExternallyManaged = errors.New("Credentials are externally managed and cannot be changed here") errInvalidMultilineBatch = errors.New("Invalid multiline batch") errTimedOut = errors.New("Operation timed out") + errInvalidUtf8 = errors.New("Message rejected for invalid utf8") ) // Socket Errors diff --git a/irc/handlers.go b/irc/handlers.go index c4f9d515..d4ae95e6 100644 --- a/irc/handlers.go +++ b/irc/handlers.go @@ -2927,3 +2927,9 @@ func unknownCommandHandler(server *Server, client *Client, msg ircmsg.IrcMessage rb.Add(nil, server.name, ERR_UNKNOWNCOMMAND, client.Nick(), utils.SafeErrorParam(msg.Command), client.t("Unknown command")) return false } + +// fake handler for invalid utf8 +func invalidUtf8Handler(server *Server, client *Client, msg ircmsg.IrcMessage, rb *ResponseBuffer) bool { + rb.Add(nil, server.name, ERR_UNKNOWNERROR, client.Nick(), utils.SafeErrorParam(msg.Command), client.t("Message rejected for containing invalid UTF-8")) + return false +} diff --git a/irc/ircconn.go b/irc/ircconn.go index bf53bad4..87f9b584 100644 --- a/irc/ircconn.go +++ b/irc/ircconn.go @@ -77,6 +77,9 @@ func (cc *IRCStreamConn) ReadLine() (line []byte, err error) { return nil, errReadQ } line = bytes.TrimSuffix(line, crlf) + if globalUtf8EnforcementSetting && !utf8.Valid(line) { + err = errInvalidUtf8 + } return } @@ -101,9 +104,9 @@ func (wc IRCWSConn) UnderlyingConn() *utils.WrappedConn { func (wc IRCWSConn) WriteLine(buf []byte) (err error) { buf = bytes.TrimSuffix(buf, crlf) - // there's not much we can do about this; - // silently drop the message - if !utf8.Valid(buf) { + if !globalUtf8EnforcementSetting && !utf8.Valid(buf) { + // there's not much we can do about this; + // silently drop the message return nil } return wc.conn.WriteMessage(websocket.TextMessage, buf) diff --git a/irc/server.go b/irc/server.go index a6f2c168..98725553 100644 --- a/irc/server.go +++ b/irc/server.go @@ -487,6 +487,7 @@ func (server *Server) applyConfig(config *Config) (err error) { server.name = config.Server.Name server.nameCasefolded = config.Server.nameCasefolded globalCasemappingSetting = config.Server.Casemapping + globalUtf8EnforcementSetting = config.Server.EnforceUtf8 } else { // enforce configs that can't be changed after launch: if server.name != config.Server.Name { @@ -495,6 +496,8 @@ func (server *Server) applyConfig(config *Config) (err error) { return fmt.Errorf("Datastore path cannot be changed after launching the server, rehash aborted") } else if globalCasemappingSetting != config.Server.Casemapping { return fmt.Errorf("Casemapping cannot be changed after launching the server, rehash aborted") + } else if globalUtf8EnforcementSetting != config.Server.EnforceUtf8 { + return fmt.Errorf("UTF-8 enforcement cannot be changed after launching the server, rehash aborted") } else if oldConfig.Accounts.Multiclient.AlwaysOn != config.Accounts.Multiclient.AlwaysOn { return fmt.Errorf("Default always-on setting cannot be changed after launching the server, rehash aborted") } diff --git a/irc/socket.go b/irc/socket.go index 289b7e2e..6304a1a4 100644 --- a/irc/socket.go +++ b/irc/socket.go @@ -75,6 +75,9 @@ func (socket *Socket) Read() (string, error) { if err == io.EOF && strings.TrimSpace(line) != "" { // don't do anything + } else if err == errInvalidUtf8 { + // pass the data through so we can parse the command at least + return line, err } else if err != nil { return "", err } diff --git a/irc/strings.go b/irc/strings.go index 6c8100f6..7f3f6076 100644 --- a/irc/strings.go +++ b/irc/strings.go @@ -50,6 +50,8 @@ const ( // this happens-before all IRC connections and all casefolding operations. var globalCasemappingSetting Casemapping = CasemappingPRECIS +var globalUtf8EnforcementSetting bool + // Each pass of PRECIS casefolding is a composition of idempotent operations, // but not idempotent itself. Therefore, the spec says "do it four times and hope // it converges" (lolwtf). Golang's PRECIS implementation has a "repeat" option,