From 69f118d2306082de7641686272b726a1753ed118 Mon Sep 17 00:00:00 2001 From: Mikkel Krautz Date: Fri, 17 Jun 2011 16:49:19 +0200 Subject: [PATCH] Move htmlfilter to its own package. --- Makefile | 3 + pkg/htmlfilter/Makefile | 7 ++ pkg/htmlfilter/htmlfilter.go | 157 +++++++++++++++++++++++++++++++++++ server.go | 135 +++--------------------------- 4 files changed, 178 insertions(+), 124 deletions(-) create mode 100644 pkg/htmlfilter/Makefile create mode 100644 pkg/htmlfilter/htmlfilter.go diff --git a/Makefile b/Makefile index 8b4d3fe..8713da6 100644 --- a/Makefile +++ b/Makefile @@ -17,6 +17,7 @@ PACKAGES = \ pkg/serverconf \ pkg/sessionpool \ pkg/ban \ + pkg/htmlfilter \ pkg/sqlite GCFLAGS = \ @@ -27,6 +28,7 @@ GCFLAGS = \ -Ipkg/serverconf/_obj \ -Ipkg/sessionpool/_obj \ -Ipkg/ban/_obj \ + -Ipkg/htmlfilter/_obj \ -Ipkg/sqlite/_obj LDFLAGS = \ @@ -37,6 +39,7 @@ LDFLAGS = \ -Lpkg/serverconf/_obj \ -Lpkg/sessionpool/_obj \ -Lpkg/ban/_obj \ + -Lpkg/htmlfilter/_obj \ -Lpkg/sqlite/_obj GOFILES = \ diff --git a/pkg/htmlfilter/Makefile b/pkg/htmlfilter/Makefile new file mode 100644 index 0000000..62d6612 --- /dev/null +++ b/pkg/htmlfilter/Makefile @@ -0,0 +1,7 @@ +include $(GOROOT)/src/Make.inc + +TARG = grumble/htmlfilter +GOFILES = \ + htmlfilter.go + +include $(GOROOT)/src/Make.pkg diff --git a/pkg/htmlfilter/htmlfilter.go b/pkg/htmlfilter/htmlfilter.go new file mode 100644 index 0000000..a754f2e --- /dev/null +++ b/pkg/htmlfilter/htmlfilter.go @@ -0,0 +1,157 @@ +// Copyright (c) 2011 The Grumble Authors +// The use of this source code is goverened by a BSD-style +// license that can be found in the LICENSE-file. + +package htmlfilter + +import ( + "bytes" + "os" + "strings" + "xml" +) + +type Options struct { + StripHTML bool + MaxTextMessageLength int + MaxImageMessageLength int +} + +var defaultOptions Options = Options{ + StripHTML: true, + MaxTextMessageLength: 1024, + MaxImageMessageLength: 1024 * 1024, +} + +var ( + ErrExceedsTextMessageLength = os.NewError("Exceeds text message length") + ErrExceedsImageMessageLength = os.NewError("Exceeds image message length") +) + +// Filter text according to options. +func Filter(text string, options *Options) (filtered string, err os.Error) { + // This function filters incoming text from clients according to the three options: + // + // StripHTML: + // If true, all HTML shall be stripped. + // When stripping br tags, append a newline to the output stream. + // When stripping p tags, append a newline after the end tag. + // + // MaxTextsageLength: + // Text length for "plain" messages (messages without images) + // + // MaxImageMessageLength: + // Text length for messages with images. + + if options == nil { + options = &defaultOptions + } + + max := options.MaxTextMessageLength + maximg := options.MaxImageMessageLength + + if options.StripHTML { + // Does the message include HTML? If not, take the fast path. + if strings.Index(text, "<") == -1 { + filtered = strings.TrimSpace(text) + } else { + // Strip away all HTML + out := bytes.NewBuffer(nil) + buf := bytes.NewBufferString(text) + parser := xml.NewParser(buf) + parser.Strict = false + parser.AutoClose = xml.HTMLAutoClose + parser.Entity = xml.HTMLEntity + for { + tok, err := parser.Token() + if err == os.EOF { + break + } else if err != nil { + return "", err + } + + switch t := tok.(type) { + case xml.CharData: + out.Write(t) + case xml.EndElement: + if t.Name.Local == "p" || t.Name.Local == "br" { + out.WriteString("\n") + } + } + } + filtered = strings.TrimSpace(out.String()) + } + if max != 0 && len(filtered) > max { + return "", ErrExceedsTextMessageLength + } + } else { + // No limits + if max == 0 && maximg == 0 { + return text, nil + } + + // Too big for images? + if maximg != 0 && len(text) > maximg { + return "", ErrExceedsImageMessageLength + } + + // Under max plain length? + if max == 0 || len(text) <= max { + return text, nil + } + + // Over max length, under image limit. If text doesn't include + // any HTML, this is a no-go. If there is HTML, we can attempt to + // strip away data URIs to see if we can get the message to fit + // into the plain message limit. + if strings.Index(text, "<") == -1 { + return "", ErrExceedsTextMessageLength + } + + // Simplify the received HTML data by stripping away data URIs + out := bytes.NewBuffer(nil) + buf := bytes.NewBufferString(text) + parser := xml.NewParser(buf) + parser.Strict = false + parser.AutoClose = xml.HTMLAutoClose + parser.Entity = xml.HTMLEntity + for { + tok, err := parser.Token() + if err == os.EOF { + break + } else if err != nil { + return "", err + } + + switch t := tok.(type) { + case xml.CharData: + out.Write(t) + case xml.StartElement: + out.WriteString("<") + xml.Escape(out, []byte(t.Name.Local)) + for _, attr := range t.Attr { + if t.Name.Local == "img" && attr.Name.Local == "src" { + continue + } + out.WriteString(" ") + xml.Escape(out, []byte(attr.Name.Local)) + out.WriteString(`="`) + out.WriteString(attr.Value) + out.WriteString(`"`) + } + out.WriteString(">") + case xml.EndElement: + out.WriteString("") + } + } + + filtered = strings.TrimSpace(out.String()) + if len(filtered) > max { + return "", ErrExceedsTextMessageLength + } + } + + return +} diff --git a/server.go b/server.go index 9fe56cf..c85ad5e 100644 --- a/server.go +++ b/server.go @@ -24,6 +24,7 @@ import ( "grumble/ban" "grumble/blobstore" "grumble/cryptstate" + "grumble/htmlfilter" "grumble/serverconf" "grumble/sessionpool" "hash" @@ -31,7 +32,6 @@ import ( "path/filepath" "strings" "time" - "xml" ) // The default port a Murmur server listens on @@ -172,11 +172,11 @@ func NewServer(id int64, addr string, port int) (s *Server, err os.Error) { // Get a pointer to the root channel func (server *Server) RootChannel() *Channel { - root, exists := server.Channels[0] - if !exists { - server.Fatalf("Not Root channel found for server") - } - return root + root, exists := server.Channels[0] + if !exists { + server.Fatalf("Not Root channel found for server") + } + return root } // Set password as the new SuperUser password @@ -1193,125 +1193,12 @@ func (server *Server) IsBanned(conn net.Conn) bool { // Filter incoming text according to the server's current rules. func (server *Server) FilterText(text string) (filtered string, err os.Error) { - // This function filters incoming text from clients according to three server settings: - // - // AllowHTML: - // If false, all HTML shall be stripped. - // When stripping br tags, append a newline to the output stream. - // When stripping p tags, append a newline after the end tag. - // - // MaxTextMessageLength: - // Text length for "plain" messages (messages without images) - // - // MaxImageTextMessageLength: - // Text length for messages with images. - - max := server.cfg.IntValue("MaxTextMessageLength") - maximg := server.cfg.IntValue("MaxImageMessageLength") - - if !server.cfg.BoolValue("AllowHTML") { - if strings.Index(text, "<") == -1 { - filtered = strings.TrimSpace(text) - } else { - // Strip away all HTML - out := bytes.NewBuffer(nil) - buf := bytes.NewBufferString(text) - parser := xml.NewParser(buf) - parser.Strict = false - parser.AutoClose = xml.HTMLAutoClose - parser.Entity = xml.HTMLEntity - for { - tok, err := parser.Token() - if err == os.EOF { - break - } else if err != nil { - return "", err - } - - switch t := tok.(type) { - case xml.CharData: - out.Write(t) - case xml.EndElement: - if t.Name.Local == "p" || t.Name.Local == "br" { - out.WriteString("\n") - } - } - } - filtered = strings.TrimSpace(out.String()) - } - if max != 0 && len(filtered) > max { - return "", os.NewError("Message exceeds max length") - } - } else { - // No limits - if max == 0 && maximg == 0 { - return text, nil - } - - // Too big for images? - if maximg != 0 && len(text) > maximg { - return "", os.NewError("Message exceeds max image message length") - } - - // Under max plain length? - if max == 0 || len(text) <= max { - return text, nil - } - - // Over max length, under image limit. If text doesn't include - // any HTML, this is a no-go. If there is XML, we can attempt to - // strip away data URIs to see if we can get the message to fit - // into the plain message limit. - if strings.Index(text, "<") == -1 { - return "", os.NewError("Over plain length") - } - - // Simplify the received HTML data by stripping away data URIs - out := bytes.NewBuffer(nil) - buf := bytes.NewBufferString(text) - parser := xml.NewParser(buf) - parser.Strict = false - parser.AutoClose = xml.HTMLAutoClose - parser.Entity = xml.HTMLEntity - for { - tok, err := parser.Token() - if err == os.EOF { - break - } else if err != nil { - return "", err - } - - switch t := tok.(type) { - case xml.CharData: - out.Write(t) - case xml.StartElement: - out.WriteString("<") - xml.Escape(out, []byte(t.Name.Local)) - for _, attr := range t.Attr { - if t.Name.Local == "img" && attr.Name.Local == "src" { - continue - } - out.WriteString(" ") - xml.Escape(out, []byte(attr.Name.Local)) - out.WriteString(`="`) - out.WriteString(attr.Value) - out.WriteString(`"`) - } - out.WriteString(">") - case xml.EndElement: - out.WriteString("") - } - } - - filtered = strings.TrimSpace(out.String()) - if len(filtered) > max { - return "", os.NewError("Data URI stripped message longer than max length") - } + options := &htmlfilter.Options{ + StripHTML: !server.cfg.BoolValue("AllowHTML"), + MaxTextMessageLength: server.cfg.IntValue("MaxTextMessageLength"), + MaxImageMessageLength: server.cfg.IntValue("MaxImageMessageLength"), } - - return + return htmlfilter.Filter(text, options) } // The accept loop of the server.