Move htmlfilter to its own package.

This commit is contained in:
Mikkel Krautz 2011-06-17 16:49:19 +02:00
parent 71b9483e00
commit 69f118d230
4 changed files with 178 additions and 124 deletions

View file

@ -17,6 +17,7 @@ PACKAGES = \
pkg/serverconf \
pkg/sessionpool \
pkg/ban \
pkg/htmlfilter \
pkg/sqlite
GCFLAGS = \
@ -27,6 +28,7 @@ GCFLAGS = \
-Ipkg/serverconf/_obj \
-Ipkg/sessionpool/_obj \
-Ipkg/ban/_obj \
-Ipkg/htmlfilter/_obj \
-Ipkg/sqlite/_obj
LDFLAGS = \
@ -37,6 +39,7 @@ LDFLAGS = \
-Lpkg/serverconf/_obj \
-Lpkg/sessionpool/_obj \
-Lpkg/ban/_obj \
-Lpkg/htmlfilter/_obj \
-Lpkg/sqlite/_obj
GOFILES = \

7
pkg/htmlfilter/Makefile Normal file
View file

@ -0,0 +1,7 @@
include $(GOROOT)/src/Make.inc
TARG = grumble/htmlfilter
GOFILES = \
htmlfilter.go
include $(GOROOT)/src/Make.pkg

View file

@ -0,0 +1,157 @@
// Copyright (c) 2011 The Grumble Authors
// The use of this source code is goverened by a BSD-style
// license that can be found in the LICENSE-file.
package htmlfilter
import (
"bytes"
"os"
"strings"
"xml"
)
type Options struct {
StripHTML bool
MaxTextMessageLength int
MaxImageMessageLength int
}
var defaultOptions Options = Options{
StripHTML: true,
MaxTextMessageLength: 1024,
MaxImageMessageLength: 1024 * 1024,
}
var (
ErrExceedsTextMessageLength = os.NewError("Exceeds text message length")
ErrExceedsImageMessageLength = os.NewError("Exceeds image message length")
)
// Filter text according to options.
func Filter(text string, options *Options) (filtered string, err os.Error) {
// This function filters incoming text from clients according to the three options:
//
// StripHTML:
// If true, all HTML shall be stripped.
// When stripping br tags, append a newline to the output stream.
// When stripping p tags, append a newline after the end tag.
//
// MaxTextsageLength:
// Text length for "plain" messages (messages without images)
//
// MaxImageMessageLength:
// Text length for messages with images.
if options == nil {
options = &defaultOptions
}
max := options.MaxTextMessageLength
maximg := options.MaxImageMessageLength
if options.StripHTML {
// Does the message include HTML? If not, take the fast path.
if strings.Index(text, "<") == -1 {
filtered = strings.TrimSpace(text)
} else {
// Strip away all HTML
out := bytes.NewBuffer(nil)
buf := bytes.NewBufferString(text)
parser := xml.NewParser(buf)
parser.Strict = false
parser.AutoClose = xml.HTMLAutoClose
parser.Entity = xml.HTMLEntity
for {
tok, err := parser.Token()
if err == os.EOF {
break
} else if err != nil {
return "", err
}
switch t := tok.(type) {
case xml.CharData:
out.Write(t)
case xml.EndElement:
if t.Name.Local == "p" || t.Name.Local == "br" {
out.WriteString("\n")
}
}
}
filtered = strings.TrimSpace(out.String())
}
if max != 0 && len(filtered) > max {
return "", ErrExceedsTextMessageLength
}
} else {
// No limits
if max == 0 && maximg == 0 {
return text, nil
}
// Too big for images?
if maximg != 0 && len(text) > maximg {
return "", ErrExceedsImageMessageLength
}
// Under max plain length?
if max == 0 || len(text) <= max {
return text, nil
}
// Over max length, under image limit. If text doesn't include
// any HTML, this is a no-go. If there is HTML, we can attempt to
// strip away data URIs to see if we can get the message to fit
// into the plain message limit.
if strings.Index(text, "<") == -1 {
return "", ErrExceedsTextMessageLength
}
// Simplify the received HTML data by stripping away data URIs
out := bytes.NewBuffer(nil)
buf := bytes.NewBufferString(text)
parser := xml.NewParser(buf)
parser.Strict = false
parser.AutoClose = xml.HTMLAutoClose
parser.Entity = xml.HTMLEntity
for {
tok, err := parser.Token()
if err == os.EOF {
break
} else if err != nil {
return "", err
}
switch t := tok.(type) {
case xml.CharData:
out.Write(t)
case xml.StartElement:
out.WriteString("<")
xml.Escape(out, []byte(t.Name.Local))
for _, attr := range t.Attr {
if t.Name.Local == "img" && attr.Name.Local == "src" {
continue
}
out.WriteString(" ")
xml.Escape(out, []byte(attr.Name.Local))
out.WriteString(`="`)
out.WriteString(attr.Value)
out.WriteString(`"`)
}
out.WriteString(">")
case xml.EndElement:
out.WriteString("</")
xml.Escape(out, []byte(t.Name.Local))
out.WriteString(">")
}
}
filtered = strings.TrimSpace(out.String())
if len(filtered) > max {
return "", ErrExceedsTextMessageLength
}
}
return
}

135
server.go
View file

@ -24,6 +24,7 @@ import (
"grumble/ban"
"grumble/blobstore"
"grumble/cryptstate"
"grumble/htmlfilter"
"grumble/serverconf"
"grumble/sessionpool"
"hash"
@ -31,7 +32,6 @@ import (
"path/filepath"
"strings"
"time"
"xml"
)
// The default port a Murmur server listens on
@ -172,11 +172,11 @@ func NewServer(id int64, addr string, port int) (s *Server, err os.Error) {
// Get a pointer to the root channel
func (server *Server) RootChannel() *Channel {
root, exists := server.Channels[0]
if !exists {
server.Fatalf("Not Root channel found for server")
}
return root
root, exists := server.Channels[0]
if !exists {
server.Fatalf("Not Root channel found for server")
}
return root
}
// Set password as the new SuperUser password
@ -1193,125 +1193,12 @@ func (server *Server) IsBanned(conn net.Conn) bool {
// Filter incoming text according to the server's current rules.
func (server *Server) FilterText(text string) (filtered string, err os.Error) {
// This function filters incoming text from clients according to three server settings:
//
// AllowHTML:
// If false, all HTML shall be stripped.
// When stripping br tags, append a newline to the output stream.
// When stripping p tags, append a newline after the end tag.
//
// MaxTextMessageLength:
// Text length for "plain" messages (messages without images)
//
// MaxImageTextMessageLength:
// Text length for messages with images.
max := server.cfg.IntValue("MaxTextMessageLength")
maximg := server.cfg.IntValue("MaxImageMessageLength")
if !server.cfg.BoolValue("AllowHTML") {
if strings.Index(text, "<") == -1 {
filtered = strings.TrimSpace(text)
} else {
// Strip away all HTML
out := bytes.NewBuffer(nil)
buf := bytes.NewBufferString(text)
parser := xml.NewParser(buf)
parser.Strict = false
parser.AutoClose = xml.HTMLAutoClose
parser.Entity = xml.HTMLEntity
for {
tok, err := parser.Token()
if err == os.EOF {
break
} else if err != nil {
return "", err
}
switch t := tok.(type) {
case xml.CharData:
out.Write(t)
case xml.EndElement:
if t.Name.Local == "p" || t.Name.Local == "br" {
out.WriteString("\n")
}
}
}
filtered = strings.TrimSpace(out.String())
}
if max != 0 && len(filtered) > max {
return "", os.NewError("Message exceeds max length")
}
} else {
// No limits
if max == 0 && maximg == 0 {
return text, nil
}
// Too big for images?
if maximg != 0 && len(text) > maximg {
return "", os.NewError("Message exceeds max image message length")
}
// Under max plain length?
if max == 0 || len(text) <= max {
return text, nil
}
// Over max length, under image limit. If text doesn't include
// any HTML, this is a no-go. If there is XML, we can attempt to
// strip away data URIs to see if we can get the message to fit
// into the plain message limit.
if strings.Index(text, "<") == -1 {
return "", os.NewError("Over plain length")
}
// Simplify the received HTML data by stripping away data URIs
out := bytes.NewBuffer(nil)
buf := bytes.NewBufferString(text)
parser := xml.NewParser(buf)
parser.Strict = false
parser.AutoClose = xml.HTMLAutoClose
parser.Entity = xml.HTMLEntity
for {
tok, err := parser.Token()
if err == os.EOF {
break
} else if err != nil {
return "", err
}
switch t := tok.(type) {
case xml.CharData:
out.Write(t)
case xml.StartElement:
out.WriteString("<")
xml.Escape(out, []byte(t.Name.Local))
for _, attr := range t.Attr {
if t.Name.Local == "img" && attr.Name.Local == "src" {
continue
}
out.WriteString(" ")
xml.Escape(out, []byte(attr.Name.Local))
out.WriteString(`="`)
out.WriteString(attr.Value)
out.WriteString(`"`)
}
out.WriteString(">")
case xml.EndElement:
out.WriteString("</")
xml.Escape(out, []byte(t.Name.Local))
out.WriteString(">")
}
}
filtered = strings.TrimSpace(out.String())
if len(filtered) > max {
return "", os.NewError("Data URI stripped message longer than max length")
}
options := &htmlfilter.Options{
StripHTML: !server.cfg.BoolValue("AllowHTML"),
MaxTextMessageLength: server.cfg.IntValue("MaxTextMessageLength"),
MaxImageMessageLength: server.cfg.IntValue("MaxImageMessageLength"),
}
return
return htmlfilter.Filter(text, options)
}
// The accept loop of the server.