libgo: Update to current sources.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@192704 138bc75d-0d04-0410-961f-82ee72b054a4
author: ian <ian@138bc75d-0d04-0410-961f-82ee72b054a4> 2012-10-23 04:31:11 +0000
committer: ian <ian@138bc75d-0d04-0410-961f-82ee72b054a4> 2012-10-23 04:31:11 +0000
commit: fb08d0057f91d420b6f48c112264fc87dc91b532 (patch)
tree: 46bb86f514fbf6bad82da48e69a18fb09d878834 /libgo/go/strings
parent: f507227a181bb31fa87d23a082485f99f3ef9183 (diff)
download: ppe42-gcc-fb08d0057f91d420b6f48c112264fc87dc91b532.tar.gz
ppe42-gcc-fb08d0057f91d420b6f48c112264fc87dc91b532.zip
6 files changed, 999 insertions, 148 deletions
diff --git a/libgo/go/strings/export_test.go b/libgo/go/strings/export_test.go
index dcfec513ccf..17c806aa563 100644
--- a/libgo/go/strings/export_test.go
+++ b/libgo/go/strings/export_test.go
@@ -7,3 +7,39 @@ package strings
 func (r *Replacer) Replacer() interface{} {
 	return r.r
 }
+
+func (r *Replacer) PrintTrie() string {
+	gen := r.r.(*genericReplacer)
+	return gen.printNode(&gen.root, 0)
+}
+
+func (r *genericReplacer) printNode(t *trieNode, depth int) (s string) {
+	if t.priority > 0 {
+		s += "+"
+	} else {
+		s += "-"
+	}
+	s += "\n"
+
+	if t.prefix != "" {
+		s += Repeat(".", depth) + t.prefix
+		s += r.printNode(t.next, depth+len(t.prefix))
+	} else if t.table != nil {
+		for b, m := range r.mapping {
+			if int(m) != r.tableSize && t.table[m] != nil {
+				s += Repeat(".", depth) + string([]byte{byte(b)})
+				s += r.printNode(t.table[m], depth+1)
+			}
+		}
+	}
+	return
+}
+
+func StringFind(pattern, text string) int {
+	return makeStringFinder(pattern).next(text)
+}
+
+func DumpTables(pattern string) ([]int, []int) {
+	finder := makeStringFinder(pattern)
+	return finder.badCharSkip[:], finder.goodSuffixSkip
+}
diff --git a/libgo/go/strings/replace.go b/libgo/go/strings/replace.go
index f53a96ee0f9..f63b1792c56 100644
--- a/libgo/go/strings/replace.go
+++ b/libgo/go/strings/replace.go
@@ -33,47 +33,45 @@ func NewReplacer(oldnew ...string) *Replacer {
 		panic("strings.NewReplacer: odd argument count")
 	}
 
-	// Possible implementations.
-	var (
-		bb  byteReplacer
-		bs  byteStringReplacer
-		gen genericReplacer
-	)
-
-	allOldBytes, allNewBytes := true, true
-	for len(oldnew) > 0 {
-		old, new := oldnew[0], oldnew[1]
-		oldnew = oldnew[2:]
-		if len(old) != 1 {
-			allOldBytes = false
+	if len(oldnew) == 2 && len(oldnew[0]) > 1 {
+		return &Replacer{r: makeSingleStringReplacer(oldnew[0], oldnew[1])}
+	}
+
+	allNewBytes := true
+	for i := 0; i < len(oldnew); i += 2 {
+		if len(oldnew[i]) != 1 {
+			return &Replacer{r: makeGenericReplacer(oldnew)}
 		}
-		if len(new) != 1 {
+		if len(oldnew[i+1]) != 1 {
 			allNewBytes = false
 		}
+	}
 
-		// generic
-		gen.p = append(gen.p, pair{old, new})
-
-		// byte -> string
-		if allOldBytes {
-			bs.old.set(old[0])
-			bs.new[old[0]] = []byte(new)
-		}
-
-		// byte -> byte
-		if allOldBytes && allNewBytes {
-			bb.old.set(old[0])
-			bb.new[old[0]] = new[0]
+	if allNewBytes {
+		bb := &byteReplacer{}
+		for i := 0; i < len(oldnew); i += 2 {
+			o, n := oldnew[i][0], oldnew[i+1][0]
+			if bb.old[o>>5]&uint32(1<<(o&31)) != 0 {
+				// Later old->new maps do not override previous ones with the same old string.
+				continue
+			}
+			bb.old.set(o)
+			bb.new[o] = n
 		}
+		return &Replacer{r: bb}
 	}
 
-	if allOldBytes && allNewBytes {
-		return &Replacer{r: &bb}
-	}
-	if allOldBytes {
-		return &Replacer{r: &bs}
+	bs := &byteStringReplacer{}
+	for i := 0; i < len(oldnew); i += 2 {
+		o, new := oldnew[i][0], oldnew[i+1]
+		if bs.old[o>>5]&uint32(1<<(o&31)) != 0 {
+			// Later old->new maps do not override previous ones with the same old string.
+			continue
+		}
+		bs.old.set(o)
+		bs.new[o] = []byte(new)
 	}
-	return &Replacer{r: &gen}
+	return &Replacer{r: bs}
 }
 
 // Replace returns a copy of s with all replacements performed.
@@ -86,79 +84,326 @@ func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
 	return r.r.WriteString(w, s)
 }
 
-// genericReplacer is the fully generic (and least optimized) algorithm.
+// trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
+// and values may be empty. For example, the trie containing keys "ax", "ay",
+// "bcbc", "x" and "xy" could have eight nodes:
+//
+//  n0  -
+//  n1  a-
+//  n2  .x+
+//  n3  .y+
+//  n4  b-
+//  n5  .cbc+
+//  n6  x+
+//  n7  .y+
+//
+// n0 is the root node, and its children are n1, n4 and n6; n1's children are
+// n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
+// with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
+// (marked with a trailing "+") are complete keys.
+type trieNode struct {
+	// value is the value of the trie node's key/value pair. It is empty if
+	// this node is not a complete key.
+	value string
+	// priority is the priority (higher is more important) of the trie node's
+	// key/value pair; keys are not necessarily matched shortest- or longest-
+	// first. Priority is positive if this node is a complete key, and zero
+	// otherwise. In the example above, positive/zero priorities are marked
+	// with a trailing "+" or "-".
+	priority int
+
+	// A trie node may have zero, one or more child nodes:
+	//  * if the remaining fields are zero, there are no children.
+	//  * if prefix and next are non-zero, there is one child in next.
+	//  * if table is non-zero, it defines all the children.
+	//
+	// Prefixes are preferred over tables when there is one child, but the
+	// root node always uses a table for lookup efficiency.
+
+	// prefix is the difference in keys between this trie node and the next.
+	// In the example above, node n4 has prefix "cbc" and n4's next node is n5.
+	// Node n5 has no children and so has zero prefix, next and table fields.
+	prefix string
+	next   *trieNode
+
+	// table is a lookup table indexed by the next byte in the key, after
+	// remapping that byte through genericReplacer.mapping to create a dense
+	// index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
+	// 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
+	// genericReplacer.tableSize will be 5. Node n0's table will be
+	// []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
+	// 'a', 'b' and 'x'.
+	table []*trieNode
+}
+
+func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
+	if key == "" {
+		if t.priority == 0 {
+			t.value = val
+			t.priority = priority
+		}
+		return
+	}
+
+	if t.prefix != "" {
+		// Need to split the prefix among multiple nodes.
+		var n int // length of the longest common prefix
+		for ; n < len(t.prefix) && n < len(key); n++ {
+			if t.prefix[n] != key[n] {
+				break
+			}
+		}
+		if n == len(t.prefix) {
+			t.next.add(key[n:], val, priority, r)
+		} else if n == 0 {
+			// First byte differs, start a new lookup table here. Looking up
+			// what is currently t.prefix[0] will lead to prefixNode, and
+			// looking up key[0] will lead to keyNode.
+			var prefixNode *trieNode
+			if len(t.prefix) == 1 {
+				prefixNode = t.next
+			} else {
+				prefixNode = &trieNode{
+					prefix: t.prefix[1:],
+					next:   t.next,
+				}
+			}
+			keyNode := new(trieNode)
+			t.table = make([]*trieNode, r.tableSize)
+			t.table[r.mapping[t.prefix[0]]] = prefixNode
+			t.table[r.mapping[key[0]]] = keyNode
+			t.prefix = ""
+			t.next = nil
+			keyNode.add(key[1:], val, priority, r)
+		} else {
+			// Insert new node after the common section of the prefix.
+			next := &trieNode{
+				prefix: t.prefix[n:],
+				next:   t.next,
+			}
+			t.prefix = t.prefix[:n]
+			t.next = next
+			next.add(key[n:], val, priority, r)
+		}
+	} else if t.table != nil {
+		// Insert into existing table.
+		m := r.mapping[key[0]]
+		if t.table[m] == nil {
+			t.table[m] = new(trieNode)
+		}
+		t.table[m].add(key[1:], val, priority, r)
+	} else {
+		t.prefix = key
+		t.next = new(trieNode)
+		t.next.add("", val, priority, r)
+	}
+}
+
+func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
+	// Iterate down the trie to the end, and grab the value and keylen with
+	// the highest priority.
+	bestPriority := 0
+	node := &r.root
+	n := 0
+	for node != nil {
+		if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
+			bestPriority = node.priority
+			val = node.value
+			keylen = n
+			found = true
+		}
+
+		if s == "" {
+			break
+		}
+		if node.table != nil {
+			index := r.mapping[s[0]]
+			if int(index) == r.tableSize {
+				break
+			}
+			node = node.table[index]
+			s = s[1:]
+			n++
+		} else if node.prefix != "" && HasPrefix(s, node.prefix) {
+			n += len(node.prefix)
+			s = s[len(node.prefix):]
+			node = node.next
+		} else {
+			break
+		}
+	}
+	return
+}
+
+// genericReplacer is the fully generic algorithm.
 // It's used as a fallback when nothing faster can be used.
 type genericReplacer struct {
-	p []pair
+	root trieNode
+	// tableSize is the size of a trie node's lookup table. It is the number
+	// of unique key bytes.
+	tableSize int
+	// mapping maps from key bytes to a dense index for trieNode.table.
+	mapping [256]byte
 }
 
-type pair struct{ old, new string }
+func makeGenericReplacer(oldnew []string) *genericReplacer {
+	r := new(genericReplacer)
+	// Find each byte used, then assign them each an index.
+	for i := 0; i < len(oldnew); i += 2 {
+		key := oldnew[i]
+		for j := 0; j < len(key); j++ {
+			r.mapping[key[j]] = 1
+		}
+	}
+
+	for _, b := range r.mapping {
+		r.tableSize += int(b)
+	}
+
+	var index byte
+	for i, b := range r.mapping {
+		if b == 0 {
+			r.mapping[i] = byte(r.tableSize)
+		} else {
+			r.mapping[i] = index
+			index++
+		}
+	}
+	// Ensure root node uses a lookup table (for performance).
+	r.root.table = make([]*trieNode, r.tableSize)
 
-type appendSliceWriter struct {
-	b []byte
+	for i := 0; i < len(oldnew); i += 2 {
+		r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
+	}
+	return r
 }
 
+type appendSliceWriter []byte
+
+// Write writes to the buffer to satisfy io.Writer.
 func (w *appendSliceWriter) Write(p []byte) (int, error) {
-	w.b = append(w.b, p...)
+	*w = append(*w, p...)
 	return len(p), nil
 }
 
+// WriteString writes to the buffer without string->[]byte->string allocations.
+func (w *appendSliceWriter) WriteString(s string) (int, error) {
+	*w = append(*w, s...)
+	return len(s), nil
+}
+
+type stringWriterIface interface {
+	WriteString(string) (int, error)
+}
+
+type stringWriter struct {
+	w io.Writer
+}
+
+func (w stringWriter) WriteString(s string) (int, error) {
+	return w.w.Write([]byte(s))
+}
+
+func getStringWriter(w io.Writer) stringWriterIface {
+	sw, ok := w.(stringWriterIface)
+	if !ok {
+		sw = stringWriter{w}
+	}
+	return sw
+}
+
 func (r *genericReplacer) Replace(s string) string {
-	// TODO(bradfitz): optimized version
-	n, _ := r.WriteString(discard, s)
-	w := appendSliceWriter{make([]byte, 0, n)}
-	r.WriteString(&w, s)
-	return string(w.b)
+	buf := make(appendSliceWriter, 0, len(s))
+	r.WriteString(&buf, s)
+	return string(buf)
 }
 
 func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
-	lastEmpty := false // the last replacement was of the empty string
-Input:
-	// TODO(bradfitz): optimized version
-	for i := 0; i < len(s); {
-		for _, p := range r.p {
-			if p.old == "" && lastEmpty {
-				// Don't let old match twice in a row.
-				// (it doesn't advance the input and
-				// would otherwise loop forever)
-				continue
+	sw := getStringWriter(w)
+	var last, wn int
+	var prevMatchEmpty bool
+	for i := 0; i <= len(s); {
+		// Ignore the empty match iff the previous loop found the empty match.
+		val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
+		prevMatchEmpty = match && keylen == 0
+		if match {
+			wn, err = sw.WriteString(s[last:i])
+			n += wn
+			if err != nil {
+				return
 			}
-			if HasPrefix(s[i:], p.old) {
-				if p.new != "" {
-					wn, err := w.Write([]byte(p.new))
-					n += wn
-					if err != nil {
-						return n, err
-					}
-				}
-				i += len(p.old)
-				lastEmpty = p.old == ""
-				continue Input
+			wn, err = sw.WriteString(val)
+			n += wn
+			if err != nil {
+				return
 			}
-		}
-		wn, err := w.Write([]byte{s[i]})
-		n += wn
-		if err != nil {
-			return n, err
+			i += keylen
+			last = i
+			continue
 		}
 		i++
 	}
+	if last != len(s) {
+		wn, err = sw.WriteString(s[last:])
+		n += wn
+	}
+	return
+}
 
-	// Final empty match at end.
-	for _, p := range r.p {
-		if p.old == "" {
-			if p.new != "" {
-				wn, err := w.Write([]byte(p.new))
-				n += wn
-				if err != nil {
-					return n, err
-				}
-			}
+// singleStringReplacer is the implementation that's used when there is only
+// one string to replace (and that string has more than one byte).
+type singleStringReplacer struct {
+	finder *stringFinder
+	// value is the new string that replaces that pattern when it's found.
+	value string
+}
+
+func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer {
+	return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
+}
+
+func (r *singleStringReplacer) Replace(s string) string {
+	var buf []byte
+	i := 0
+	for {
+		match := r.finder.next(s[i:])
+		if match == -1 {
 			break
 		}
+		buf = append(buf, s[i:i+match]...)
+		buf = append(buf, r.value...)
+		i += match + len(r.finder.pattern)
+	}
+	if buf == nil {
+		return s
 	}
+	buf = append(buf, s[i:]...)
+	return string(buf)
+}
 
-	return n, nil
+func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
+	sw := getStringWriter(w)
+	var i, wn int
+	for {
+		match := r.finder.next(s[i:])
+		if match == -1 {
+			break
+		}
+		wn, err = sw.WriteString(s[i : i+match])
+		n += wn
+		if err != nil {
+			return
+		}
+		wn, err = sw.WriteString(r.value)
+		n += wn
+		if err != nil {
+			return
+		}
+		i += match + len(r.finder.pattern)
+	}
+	wn, err = sw.WriteString(s[i:])
+	n += wn
+	return
 }
 
 // byteReplacer is the implementation that's used when all the "old"
@@ -301,12 +546,3 @@ func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err erro
 	}
 	return n, nil
 }
-
-// strings is too low-level to import io/ioutil
-var discard io.Writer = devNull(0)
-
-type devNull int
-
-func (devNull) Write(p []byte) (int, error) {
-	return len(p), nil
-}
diff --git a/libgo/go/strings/replace_test.go b/libgo/go/strings/replace_test.go
index 23c7e2e5334..d33dea95b09 100644
--- a/libgo/go/strings/replace_test.go
+++ b/libgo/go/strings/replace_test.go
@@ -7,105 +7,390 @@ package strings_test
 import (
 	"bytes"
 	"fmt"
-	"log"
 	. "strings"
 	"testing"
 )
 
-var _ = log.Printf
-
-type ReplacerTest struct {
-	r   *Replacer
-	in  string
-	out string
-}
+var htmlEscaper = NewReplacer(
+	"&", "&amp;",
+	"<", "&lt;",
+	">", "&gt;",
+	`"`, "&quot;",
+	"'", "&apos;",
+)
 
-var htmlEscaper = NewReplacer("&", "&amp;", "<", "&lt;", ">", "&gt;", "\"", "&quot;")
+var htmlUnescaper = NewReplacer(
+	"&amp;", "&",
+	"&lt;", "<",
+	"&gt;", ">",
+	"&quot;", `"`,
+	"&apos;", "'",
+)
 
 // The http package's old HTML escaping function.
-func oldhtmlEscape(s string) string {
+func oldHTMLEscape(s string) string {
 	s = Replace(s, "&", "&amp;", -1)
 	s = Replace(s, "<", "&lt;", -1)
 	s = Replace(s, ">", "&gt;", -1)
-	s = Replace(s, "\"", "&quot;", -1)
+	s = Replace(s, `"`, "&quot;", -1)
 	s = Replace(s, "'", "&apos;", -1)
 	return s
 }
 
-var replacer = NewReplacer("aaa", "3[aaa]", "aa", "2[aa]", "a", "1[a]", "i", "i",
-	"longerst", "most long", "longer", "medium", "long", "short",
-	"X", "Y", "Y", "Z")
-
 var capitalLetters = NewReplacer("a", "A", "b", "B")
 
-var blankToXReplacer = NewReplacer("", "X", "o", "O")
+// TestReplacer tests the replacer implementations.
+func TestReplacer(t *testing.T) {
+	type testCase struct {
+		r       *Replacer
+		in, out string
+	}
+	var testCases []testCase
 
-var ReplacerTests = []ReplacerTest{
-	// byte->string
-	{htmlEscaper, "No changes", "No changes"},
-	{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
-	{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
+	// str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
+	str := func(b byte) string {
+		return string([]byte{b})
+	}
+	var s []string
 
-	// generic
-	{replacer, "fooaaabar", "foo3[aaa]b1[a]r"},
-	{replacer, "long, longerst, longer", "short, most long, medium"},
-	{replacer, "XiX", "YiY"},
+	// inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
+	s = nil
+	for i := 0; i < 256; i++ {
+		s = append(s, str(byte(i)), str(byte(i+1)))
+	}
+	inc := NewReplacer(s...)
 
-	// byte->byte
-	{capitalLetters, "brad", "BrAd"},
-	{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
+	// Test cases with 1-byte old strings, 1-byte new strings.
+	testCases = append(testCases,
+		testCase{capitalLetters, "brad", "BrAd"},
+		testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
+		testCase{capitalLetters, "", ""},
 
-	// hitting "" special case
-	{blankToXReplacer, "oo", "XOXOX"},
-}
+		testCase{inc, "brad", "csbe"},
+		testCase{inc, "\x00\xff", "\x01\x00"},
+		testCase{inc, "", ""},
 
-func TestReplacer(t *testing.T) {
-	for i, tt := range ReplacerTests {
-		if s := tt.r.Replace(tt.in); s != tt.out {
-			t.Errorf("%d. Replace(%q) = %q, want %q", i, tt.in, s, tt.out)
+		testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"},
+	)
+
+	// repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
+	s = nil
+	for i := 0; i < 256; i++ {
+		n := i + 1 - 'a'
+		if n < 1 {
+			n = 1
+		}
+		s = append(s, str(byte(i)), Repeat(str(byte(i)), n))
+	}
+	repeat := NewReplacer(s...)
+
+	// Test cases with 1-byte old strings, variable length new strings.
+	testCases = append(testCases,
+		testCase{htmlEscaper, "No changes", "No changes"},
+		testCase{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
+		testCase{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
+		testCase{htmlEscaper, "", ""},
+
+		testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
+		testCase{repeat, "abba", "abbbba"},
+		testCase{repeat, "", ""},
+
+		testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"},
+	)
+
+	// The remaining test cases have variable length old strings.
+
+	testCases = append(testCases,
+		testCase{htmlUnescaper, "&amp;amp;", "&amp;"},
+		testCase{htmlUnescaper, "&lt;b&gt;HTML&apos;s neat&lt;/b&gt;", "<b>HTML's neat</b>"},
+		testCase{htmlUnescaper, "", ""},
+
+		testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
+
+		testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
+
+		testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
+	)
+
+	// gen1 has multiple old strings of variable length. There is no
+	// overall non-empty common prefix, but some pairwise common prefixes.
+	gen1 := NewReplacer(
+		"aaa", "3[aaa]",
+		"aa", "2[aa]",
+		"a", "1[a]",
+		"i", "i",
+		"longerst", "most long",
+		"longer", "medium",
+		"long", "short",
+		"xx", "xx",
+		"x", "X",
+		"X", "Y",
+		"Y", "Z",
+	)
+	testCases = append(testCases,
+		testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
+		testCase{gen1, "long, longerst, longer", "short, most long, medium"},
+		testCase{gen1, "xxxxx", "xxxxX"},
+		testCase{gen1, "XiX", "YiY"},
+		testCase{gen1, "", ""},
+	)
+
+	// gen2 has multiple old strings with no pairwise common prefix.
+	gen2 := NewReplacer(
+		"roses", "red",
+		"violets", "blue",
+		"sugar", "sweet",
+	)
+	testCases = append(testCases,
+		testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
+		testCase{gen2, "", ""},
+	)
+
+	// gen3 has multiple old strings with an overall common prefix.
+	gen3 := NewReplacer(
+		"abracadabra", "poof",
+		"abracadabrakazam", "splat",
+		"abraham", "lincoln",
+		"abrasion", "scrape",
+		"abraham", "isaac",
+	)
+	testCases = append(testCases,
+		testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
+		testCase{gen3, "abrasion abracad", "scrape abracad"},
+		testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
+		testCase{gen3, "", ""},
+	)
+
+	// foo{1,2,3,4} have multiple old strings with an overall common prefix
+	// and 1- or 2- byte extensions from the common prefix.
+	foo1 := NewReplacer(
+		"foo1", "A",
+		"foo2", "B",
+		"foo3", "C",
+	)
+	foo2 := NewReplacer(
+		"foo1", "A",
+		"foo2", "B",
+		"foo31", "C",
+		"foo32", "D",
+	)
+	foo3 := NewReplacer(
+		"foo11", "A",
+		"foo12", "B",
+		"foo31", "C",
+		"foo32", "D",
+	)
+	foo4 := NewReplacer(
+		"foo12", "B",
+		"foo32", "D",
+	)
+	testCases = append(testCases,
+		testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
+		testCase{foo1, "", ""},
+
+		testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
+		testCase{foo2, "", ""},
+
+		testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
+		testCase{foo3, "", ""},
+
+		testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
+		testCase{foo4, "", ""},
+	)
+
+	// genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
+	allBytes := make([]byte, 256)
+	for i := range allBytes {
+		allBytes[i] = byte(i)
+	}
+	allString := string(allBytes)
+	genAll := NewReplacer(
+		allString, "[all]",
+		"\xff", "[ff]",
+		"\x00", "[00]",
+	)
+	testCases = append(testCases,
+		testCase{genAll, allString, "[all]"},
+		testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
+		testCase{genAll, "", ""},
+	)
+
+	// Test cases with empty old strings.
+
+	blankToX1 := NewReplacer("", "X")
+	blankToX2 := NewReplacer("", "X", "", "")
+	blankHighPriority := NewReplacer("", "X", "o", "O")
+	blankLowPriority := NewReplacer("o", "O", "", "X")
+	blankNoOp1 := NewReplacer("", "")
+	blankNoOp2 := NewReplacer("", "", "", "A")
+	blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z")
+	testCases = append(testCases,
+		testCase{blankToX1, "foo", "XfXoXoX"},
+		testCase{blankToX1, "", "X"},
+
+		testCase{blankToX2, "foo", "XfXoXoX"},
+		testCase{blankToX2, "", "X"},
+
+		testCase{blankHighPriority, "oo", "XOXOX"},
+		testCase{blankHighPriority, "ii", "XiXiX"},
+		testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
+		testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
+		testCase{blankHighPriority, "", "X"},
+
+		testCase{blankLowPriority, "oo", "OOX"},
+		testCase{blankLowPriority, "ii", "XiXiX"},
+		testCase{blankLowPriority, "oiio", "OXiXiOX"},
+		testCase{blankLowPriority, "iooi", "XiOOXiX"},
+		testCase{blankLowPriority, "", "X"},
+
+		testCase{blankNoOp1, "foo", "foo"},
+		testCase{blankNoOp1, "", ""},
+
+		testCase{blankNoOp2, "foo", "foo"},
+		testCase{blankNoOp2, "", ""},
+
+		testCase{blankFoo, "foobarfoobaz", "XRXZX"},
+		testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
+		testCase{blankFoo, "", "X"},
+	)
+
+	// single string replacer
+
+	abcMatcher := NewReplacer("abc", "[match]")
+
+	testCases = append(testCases,
+		testCase{abcMatcher, "", ""},
+		testCase{abcMatcher, "ab", "ab"},
+		testCase{abcMatcher, "abcd", "[match]d"},
+		testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
+	)
+
+	// No-arg test cases.
+
+	nop := NewReplacer()
+	testCases = append(testCases,
+		testCase{nop, "abc", "abc"},
+		testCase{nop, "", ""},
+	)
+
+	// Run the test cases.
+
+	for i, tc := range testCases {
+		if s := tc.r.Replace(tc.in); s != tc.out {
+			t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
 		}
 		var buf bytes.Buffer
-		n, err := tt.r.WriteString(&buf, tt.in)
+		n, err := tc.r.WriteString(&buf, tc.in)
 		if err != nil {
 			t.Errorf("%d. WriteString: %v", i, err)
 			continue
 		}
 		got := buf.String()
-		if got != tt.out {
-			t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tt.in, got, tt.out)
+		if got != tc.out {
+			t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
 			continue
 		}
-		if n != len(tt.out) {
+		if n != len(tc.out) {
 			t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
-				i, tt.in, n, len(tt.out), tt.out)
+				i, tc.in, n, len(tc.out), tc.out)
 		}
 	}
 }
 
-// pickAlgorithmTest is a test that verifies that given input for a
-// Replacer that we pick the correct algorithm.
-type pickAlgorithmTest struct {
-	r    *Replacer
-	want string // name of algorithm
+// TestPickAlgorithm tests that NewReplacer picks the correct algorithm.
+func TestPickAlgorithm(t *testing.T) {
+	testCases := []struct {
+		r    *Replacer
+		want string
+	}{
+		{capitalLetters, "*strings.byteReplacer"},
+		{htmlEscaper, "*strings.byteStringReplacer"},
+		{NewReplacer("12", "123"), "*strings.singleStringReplacer"},
+		{NewReplacer("1", "12"), "*strings.byteStringReplacer"},
+		{NewReplacer("", "X"), "*strings.genericReplacer"},
+		{NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
+	}
+	for i, tc := range testCases {
+		got := fmt.Sprintf("%T", tc.r.Replacer())
+		if got != tc.want {
+			t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want)
+		}
+	}
 }
 
-var pickAlgorithmTests = []pickAlgorithmTest{
-	{capitalLetters, "*strings.byteReplacer"},
-	{NewReplacer("12", "123"), "*strings.genericReplacer"},
-	{NewReplacer("1", "12"), "*strings.byteStringReplacer"},
-	{htmlEscaper, "*strings.byteStringReplacer"},
-}
+// TestGenericTrieBuilding verifies the structure of the generated trie. There
+// is one node per line, and the key ending with the current line is in the
+// trie if it ends with a "+".
+func TestGenericTrieBuilding(t *testing.T) {
+	testCases := []struct{ in, out string }{
+		{"abc;abdef;abdefgh;xx;xy;z", `-
+			a-
+			.b-
+			..c+
+			..d-
+			...ef+
+			.....gh+
+			x-
+			.x+
+			.y+
+			z+
+			`},
+		{"abracadabra;abracadabrakazam;abraham;abrasion", `-
+			a-
+			.bra-
+			....c-
+			.....adabra+
+			...........kazam+
+			....h-
+			.....am+
+			....s-
+			.....ion+
+			`},
+		{"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `-
+			X+
+			Y+
+			a+
+			.a+
+			..a+
+			i+
+			l-
+			.ong+
+			....er+
+			......st+
+			x+
+			.x+
+			`},
+		{"foo;;foo;foo1", `+
+			f-
+			.oo+
+			...1+
+			`},
+	}
 
-func TestPickAlgorithm(t *testing.T) {
-	for i, tt := range pickAlgorithmTests {
-		got := fmt.Sprintf("%T", tt.r.Replacer())
-		if got != tt.want {
-			t.Errorf("%d. algorithm = %s, want %s", i, got, tt.want)
+	for _, tc := range testCases {
+		keys := Split(tc.in, ";")
+		args := make([]string, len(keys)*2)
+		for i, key := range keys {
+			args[i*2] = key
+		}
+
+		got := NewReplacer(args...).PrintTrie()
+		// Remove tabs from tc.out
+		wantbuf := make([]byte, 0, len(tc.out))
+		for i := 0; i < len(tc.out); i++ {
+			if tc.out[i] != '\t' {
+				wantbuf = append(wantbuf, tc.out[i])
+			}
+		}
+		want := string(wantbuf)
+
+		if got != want {
+			t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want)
 		}
 	}
 }
 
-func BenchmarkGenericMatch(b *testing.B) {
+func BenchmarkGenericNoMatch(b *testing.B) {
 	str := Repeat("A", 100) + Repeat("B", 100)
 	generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
 	for i := 0; i < b.N; i++ {
@@ -113,6 +398,42 @@ func BenchmarkGenericMatch(b *testing.B) {
 	}
 }
 
+func BenchmarkGenericMatch1(b *testing.B) {
+	str := Repeat("a", 100) + Repeat("b", 100)
+	generic := NewReplacer("a", "A", "b", "B", "12", "123")
+	for i := 0; i < b.N; i++ {
+		generic.Replace(str)
+	}
+}
+
+func BenchmarkGenericMatch2(b *testing.B) {
+	str := Repeat("It&apos;s &lt;b&gt;HTML&lt;/b&gt;!", 100)
+	for i := 0; i < b.N; i++ {
+		htmlUnescaper.Replace(str)
+	}
+}
+
+func benchmarkSingleString(b *testing.B, pattern, text string) {
+	r := NewReplacer(pattern, "[match]")
+	b.SetBytes(int64(len(text)))
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		r.Replace(text)
+	}
+}
+
+func BenchmarkSingleMaxSkipping(b *testing.B) {
+	benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000))
+}
+
+func BenchmarkSingleLongSuffixFail(b *testing.B) {
+	benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002))
+}
+
+func BenchmarkSingleMatch(b *testing.B) {
+	benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000))
+}
+
 func BenchmarkByteByteNoMatch(b *testing.B) {
 	str := Repeat("A", 100) + Repeat("B", 100)
 	for i := 0; i < b.N; i++ {
@@ -144,7 +465,7 @@ func BenchmarkHTMLEscapeNew(b *testing.B) {
 func BenchmarkHTMLEscapeOld(b *testing.B) {
 	str := "I <3 to escape HTML & other text too."
 	for i := 0; i < b.N; i++ {
-		oldhtmlEscape(str)
+		oldHTMLEscape(str)
 	}
 }
 
diff --git a/libgo/go/strings/search.go b/libgo/go/strings/search.go
new file mode 100644
index 00000000000..f77c879c572
--- /dev/null
+++ b/libgo/go/strings/search.go
@@ -0,0 +1,124 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings
+
+// stringFinder efficiently finds strings in a source text. It's implemented
+// using the Boyer-Moore string search algorithm:
+// http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm
+// http://www.cs.utexas.edu/~moore/publications/fstrpos.pdf (note: this aged
+// document uses 1-based indexing)
+type stringFinder struct {
+	// pattern is the string that we are searching for in the text.
+	pattern string
+
+	// badCharSkip[b] contains the distance between the last byte of pattern
+	// and the rightmost occurrence of b in pattern. If b is not in pattern,
+	// badCharSkip[b] is len(pattern).
+	//
+	// Whenever a mismatch is found with byte b in the text, we can safely
+	// shift the matching frame at least badCharSkip[b] until the next time
+	// the matching char could be in alignment.
+	badCharSkip [256]int
+
+	// goodSuffixSkip[i] defines how far we can shift the matching frame given
+	// that the suffix pattern[i+1:] matches, but the byte pattern[i] does
+	// not. There are two cases to consider:
+	//
+	// 1. The matched suffix occurs elsewhere in pattern (with a different
+	// byte preceding it that we might possibly match). In this case, we can
+	// shift the matching frame to align with the next suffix chunk. For
+	// example, the pattern "mississi" has the suffix "issi" next occurring
+	// (in right-to-left order) at index 1, so goodSuffixSkip[3] ==
+	// shift+len(suffix) == 3+4 == 7.
+	//
+	// 2. If the matched suffix does not occur elsewhere in pattern, then the
+	// matching frame may share part of its prefix with the end of the
+	// matching suffix. In this case, goodSuffixSkip[i] will contain how far
+	// to shift the frame to align this portion of the prefix to the
+	// suffix. For example, in the pattern "abcxxxabc", when the first
+	// mismatch from the back is found to be in position 3, the matching
+	// suffix "xxabc" is not found elsewhere in the pattern. However, its
+	// rightmost "abc" (at position 6) is a prefix of the whole pattern, so
+	// goodSuffixSkip[3] == shift+len(suffix) == 6+5 == 11.
+	goodSuffixSkip []int
+}
+
+func makeStringFinder(pattern string) *stringFinder {
+	f := &stringFinder{
+		pattern:        pattern,
+		goodSuffixSkip: make([]int, len(pattern)),
+	}
+	// last is the index of the last character in the pattern.
+	last := len(pattern) - 1
+
+	// Build bad character table.
+	// Bytes not in the pattern can skip one pattern's length.
+	for i := range f.badCharSkip {
+		f.badCharSkip[i] = len(pattern)
+	}
+	// The loop condition is < instead of <= so that the last byte does not
+	// have a zero distance to itself. Finding this byte out of place implies
+	// that it is not in the last position.
+	for i := 0; i < last; i++ {
+		f.badCharSkip[pattern[i]] = last - i
+	}
+
+	// Build good suffix table.
+	// First pass: set each value to the next index which starts a prefix of
+	// pattern.
+	lastPrefix := last
+	for i := last; i >= 0; i-- {
+		if HasPrefix(pattern, pattern[i+1:]) {
+			lastPrefix = i + 1
+		}
+		// lastPrefix is the shift, and (last-i) is len(suffix).
+		f.goodSuffixSkip[i] = lastPrefix + last - i
+	}
+	// Second pass: find repeats of pattern's suffix starting from the front.
+	for i := 0; i < last; i++ {
+		lenSuffix := longestCommonSuffix(pattern, pattern[1:i+1])
+		if pattern[i-lenSuffix] != pattern[last-lenSuffix] {
+			// (last-i) is the shift, and lenSuffix is len(suffix).
+			f.goodSuffixSkip[last-lenSuffix] = lenSuffix + last - i
+		}
+	}
+
+	return f
+}
+
+func longestCommonSuffix(a, b string) (i int) {
+	for ; i < len(a) && i < len(b); i++ {
+		if a[len(a)-1-i] != b[len(b)-1-i] {
+			break
+		}
+	}
+	return
+}
+
+// next returns the index in text of the first occurrence of the pattern. If
+// the pattern is not found, it returns -1.
+func (f *stringFinder) next(text string) int {
+	i := len(f.pattern) - 1
+	for i < len(text) {
+		// Compare backwards from the end until the first unmatching character.
+		j := len(f.pattern) - 1
+		for j >= 0 && text[i] == f.pattern[j] {
+			i--
+			j--
+		}
+		if j < 0 {
+			return i + 1 // match
+		}
+		i += max(f.badCharSkip[text[i]], f.goodSuffixSkip[j])
+	}
+	return -1
+}
+
+func max(a, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
diff --git a/libgo/go/strings/search_test.go b/libgo/go/strings/search_test.go
new file mode 100644
index 00000000000..966c05e65ad
--- /dev/null
+++ b/libgo/go/strings/search_test.go
@@ -0,0 +1,90 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings_test
+
+import (
+	"reflect"
+	. "strings"
+	"testing"
+)
+
+func TestFinderNext(t *testing.T) {
+	testCases := []struct {
+		pat, text string
+		index     int
+	}{
+		{"", "", 0},
+		{"", "abc", 0},
+		{"abc", "", -1},
+		{"abc", "abc", 0},
+		{"d", "abcdefg", 3},
+		{"nan", "banana", 2},
+		{"pan", "anpanman", 2},
+		{"nnaaman", "anpanmanam", -1},
+		{"abcd", "abc", -1},
+		{"abcd", "bcd", -1},
+		{"bcd", "abcd", 1},
+		{"abc", "acca", -1},
+		{"aa", "aaa", 0},
+		{"baa", "aaaaa", -1},
+		{"at that", "which finally halts.  at that point", 22},
+	}
+
+	for _, tc := range testCases {
+		got := StringFind(tc.pat, tc.text)
+		want := tc.index
+		if got != want {
+			t.Errorf("stringFind(%q, %q) got %d, want %d\n", tc.pat, tc.text, got, want)
+		}
+	}
+}
+
+func TestFinderCreation(t *testing.T) {
+	testCases := []struct {
+		pattern string
+		bad     [256]int
+		suf     []int
+	}{
+		{
+			"abc",
+			[256]int{'a': 2, 'b': 1, 'c': 3},
+			[]int{5, 4, 1},
+		},
+		{
+			"mississi",
+			[256]int{'i': 3, 'm': 7, 's': 1},
+			[]int{15, 14, 13, 7, 11, 10, 7, 1},
+		},
+		// From http://www.cs.utexas.edu/~moore/publications/fstrpos.pdf
+		{
+			"abcxxxabc",
+			[256]int{'a': 2, 'b': 1, 'c': 6, 'x': 3},
+			[]int{14, 13, 12, 11, 10, 9, 11, 10, 1},
+		},
+		{
+			"abyxcdeyx",
+			[256]int{'a': 8, 'b': 7, 'c': 4, 'd': 3, 'e': 2, 'y': 1, 'x': 5},
+			[]int{17, 16, 15, 14, 13, 12, 7, 10, 1},
+		},
+	}
+
+	for _, tc := range testCases {
+		bad, good := DumpTables(tc.pattern)
+
+		for i, got := range bad {
+			want := tc.bad[i]
+			if want == 0 {
+				want = len(tc.pattern)
+			}
+			if got != want {
+				t.Errorf("boyerMoore(%q) bad['%c']: got %d want %d", tc.pattern, i, got, want)
+			}
+		}
+
+		if !reflect.DeepEqual(good, tc.suf) {
+			t.Errorf("boyerMoore(%q) got %v want %v", tc.pattern, good, tc.suf)
+		}
+	}
+}
diff --git a/libgo/go/strings/strings_test.go b/libgo/go/strings/strings_test.go
index 54046d68aa0..c271e48ab24 100644
--- a/libgo/go/strings/strings_test.go
+++ b/libgo/go/strings/strings_test.go
@@ -7,6 +7,7 @@ package strings_test
 import (
 	"bytes"
 	"io"
+	"math/rand"
 	"reflect"
 	. "strings"
 	"testing"
@@ -311,6 +312,13 @@ var FieldsFuncTests = []FieldsTest{
 }
 
 func TestFieldsFunc(t *testing.T) {
+	for _, tt := range fieldstests {
+		a := FieldsFunc(tt.s, unicode.IsSpace)
+		if !eq(a, tt.a) {
+			t.Errorf("FieldsFunc(%q, unicode.IsSpace) = %v; want %v", tt.s, a, tt.a)
+			continue
+		}
+	}
 	pred := func(c rune) bool { return c == 'X' }
 	for _, tt := range FieldsFuncTests {
 		a := FieldsFunc(tt.s, pred)
@@ -984,3 +992,39 @@ func TestEqualFold(t *testing.T) {
 		}
 	}
 }
+
+var makeFieldsInput = func() string {
+	x := make([]byte, 1<<20)
+	// Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space. 
+	for i := range x {
+		switch rand.Intn(10) {
+		case 0:
+			x[i] = ' '
+		case 1:
+			if i > 0 && x[i-1] == 'x' {
+				copy(x[i-1:], "χ")
+				break
+			}
+			fallthrough
+		default:
+			x[i] = 'x'
+		}
+	}
+	return string(x)
+}
+
+var fieldsInput = makeFieldsInput()
+
+func BenchmarkFields(b *testing.B) {
+	b.SetBytes(int64(len(fieldsInput)))
+	for i := 0; i < b.N; i++ {
+		Fields(fieldsInput)
+	}
+}
+
+func BenchmarkFieldsFunc(b *testing.B) {
+	b.SetBytes(int64(len(fieldsInput)))
+	for i := 0; i < b.N; i++ {
+		FieldsFunc(fieldsInput, unicode.IsSpace)
+	}
+}
author	ian <ian@138bc75d-0d04-0410-961f-82ee72b054a4>	2012-10-23 04:31:11 +0000
committer	ian <ian@138bc75d-0d04-0410-961f-82ee72b054a4>	2012-10-23 04:31:11 +0000
commit	fb08d0057f91d420b6f48c112264fc87dc91b532 (patch)
tree	46bb86f514fbf6bad82da48e69a18fb09d878834 /libgo/go/strings
parent	f507227a181bb31fa87d23a082485f99f3ef9183 (diff)
download	ppe42-gcc-fb08d0057f91d420b6f48c112264fc87dc91b532.tar.gz ppe42-gcc-fb08d0057f91d420b6f48c112264fc87dc91b532.zip