summaryrefslogtreecommitdiffstats
path: root/libgo/go/exp/norm/composition.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/exp/norm/composition.go')
-rw-r--r--libgo/go/exp/norm/composition.go115
1 files changed, 57 insertions, 58 deletions
diff --git a/libgo/go/exp/norm/composition.go b/libgo/go/exp/norm/composition.go
index b2d2abaf63b..1d722230d6f 100644
--- a/libgo/go/exp/norm/composition.go
+++ b/libgo/go/exp/norm/composition.go
@@ -7,27 +7,46 @@ package norm
import "utf8"
const (
- maxCombiningChars = 30 + 2 // +2 to hold CGJ and Hangul overflow.
+ maxCombiningChars = 30
+ maxBufferSize = maxCombiningChars + 2 // +1 to hold starter +1 to hold CGJ
maxBackRunes = maxCombiningChars - 1
maxNFCExpansion = 3 // NFC(0x1D160)
maxNFKCExpansion = 18 // NFKC(0xFDFA)
- maxRuneSizeInDecomp = 4
- // Need to multiply by 2 as we don't reuse byte buffer space for recombining.
- maxByteBufferSize = 2 * maxRuneSizeInDecomp * maxCombiningChars // 256
+ maxByteBufferSize = utf8.UTFMax * maxBufferSize // 128
)
// reorderBuffer is used to normalize a single segment. Characters inserted with
-// insert() are decomposed and reordered based on CCC. The compose() method can
+// insert are decomposed and reordered based on CCC. The compose method can
// be used to recombine characters. Note that the byte buffer does not hold
// the UTF-8 characters in order. Only the rune array is maintained in sorted
-// order. flush() writes the resulting segment to a byte array.
+// order. flush writes the resulting segment to a byte array.
type reorderBuffer struct {
- rune [maxCombiningChars]runeInfo // Per character info.
- byte [maxByteBufferSize]byte // UTF-8 buffer. Referenced by runeInfo.pos.
- nrune int // Number of runeInfos.
- nbyte uint8 // Number or bytes.
+ rune [maxBufferSize]runeInfo // Per character info.
+ byte [maxByteBufferSize]byte // UTF-8 buffer. Referenced by runeInfo.pos.
+ nrune int // Number of runeInfos.
+ nbyte uint8 // Number or bytes.
f formInfo
+
+ src input
+ nsrc int
+ srcBytes inputBytes
+ srcString inputString
+ tmpBytes inputBytes
+}
+
+func (rb *reorderBuffer) init(f Form, src []byte) {
+ rb.f = *formTable[f]
+ rb.srcBytes = inputBytes(src)
+ rb.src = &rb.srcBytes
+ rb.nsrc = len(src)
+}
+
+func (rb *reorderBuffer) initString(f Form, src string) {
+ rb.f = *formTable[f]
+ rb.srcString = inputString(src)
+ rb.src = &rb.srcString
+ rb.nsrc = len(src)
}
// reset discards all characters from the buffer.
@@ -49,10 +68,10 @@ func (rb *reorderBuffer) flush(out []byte) []byte {
// insertOrdered inserts a rune in the buffer, ordered by Canonical Combining Class.
// It returns false if the buffer is not large enough to hold the rune.
-// It is used internally by insert.
+// It is used internally by insert and insertString only.
func (rb *reorderBuffer) insertOrdered(info runeInfo) bool {
n := rb.nrune
- if n >= maxCombiningChars {
+ if n >= maxCombiningChars+1 {
return false
}
b := rb.rune[:]
@@ -68,7 +87,7 @@ func (rb *reorderBuffer) insertOrdered(info runeInfo) bool {
}
rb.nrune += 1
pos := uint8(rb.nbyte)
- rb.nbyte += info.size
+ rb.nbyte += utf8.UTFMax
info.pos = pos
b[n] = info
return true
@@ -76,53 +95,32 @@ func (rb *reorderBuffer) insertOrdered(info runeInfo) bool {
// insert inserts the given rune in the buffer ordered by CCC.
// It returns true if the buffer was large enough to hold the decomposed rune.
-func (rb *reorderBuffer) insert(src []byte, info runeInfo) bool {
- if info.size == 3 && isHangul(src) {
- rune, _ := utf8.DecodeRune(src)
- return rb.decomposeHangul(uint32(rune))
- }
- pos := rb.nbyte
- if info.flags.hasDecomposition() {
- dcomp := rb.f.decompose(src)
- for i := 0; i < len(dcomp); i += int(info.size) {
- info = rb.f.info(dcomp[i:])
- if !rb.insertOrdered(info) {
- return false
- }
- }
- copy(rb.byte[pos:], dcomp)
- } else {
- if !rb.insertOrdered(info) {
- return false
+func (rb *reorderBuffer) insert(src input, i int, info runeInfo) bool {
+ if info.size == 3 {
+ if rune := src.hangul(i); rune != 0 {
+ return rb.decomposeHangul(uint32(rune))
}
- copy(rb.byte[pos:], src[:info.size])
}
- return true
-}
-
-// insertString inserts the given rune in the buffer ordered by CCC.
-// It returns true if the buffer was large enough to hold the decomposed rune.
-func (rb *reorderBuffer) insertString(src string, info runeInfo) bool {
- if info.size == 3 && isHangulString(src) {
- rune, _ := utf8.DecodeRuneInString(src)
- return rb.decomposeHangul(uint32(rune))
- }
- pos := rb.nbyte
- dcomp := rb.f.decomposeString(src)
- dn := len(dcomp)
- if dn != 0 {
- for i := 0; i < dn; i += int(info.size) {
- info = rb.f.info(dcomp[i:])
+ if info.flags.hasDecomposition() {
+ dcomp := rb.f.decompose(src, i)
+ rb.tmpBytes = inputBytes(dcomp)
+ for i := 0; i < len(dcomp); {
+ info = rb.f.info(&rb.tmpBytes, i)
+ pos := rb.nbyte
if !rb.insertOrdered(info) {
return false
}
+ end := i + int(info.size)
+ copy(rb.byte[pos:], dcomp[i:end])
+ i = end
}
- copy(rb.byte[pos:], dcomp)
} else {
+ // insertOrder changes nbyte
+ pos := rb.nbyte
if !rb.insertOrdered(info) {
return false
}
- copy(rb.byte[pos:], src[:info.size])
+ src.copySlice(rb.byte[pos:], i, i+int(info.size))
}
return true
}
@@ -131,17 +129,16 @@ func (rb *reorderBuffer) insertString(src string, info runeInfo) bool {
func (rb *reorderBuffer) appendRune(rune uint32) {
bn := rb.nbyte
sz := utf8.EncodeRune(rb.byte[bn:], int(rune))
- rb.nbyte += uint8(sz)
+ rb.nbyte += utf8.UTFMax
rb.rune[rb.nrune] = runeInfo{bn, uint8(sz), 0, 0}
rb.nrune++
}
// assignRune sets a rune at position pos. It is used for Hangul and recomposition.
func (rb *reorderBuffer) assignRune(pos int, rune uint32) {
- bn := rb.nbyte
+ bn := rb.rune[pos].pos
sz := utf8.EncodeRune(rb.byte[bn:], int(rune))
rb.rune[pos] = runeInfo{bn, uint8(sz), 0, 0}
- rb.nbyte += uint8(sz)
}
// runeAt returns the rune at position n. It is used for Hangul and recomposition.
@@ -259,11 +256,10 @@ func (rb *reorderBuffer) decomposeHangul(rune uint32) bool {
// combineHangul algorithmically combines Jamo character components into Hangul.
// See http://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
-func (rb *reorderBuffer) combineHangul() {
- k := 1
+func (rb *reorderBuffer) combineHangul(s, i, k int) {
b := rb.rune[:]
bn := rb.nrune
- for s, i := 0, 1; i < bn; i++ {
+ for ; i < bn; i++ {
cccB := b[k-1].ccc
cccC := b[i].ccc
if cccB == 0 {
@@ -305,14 +301,17 @@ func (rb *reorderBuffer) compose() {
// blocked from S if and only if there is some character B between S
// and C, and either B is a starter or it has the same or higher
// combining class as C."
+ bn := rb.nrune
+ if bn == 0 {
+ return
+ }
k := 1
b := rb.rune[:]
- bn := rb.nrune
for s, i := 0, 1; i < bn; i++ {
if isJamoVT(rb.bytesAt(i)) {
// Redo from start in Hangul mode. Necessary to support
// U+320E..U+321E in NFKC mode.
- rb.combineHangul()
+ rb.combineHangul(s, i, k)
return
}
ii := b[i]
OpenPOWER on IntegriCloud