summaryrefslogtreecommitdiffstats
path: root/libgo/go/html/doc.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/html/doc.go')
-rw-r--r--libgo/go/html/doc.go31
1 files changed, 25 insertions, 6 deletions
diff --git a/libgo/go/html/doc.go b/libgo/go/html/doc.go
index 9f5d478b42c..c5338d0781d 100644
--- a/libgo/go/html/doc.go
+++ b/libgo/go/html/doc.go
@@ -15,7 +15,7 @@ which parses the next token and returns its type, or an error:
for {
tt := z.Next()
- if tt == html.Error {
+ if tt == html.ErrorToken {
// ...
return ...
}
@@ -34,7 +34,7 @@ Entities (such as "<") are unescaped, tag names and attribute keys are
lower-cased, and attributes are collected into a []Attribute. For example:
for {
- if z.Next() == html.Error {
+ if z.Next() == html.ErrorToken {
// Returning os.EOF indicates success.
return z.Error()
}
@@ -49,15 +49,15 @@ call to Next. For example, to extract an HTML page's anchor text:
for {
tt := z.Next()
switch tt {
- case Error:
+ case ErrorToken:
return z.Error()
- case Text:
+ case TextToken:
if depth > 0 {
// emitBytes should copy the []byte it receives,
// if it doesn't process it immediately.
emitBytes(z.Text())
}
- case StartTag, EndTag:
+ case StartTagToken, EndTagToken:
tn, _ := z.TagName()
if len(tn) == 1 && tn[0] == 'a' {
if tt == StartTag {
@@ -69,6 +69,26 @@ call to Next. For example, to extract an HTML page's anchor text:
}
}
+Parsing is done by calling Parse with an io.Reader, which returns the root of
+the parse tree (the document element) as a *Node. It is the caller's
+responsibility to ensure that the Reader provides UTF-8 encoded HTML. For
+example, to process each anchor node in depth-first order:
+
+ doc, err := html.Parse(r)
+ if err != nil {
+ // ...
+ }
+ var f func(*html.Node)
+ f = func(n *html.Node) {
+ if n.Type == html.ElementNode && n.Data == "a" {
+ // Do something with n...
+ }
+ for _, c := range n.Child {
+ f(c)
+ }
+ }
+ f(doc)
+
The relevant specifications include:
http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html and
http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html
@@ -82,6 +102,5 @@ package html
// node. Specification compliance is verified by checking expected and actual
// outputs over a test suite rather than aiming for algorithmic fidelity.
-// TODO(nigeltao): Implement a parser, not just a tokenizer.
// TODO(nigeltao): Does a DOM API belong in this package or a separate one?
// TODO(nigeltao): How does parsing interact with a JavaScript engine?
OpenPOWER on IntegriCloud