Skip to content

Commit a81bf86

Browse files
committed
init
1 parent 25d5562 commit a81bf86

File tree

3 files changed

+115
-0
lines changed

3 files changed

+115
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
work/src/github.com/*
2+
work/src/golang.org/*
3+
work/pkg

work/src/main/jin10_spider.go

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
package main
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"io/ioutil"
7+
"log"
8+
"net/http"
9+
"net/url"
10+
"regexp"
11+
"strings"
12+
13+
"github.com/PuerkitoBio/goquery"
14+
)
15+
16+
type Jin10 struct {
17+
jin10_page string `json:"-"`
18+
CodeType string `json:"codeType"`
19+
CreateAt int64 `json:"createAt"`
20+
Channels []int `json:"channels"`
21+
Content string `json:"content"`
22+
}
23+
24+
func (j Jin10) getByProxy(url_addr, proxy_addr string) (*http.Response, error) {
25+
request, err := http.NewRequest("GET", url_addr, nil)
26+
if err != nil {
27+
return nil, err
28+
}
29+
proxy, err := url.Parse(proxy_addr)
30+
if err != nil {
31+
return nil, err
32+
}
33+
client := &http.Client{
34+
Transport: &http.Transport{
35+
Proxy: http.ProxyURL(proxy),
36+
},
37+
}
38+
return client.Do(request)
39+
}
40+
41+
func (j *Jin10) getPage() {
42+
proxy := "http://123.59.83.131:23128"
43+
url := "http://jin10.com/"
44+
resp, err := j.getByProxy(url, proxy)
45+
if err != nil {
46+
log.Println(err)
47+
return
48+
}
49+
body, err := ioutil.ReadAll(resp.Body)
50+
defer resp.Body.Close()
51+
j.jin10_page = string(body)
52+
err = j.matchResult()
53+
if err != nil {
54+
log.Println(err)
55+
return
56+
}
57+
}
58+
59+
func (j *Jin10) matchResult() error {
60+
doc, err := goquery.NewDocumentFromReader(strings.NewReader(j.jin10_page))
61+
if err != nil {
62+
return err
63+
}
64+
firstEle, err := doc.Find("#newslist table").Eq(0).Html()
65+
if err != nil {
66+
return err
67+
}
68+
regExp, err := regexp.Compile("\\<td align=\"left\" valign=\"middle\" id=\"content_[0-9]+\"\\>(.+)?\\</td\\>")
69+
if err != nil {
70+
return err
71+
}
72+
var result string
73+
f := regExp.FindStringSubmatch(firstEle)
74+
if len(f) == 0 {
75+
ID, hasID := doc.Find("#newslist .newsline").Eq(0).Attr("id")
76+
if !hasID {
77+
errors.New("dom match failed.")
78+
}
79+
result = doc.Find("#content_" + ID).Text()
80+
} else {
81+
result = f[1]
82+
}
83+
fmt.Println(result)
84+
return nil
85+
}
86+
87+
func main() {
88+
var j = &Jin10{}
89+
j.getPage()
90+
}

work/src/main/test.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"html"
6+
"log"
7+
"net/http"
8+
)
9+
10+
// func responseHandler(w http.ResponseWriter, r *http.Request) {
11+
// if r.Method == "GET" {
12+
// fmt.Println(r.URL.Path)
13+
// }
14+
// }
15+
16+
func main() {
17+
http.HandleFunc("/bar", func(w http.ResponseWriter, r *http.Request) {
18+
fmt.Fprintf(w, "%s %q", r.Method, html.EscapeString(r.URL.Path))
19+
})
20+
21+
log.Fatal(http.ListenAndServe(":8899", nil))
22+
}

0 commit comments

Comments
 (0)