golang meta2tsv_goquery.go
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了golang meta2tsv_goquery.go相关的知识,希望对你有一定的参考价值。
package main
import (
"bufio"
"fmt"
"github.com/PuerkitoBio/goquery"
"log"
"os"
"runtime"
"sync"
"time"
)
type Result struct {
Title string
Keywords string
Description string
}
func GetPage(url string) {
var d Result
doc, err := goquery.NewDocument(url)
if err != nil {
log.Print("url scrapping failed")
}
d.Title = doc.Find("title").Text()
doc.Find("meta").Each(func(i int, s *goquery.Selection) {
if name, _ := s.Attr("name"); name == "keywords" {
keywords, _ := s.Attr("content")
d.Keywords = keywords
}
})
doc.Find("meta").Each(func(i int, s *goquery.Selection) {
if name, _ := s.Attr("name"); name == "description" {
description, _ := s.Attr("content")
d.Description = description
}
})
log.Printf("%s\t%s\t%s\t%s\n", url, d.Title, d.Keywords, d.Description)
fmt.Printf("%s\t%s\t%s\t%s\n", url, d.Title, d.Keywords, d.Description)
}
func File2Array(filePath string) []string {
f, err := os.Open(filePath)
if err != nil {
fmt.Fprintf(os.Stderr, "File %s could not read: %v\n", filePath, err)
os.Exit(1)
}
defer f.Close()
lines := make([]string, 0, 450)
scanner := bufio.NewScanner(f)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
if serr := scanner.Err(); serr != nil {
fmt.Fprintf(os.Stderr, "File %s scan error: %v\n", filePath, err)
}
return lines
}
func execLoop(lines []string) {
cpus := runtime.NumCPU()
runtime.GOMAXPROCS(cpus)
var wg sync.WaitGroup
semaphore := make(chan int, cpus)
for _, url := range lines {
wg.Add(1)
go func(url2 string) {
defer wg.Done()
semaphore <- 1
GetPage(url2)
<-semaphore
}(url)
}
wg.Wait()
}
func main() {
filename := "url_list.txt"
lines := File2Array(filename)
start := time.Now()
execLoop(lines)
end := time.Now()
log.Printf("%f seconds\n", (end.Sub(start)).Seconds())
}
Golang 学习之路
Golang基础
Golang基础之包概念
Golang基础之数据类型
Golang基础之流程控制
Golang基础之数组
Golang基础之切片
Golang基础之map
Golang基础之函数
Golang基础之指针
Golang基础之结构体
Golang基础之接口
Golang Web开发
Golang 其它
算法
以上是关于golang meta2tsv_goquery.go的主要内容,如果未能解决你的问题,请参考以下文章
Golang入门到项目实战 第一个golang应用
golang编译androidso无法加载
golang如何打印内存内容
Golang入门到项目实战 golang匿名函数
json [Golang] golang #golang #snippets中有用的片段
golang使用成本高