golang meta2tsv_goquery.go

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了golang meta2tsv_goquery.go相关的知识,希望对你有一定的参考价值。

package main

import (
	"bufio"
	"fmt"
	"github.com/PuerkitoBio/goquery"
    "log"
	"os"
	"runtime"
	"sync"
	"time"
)

type Result struct {
	Title       string
	Keywords    string
	Description string
}

func GetPage(url string) {
	var d Result
	doc, err := goquery.NewDocument(url)
	if err != nil {
		log.Print("url scrapping failed")
	}
	d.Title = doc.Find("title").Text()
	doc.Find("meta").Each(func(i int, s *goquery.Selection) {
		if name, _ := s.Attr("name"); name == "keywords" {
			keywords, _ := s.Attr("content")
			d.Keywords = keywords
		}
	})
	doc.Find("meta").Each(func(i int, s *goquery.Selection) {
		if name, _ := s.Attr("name"); name == "description" {
			description, _ := s.Attr("content")
			d.Description = description
		}
	})
	log.Printf("%s\t%s\t%s\t%s\n", url, d.Title, d.Keywords, d.Description)
	fmt.Printf("%s\t%s\t%s\t%s\n", url, d.Title, d.Keywords, d.Description)
}

func File2Array(filePath string) []string {
	f, err := os.Open(filePath)
	if err != nil {
		fmt.Fprintf(os.Stderr, "File %s could not read: %v\n", filePath, err)
		os.Exit(1)
	}
	defer f.Close()

	lines := make([]string, 0, 450)
	scanner := bufio.NewScanner(f)
	for scanner.Scan() {
		lines = append(lines, scanner.Text())
	}
	if serr := scanner.Err(); serr != nil {
		fmt.Fprintf(os.Stderr, "File %s scan error: %v\n", filePath, err)
	}
	return lines
}

func execLoop(lines []string) {
	cpus := runtime.NumCPU()
	runtime.GOMAXPROCS(cpus)
	var wg sync.WaitGroup
	semaphore := make(chan int, cpus)
	for _, url := range lines {
		wg.Add(1)
		go func(url2 string) {
			defer wg.Done()
			semaphore <- 1
			GetPage(url2)
			<-semaphore
		}(url)
	}
	wg.Wait()
}

func main() {
	filename := "url_list.txt"
	lines := File2Array(filename)
	start := time.Now()
	execLoop(lines)
	end := time.Now()
	log.Printf("%f seconds\n", (end.Sub(start)).Seconds())
}

Golang 学习之路

Golang基础

Golang基础之包概念
Golang基础之数据类型
Golang基础之流程控制
Golang基础之数组
Golang基础之切片
Golang基础之map
Golang基础之函数
Golang基础之指针
Golang基础之结构体
Golang基础之接口

Golang Web开发

Golang 其它

算法

以上是关于golang meta2tsv_goquery.go的主要内容,如果未能解决你的问题,请参考以下文章

Golang入门到项目实战 第一个golang应用

golang编译androidso无法加载

golang如何打印内存内容

Golang入门到项目实战 golang匿名函数

json [Golang] golang #golang #snippets中有用的片段

golang使用成本高