node爬虫通过高德接口爬取地址的经纬度
Posted 牛老师讲GIS
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了node爬虫通过高德接口爬取地址的经纬度相关的知识,希望对你有一定的参考价值。
概述
通过地址的经纬度在GIS中我们称之为地理编码,与之对应的就是通过经纬度获取地址,被称为逆地理编码。不论是地理编码还是逆地理编码,在我们实际的工作、学习中都会有很多的场景,本文讲述的在node环境下,通过高德API实现经纬度数据的获取。
效果
实现
在本示例中,首先将地址数据导入到数据库中,同时将获取到的经纬度数据存储在数据库中。实现代码如下:
const pgConfig = require('./model/pgConfig');
const pg = require('pg');
const pool = new pg.Pool(pgConfig);
let request = require("./utils/request");
const chalk = require('chalk');
// 申请的key
const aKey =
0: '申请的key1',
1: '申请的key2',
2: '申请的key3'
const aUrl = 'https://restapi.amap.com/v3/geocode/geo'
/**
* 获取随机数
* @param minNum
* @param maxNum
* @return number
*/
function getRandom(minNum = 80,maxNum = 200)
switch(arguments.length)
case 1:
return parseInt(Math.random()*minNum+1,10);
case 2:
return parseInt(Math.random()*(maxNum-minNum+1)+minNum,10);
default:
return 0;
/**
* json
* @param url
* @return Promise<unknown>
*/
function getJson(url)
return new Promise(resolve =>
request(url).then(res =>
resolve(res)
);
)
async function getAddressLonLat(address)
try
const urlFull = `$aUrl?address=$address&key=$aKey[getRandom(0, 2)]`
let res = await getJson(urlFull)
res = JSON.parse(res.toString())
const geocodes = res['geocodes']
if(geocodes && geocodes.length > 0)
const location = geocodes[0]
return location
catch (e)
console.debug(e)
function startSpider(table)
console.time(table)
let index = 0
let count = 0
let rowsData = []
let flag = 0
let updateSql =
id: [],
lonlat: []
const getLocation = () =>
flag = setTimeout(() =>
// 进度输出
if((index > 0 && index % 20 === 0) || index === count)
console.log(`$table:----------$index`)
if(updateSql.id.length > 0)
let val = ''
updateSql.id.forEach((id, index) =>
val += ` WHEN $id THEN '$updateSql.lonlat[index]' `
)
let sqlUpdate = `
UPDATE $table
SET lonlat = CASE id
$val
END
WHERE id IN ($updateSql.id.join(','));
`
pool.query(sqlUpdate, (_isErr, _res) =>
if(_isErr) console.error(`$table----------数据库更新错误`)
index++
getLocation()
if(index >= count)
clearTimeout(flag)
console.timeEnd(table)
console.log(chalk.green(`----------$table结束处理----------`))
process.exit(1);// 退出进程
)
else
if(index >= count)
clearTimeout(flag)
console.timeEnd(table)
console.log(chalk.green(`----------$table结束处理----------`))
process.exit(1);// 退出进程
index++
getLocation()
else
const id = rowsData[index] && rowsData[index].id
if(id)
let sql = `select reg_location as loc, company_name as name
from $table where id=$id;`
pool.query(sql, (isErr, res) =>
if(isErr) console.error('数据库查询错误')
else
const address = [res.rows[0].loc, res.rows[0].name]
getAddressLonLat(address.join('')).then(location =>
if(location)
updateSql.id.push(id)
updateSql.lonlat.push(location)
index++
getLocation()
)
);
, getRandom())
// 查询条数
const countSql = `select id from $table where lonlat = '' order by id;`
pool.query(countSql, (isErr, res) =>
if(isErr) console.error('数据库查询错误')
else
const rows = res.rows
rowsData = rows
count = rows.length
console.log(chalk.red(`----------开始处理$table,共$count条记录----------`))
getLocation()
);
pool.connect(isError =>
if(isError) console.error(chalk.red('数据库连接错误'))
startSpider('company_2006')
)
request
代码如下:
const request = require("request");
function handleRequestByPromise(url, options = )
options.url = encodeURI(url)
let op = Object.assign(
,
url: "",
method: "GET",
encoding: null,
header:
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
Referer: "https://www.meituri.com"
,
options
);
if (op.url === "")
throw new Error("请求的url地址不正确");
const promise = new Promise(function(resolve, reject)
request(op, (err, response, body) =>
if (err) reject(err);
if (response && response.statusCode === 200)
resolve(body);
else
reject(`请求$url失败!`);
);
);
return promise;
module.exports = handleRequestByPromise
pgConfig
代码如下:
const config =
host: 'ip',
user: 'user',
database: 'database',
password: 'password',
port: 5432,
// 扩展属性
max: 40, // 连接池最大连接数
idleTimeoutMillis: 3000, // 连接最大空闲时间 3s
;
module.exports = config;
说明:
aKey
设置了多个,是为了防止账户被封;- 在获取经纬度数据的时候设置了80-200ms的随机延迟,也是为了防止账户被封;
以上是关于node爬虫通过高德接口爬取地址的经纬度的主要内容,如果未能解决你的问题,请参考以下文章