前程无忧

Posted 2021-01-03 lxh777

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了前程无忧相关的知识，希望对你有一定的参考价值。

# -*- coding: utf-8 -*-
import scrapy
import re
from zhaopin_project.items import LagouItem

class QianchengwuyouSpider(scrapy.Spider):
    name = ‘qianchengwuyou‘
    allowed_domains = [‘51job.com‘]
    start_urls = [‘http://51job.com/‘]

    def parse(self, response):
        for i in range(1,1620):
            base_url = ‘https://search.51job.com/list/010000,000000,0000,32,9,99,%2B,2,{}.html‘.format(i)
            # print(base_url)
            yield scrapy.Request(base_url,callback=self.parse_detail)

    def parse_detail(self,response):
        html_str = response.xpath(‘//div[@class="el"]/p/span/a/@href‘).extract()
        # print(html_str)
        for html_list in html_str:
            yield scrapy.Request(html_list,callback=self.parse_list)

    def parse_list(self,response):
        try:
            # 职位名称
            title = response.xpath(‘//div[@class="cn"]/h1/text()‘).extract_first()
            # 月薪
            salary = response.xpath(‘//div[@class="cn"]/strong/text()‘).extract_first()
            # 位置
            p = re.findall(r‘<p class="msg ltype" title="(.*)">‘,response.text)[0]
            ss = p.split(‘&nbsp;&nbsp;|&nbsp;&nbsp;‘)
            position = ss[0]
            #经验要求
            jingyan = ss[1]
            # 学历要求
            if len(ss) ==4:
                xueli = ‘学历不限‘
            else:
                xueli = ss[2]
            # 时间
            shijian = ss[-1]
            # 发布网站
            fabu = ‘前程无忧‘
            # 职位描述
            job_bt = response.xpath(‘//div[@class="tBorderTop_box"]/div/p/text()‘).extract()
            job_bt = ‘‘.join(job_bt)
            # print(‘--‘*50)

            item = LagouItem()
            item[‘title‘] = title
            item[‘salary‘] = salary
            item[‘position‘] = position
            item[‘jingyan‘] = jingyan
            item[‘xueli‘] = xueli
            item[‘shijian‘] = shijian
            item[‘fabu‘] = fabu
            item[‘job_bt‘] = job_bt
            yield item
        except:
            pass

以上是关于前程无忧的主要内容，如果未能解决你的问题，请参考以下文章