热词顶会分析

Posted zl00

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了热词顶会分析相关的知识,希望对你有一定的参考价值。

from lxml import etree

from pymysql import connect

from jieba.analyse import *

import requests

class CVPR:

    # 保存数据

    def saveContent_list(self,title,zhaiyao,guanjian,lianjie):

        # 打开数据库连接(ip/数据库用户名/登录密码/数据库名)

        con = connect("localhost", "root", "a3685371", "pachong")

        # 使用 cursor() 方法创建一个游标对象 cursor

        cursors = con.cursor()

        # 使用 execute()  方法执行 SQL 查询 返回的是你影响的行数

        row = cursors.execute("insert into CVPR values(%s,%s,%s,%s)", (title,zhaiyao,guanjian,lianjie))

        # 使用 fetchone() 方法获取数据.

        con.commit()

        # 关闭数据库连接(别忘了)

        con.close()

headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/78.0.3904.108 Safari/537.36"

           }

url = "http://openaccess.thecvf.com/CVPR2019.py"

proxies = {

  "http": "http://211.147.226.4",

  "https": "http://122.200.90.12",

}

cvpr = CVPR()

response = requests.get(url,headers=headers)

html_str = etree.HTML(response.content.decode())

#获得标题

hrefs = html_str.xpath("//div[@id=‘content‘]/dl/dt/a/@href")

for href in hrefs:

    href = "http://openaccess.thecvf.com/{0}".format(href)

    response2 = requests.get(href,headers=headers)

    html_str = etree.HTML(response2.content.decode())

    lunwens = {}

    title = html_str.xpath("//div[@id=‘content‘]/dl/dd//div[@id=‘papertitle‘]/text()")

    lianjie = html_str.xpath("//div[@id=‘content‘]/dl/dd//a/@href")

    zhaiyao = html_str.xpath("//div[@id=‘content‘]/dl/dd//div[@id=‘abstract‘]/text()")

    for keyword, weight in extract_tags(zhaiyao[0].strip(), topK=5, withWeight=True):

        

    try:

        cvpr.saveContent_list(title,zhaiyao,keyword,lianjie)

        print("存入成功")

    except:

        print("存入失败")

 

 

<%@ page language="java" contentType="text/html; charset=UTF-8"

    pageEncoding="UTF-8"%>

<%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>

<!DOCTYPE html>

<html>

<head>

<meta charset="UTF-8">

<title>Insert title here</title>

<link rel="stylesheet" href="css/bootstrap.min.css" type="text/css" />

<script src="js/jquery-1.11.3.min.js" type="text/javascript"></script>

<script type="text/javascript" src="js/echarts.min.js"></script>

<script type="text/javascript" src="js/china.js"></script>

<script src="js/bootstrap.min.js" type="text/javascript"></script>

<script src=‘https://cdn.bootcss.com/echarts/3.7.0/echarts.simple.js‘></script>

<script src=‘js/echarts-wordcloud.js‘></script>

</head>

<body>

<div id="main" style="width: 100%;height: 400px"></div>

<div>

  <table class="table" style="width: 100%;align-content: center;" >

    <tr>

      <th align="center">论文连接</th>

    </tr>

    <c:forEach var="item" items="${list}">

      <tr>

        <td><a href="${item.lianjie }">${item.title}</a></td>

      </tr>

    </c:forEach>

  </table>

</div>

<script>

  var chart = echarts.init(document.getElementById(‘main‘));

  var dt;

  $.ajax({

    url : "PaperServlet_",

    async : false,

    type : "POST",

    success : function(data) {

      dt = data;

     // alert(dt[0].title);

    },

    error : function() {

      alert("请求失败");

    },

    dataType : "json"

  });

  var mydata = new Array(0);

  for (var i = 0; i < dt.length; i++) {

      var d = {};

      

      d["name"] = dt[i].name;

      //alert(dt[i].name);

      d["value"] = dt[i].value;

      mydata.push(d);

  }

  var option = {

    tooltip: {},

    series: [ {

      type: ‘wordCloud‘,

      gridSize: 2,

      sizeRange: [20, 50],

      rotationRange: [-90, 90],

      shape: ‘pentagon‘,

      width: 600,

      height: 300,

      drawOutOfBound: true,

      textStyle: {

        normal: {

          color: function () {

            return ‘rgb(‘ + [

              Math.round(Math.random() * 160),

              Math.round(Math.random() * 160),

              Math.round(Math.random() * 160)

            ].join(‘,‘) + ‘)‘;

          }

        },

        emphasis: {

          shadowBlur: 10,

          shadowColor: ‘#333‘

        }

      },

      data: mydata

    } ]

  };

 

  chart.setOption(option);

  chart.on(‘click‘, function (params) {

      var url = "ClickServlet?geunjian=" + params.name;

      window.location.href = url;

    });

  window.onresize = chart.resize;

</script>

</body>

</html>

 

package com.me.servlet;

 

import java.io.IOException;

import java.sql.SQLException;

import java.util.ArrayList;

import java.util.List;

 

import javax.servlet.ServletException;

import javax.servlet.annotation.WebServlet;

import javax.servlet.http.HttpServlet;

import javax.servlet.http.HttpServletRequest;

import javax.servlet.http.HttpServletResponse;

 

import com.google.gson.Gson;

import com.me.dao.LWDao;

import com.me.domain.LunWen;

import com.me.domain.Tu;

 

@WebServlet("/PaperServlet_")

public class PaperServlet_ extends HttpServlet {

    private static final long serialVersionUID = 1L;

       

 

    public PaperServlet_() {

        super();

    }

 

 

    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

        response.setHeader("content-type", "text/html;charset=UTF-8");

 

        response.setCharacterEncoding("UTF-8");

 

        LWDao dao = new LWDao();

        List<LunWen> list = new ArrayList<LunWen>();

        List<Tu> list_tu = new ArrayList<Tu>();

        String [] str = new String[10000];

        String [] str_ = new String[10000];

        int [] b = new int[10000];

        int num = 0;

        int length1 = 0;

        try {

            list = dao.search_();

        } catch (SQLException e) {

            e.printStackTrace();

        }

        for(int i=0;i<list.size();i++) {

            if(list.get(i).getLianjie()!=null) {

                String ss = list.get(i).getLianjie().substring(6,list.get(i).getLianjie().length());

                list.get(i).setLianjie("http://openaccess.thecvf.com/"+ss);

            }

            String[] split = list.get(i).getGuanjian().split(" ");

            for(int j=0;j<split.length;j++) {

                str[num++] = split[j];

            }

        }

        for(int k=0;k<num;k++) {

            b[k]=0;

        }

        str_[0]=str[0];

        int tt=1;

        Boolean rt=true;

        for(int i=1;i<num;i++) {

            rt=false;

            for(int j=0;j<tt;j++) {

                if(str[i].equals(str_[j])) {

                    rt=true;

                    break;

                }

            }

            if(!rt) {

                str_[tt]=str[i];

                tt++;

            }

        }

        length1=tt;

        for(int i=0;i<length1;i++) {

            for(int j=0;j<num;j++) {

                if(str_[i].equals(str[j])) {

                    b[i]++;

                }

            }

        }

        int t3=0;

        int t2=0;

        String sr="";

        for(int i=0;i<length1-1;i++) {

            t3=i;

            for(int j=i+1;j<length1;j++) {

                if(b[t3]<b[j]) {

                    t3=j;

                }

            }

           if(t3!=i) {

               t2=b[i];

               b[i]=b[t3];

               b[t3]=t2;

               sr=str_[i];

               str_[i]=str_[t3];

               str_[t3]=sr;

           }

        }

        for(int i=0;i<100;i++) {

            Tu tu = new Tu();

            tu.name=str_[i];

            tu.value= b[i];

            list_tu.add(tu);

        }

        

        Gson gson = new Gson();

        String json = gson.toJson(list_tu);

        response.getWriter().write(json);

    }

    

 

    protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

        // TODO Auto-generated method stub

        doGet(request, response);

    }

 

}

 

package com.me.domain;

public class Tu {

    public String name;

    public int value;

}

 

LWDao dao = new LWDao();

        List<LunWen> list = new ArrayList<LunWen>();

        List<Tu> list_tu = new ArrayList<Tu>();

        String [] str = new String[10000];

        String [] str_ = new String[10000];

        int [] b = new int[10000];

        int num = 0;

        int length1 = 0;

        try {

            list = dao.search_();

        } catch (SQLException e) {

            e.printStackTrace();

        }

//分割成单词

for(int i=0;i<list.size();i++) {

            String[] split = list.get(i).getGuanjian().split(" ");

            for(int j=0;j<split.length;j++) {

                str[num++] = split[j];

            }

        }

//去重并计数

        for(int k=0;k<num;k++) {

            b[k]=0;

        }

        str_[0]=str[0];

        int tt=1;

        Boolean rt=true;

        for(int i=1;i<num;i++) {

            rt=false;

            for(int j=0;j<tt;j++) {

                if(str[i].equals(str_[j])) {

                    rt=true;

                    break;

                }

            }

            if(!rt) {

                str_[tt]=str[i];

                tt++;

            }

        }

        length1=tt;

        for(int i=0;i<length1;i++) {

            for(int j=0;j<num;j++) {

                if(str_[i].equals(str[j])) {

                    b[i]++;

                }

            }

        }

//排序

        int t3=0;

        int t2=0;

        String sr="";

        for(int i=0;i<length1-1;i++) {

            t3=i;

            for(int j=i+1;j<length1;j++) {

                if(b[t3]<b[j]) {

                    t3=j;

                }

            }

           if(t3!=i) {

               t2=b[i];

               b[i]=b[t3];

               b[t3]=t2;

               sr=str_[i];

               str_[i]=str_[t3];

               str_[t3]=sr;

           }

        }

//封装

        for(int i=0;i<100;i++) {

            Tu tu = new Tu();

            tu.name=str_[i];

            tu.value= b[i];

            list_tu.add(tu);

        }

 

package com.me.servlet;

 

import java.io.IOException;

import java.sql.SQLException;

import java.util.ArrayList;

import java.util.List;

 

import javax.servlet.ServletException;

import javax.servlet.annotation.WebServlet;

import javax.servlet.http.HttpServlet;

import javax.servlet.http.HttpServletRequest;

import javax.servlet.http.HttpServletResponse;

 

import com.me.dao.LWDao;

import com.me.domain.LunWen;

import com.me.domain.Tu;

 

/**

 * Servlet implementation class LunServlet

 */

@WebServlet("/LunServlet")

public class LunServlet extends HttpServlet {

    private static final long serialVersionUID = 1L;

       

    /**

     * @see HttpServlet#HttpServlet()

     */

    public LunServlet() {

        super();

        // TODO Auto-generated constructor stub

    }

 

    /**

     * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)

     */

    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

        response.setHeader("content-type", "text/html;charset=UTF-8");

 

        response.setCharacterEncoding("UTF-8");

 

        LWDao dao = new LWDao();

        List<LunWen> list = new ArrayList<LunWen>();

        try {

            list = dao.search_();

        } catch (SQLException e) {

            e.printStackTrace();

        }

        for(int i=0;i<list.size();i++) {

            if(list.get(i).getLianjie()!=null) {

                String ss = list.get(i).getLianjie().substring(6,list.get(i).getLianjie().length());

                list.get(i).setLianjie("http://openaccess.thecvf.com/"+ss);

            }

            

        }

        

        request.setAttribute("list",list);

        request.getRequestDispatcher("lw.jsp").forward(request, response);

    }

 

    /**

     * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)

     */

    protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

        // TODO Auto-generated method stub

        doGet(request, response);

    }

    

    

 

}

 

package com.me.domain;

 

public class LunWen {

    private String title;

    private String zhaiyao;

    private String guanjian;

    private String lianjie;

 

    public String getTitle() {

        return title;

    }

 

    public void setTitle(String title) {

        this.title = title;

    }

 

    public String getZhaiyao() {

        return zhaiyao;

    }

 

    public void setZhaiyao(String zhaiyao) {

        this.zhaiyao = zhaiyao;

    }

 

    public String getGuanjian() {

        return guanjian;

    }

 

    public void setGuanjian(String guanjian) {

        this.guanjian = guanjian;

    }

 

    public String getLianjie() {

        return lianjie;

    }

 

    public void setLianjie(String lianjie) {

        this.lianjie = lianjie;

    }

}

 

package com.me.servlet;

 

import java.io.IOException;

import java.sql.SQLException;

import java.util.ArrayList;

import java.util.List;

 

import javax.servlet.ServletException;

import javax.servlet.annotation.WebServlet;

import javax.servlet.http.HttpServlet;

import javax.servlet.http.HttpServletRequest;

import javax.servlet.http.HttpServletResponse;

 

import com.me.dao.LWDao;

import com.me.domain.LunWen;

 

/**

 * Servlet implementation class ClickServlet

 */

@WebServlet("/ClickServlet")

public class ClickServlet extends HttpServlet {

    private static final long serialVersionUID = 1L;

    LWDao dao = new LWDao();

    

    public ClickServlet() {

        super();

        // TODO Auto-generated constructor stub

    }

 

    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

        String geunjian = request.getParameter("geunjian");

        System.out.println(geunjian);

        List<LunWen> guan = new ArrayList<LunWen>();

        try {

            guan = dao.login(geunjian);

        } catch (SQLException e) {

            e.printStackTrace();

        }

        for(int i=0;i<guan.size();i++) {

            if(guan.get(i).getLianjie()!=null) {

                String ss = guan.get(i).getLianjie().substring(6,guan.get(i).getLianjie().length());

                guan.get(i).setLianjie("http://openaccess.thecvf.com/"+ss);

            }

            

        }

        request.setAttribute("list", guan);

        System.out.println(guan.size());

        request.getRequestDispatcher("lw.jsp").forward(request, response);

    }

 

    /**

     * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)

     */

    protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

        // TODO Auto-generated method stub

        doGet(request, response);

    }

 

}

 

package com.me.dao;

 

import java.sql.SQLException;

import java.util.List;

 

import org.apache.commons.dbutils.QueryRunner;

import org.apache.commons.dbutils.handlers.BeanListHandler;

 

import com.me.domain.LunWen;

import com.me.utils.DBUtils;

 

/**

 * @author 王正帅

 * @date: 2020414日 下午8:21:38

 *

 */

public class LWDao {

    public List<LunWen> search_() throws SQLException {

        QueryRunner qr = new QueryRunner(DBUtils.getDataSource());

        String sql = "select * from cvpr";

        List<LunWen> query = qr.query(sql, new BeanListHandler<LunWen>(LunWen.class));

        return query;

    }

    public List<LunWen> login(String guanjien) throws SQLException {

        QueryRunner qr = new QueryRunner(DBUtils.getDataSource());

        String sql = "select * from cvpr where guanjian like "+"‘%"+guanjien+"%‘";

        System.out.println(sql);

        List<LunWen> user01 = qr.query(sql, new BeanListHandler<LunWen>(LunWen.class));

        return user01;

    }

}

技术图片

 

 技术图片

 

以上是关于热词顶会分析的主要内容,如果未能解决你的问题,请参考以下文章

原型设计(顶会热词统计)

顶会热词统计

顶会热词统计

cvpr顶会热词爬取

08顶会热词统计-补

6月20日 顶会热词统计