ruby Crawler :: UrlParser规范

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了ruby Crawler :: UrlParser规范相关的知识,希望对你有一定的参考价值。

require "rails_helper"

RSpec.describe Crawler::UrlParser, :type => :service do
  describe ".parse" do
    subject { Crawler::UrlParser.parse url }

    context "normal url" do
      let(:url) { 'www.my-example.url.com' }
      let(:url_attributes) {
        {
          url_scheme: 'http',
          host: 'www.my-example.url.com',
          path: '/',
          fragment: nil,
          query_strings: nil
        }
      }

      it { is_expected.to eq(url_attributes) }
    end

    context "with fragments" do
      let(:url) { 'www.my-example.url.com/#!/foo/bar' }
      let(:url_attributes) {
        {
          url_scheme: 'http',
          host: 'www.my-example.url.com',
          path: '/',
          fragment: '#!/foo/bar',
          query_strings: nil
        }
      }
      it { is_expected.to eq(url_attributes) }
    end

    context "with query strings" do
      let(:url) { 'www.my-example.url.com/?foo=bar&bar=foo' }
      let(:url_attributes) {
        {
          url_scheme: 'http',
          host: 'www.my-example.url.com',
          path: '/',
          fragment: nil,
          query_strings: 'foo=bar&bar=foo'
        }
      }
      it { is_expected.to eq(url_attributes) }
    end

    context "complex with query and fragments" do
      let(:url) { 'www.my-example.url.com/#!/foo/bar?foo=bar&bar=foo' }
      let(:url_attributes) {
        {
          url_scheme: 'http',
          host: 'www.my-example.url.com',
          path: '/',
          fragment: '#!/foo/bar',
          query_strings:  'foo=bar&bar=foo'
        }
      }
      it { is_expected.to eq(url_attributes) }
    end
  end
end

以上是关于ruby Crawler :: UrlParser规范的主要内容,如果未能解决你的问题,请参考以下文章

ruby crawler_connection.rb

ruby psy_crawler.rb

ruby share_crawler.rb

ruby crawler.rb

ruby share_crawler.rb

ruby 基于Poltergeist(PhantomJS)的Web Crawler Helper类。使用Capybara作为构建webcrawler的框架非常方便