ruby 存储前重新分配短URL。短网址用于微博;你永远不应该真的把它们留在身边。

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了ruby 存储前重新分配短URL。短网址用于微博;你永远不应该真的把它们留在身边。相关的知识,希望对你有一定的参考价值。

# Author: Mislav Marohnić
# License: MIT http://mislav.mit-license.org

require 'uri'
require 'net/https'

# Public: Service that resolves URLs to their final destination.
#
# Examples
#
#   res = UrlResolver::resolve(url)
#
#   if res.dead?
#     abort "dead link"
#   elsif res.failed?
#     abort res.failed_reason
#   elsif res.changed?
#     puts "-> #{res.final_url} (#{res.num_redirects} redirects)"
#   else
#     warn "URL is direct"
#     p res.response_code
#     p res.response_headers
#   end
class UrlResolver
  # Public: Resolve a URL
  #
  # url          - String or URI
  # http_adapter - a HTTP adapter to make requests with
  #                (default: HttpAdapter.new)
  #
  # Returns a Resolution.
  def self.resolve url, http_adapter = HttpAdapter.new
    new(url, http_adapter).resolve
  end

  attr_reader :url, :http_adapter

  def initialize url, http_adapter
    @url = normalize_url(url)
    @http_adapter = http_adapter
  end

  # Public: Perform URL resolution
  #
  # limit - Fixnum representing the maximum number of redirects
  #         (default: 5)
  #
  # All exceptions are caught and available as Resolution#error.
  #
  # Returns a Resolution.
  def resolve limit = 5
    resolution = Resolution.new url
    begin
      resolve_url(url, limit) do |new_url, response|
        resolution.final_url = new_url
        resolution.response  = response
        resolution.num_requests += 1
      end
    rescue => error
      resolution.response = nil unless error.respond_to? :response
      resolution.error = error
    end
    resolution
  end

  # Public: The result of a URL resolution.
  class Resolution
    attr_reader :original_url
    attr_accessor :final_url, :num_requests, :response, :error

    def initialize url
      @final_url = @original_url = url
      @num_requests = 0
      @response = @error = nil
    end

    def num_redirects() num_requests - 1 end

    def response_code
      if response then response.code.to_i
      else 500
      end
    end

    def response_headers
      if response then response.to_hash
      else Hash.new
      end
    end

    def failed?
      error
    end

    def failed_reason
      error.message
    end

    def changed?
      original_url != final_url
    end

    def dead?
      error.respond_to?(:not_found?) and error.not_found?
    end
  end

  class TooManyRedirects < StandardError
    attr_reader :response
    def initialize(msg, response)
      super(msg)
      @response = response
    end
  end

  class HttpError < StandardError
    attr_reader :request_url, :response
    def initialize(msg, request_url, response)
      super(msg)
      @request_url, @response = request_url, response
    end

    def not_found?
      response_code == 404 or response_code == 410
    end

    def response_code
      response.code.to_i
    end
  end

  def normalize_url url
    url.respond_to?(:host) ? url : URI(url.to_s)
  end

  def resolve_url url, limit, referer = nil, &block
    response = request url, referer
    yield url, response if block_given?
    case response.code.to_i
    when 400...600
      raise HttpError.new(
        "server returned #{response.code} #{response.message}",
        url, response)
    when 301
      raise TooManyRedirects.new("redirect limit exceeded", response) if limit < 1
      new_location = normalize_url response['location']
      resolve_url(new_location, limit - 1, url, &block)
    else
      url
    end
  end

  def request url, referer = nil
    connection = http_adapter.get_connection(url)
    headers = referer ? {'referer' => referer.to_s} : {}
    request = http_adapter.create_request(url, headers)
    http_adapter.perform_request(connection, request)
  end

  # Internal: HTTP adapter for Net::HTTP to use for URL resolution.
  class HttpAdapter
    def get_connection url
      http = Net::HTTP.new url.host, url.port
      if http.use_ssl = url.scheme == 'https'
        http.verify_mode = OpenSSL::SSL::VERIFY_PEER
        http.cert_store  = cert_store
      end
      http.open_timeout = 1.5
      http.read_timeout = 2
      http
    end

    def cert_store
      store = OpenSSL::X509::Store.new
      store.set_default_paths
      store
    end

    def create_request url, headers = {}
      Net::HTTP::Head.new url.request_uri, headers
    end

    def perform_request connection, request
      connection.start do |http|
        http.request request
      end
    end
  end
end

### END implementation; begin tests ###

if $0 == __FILE__
  require 'test/unit'

  class UrlResolverTest < Test::Unit::TestCase
    def setup
      @http = TestHttpAdapter.new
    end

    def resolve url
      UrlResolver.new(url, @http).resolve
    end

    class TestHttpAdapter < UrlResolver::HttpAdapter
      def initialize
        super
        @expectations = []
      end

      def expect_request expectation = nil
        @expectations << (expectation || Proc.new)
      end

      def perform_request connection, request
        response = @expectations.first.call(connection, request)
        @expectations.shift if @expectations.size > 1
        response
      end
    end

    class MockResponse < Struct.new(:code, :message, :headers)
      def [](name) headers[name] end
      alias to_hash headers
    end

    def mock_response code, headers = {}
      message = (400...500) === code ? 'Not Found' : 'OK'
      MockResponse.new(code.to_s, message, headers)
    end

    def test_direct_url
      @http.expect_request do |http, request|
        assert_equal 'disney.com', http.address
        assert !http.use_ssl?
        assert_equal '/pluto', request.path
        mock_response 200
      end
      resolution = resolve 'http://disney.com/pluto'
      assert !resolution.failed?, "expected not to have failed"
      assert !resolution.changed?, "expected not to have changed"
      assert resolution.final_url.respond_to?(:host)
      assert_equal 'http://disney.com/pluto', resolution.final_url.to_s
      assert_equal 0, resolution.num_redirects
    end

    def test_failed_resolve
      @http.expect_request do |http, request|
        raise "boom!"
      end
      resolution = resolve 'http://disney.com'
      assert resolution.failed?
      assert_equal "boom!", resolution.failed_reason
    end

    def test_endless_redirect
      @http.expect_request do |http, request|
        mock_response 301, 'location' => 'http://disney.com'
      end
      resolution = resolve 'http://t.co/short'
      assert resolution.failed?, "expected to have failed"
      assert_equal "redirect limit exceeded", resolution.failed_reason
      assert_equal 5, resolution.num_redirects
      assert_equal 'http://disney.com', resolution.final_url.to_s
    end

    def test_normal_redirect
      @http.expect_request do |http, request|
        assert_equal 't.co', http.address
        mock_response 301, 'location' => 'http://disney.com/pluto'
      end
      @http.expect_request do |http, request|
        assert_equal 'disney.com', http.address
        assert_equal '/pluto', request.path
        mock_response 200
      end
      resolution = resolve 'http://t.co/short'
      assert !resolution.failed?, "expected not to have failed"
      assert resolution.changed?, "expected to have changed"
      assert_equal 'http://disney.com/pluto', resolution.final_url.to_s
      assert_equal 1, resolution.num_redirects
    end

    def test_redirect_to_dead_url
      @http.expect_request do |http, request|
        mock_response 301, 'location' => 'http://disney.com/pluto'
      end
      @http.expect_request do |http, request|
        mock_response 404
      end
      resolution = resolve 'http://t.co/short'
      assert resolution.failed?, "expected to have failed"
      assert resolution.dead?, "expected to be dead"
      assert_equal "server returned 404 Not Found", resolution.failed_reason
      assert resolution.changed?, "expected to have changed"
      assert_equal 'http://disney.com/pluto', resolution.final_url.to_s
      assert_equal 1, resolution.num_redirects
    end

    def test_multiple_redirects
      @http.expect_request do |http, request|
        mock_response 301, 'location' => 'http://disney.com/pluto'
      end
      @http.expect_request do |http, request|
        assert_equal '/pluto', request.path
        mock_response 301, 'location' => 'http://disney.com'
      end
      @http.expect_request do |http, request|
        assert_equal '/', request.path
        mock_response 200
      end
      resolution = resolve 'http://t.co/short'
      assert !resolution.failed?, "expected not to have failed"
      assert resolution.changed?, "expected to have changed"
      assert_equal 'http://disney.com', resolution.final_url.to_s
      assert_equal 2, resolution.num_redirects
    end

    def test_ssl
      @http.expect_request do |http, request|
        mock_response 301, 'location' => 'https://disney.com/pluto'
      end
      @http.expect_request do |http, request|
        assert http.use_ssl?, "expected to use SSL"
        assert_equal 443, http.port
        assert_equal '/pluto', request.path
        mock_response 200
      end
      resolution = resolve 'http://t.co/short'
      assert_equal 'https://disney.com/pluto', resolution.final_url.to_s
    end

    def test_referer
      @http.expect_request do |http, request|
        assert_nil request['referer'], "expected blank referer"
        mock_response 301, 'location' => 'http://disney.com/pluto'
      end
      @http.expect_request do |http, request|
        assert_equal 'http://t.co/short', request['referer']
        mock_response 200
      end
      resolution = resolve 'http://t.co/short'
      assert_equal 'http://disney.com/pluto', resolution.final_url.to_s
    end

    def test_response
      @http.expect_request do |http, request|
        mock_response 200, 'ETag' => 'hi!'
      end
      resolution = resolve 'http://disney.com/pluto'
      assert_equal '200', resolution.response.code
      assert_equal 200, resolution.response_code
      assert_equal({'ETag' => 'hi!'}, resolution.response_headers)
    end

    def test_failed_response
      @http.expect_request do |http, request|
        mock_response 503
      end
      resolution = resolve 'http://disney.com/pluto'
      assert_equal '503', resolution.response.code
      assert_equal 503, resolution.response_code
    end

    def test_exception_response
      @http.expect_request do |http, request|
        mock_response 301, 'location' => 'http://disney.com'
      end
      @http.expect_request do |http, request|
        raise "boom!"
      end
      resolution = resolve 'http://disney.com/pluto'
      assert_equal 500, resolution.response_code
      assert_equal({}, resolution.response_headers)
      assert_nil resolution.response
    end
  end
end
require 'net/http'

# WARNING do not use this; it works but is very limited
def resolve url
  res = Net::HTTP.get_response URI(url)
  if res.code == '301' then res['location']
  else url.to_s
  end
end

# Why the above method sucks:
# - doesn't handle multiple redirects
# - uses HTTP GET instead of HEAD (slower, wasted bandwidth)
# - no HTTP error and Ruby exception handling
# - no HTTPS support
# - no strict timeouts (lookups can block for too long)

以上是关于ruby 存储前重新分配短URL。短网址用于微博;你永远不应该真的把它们留在身边。的主要内容,如果未能解决你的问题,请参考以下文章

京东短网址高可用提升最佳实践

新浪微博短网址生成_最新t.cn短链接api分享

防护恶意网址用短网址欺骗的对策

长链接转短链接(短网址)

2020最新新浪短网址API接口分享(附腾讯url.cn短网址API接口)

short url短链接原理