ruby 将jbig2编码的图像转换为PDF - https://github.com/agl/jbig2enc/blob/master/pdf.py的Ruby端口

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了ruby 将jbig2编码的图像转换为PDF - https://github.com/agl/jbig2enc/blob/master/pdf.py的Ruby端口相关的知识,希望对你有一定的参考价值。

#!/usr/bin/env ruby

# Copyright 2017 Mahmood S. Zargar
# Author: mahmood@gmail.com (Mahmood S. Zargar)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This is a Ruby port of "pdf.py" published under the same license
# Copyright 2006 Google Inc.
# Author: agl@imperialviolet.org (Adam Langley)

# JBIG2 Encoder
# https://github.com/agl/jbig2enc

dpi = 72

class Dict
  attr_accessor :d
  
  def initialize(values = {})
    @d = {}
    @d.update(values)
  end
  def to_s
    s = ['<< ']
    @d.each do |x, y|
      s << '/%s ' % x
      s << y.to_s
      s << "\n"
    end
    s << ">>\n"
    s.join
  end
end

$global_next_id = 1

class Obj
  attr_accessor :id
  attr_accessor :d
  
  def initialize(d = {}, stream = nil)
    if !stream.nil?
      d['Length'] = stream.size.to_s 
    end
    @d = Dict.new(d)
    @stream = stream
    @id = $global_next_id
    $global_next_id = $global_next_id + 1
  end

  def to_s
    s = []
    s << @d.to_s
    if !@stream.nil?
      s << "stream\n"
      s << @stream
      s << "\nendstream\n"
    end
    s << "endobj\n"
    s.join
  end
end

class Doc
  def initialize
    @objs = []
    @pages = []
  end

  def add_object(o)
    @objs << o
    o
  end

  def add_page(o)
    @pages << o
    add_object(o)
  end

  def to_s
    a = []
    j = [0]
    offsets = []

    add = lambda do |x|
      a << x
      j[0] = j[0] + x.size + 1
    end
    
    add.call('%PDF-1.4')
    
    for o in @objs
      offsets << j[0]
      add.call('%i 0 obj' % o.id)
      add.call(o.to_s)
    end
    xrefstart = j[0]
    a << 'xref'
    a << '0 %i' % (offsets.size + 1)
    a << '0000000000 65535 f '
    for o in offsets
      a << '%010i 00000 n ' % o
    end
    a << ''
    a << 'trailer'
    a << "<< /Size %i\n/Root 1 0 R >>" % (offsets.size + 1)
    a << 'startxref'
    a << xrefstart.to_s
    a << '%%EOF'
    a.join("\n")
  end
end  

def ref(x)
  '%i 0 R' % x
end

def jbig2pdf(symboltable='output.sym', pagefiles=Dir['output.[0-9]*'])
  doc = Doc.new
  doc.add_object(Obj.new({'Type' => '/Catalog', 'Outlines' => ref(2), 'Pages' => ref(3)}))
  doc.add_object(Obj.new({'Type' => '/Outlines', 'Count' => '0'}))
  pages = Obj.new({'Type' => '/Pages'})
  doc.add_object(pages)
  symd = doc.add_object(Obj.new({},  File::read(symboltable, mode: "rb")))
  page_objs = []

  pagefiles.sort!
  for p in pagefiles
    begin
      contents = File::read(p, mode: "rb")
    rescue IOError
      $stderr.puts("error reading page file %s\n"% p)
      next
    end
    width, height, xres, yres = contents[11..26].unpack('NNNN')

    xres = dpi if xres == 0
    yres = dpi if yres == 0
        
    xobj = Obj.new({'Type' => '/XObject', 'Subtype' => '/Image', 'Width' =>
        width.to_s, 'Height' => height.to_s, 'ColorSpace' => '/DeviceGray',
        'BitsPerComponent' => '1', 'Filter' => '/JBIG2Decode', 'DecodeParms' =>
        ' << /JBIG2Globals %i 0 R >>' % symd.id}, contents)
    contents = Obj.new({}, 'q %f 0 0 %f 0 0 cm /Im1 Do Q' % [(width.to_f * 72) / xres, (height.to_f * 72) / yres])
    resources = Obj.new({'ProcSet' => '[/PDF /ImageB]',
        'XObject' => '<< /Im1 %i 0 R >>' % xobj.id})
    page = Obj.new({'Type' => '/Page', 'Parent' => '3 0 R',
        'MediaBox' => '[ 0 0 %f %f ]' % [(width.to_f * 72) / xres, (height.to_f * 72) / yres],
        'Contents' => ref(contents.id),
        'Resources' => ref(resources.id)})
    for x in [xobj, contents, resources, page]
      doc.add_object(x) 
    end
    page_objs << page

    pages.d.d['Count'] = page_objs.size.to_s    
    pages.d.d['Kids'] = '[' + page_objs.map{|x| ref(x.id)}.join(" ") + "]"
  end
  doc.to_s
end

def usage(msg)
  $stderr.puts("%s: %s\n"% ["pdf.rb", msg])
  $stderr.puts("Usage: %s [file_basename] > out.pdf\n"% "pdf.rb")
  exit(false)
end

########## Main ##########

if __FILE__ == $0

  if ARGV.size == 1
    sym = ARGV[0] + '.sym'
    pages = Dir[ARGV[0] + '.[0-9]*']
  elsif ARGV.size == 0
    sym = 'output.sym'
    pages = Dir['output.[0-9]*']
  else
    usage("Wrong number of arguments!")
  end

  if not File.exists?(sym)
    usage("Symbol table %s not found!"% sym)
  elsif pages.size == 0
    usage("No pages found!")
  end

  print(jbig2pdf(sym, pages))

end

以上是关于ruby 将jbig2编码的图像转换为PDF - https://github.com/agl/jbig2enc/blob/master/pdf.py的Ruby端口的主要内容,如果未能解决你的问题,请参考以下文章

转换为PNG时,Apache PDFBox删除水平线

将图像类型 PDF 转换为启用 OCR 的 PDF

在 Javascript 中将 PDF 转换为 Base64 编码的字符串

怎么用ABBYY将PDF转换为JPEG图像

怎么用ABBYY将PDF转换为JPEG图像

如何将图像转换为PDF?