Erlang io:将二进制文件格式化为十六进制

Posted

技术标签:

【中文标题】Erlang io:将二进制文件格式化为十六进制【英文标题】:Erlang io:formatting a binary to hex 【发布时间】:2011-04-15 15:43:33 【问题描述】:

我可以格式化 Erlang 二进制文件,以便每个字节都以十六进制写入吗?即,

> io:format(???, [<<255, 16>>]).
<<FF, 10>>

我在io:format 文档中没有看到明显的方法,但也许我只是缺少一个?将二进制文件转换为列表并分别格式化其元素效率太低了。

【问题讨论】:

【参考方案1】:

不,没有这样的格式化选项,但您可以执行以下操作:

io:format("<<~s>>~n", [[io_lib:format("~2.16.0B",[X]) || <<X:8>> <= <<255,16>> ]]).

如果您需要,还有更快的解决方案。

-module(bin_to_hex).

-compile([native, hipe, [o3]]).

-export([bin_to_hex/1]).

bin_to_hex(B) when is_binary(B) ->
  bin_to_hex(B, <<>>).

-define(H(X), (hex(X)):16).

bin_to_hex(<<>>, Acc) -> Acc;
bin_to_hex(Bin, Acc) when byte_size(Bin) band 7 =:= 0 ->
  bin_to_hex_(Bin, Acc);
bin_to_hex(<<X:8, Rest/binary>>, Acc) ->
  bin_to_hex(Rest, <<Acc/binary, ?H(X)>>).

bin_to_hex_(<<>>, Acc) -> Acc;
bin_to_hex_(<<A:8, B:8, C:8, D:8, E:8, F:8, G:8, H:8, Rest/binary>>, Acc) ->
  bin_to_hex_(
    Rest,
    <<Acc/binary,
      ?H(A), ?H(B), ?H(C), ?H(D), ?H(E), ?H(F), ?H(G), ?H(H)>>).

-compile(inline, [hex/1]).

hex(X) ->
  element(
    X+1, 16#3030, 16#3031, 16#3032, 16#3033, 16#3034, 16#3035, 16#3036,
          16#3037, 16#3038, 16#3039, 16#3041, 16#3042, 16#3043, 16#3044,
          16#3045, 16#3046, 16#3130, 16#3131, 16#3132, 16#3133, 16#3134,
          16#3135, 16#3136, 16#3137, 16#3138, 16#3139, 16#3141, 16#3142,
          16#3143, 16#3144, 16#3145, 16#3146, 16#3230, 16#3231, 16#3232,
          16#3233, 16#3234, 16#3235, 16#3236, 16#3237, 16#3238, 16#3239,
          16#3241, 16#3242, 16#3243, 16#3244, 16#3245, 16#3246, 16#3330,
          16#3331, 16#3332, 16#3333, 16#3334, 16#3335, 16#3336, 16#3337,
          16#3338, 16#3339, 16#3341, 16#3342, 16#3343, 16#3344, 16#3345,
          16#3346, 16#3430, 16#3431, 16#3432, 16#3433, 16#3434, 16#3435,
          16#3436, 16#3437, 16#3438, 16#3439, 16#3441, 16#3442, 16#3443,
          16#3444, 16#3445, 16#3446, 16#3530, 16#3531, 16#3532, 16#3533,
          16#3534, 16#3535, 16#3536, 16#3537, 16#3538, 16#3539, 16#3541,
          16#3542, 16#3543, 16#3544, 16#3545, 16#3546, 16#3630, 16#3631,
          16#3632, 16#3633, 16#3634, 16#3635, 16#3636, 16#3637, 16#3638,
          16#3639, 16#3641, 16#3642, 16#3643, 16#3644, 16#3645, 16#3646,
          16#3730, 16#3731, 16#3732, 16#3733, 16#3734, 16#3735, 16#3736,
          16#3737, 16#3738, 16#3739, 16#3741, 16#3742, 16#3743, 16#3744,
          16#3745, 16#3746, 16#3830, 16#3831, 16#3832, 16#3833, 16#3834,
          16#3835, 16#3836, 16#3837, 16#3838, 16#3839, 16#3841, 16#3842,
          16#3843, 16#3844, 16#3845, 16#3846, 16#3930, 16#3931, 16#3932,
          16#3933, 16#3934, 16#3935, 16#3936, 16#3937, 16#3938, 16#3939,
          16#3941, 16#3942, 16#3943, 16#3944, 16#3945, 16#3946, 16#4130,
          16#4131, 16#4132, 16#4133, 16#4134, 16#4135, 16#4136, 16#4137,
          16#4138, 16#4139, 16#4141, 16#4142, 16#4143, 16#4144, 16#4145,
          16#4146, 16#4230, 16#4231, 16#4232, 16#4233, 16#4234, 16#4235,
          16#4236, 16#4237, 16#4238, 16#4239, 16#4241, 16#4242, 16#4243,
          16#4244, 16#4245, 16#4246, 16#4330, 16#4331, 16#4332, 16#4333,
          16#4334, 16#4335, 16#4336, 16#4337, 16#4338, 16#4339, 16#4341,
          16#4342, 16#4343, 16#4344, 16#4345, 16#4346, 16#4430, 16#4431,
          16#4432, 16#4433, 16#4434, 16#4435, 16#4436, 16#4437, 16#4438,
          16#4439, 16#4441, 16#4442, 16#4443, 16#4444, 16#4445, 16#4446,
          16#4530, 16#4531, 16#4532, 16#4533, 16#4534, 16#4535, 16#4536,
          16#4537, 16#4538, 16#4539, 16#4541, 16#4542, 16#4543, 16#4544,
          16#4545, 16#4546, 16#4630, 16#4631, 16#4632, 16#4633, 16#4634,
          16#4635, 16#4636, 16#4637, 16#4638, 16#4639, 16#4641, 16#4642,
          16#4643, 16#4644, 16#4645, 16#4646).

在我的笔记本 i5 CPU M 520 @ 2.40GHz 在 10MB 块上进行测试时,它的性能为 90MB/s。但是优化在那里被发挥到了极致。如果使用 16 位查找,它也可以达到 97MB,但在这里发布太疯狂而且太长了。

【讨论】:

无需循环:Bits = bit_size(Bin), &lt;&lt; Int:Bits &gt;&gt; = Bin, io:format("~.16B\n", [Int])【参考方案2】:

你可以这样做: [ hd(erlang:integer_to_list(Nibble, 16)) || >

这将返回一个包含二进制十六进制数字的列表(字符串)。虽然我怀疑此操作的效率是否会对系统的运行时产生任何影响,但您也可以让这个bin_to_hex 函数返回一个更易于构造的iolist,并且无论如何在输出时都会被展平。以下函数返回一个带有您提供的格式示例的 iolist:

bin_to_hex(Bin) when is_binary(Bin) ->
    JoinableLength = byte_size(Bin) - 1,
    << Bytes:JoinableLength/binary, LastNibble1:4, LastNibble2:4 >> = Bin,
    [ "<< ",
      [ [ erlang:integer_to_list(Nibble1, 16), erlang:integer_to_list(Nibble2, 16), ", " ]
        || << Nibble1:4, Nibble2:4 >> <= Bytes ],
      erlang:integer_to_list(LastNibble1, 16),
      erlang:integer_to_list(LastNibble2, 16),
      " >>" ].

它有点难看,但是会运行一次二进制文件并且不会遍历输出列表(否则我会使用 string:join 来获取散布的 ", " 序列)。如果这个函数不是某个进程的内部循环(我很难相信这个函数会成为你的瓶颈),那么你可能应该使用一些效率较低但更明显的代码,例如:

bin_to_hex(Bin) when is_binary(Bin) ->
    "<< " ++ string:join([byte_to_hex(B) || << B >> <= Bin ],", ") ++ " >>".

byte_to_hex(<< N1:4, N2:4 >>) ->
    [erlang:integer_to_list(N1, 16), erlang:integer_to_list(N2, 16)].

【讨论】:

【参考方案3】:
bin_to_hex_list(Bin) when is_binary(Bin) ->
  lists:flatten([integer_to_list(X,16) || <<X>> <= Bin]).

【讨论】:

这不做任何零填充,所以结果不正确。如果一个字节的前 4 位为 0,那么当需要两位时,它只会为该字节生成一位。【参考方案4】:

改进@hairyhum

这会处理零填充 << <<Y>> ||<<X:4>> <= Id, Y <- integer_to_list(X,16)>>

逆向变换 <<<<Z>> || <<X:8,Y:8>> <= Id,Z <- [binary_to_integer(<<X,Y>>,16)]>>, %%hex to binary

【讨论】:

【参考方案5】:

这已经有一段时间没有看到任何行动了,但所有先前的解决方案似乎都过于复杂。 对我来说,这似乎要简单得多:

[begin if N < 10 -> 48 + N; true -> 87 + N end end || <<N:4>> <= Bin]

如果你喜欢它扩展一点:

[begin
    if
        N < 10 ->
            48 + N; % 48 = $0
        true ->
            87 + N  % 87 = ($a - 10)
    end
end || <<N:4>> <= Bin]

【讨论】:

外层begin...end的目的是什么?【参考方案6】:

这是我使用的另一个简短而快速的版本:

hexlify(Bin) when is_binary(Bin) ->
    << <<(hex(H)),(hex(L))>> || <<H:4,L:4>> <= Bin >>.

hex(C) when C < 10 -> $0 + C;
hex(C) -> $a + C - 10.

【讨论】:

【参考方案7】:

如果您更喜欢制作二进制字符串而不是 erlang 默认列表字符串,则可以使用二进制理解语法,就像我在 sha1 生成代码中所做的那样:

1> << << if N >= 10 -> N -10 + $a;
1>          true    -> N     + $0 end >>
1>    || <<N:4>> <= crypto:hash(sha, "hello world") >>.
<<"2aae6c35c94fcfb415dbe95f408b9ce91ee846ed">>

与 python binascii.b2a_hex 相同:

>>> binascii.b2a_hex(sha.new('hello world').digest())
'2aae6c35c94fcfb415dbe95f408b9ce91ee846ed'

【讨论】:

【参考方案8】:

从 OTP24 开始,有

1> binary:encode_hex(<<1,2,3,4,5,6,255>>).
<<"010203040506FF">>

【讨论】:

以上是关于Erlang io:将二进制文件格式化为十六进制的主要内容,如果未能解决你的问题,请参考以下文章

如何将十进制属性格式化为货币?

PHP:将任何浮点数格式化为十进制扩展

如何将String / char格式化为十六进制值正确

将二进制文件传递给erlang nif

将excel列格式化为十进制从c#导出

在 JavaScript 中将十六进制数字格式化为短 UUID