可以在 OCaml 中的类型之间编码二进制函数吗？

Posted 2023-03-31

技术标签:

【中文标题】可以在 OCaml 中的类型之间编码二进制函数吗？【英文标题】：Can one encode binary functions between types in OCaml? 【发布时间】：2017-05-03 23:56:48 【问题描述】：

我想知道是否可以在 OCaml 中构建类似于多重调度的东西。为此，我尝试为多方法的输入签名创建一个显式类型。举个例子，我定义了一个数字类型

type _ num =
| I : int -> int num
| F : float -> float num

现在我想要一个函数 add 对 'a num 和 'b num 求和，如果 'a 和 'b 都是 int，则返回 int num，如果在其中至少一个是float。此外，类型系统应该知道输出将使用哪个构造函数。 IE。例如，在函数调用时应该静态知道输出的类型为int num。

这可能吗？到目前为止，我只能管理签名 type a b. a num * b num -> a num 的函数，因此（更一般的）浮点数总是必须作为第一个参数提供。必须禁止 int num * float num 的情况，从而导致非详尽的模式匹配和运行时异常。

似乎需要像type a b. a num * b num -> c(a,b) num 这样的签名，其中c 是一个包含类型提升规则的类型函数。我不认为 OCaml 有这个。开放类型或对象是否能够捕捉到这一点？我不是在寻找类型之间最通用的函数，只要我能明确列出少数输入类型组合和对应的输出类型就足够了。

【问题讨论】：

什么是'a t、'b t？什么是“'a 和 'b 整数”？这是一个错字，我编辑了这个问题希望澄清 【参考方案1】：

您所询问的具体情况可以使用 GADT 和多态很好地解决变体。请参阅此代码底部对 M.add 的调用：

type whole = [ `Integer ]
type general = [ whole | `Float ]

type _ num =
  | I : int -> [> whole ] num
  | F : float -> general num

module M :
sig
  val add : ([< general ] as 'a) num -> 'a num -> 'a num

  val to_int : whole num -> int
  val to_float : general num -> float
end =
struct
  let add : type a. a num -> a num -> a num = fun a b ->
    match a, b with
    | I n, I m -> I (n + m)
    | F n, I m -> F (n +. float_of_int m)
    (* Can't allow the typechecker to see an I pattern first. *)
    | _,   F m ->
      match a with
      | I n -> F (float_of_int n +. m)
      | F n -> F (n +. m)

  let to_int : whole num -> int = fun (I n) -> n

  let to_float = function
    | I n -> float_of_int n
    | F n -> n
end

(* Usage. *)
let () =
  M.add (I 1)  (I 2)  |> M.to_int   |> Printf.printf "%i\n";
  M.add (I 1)  (F 2.) |> M.to_float |> Printf.printf "%f\n";
  M.add (F 1.) (I 2)  |> M.to_float |> Printf.printf "%f\n";
  M.add (F 1.) (F 2.) |> M.to_float |> Printf.printf "%f\n"

打印出来的

您不能将上述任何to_floats 更改为to_int：它是静态的已知仅添加两个Is 会导致I。但是，您可以更改 to_int 到 to_float（并调整 printf）。这些操作很容易组合和传播类型信息。

嵌套match 表达式的愚蠢行为是我将在关于邮件列表。我以前从未见过这样做。

一般类型函数

AFAIK 在当前 OCaml 中评估通用类型函数的唯一方法需要用户提供见证，即一些额外的类型和值信息。这可以通过多种方式完成，例如将参数包装在额外的构造函数中（参见@mookid 的回答），使用一流的模块（也在接下来讨论部分），提供一小部分抽象值可供选择（其中实现真正的操作，包装器分派给这些值）。这下面的示例使用第二个 GADT 对有限关系进行编码：

type _ num =
  | I : int -> int num
  | F : float -> float num

(* Witnesses. *)
type (_, _, _) promotion =
  | II : (int, int, int) promotion
  | IF : (int, float, float) promotion
  | FI : (float, int, float) promotion
  | FF : (float, float, float) promotion

module M :
sig
  val add : ('a, 'b, 'c) promotion -> 'a num -> 'b num -> 'c num
end =
struct
  let add (type a) (type b) (type c)
      (p : (a, b, c) promotion) (a : a num) (b : b num) : c num =
    match p, a, b with
    | II, I n, I m -> I (n + m)
    | IF, I n, F m -> F (float_of_int n +. m)
    | FI, F n, I m -> F (n +. float_of_int m)
    | FF, F n, F m -> F (n +. m)
end

(* Usage. *)
let () =
  M.add II (I 1) (I 2)  |> fun (I n) -> n |> Printf.printf "%i\n";
  M.add IF (I 1) (F 2.) |> fun (F n) -> n |> Printf.printf "%f\n"

这里，类型函数为('a, 'b, 'c) promotion，其中'a、'b分别是参数，'c 是结果。不幸的是，你必须通过add promotion 的实例将 'c 接地，即这样的事情不会（AFAIK）工作：

type 'p result = 'c
  constraint 'p = (_, _, 'c) promotion

val add : 'a num -> 'b num -> ('a, 'b, _) promotion result num

尽管'c 完全由'a 和'b 决定，但由于GADT；编译器仍然认为基本上只是

val add : 'a num -> 'b num -> 'c num

目击者并不会因为只有四个功能而真正为您买账，除了操作集（add、multiply 等）和参数/结果类型组合，可以相互正交；打字可以是更好，事情可以更容易使用和实施。

编辑实际上可以删除 I 和 F 构造函数，即

val add : ('a, 'b, 'c) promotion -> 'a -> 'b -> `c

这使得使用更加简单：

M.add IF 1 2. |> Printf.printf "%f\n"

但是，在这两种情况下，这都不像 GADT+多态变体解决方案那样可组合，因为永远不会推断出见证。

未来的 OCaml：模块化隐式

如果你的见证是一等模块，编译器可以为你选择它自动使用模块化隐式。您可以在 4.02.1+modular-implicits-ber 开关。第一个示例只是将上一个示例中的 GADT 见证人包装在模块中，以让编译器为您选择它们：

module type PROMOTION =
sig
  type a
  type b
  type c
  val promotion : (a, b, c) promotion
end

implicit module Promote_int_int =
struct
  type a = int
  type b = int
  type c = int
  let promotion = II
end

implicit module Promote_int_float =
struct
  type a = int
  type b = float
  type c = float
  let promotion = IF
end

(* Two more like the above. *)

module M' :
sig
  val add : P : PROMOTION -> P.a num -> P.b num -> P.c num
end =
struct
  let add P : PROMOTION = M.add P.promotion
end

(* Usage. *)
let () =
  M'.add (I 1) (I 2)  |> fun (I n) -> n |> Printf.printf "%i\n";
  M'.add (I 1) (F 2.) |> fun (F n) -> n |> Printf.printf "%f\n"

使用模块化隐式，您还可以简单地添加未标记的浮点数和整数。这个例子对应于调度到一个函数“witness”：

module type PROMOTING_ADD =
sig
  type a
  type b
  type c
  val add : a -> b -> c
end

implicit module Add_int_int =
struct
  type a = int
  type b = int
  type c = int
  let add a b = a + b
end

implicit module Add_int_float =
struct
  type a = int
  type b = float
  type c = float
  let add a b = (float_of_int a) +. b
end

(* Two more. *)

module M'' :
sig
  val add : P : PROMOTING_ADD -> P.a -> P.b -> P.c
end =
struct
  let add P : PROMOTING_ADD = P.add
end

(* Usage. *)
let () =
  M''.add 1 2  |> Printf.printf "%i\n";
  M''.add 1 2. |> Printf.printf "%f\n"

【讨论】：

非常好！在当前的 OCaml 中可悲的是冗长，但在类型方面仍然令人印象深刻。在模块化隐式方面，这很好地说明了它们将/将如何使用。谢谢！修改了add 函数，使match 具有详尽性检查。很好，答案的第一部分是我没有成功做的事情。我缺少的是使用开放变体键入I : int -> [> whole ] num。不确定对于更复杂的 int 和 float 子类型关系的可扩展性如何，但我会尝试。是的，这需要一些努力才能解决。这是对类型系统的某种严重滥用 :) 我不希望它通常有用，但也许你可以想出一些疯狂而优雅的东西:) 我正在寻找类似于您的第一个解决方案的东西。太棒了！【参考方案2】：

从 4.04.0 版本开始，OCaml 没有办法以这种方式编码类型级别的依赖关系。打字规则必须更简单。

一种选择是为此使用简单的变体类型，将所有内容包装成一个（可能很大）类型并匹配：

type vnum =
  | Int of int
  | Float of float

let add_vnum a b =
  match a, b with
  | Int ia, Int ib -> Int (ia + ib)
  | Int i, Float f
  | Float f, Int i -> Float (float_of_int i +. f)
  | Float fa, Float fb -> Float (fa +. fb)

另一种方法是将输入值限制为具有匹配类型：

type _ gnum =
  | I : int -> int gnum
  | F : float -> float gnum

let add_gnum (type a) (x : a gnum) (y : a gnum) : a gnum =
  match x, y with
  | I ia, I ib -> I (ia + ib)
  | F fa, F fb -> F (fa +. fb)

最后，输入值之一的类型可用于约束返回值的类型。在此示例中，返回值将始终与第二个参数具有相同的类型：

type _ gnum =
  | I : int -> int gnum
  | F : float -> float gnum

let add_gnum' (type a b) (x : a gnum) (y : b gnum) : b gnum =
  match x, y with
  | I i1, I i2 -> I (i1 + i2)
  | F f1, F f2 -> F (f1 +. f2)
  | I i, F f -> F (float_of_int i +. f)
  | F f, I i -> I (int_of_float f + i)

【讨论】：

谢谢！第一个选项是我尝试过的；我缺少的是输出只是完整的变体，而不是特定的构造函数。第二个选项给出了特定的输出，但我实际上想提升类型以允许 int + float。最后一个想法我也想到了一点，但实际上 F f, I i case 没有我想要的语义。我正在考虑将其丢弃，从而导致可能的运行时匹配失败。真可惜。我在考虑开放变体来合并子类型，但到目前为止没有成功。像type i = [> `Int] 之类的东西可以包含一个int，即int、float、一个有理数。然后浮点类型将是 type f = [`Float| `Int] 明确包括它是 int 的可能性。那么促销必须看起来像：add: ('a, 'b) -> ['a|'b] ，其中'a 和'b 将是i 或f。到目前为止，这还没有奏效。接受答案，因为使用开放变体进行子类型化似乎也不允许以所需的方式进行类型算术。即使'a t 被定义为已知为多态变体，似乎也无法获得'a t -> 'b t -> ['a t|'b t] 形式的函数类型。 @hcarty 看我的回答 :)【参考方案3】：

一种选择是使用带有参数元组的子类型，它允许重用一些代码（这就是使用子类型的原因）：

type intpair = [`int_int of int * int]
type floatpair = [`float_float of float * float]

type num = [`int of int | `float of float]

type pair =
  [ `float_int of float * int
  | `int_float of int * float
  | intpair | floatpair ]

let plus_int_int = function `int_int (i,j) -> `int (i+j)
let plus_float_float = function `float_float (x,y) -> `float (x+.y)
let plus_int_float = function `int_float (i,y) -> `float(float i +. y)
let plus_float_int = function `float_int (x,j) -> `float(x +. float j)

let plus
  : pair -> num
  = function
    | `int_int _ as a -> plus_int_int a
    | `float_float _ as a -> plus_float_float a
    | `int_float _ as a -> plus_int_float a
    | `float_int _ as a -> plus_float_int a

现在，如果您想要静态保证，则需要使用 GADT：

type 'a num =
  | Int : int -> int num
  | Float : float -> float num

type 'a binop =
  | Intpair : (int * int) -> int binop
  | Int_Float : (int * float) -> float binop
  | Float_Int : (float * int) -> float binop
  | Floatpair : (float * float) -> float binop

let plus :
  type a . a binop -> a num
  = function
    | Intpair (a,b) -> Int (a+b)
    | Int_Float (a,y) -> Float (float a +. y)
    | Float_Int (x,b) -> Float (x +. float b)
    | Floatpair (x,y) -> Float (x +. y)

【讨论】：

不错！ plus 函数给出一个类型化的输出。但是，必须明确说明输入对类型。 @antron 的答案的第一个版本就是这样做的，所以我接受了。还要注意，这个接口在逻辑上等同于我使用单独的帮助程序 GADT (('a, 'b, 'c) promotion) 给出的示例，除了使用单独的第三个参数保证不会分配（尽管您可能必须将构造函数包装在预先声明的值中（let ii = II），尚未检查）。另一方面，将两个参数包装在构造函数中，就像这里一样，将分配，除非优化器可以证明它没有必要。

以上是关于可以在 OCaml 中的类型之间编码二进制函数吗？的主要内容，如果未能解决你的问题，请参考以下文章