集群从CDH5.16升级到CDP7.1后,笔者用的阿里云DataPhin中台也升级了版本,之前的UDF不是很好用。某些UDF主要是让肤浅的SQL Boy们看不到Hive表某些机密字段的真实信息,防止出现机密信息泄露。笔者编写UDF函数,实现编码、解码、加密、解密,经测试在USDP集群的Apache Hive中可行。USDP的稳定性比Aliyun的中台貌似还好点。。。





import org.apache.hadoop.hive.ql.exec.UDF;

import java.nio.charset.StandardCharsets;

 * @program: HiveUDF
 * @description: 使用base64重新编码string
 * @author: zhiyong
 * @create: 2022-08-04 22:48
public class base64code1 extends UDF 
    public String evaluate(String input)
        return java.util.Base64.getEncoder().encodeToString(input.getBytes(StandardCharsets.UTF_8));





package org.apache.hadoop.hive.ql.udf;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;

 * UDFAscii.
@Description(name = "ascii",
    value = "_FUNC_(str) - returns the numeric value of the first character"
    + " of str",
    extended = "Returns 0 if str is empty or NULL if str is NULL\\n"
    + "Example:\\n"
    + "  > SELECT _FUNC_('222') FROM src LIMIT 1;"
    + "  50\\n"
    + "  > SELECT _FUNC_(2) FROM src LIMIT 1;\\n" + "  50")
public class UDFAscii extends UDF 
  private final IntWritable result = new IntWritable();

  public IntWritable evaluate(Text s) 
    if (s == null) 
      return null;

    if (s.getLength() > 0) 

    return result;




package org.apache.hadoop.hive.ql.exec;

import org.apache.hadoop.hive.ql.udf.UDFType;

 * A User-defined function (UDF) for use with Hive.
 * <p>
 * New UDF classes need to inherit from this UDF class (or from @link
 * org.apache.hadoop.hive.ql.udf.generic.GenericUDF GenericUDF which provides more flexibility at
 * the cost of more complexity).
 * <p>
 * Requirements for all classes extending this UDF are:
 * <ul>
 * <li>Implement one or more methods named @code evaluate which will be called by Hive (the exact
 * way in which Hive resolves the method to call can be configured by setting a custom @link
 * UDFMethodResolver). The following are some examples:
 * <ul>
 * <li>@code public int evaluate();</li>
 * <li>@code public int evaluate(int a);</li>
 * <li>@code public double evaluate(int a, double b);</li>
 * <li>@code public String evaluate(String a, int b, Text c);</li>
 * <li>@code public Text evaluate(String a);</li>
 * <li>@code public String evaluate(List<Integer> a); (Note that Hive Arrays are represented as
 * @link java.util.List Lists in Hive.
 * So an @code ARRAY<int> column would be passed in as a @code List<Integer>.)</li>
 * </ul>
 * </li>
 * <li>@code evaluate should never be a void method. However it can return @code null if
 * needed.
 * <li>Return types as well as method arguments can be either Java primitives or the corresponding
 * @link org.apache.hadoop.io.Writable Writable class.</li>
 * </ul>
 * One instance of this class will be instantiated per JVM and it will not be called concurrently.
 * @see Description
 * @see UDFType
 * @deprecated use @link org.apache.hadoop.hive.ql.udf.generic.GenericUDF
@UDFType(deterministic = true)
public class UDF 

   * The resolver to use for method resolution.
  private UDFMethodResolver rslv;

   * The constructor.
  public UDF() 
    rslv = new DefaultUDFMethodResolver(this.getClass());

   * The constructor with user-provided @link UDFMethodResolver.
  protected UDF(UDFMethodResolver rslv) 
    this.rslv = rslv;

   * Sets the resolver.
   * @param rslv The method resolver to use for method resolution.
  public void setResolver(UDFMethodResolver rslv) 
    this.rslv = rslv;

   * Get the method resolver.
  public UDFMethodResolver getResolver() 
    return rslv;

   * This can be overridden to include JARs required by this UDF.
   * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDF#getRequiredJars()
   *      GenericUDF.getRequiredJars()
   * @return an array of paths to files to include, @code null by default.
  public String[] getRequiredJars() 
    return null;

   * This can be overridden to include files required by this UDF.
   * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDF#getRequiredFiles()
   *      GenericUDF.getRequiredFiles()
   * @return an array of paths to files to include, @code null by default.
  public String[] getRequiredFiles() 
    return null;


package org.apache.hadoop.hive.ql.exec;

import java.lang.reflect.Method;
import java.util.List;

import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;

 * The UDF Method resolver interface. A user can plugin a resolver to their UDF
 * by implementing the functions in this interface. Note that the resolver is
 * stored in the UDF class as an instance variable. We did not use a static
 * variable because many resolvers maintain the class of the enclosing UDF as
 * state and are called from a base class e.g. UDFBaseCompare. This makes it
 * very easy to write UDFs that want to do resolution similar to the comparison
 * operators. Such UDFs just need to extend UDFBaseCompare and do not have to
 * care about the UDFMethodResolver interface. Same is true for UDFs that want
 * to do resolution similar to that done by the numeric operators. Such UDFs
 * simply have to extend UDFBaseNumericOp class. For the default resolution the
 * UDF implementation simply needs to extend the UDF class.
public interface UDFMethodResolver 

   * Gets the evaluate method for the UDF given the parameter types.
   * @param argClasses
   *          The list of the argument types that need to matched with the
   *          evaluate function signature.
  Method getEvalMethod(List<TypeInfo> argClasses) throws UDFArgumentException;




package org.apache.hadoop.hive.ql.exec;

import java.lang.reflect.Method;
import java.util.List;

import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;

 * The default UDF Method resolver. This resolver is used for resolving the UDF
 * method that is to be used for evaluation given the list of the argument
 * types. The getEvalMethod goes through all the evaluate methods and returns
 * the one that matches the argument signature or is the closest match. Closest
 * match is defined as the one that requires the least number of arguments to be
 * converted. In case more than one matches are found, the method throws an
 * ambiguous method exception.
public class DefaultUDFMethodResolver implements UDFMethodResolver 

   * The class of the UDF.
  private final Class<? extends UDF> udfClass;

   * Constructor. This constructor sets the resolver to be used for comparison
   * operators. See @link UDFMethodResolver
  public DefaultUDFMethodResolver(Class<? extends UDF> udfClass) 
    this.udfClass = udfClass;

   * Gets the evaluate method for the UDF given the parameter types.
   * @param argClasses
   *          The list of the argument types that need to matched with the
   *          evaluate function signature.
  public Method getEvalMethod(List<TypeInfo> argClasses) throws UDFArgumentException 
    return FunctionRegistry.getMethodInternal(udfClass, "evaluate", false,


package org.apache.hadoop.hive.ql.exec;

public final class FunctionRegistry 

   * This method is shared between UDFRegistry and UDAFRegistry. methodName will
   * be "evaluate" for UDFRegistry, and "aggregate"/"evaluate"/"evaluatePartial"
   * for UDAFRegistry.
   * @throws UDFArgumentException
  public static <T> Method getMethodInternal(Class<? extends T> udfClass,
      String methodName, boolean exact, List<TypeInfo> argumentClasses)
      throws UDFArgumentException 

    List<Method> mlist = new ArrayList<Method>();

    for (Method m : udfClass.getMethods()) 
      if (m.getName().equals(methodName)) 

    return getMethodInternal(udfClass, mlist, exact, argumentClasses);



package java.lang;

public final class Class<T> implements java.io.Serializable,
     * Returns an array containing @code Method objects reflecting all the
     * public methods of the class or interface represented by this @code
     * Class object, including those declared by the class or interface and
     * those inherited from superclasses and superinterfaces.
     * <p> If this @code Class object represents a type that has multiple
     * public methods with the same name and parameter types, but different
     * return types, then the returned array has a @code Method object for
     * each such method.
     * <p> If this @code Class object represents a type with a class
     * initialization method @code <clinit>, then the returned array does
     * <em>not</em> have a corresponding @code Method object.
     * <p> If this @code Class object represents an array type, then the
     * returned array has a @code Method object for each of the public
     * methods inherited by the array type from @code Object. It does not
     * contain a @code Method object for @code clone().
     * <p> If this @code Class object represents an interface then the
     * returned array does not contain any implicitly declared methods from
     * @code Object. Therefore, if no methods are explicitly declared in
     * this interface or any of its superinterfaces then the returned array
     * has length 0. (Note that a @code Class object which represents a class
     * always has public methods, inherited from @code Object.)
     * <p> If this @code Class object represents a primitive type or void,
     * then the returned array has length 0.
     * <p> Static methods declared in superinterfaces of the class or interface
     * represented by this @code Class object are not considered members of
     * the class or interface.
     * <p> The elements in the returned array are not sorted and are not in any
     * particular order.
     * @return the array of @code Method objects representing the
     *         public methods of this class
     * @throws SecurityException
     *         If a security manager, <i>s</i>, is present and
     *         the caller's class loader is not the same as or an
     *         ancestor of the class loader for the current class and
     *         invocation of @link SecurityManager#checkPackageAccess
     *         s.checkPackageAccess() denies access to the package
     *         of this class.
     * @jls 8.2 Class Members
     * @jls 8.4 Method Declarations
     * @since JDK1.1
    public Method[] getMethods() throws SecurityException 
        checkMemberAccess(Member.PUBLIC, Reflection.getCallerClass(), true);
        return copyMethods(privateGetPublicMethods());


package java.lang;

 * Thrown by the security manager to indicate a security violation.
 * @author  unascribed
 * @see     java.lang.SecurityManager
 * @since   JDK1.0
public class SecurityException extends RuntimeException 




package com.zhiyong.hiveUDF;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;

 * @program: HiveUDF
 * @description: 测试UDF
 * @author: zhiyong
 * @create: 2022-08-05 00:10
public class base64code2 extends GenericUDF 
    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException 
        return null;

    public Object evaluate(DeferredObject[] arguments) throws HiveException 
        return null;

    public String getDisplayString(String[] children) 
        return null;



// Source code recreated from a .class file by IntelliJ IDEA
// (powered by FernFlower decompiler)

package org.apache.hadoop.hive.serde2.objectinspector;

import org.apache.hadoop.hive.common.classification.InterfaceAudience.Public;
import org.apache.hadoop.hive.common.classification.InterfaceStability.Stable;

public interface ObjectInspector extends Cloneable 
    String getTypeName();

    ObjectInspector.Category getCategory();

    public static enum Category 

        private Category() 






package org.apache.hadoop.hive.ql.udf.generic;

 * A Generic User-defined function (GenericUDF) for the use with Hive.
 * New GenericUDF classes need to inherit from this GenericUDF class.
 * The GenericUDF are superior to normal UDFs in the following ways: 1. It can
 * accept arguments of complex types, and return complex types. 2. It can accept
 * variable length of arguments. 3. It can accept an infinite number of function
 * signature - for example, it's easy to write a GenericUDF that accepts
 * array<int>, array<array<int>> and so on (arbitrary levels of nesting). 4. It
 * can do short-circuit evaluations using DeferedObject.
@UDFType(deterministic = true)
public abstract class GenericUDF implements Closeable 



至于为神马找到这2个类,其实笔者是根据Hive的Error Log堆栈找到的。。。。。。



package org.apache.hadoop.hive.ql.udf.generic;

 * GenericUDFBridge encapsulates UDF to provide the same interface as
 * GenericUDF.
 * Note that GenericUDFBridge implements Serializable because the name of the
 * UDF class needs to be serialized with the plan.
public class GenericUDFBridge extends GenericUDF implements Serializable 
  private static final long serialVersionUID = 4994861742809511113L;

   * The name of the UDF.
  private String udfName;

   * Whether the UDF is an operator or not. This controls how the display string
   * is generated.
  private boolean isOperator;

   * The underlying UDF class Name.
  private String udfClassName;

   * The underlying method of the UDF class.
  private transient Method udfMethod;

   * Helper to convert the parameters before passing to udfMethod.
  private transient ConversionHelper conversionHelper;
   * The actual udf object.
  private transient UDF udf;
   * The non-deferred real arguments for method invocation.
  private transient Object[] realArguments;

  private transient UdfWhitelistChecker udfChecker;

   * Create a new GenericUDFBridge object.
   * @param udfName
   *          The name of the corresponding udf.
   * @param isOperator true for operators
   * @param udfClassName java class name of UDF
  public GenericUDFBridge(String udfName, boolean isOperator,
      String udfClassName) 
    this.udfName = udfName;
    this.isOperator = isOperator;
    this.udfClassName = udfClassName;
  // For Java serialization only
  public GenericUDFBridge() 

  public void setUdfName(String udfName) 
    this.udfName = udfName;

  public String getUdfName() 
    return udfName;

  public String getUdfClassName() 
    return udfClassName;

  public void setUdfClassName(String udfClassName) 
    this.udfClassName = udfClassName;

  public boolean isOperator() 
    return isOperator;

  public void setOperator(boolean isOperator) 
    this.isOperator = isOperator;

  public Class<? extends UDF> getUdfClass() 
      return getUdfClassInternal();
     catch (ClassNotFoundException e) 
      throw new RuntimeException(e);

  /** Gets the UDF class and checks it against the whitelist, if any. */
  private Class<? extends UDF> getUdfClassInternal()
      throws ClassNotFoundException 
    Class<? extends UDF> clazz = (Class<? extends UDF>) Class.forName(
        udfClassName, true, Utilities.getSessionSpecifiedClassLoader());
    if (udfChecker != null && !udfChecker.isUdfAllowed(clazz)) 
      throw new SecurityException("UDF " + clazz.getCanonicalName() + " is not allowed");
    return clazz;

  public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException 

      udf = (UDF)getUdfClassInternal().newInstance(以上是关于使用Java继承UDF类或GenericUDF类给Hive3.1.2编写UDF实现编码解码加密解密并运行在USDP大数据集群的主要内容,如果未能解决你的问题,请参考以下文章


