stanford coreNLP CRFClassifier 模型加载和序列化
Posted 一休Q_Q
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了stanford coreNLP CRFClassifier 模型加载和序列化相关的知识,希望对你有一定的参考价值。
源代码位置:ie.crf.CRFClassifier
模型加载
loadClassifier(String loadPath, Properties props)
/** * Loads a classifier from the file, classpath resource, or URL specified by loadPath. If loadPath ends in * .gz, uses a GZIPInputStream. */ //seg here ,ner here public void loadClassifier(String loadPath, Properties props) throws ClassCastException, IOException, ClassNotFoundException InputStream is = IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(loadPath); Timing t = new Timing(); loadClassifier(is, props); is.close(); t.done(log, "Loading classifier from " + loadPath);
loadClassifier(ObjectInputStream ois, Properties props)
/** * Loads a classifier from the specified InputStream. This version works * quietly (unless VERBOSE is true). If props is non-null then any properties * it specifies override those in the serialized file. However, only some * properties are sensible to change (you shouldn't change how features are * defined). * <p> * <i>Note:</i> This method does not close the ObjectInputStream. (But earlier * versions of the code used to, so beware....) */ @Override @SuppressWarnings( "unchecked" ) // can't have right types in deserialization //seg here,ner here public void loadClassifier(ObjectInputStream ois, Properties props) throws ClassCastException, IOException, ClassNotFoundException Object o = ois.readObject(); // TODO: when we next break serialization, get rid of this fork and only read the List<Index> (i.e., keep first case) if (o instanceof List) labelIndices = (List<Index<CRFLabel>>) o; else Index<CRFLabel>[] indexArray = (Index<CRFLabel>[]) o; labelIndices = new ArrayList<>(indexArray.length); Collections.addAll(labelIndices, indexArray); classIndex = (Index<String>) ois.readObject(); featureIndex = (Index<String>) ois.readObject(); flags = (SeqClassifierFlags) ois.readObject(); if (flags.useEmbedding) embeddings = (Map<String, double[]>) ois.readObject(); Object featureFactory = ois.readObject(); if (featureFactory instanceof List) featureFactories = ErasureUtils.uncheckedCast(featureFactories); // int i = 0; // for (FeatureFactory ff : featureFactories) // XXXX // System.err.println("List FF #" + i + ": " + ((NERFeatureFactory) ff).describeDistsimLexicon()); // XXXX // i++; // else if (featureFactory instanceof FeatureFactory) featureFactories = Generics.newArrayList(); featureFactories.add((FeatureFactory) featureFactory); // System.err.println(((NERFeatureFactory) featureFactory).describeDistsimLexicon()); // XXXX else if (featureFactory instanceof Integer) // this is the current format (2014) since writing list didn't work (see note in serializeClassifier). int size = (Integer) featureFactory; featureFactories = Generics.newArrayList(size); for (int i = 0; i < size; ++i) featureFactory = ois.readObject(); if (!(featureFactory instanceof FeatureFactory)) throw new RuntimeIOException("Should have FeatureFactory but got " + featureFactory.getClass()); // System.err.println("FF #" + i + ": " + ((NERFeatureFactory) featureFactory).describeDistsimLexicon()); // XXXX featureFactories.add((FeatureFactory) featureFactory); // log.info("properties passed into CRF's loadClassifier are:" + props); if (props != null) flags.setProperties(props, false); windowSize = ois.readInt(); weights = (double[][]) ois.readObject(); // WordShapeClassifier.setKnownLowerCaseWords((Set) ois.readObject()); Set<String> lcWords = (Set<String>) ois.readObject(); if (lcWords instanceof MaxSizeConcurrentHashSet) knownLCWords = (MaxSizeConcurrentHashSet<String>) lcWords; else knownLCWords = new MaxSizeConcurrentHashSet<>(lcWords); reinit(); if (flags.labelDictionaryCutoff > 0) labelDictionary = (LabelDictionary) ois.readObject(); if (VERBOSE) log.info("windowSize=" + windowSize); log.info("flags=\\n" + flags);
模型序列化
/** * Serialize the classifier to the given ObjectOutputStream. * <br> * (Since the classifier is a processor, we don't want to serialize the * whole classifier but just the data that represents a classifier model.) */ @Override public void serializeClassifier(ObjectOutputStream oos) try oos.writeObject(labelIndices); oos.writeObject(classIndex); oos.writeObject(featureIndex); oos.writeObject(flags); if (flags.useEmbedding) oos.writeObject(embeddings); // For some reason, writing out the array of FeatureFactory // objects doesn't seem to work. The resulting classifier // doesn't have the lexicon (distsim object) correctly saved. So now custom write the list oos.writeObject(featureFactories.size()); for (FeatureFactory ff : featureFactories) oos.writeObject(ff); oos.writeInt(windowSize); oos.writeObject(weights); // oos.writeObject(WordShapeClassifier.getKnownLowerCaseWords()); oos.writeObject(knownLCWords); if (labelDictionary != null) oos.writeObject(labelDictionary); catch (IOException e) throw new RuntimeIOException(e);
以上是关于stanford coreNLP CRFClassifier 模型加载和序列化的主要内容,如果未能解决你的问题,请参考以下文章
转载Stanford CoreNLP Typed Dependencies