storm经典例子的wordcount的实现

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了storm经典例子的wordcount的实现相关的知识,希望对你有一定的参考价值。

    storm有个经典的例子wordcount,其实这几乎可以说是大数据的经典例子了,mapreduce也会有这个例子。但是storm给的例子包里的WordCountTopology用到了python的调用,直接用eclipse跑起来的话会报错,这里做了个小改动。

    1、WordCountTopology.java

package storm.starter;


import backtype.storm.Config;

import backtype.storm.LocalCluster;

import backtype.storm.StormSubmitter;

import backtype.storm.task.ShellBolt;

import backtype.storm.topology.BasicOutputCollector;

import backtype.storm.topology.FailedException;

import backtype.storm.topology.IRichBolt;

import backtype.storm.topology.OutputFieldsDeclarer;

import backtype.storm.topology.TopologyBuilder;

import backtype.storm.topology.base.BaseBasicBolt;

import backtype.storm.tuple.Fields;

import backtype.storm.tuple.Tuple;

import backtype.storm.tuple.Values;

import storm.starter.spout.RandomSentenceSpout;


import java.util.HashMap;

import java.util.Map;


/**

 * This topology demonstrates Storm‘s stream groupings and multilang capabilities.

 */

public class WordCountTopology {

//  public static class SplitSentence extends ShellBolt implements IRichBolt {

//

//    public SplitSentence() {

//      super("python", "splitsentence.py");

//    }

//

//    @Override

//    public void declareOutputFields(OutputFieldsDeclarer declarer) {

//      declarer.declare(new Fields("word"));

//    }

//

//    @Override

//    public Map<String, Object> getComponentConfiguration() {

//      return null;

//    }

//  }


  public static class SplitSentence extends BaseBasicBolt {

String patton ;

    public SplitSentence(String patton) {

      this.patton = patton;

    }


    @Override

    public void declareOutputFields(OutputFieldsDeclarer declarer) {

      declarer.declare(new Fields("word"));

    }

    

    @Override

    public void execute(Tuple tuple, BasicOutputCollector collector) {

try {

String sen = tuple.getString(0);

if(sen != null)

{

for(String word : sen.split(patton))

{

collector.emit(new Values(word));

}

}

} catch (Exception e) {

throw new FailedException("split fail!");

}

    }

  }

  public static class WordCount extends BaseBasicBolt {

    Map<String, Integer> counts = new HashMap<String, Integer>();


    @Override

    public void execute(Tuple tuple, BasicOutputCollector collector) {

//      String word = tuple.getString(0);

      String word = tuple.getStringByField("word");

      Integer count = counts.get(word);

      if (count == null)

        count = 0;

      count++;

      counts.put(word, count);

 //     collector.emit(new Values(word, count));

      System.err.println("word="+word+";  word_count="+count);

    }


    @Override

    public void declareOutputFields(OutputFieldsDeclarer declarer) {

 //     declarer.declare(new Fields("word", "count"));

    }

  }


  public static void main(String[] args) throws Exception {


    TopologyBuilder builder = new TopologyBuilder();


    builder.setSpout("spout", new RandomSentenceSpout(), 1);


    builder.setBolt("split", new SplitSentence(" "), 1)

    .shuffleGrouping("spout");

    

    builder.setBolt("count", new WordCount(), 1)

    .fieldsGrouping("split", new Fields("word"));


    Config conf = new Config();

    conf.setDebug(false);



    if (args != null && args.length > 0) {

      conf.setNumWorkers(3);


      StormSubmitter.submitTopology(args[0], conf, builder.createTopology());

    }

    else {

      conf.setMaxTaskParallelism(3);


      LocalCluster cluster = new LocalCluster();

      cluster.submitTopology("word-count", conf, builder.createTopology());


      Thread.sleep(5000);


      cluster.shutdown();

    }

  }

}

2、spout的实现类RandomSentenceSpout.java

package storm.starter.spout;


import backtype.storm.spout.SpoutOutputCollector;

import backtype.storm.task.TopologyContext;

import backtype.storm.topology.OutputFieldsDeclarer;

import backtype.storm.topology.base.BaseRichSpout;

import backtype.storm.tuple.Fields;

import backtype.storm.tuple.Values;

import backtype.storm.utils.Utils;


import java.util.Map;

import java.util.Random;


public class RandomSentenceSpout extends BaseRichSpout {

  SpoutOutputCollector _collector;

  Random _rand;

  int _num;



  @Override

  public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {

    _collector = collector;

    _rand = new Random();

  }


  @Override

  public void nextTuple() {

    Utils.sleep(100);

    String[] sentences = new String[]{ "the cow jumped over the moon", "an apple a day keeps the doctor away",

        "four score and seven years ago", "snow white and the seven dwarfs", "i am at two with nature" };

    int num = _rand.nextInt(sentences.length);

    String sentence = sentences[num];

    _num++;

    System.err.println("Spout__batch_num:"+_num+"___Random number is :"+num+" Emit Sentence is :"+sentence);

    _collector.emit(new Values(sentence));

  }


  @Override

  public void ack(Object id) {

  }


  @Override

  public void fail(Object id) {

  }


  @Override

  public void declareOutputFields(OutputFieldsDeclarer declarer) {

    declarer.declare(new Fields("word"));

  }


}

3、bolt的实现类嵌套在topo类里。


以上是关于storm经典例子的wordcount的实现的主要内容,如果未能解决你的问题,请参考以下文章

Storm WordCount

Storm学习笔记Hello WordCount - 单机模式

Storm之路-WordCount-实例

Storm常用操作命令及WordCount

storm实战之WordCount

Storm入门WordCount示例