storm-starter项目概述
Posted 大数据和人工智能躺过的坑
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了storm-starter项目概述相关的知识,希望对你有一定的参考价值。
不多说,直接上干货!
这是书籍《从零开始学Storm》赵必厦 2014年出版的配套代码!
storm-starter项目包含使用storm的各种各样的例子。项目托管在GitHub上面,其网址为: http://github.com/nathanmarz/storm-starter
或者
storm-starter项目的包结构:
storm-starter项目的拓扑结构:
新建maven项目的方式
以“新建Maven项目的方式”导入storm-starter项目的步骤如下:
1、新建一个Maven项目,项目名称可以随意,如storm-starter。
2、把storm-starter项目根目录的src\\jvm目录中的全部文件复制到Maven项目的src/main/java目录下。
storm-starter-master\\src\\jvm\\storm\\starter下的BasicDRPCTopology.java
package storm.starter; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.LocalDRPC; import backtype.storm.StormSubmitter; import backtype.storm.drpc.LinearDRPCTopologyBuilder; import backtype.storm.topology.BasicOutputCollector; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseBasicBolt; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; /** * This topology is a basic example of doing distributed RPC on top of Storm. It implements a function that appends a * "!" to any string you send the DRPC function. * <p/> * See https://github.com/nathanmarz/storm/wiki/Distributed-RPC for more information on doing distributed RPC on top of * Storm. */ public class BasicDRPCTopology { public static class ExclaimBolt extends BaseBasicBolt { @Override public void execute(Tuple tuple, BasicOutputCollector collector) { String input = tuple.getString(1); collector.emit(new Values(tuple.getValue(0), input + "!")); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("id", "result")); } } public static void main(String[] args) throws Exception { LinearDRPCTopologyBuilder builder = new LinearDRPCTopologyBuilder("exclamation"); builder.addBolt(new ExclaimBolt(), 3); Config conf = new Config(); if (args == null || args.length == 0) { LocalDRPC drpc = new LocalDRPC(); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("drpc-demo", conf, builder.createLocalTopology(drpc)); for (String word : new String[]{ "hello", "goodbye" }) { System.out.println("Result for \\"" + word + "\\": " + drpc.execute("exclamation", word)); } cluster.shutdown(); drpc.shutdown(); } else { conf.setNumWorkers(3); StormSubmitter.submitTopology(args[0], conf, builder.createRemoteTopology()); } } }
storm-starter-master\\src\\jvm\\storm\\starter下的的ExclamationTopology.java
package storm.starter; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.StormSubmitter; import backtype.storm.task.OutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.testing.TestWordSpout; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.TopologyBuilder; import backtype.storm.topology.base.BaseRichBolt; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; import backtype.storm.utils.Utils; import java.util.Map; /** * This is a basic example of a Storm topology. */ public class ExclamationTopology { public static class ExclamationBolt extends BaseRichBolt { OutputCollector _collector; @Override public void prepare(Map conf, TopologyContext context, OutputCollector collector) { _collector = collector; } @Override public void execute(Tuple tuple) { _collector.emit(tuple, new Values(tuple.getString(0) + "!!!")); _collector.ack(tuple); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word")); } } public static void main(String[] args) throws Exception { TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("word", new TestWordSpout(), 10); builder.setBolt("exclaim1", new ExclamationBolt(), 3).shuffleGrouping("word"); builder.setBolt("exclaim2", new ExclamationBolt(), 2).shuffleGrouping("exclaim1"); Config conf = new Config(); conf.setDebug(true); if (args != null && args.length > 0) { conf.setNumWorkers(3); StormSubmitter.submitTopology(args[0], conf, builder.createTopology()); } else { LocalCluster cluster = new LocalCluster(); cluster.submitTopology("test", conf, builder.createTopology()); Utils.sleep(10000); cluster.killTopology("test"); cluster.shutdown(); } } }
storm-starter-master\\src\\jvm\\storm\\starter下的ManualDRPC.java
package storm.starter; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.LocalDRPC; import backtype.storm.drpc.DRPCSpout; import backtype.storm.drpc.ReturnResults; import backtype.storm.topology.BasicOutputCollector; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.TopologyBuilder; import backtype.storm.topology.base.BaseBasicBolt; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; public class ManualDRPC { public static class ExclamationBolt extends BaseBasicBolt { @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("result", "return-info")); } @Override public void execute(Tuple tuple, BasicOutputCollector collector) { String arg = tuple.getString(0); Object retInfo = tuple.getValue(1); collector.emit(new Values(arg + "!!!", retInfo)); } } public static void main(String[] args) { TopologyBuilder builder = new TopologyBuilder(); LocalDRPC drpc = new LocalDRPC(); DRPCSpout spout = new DRPCSpout("exclamation", drpc); builder.setSpout("drpc", spout); builder.setBolt("exclaim", new ExclamationBolt(), 3).shuffleGrouping("drpc"); builder.setBolt("return", new ReturnResults(), 3).shuffleGrouping("exclaim"); LocalCluster cluster = new LocalCluster(); Config conf = new Config(); cluster.submitTopology("exclaim", conf, builder.createTopology()); System.out.println(drpc.execute("exclamation", "aaa")); System.out.println(drpc.execute("exclamation", "bbb")); } }
storm-starter-master\\src\\jvm\\storm\\starter下的PrintSampleStream.java
/* // to use this example, uncomment the twitter4j dependency information in the project.clj, // uncomment storm.starter.spout.TwitterSampleSpout, and uncomment this class package storm.starter; import storm.starter.spout.TwitterSampleSpout; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.topology.TopologyBuilder; import backtype.storm.utils.Utils; import storm.starter.bolt.PrinterBolt; public class PrintSampleStream { public static void main(String[] args) { String username = args[0]; String pwd = args[1]; TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("spout", new TwitterSampleSpout(username, pwd)); builder.setBolt("print", new PrinterBolt()) .shuffleGrouping("spout"); Config conf = new Config(); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("test", conf, builder.createTopology()); Utils.sleep(10000); cluster.shutdown(); } } */
storm-starter-master\\src\\jvm\\storm\\starter下的ReachTopology.java
package storm.starter; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.LocalDRPC; import backtype.storm.StormSubmitter; import backtype.storm.coordination.BatchOutputCollector; import backtype.storm.drpc.LinearDRPCTopologyBuilder; import backtype.storm.task.TopologyContext; import backtype.storm.topology.BasicOutputCollector; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseBasicBolt; import backtype.storm.topology.base.BaseBatchBolt; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; import java.util.*; /** * This is a good example of doing complex Distributed RPC on top of Storm. This program creates a topology that can * compute the reach for any URL on Twitter in realtime by parallelizing the whole computation. * <p/> * Reach is the number of unique people exposed to a URL on Twitter. To compute reach, you have to get all the people * who tweeted the URL, get all the followers of all those people, unique that set of followers, and then count the * unique set. It\'s an intense computation that can involve thousands of database calls and tens of millions of follower * records. * <p/> * This Storm topology does every piece of that computation in parallel, turning what would be a computation that takes * minutes on a single machine into one that takes just a couple seconds. * <p/> * For the purposes of demonstration, this topology replaces the use of actual DBs with in-memory hashmaps. * <p/> * See https://github.com/nathanmarz/storm/wiki/Distributed-RPC for more information on Distributed RPC. */ public class ReachTopology { public static Map<String, List<String>> TWEETERS_DB = new HashMap<String, List<String>>() {{ put("foo.com/blog/1", Arrays.asList("sally", "bob", "tim", "george", "nathan")); put("engineering.twitter.com/blog/5", Arrays.asList("adam", "david", "sally", "nathan")); put("tech.backtype.com/blog/123", Arrays.asList("tim", "mike", "john")); }}; public static Map<String, List<String>> FOLLOWERS_DB = new HashMap<String, List<String>>() {{ put("sally", Arrays.asList("bob", "tim", "alice", "adam", "jim", "chris", "jai")); put("bob", Arrays.asList("sally", "nathan", "jim", "mary", "david", "vivian")); put("tim", Arrays.asList("alex")); put("nathan", Arrays.asList("sally", "bob", "adam", "harry", "chris", "vivian", "emily", "jordan")); put("adam", Arrays.asList("david", "carissa")); put("mike", Arrays.asList("john", "bob")); put("john", Arrays.asList("alice", "nathan", "jim", "mike", "bob")); }}; public static class GetTweeters extends BaseBasicBolt { @Override public void execute(Tuple tuple, BasicOutputCollector collector) { Object id = tuple.getValue(0); String url = tuple.getString(1); List<String> tweeters = TWEETERS_DB.get(url); if (tweeters != null) { for (String tweeter : tweeters) { collector.emit(new Values(id, tweeter)); } } } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("id", "tweeter")); } } public static class GetFollowers extends BaseBasicBolt { @Override public void execute(Tuple tuple, BasicOutputCollector collector) { Object id = tuple.getValue(0); String tweeter = tuple.getString(1); List<String> followers = FOLLOWERS_DB.get(tweeter); if (followers != null) { for (String follower : followers) { collector.emit(new Values(id, follower)); } } } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("id", "follower")); } } public static class PartialUniquer extends BaseBatchBolt { BatchOutputCollector _collector; Object _id; Set<String> _followers = new HashSet<String>(); @Override public void prepare(Map conf, TopologyContext context, BatchOutputCollector collector, Object id) { _collector = collector; _id = id; } @Override public void execute(Tuple tuple) { _followers.add(tuple.getString(1)); } @Override public void finishBatch() { _collector.emit(new Values(_id, _followers.size())); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("id", "partial-count")); } } public static class CountAggregator extends BaseBatchBolt { BatchOutputCollector _collector; Object _id; int _count = 0; @Override public void prepare(Map conf, TopologyContext context, BatchOutputCollector collector, Object id) { _collector = collector; _id = id; } @Override public void execute(Tuple tuple) { _count += tuple.getInteger(1); } @Override public void finishBatch() { _collector.emit(new Values(_id, _count)); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("id", "reach")); } } public static LinearDRPCTopologyBuilder construct() { LinearDRPCTopologyBuilder builder = new LinearDRPCTopologyBuilder("reach"); builder.addBolt(new GetTweeters(), 4); builder.addBolt(new GetFollowers(), 12).shuffleGrouping(); builder.addBolt(new PartialUniquer(), 6).fieldsGrouping(new Fields("id", "follower")); builder.addBolt(new CountAggregator(), 3).fieldsGrouping(new Fields("id")); return builder; } public static void main(String[] args) throws Exception { LinearDRPCTopologyBuilder builder = construct(); Config conf = new Config(); if (args == null || args.length == 0) { conf.setMaxTaskParallelism(3); LocalDRPC drpc = new LocalDRPC(); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("reach-drpc", conf, builder.createLocalTopology(drpc)); String[] urlsToTry = new String[]{ "foo.com/blog/1", "engineering.twitter.com/blog/5", "notaurl.com" }; for (String url : urlsToTry) { System.out.println("Reach of " + url + ": " + drpc.execute("reach", url)); } cluster.shutdown(); drpc.shutdown(); } else { conf.setNumWorkers(6); StormSubmitter.submitTopology(args[0], conf, builder.createRemoteTopology()); } } }
storm-starter-master\\src\\jvm\\storm\\starter下的RollingTopWords.java
package storm.starter; import backtype.storm.Config; import backtype.storm.testing.TestWordSpout; import backtype.storm.topology.TopologyBuilder; import backtype.storm.tuple.Fields; import storm.starter.bolt.IntermediateRankingsBolt; import storm.starter.bolt.RollingCountBolt; import storm.starter.bolt.TotalRankingsBolt; import storm.starter.util.StormRunner; /** * This topology does a continuous computation of the top N words that the topology has seen in terms of cardinality. * The top N computation is done in a completely scalable way, and a similar approach could be used to compute things * like trending topics or trending images on Twitter. */ public class RollingTopWords { private static final int DEFAULT_RUNTIME_IN_SECONDS = 60; private static final int TOP_N = 5; private final TopologyBuilder builder; private final String topologyName; private final Config topologyConfig; private final int runtimeInSeconds; public RollingTopWords() throws InterruptedException { builder = new TopologyBuilder(); topologyName = "slidingWindowCounts"; topologyConfig = createTopologyConfiguration(); runtimeInSeconds = DEFAULT_RUNTIME_IN_SECONDS; wireTopology(); } private static Config createTopologyConfiguration() { Config conf = new Config(); conf.setDebug(true); return conf; } private void wireTopology() throws InterruptedException { String spoutId = "wordGenerator"; String counterId = "counter"; String intermediateRankerId = "intermediateRanker"; String totalRankerId = "finalRanker"; builder.setSpout(spoutId, new TestWordSpout(), 5); builder.setBolt(counterId, new RollingCountBolt(9, 3), 4).fieldsGrouping(spoutId, new Fields("word")); builder.setBolt(intermediateRankerId, new IntermediateRankingsBolt(TOP_N), 4).fieldsGrouping(counterId, new Fields( "obj")); builder.setBolt(totalRankerId, new TotalRankingsBolt(TOP_N)).globalGrouping(intermediateRankerId); } public void run() throws InterruptedException { StormRunner.runTopologyLocally(builder.createTopology(), topologyName, topologyConfig, runtimeInSeconds); } public static void main(String[] args) throws Exception { new RollingTopWords().run(); } }
storm-starter-master\\src\\jvm\\storm\\starter下的SingleJoinExample.java
package storm.starter; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.testing.FeederSpout; import backtype.storm.topology.TopologyBuilder; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Values; import backtype.storm.utils.Utils; import storm.starter.bolt.SingleJoinBolt; public class SingleJoinExample { public static void main(String[] args) { FeederSpout genderSpout = new FeederSpout(new Fields("id", "gender")); FeederSpout ageSpout = new FeederSpout(new Fields("id", "age")); TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("gender", genderSpout); builder.setSpout("age", ageSpout); builder.setBolt("join", new SingleJoinBolt(new Fields("gender", "age"))).fieldsGrouping("gender", new Fields("id")) .fieldsGrouping("age", new Fields("id")); Config conf = new Config(); conf.setDebug(true); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("join-example", conf, builder.createTopology()); for (int i = 0; i < 10; i++) { String gender; if (i % 2 == 0) { gender = "male"; } else { gender = "female"; } genderSpout.feed(new Values(i, gender)); } for (int i = 9; i >= 0; i--) { ageSpout.feed(new Values(i, i + 20)); } Utils.sleep(2000); cluster.shutdown(); } }
storm-starter-master\\src\\jvm\\storm\\starter下的TransactionalGlobalCount.java
package storm.starter;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.coordination.BatchOutputCollector;
import backtype.storm.task.TopologyContex以上是关于storm-starter项目概述的主要内容,如果未能解决你的问题,请参考以下文章