网络内容爬取、文件操作、KMP匹配
1 import java.net.MalformedURLException; 2 import java.net.URL; 3 import java.net.URLConnection; 4 import java.util.ArrayList; 5 import java.io.*; 6 import java.util.Scanner; 7 8 class GethtmlData { 9 private String url; 10 GetHtmlData(){//构造函数 11 } 12 URL readurl; 13 String bq(String pt) { 14 String ps; 15 ps=new String(); 16 if(pt.charAt(0)!=‘h‘&&pt.charAt(1)!=‘t‘&&pt.charAt(2)!=‘t‘&&pt.charAt(3)!=‘p‘) { 17 ps="https://"+pt; 18 return ps; 19 } 20 return pt; 21 } 22 String GetUrl(String pt,boolean check) throws IOException { 23 if(check==true) 24 pt=bq(pt);//补全网址 25 url=new String(); 26 url=pt;//init url 27 readurl=new URL(url);//url字符串构建URL类 28 URLConnection connection = readurl.openConnection();//初始化连接 29 connection.setRequestProperty("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11"); 30 connection.connect();//开始连接 31 String line; 32 String web_data; 33 web_data=new String(); 34 line=new String(); 35 //输入流 36 BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(),"GBK")); 37 while((line=in.readLine())!=null) {//java 中 null 为小写 38 web_data+=line; 39 } 40 return web_data; 41 } 42 43 String GetUrl_utf(String pt,boolean check) throws IOException { 44 if(check==true) 45 pt=bq(pt);//补全网址 46 url=new String(); 47 url=pt;//init url 48 readurl=new URL(url);//url字符串构建URL类 49 URLConnection connection = readurl.openConnection();//初始化连接 50 connection.setRequestProperty("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11") ; 51 connection.connect();//开始连接 52 String line; 53 String web_data; 54 web_data=new String(); 55 line=new String(); 56 //输入流 57 BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(),"UTF-8")); 58 while((line=in.readLine())!=null) {//java 中 null 为小写 59 web_data+=line; 60 } 61 return web_data; 62 } 63 } 64 class WFILE { 65 FileWriter fp; 66 PrintWriter outfp; 67 boolean OpenFile(String m,String f) {//将s内容输出至文件目录m下的f文件,wdata为文件内容 68 File catalogue = new File(m);//目录 69 try { 70 if (!catalogue.exists()) { 71 boolean ok=catalogue.mkdir(); 72 if(!ok)//如果失败返回失败 73 return ok; 74 } 75 } 76 catch(Exception e) {//输出异常 77 System.out.println("在构建目录时异常,目录名为: "+m); 78 e.printStackTrace(); 79 } 80 String lj=m+"\\"+f; 81 File fe=new File(lj); 82 try { 83 if(!fe.exists()) { 84 boolean ok2=fe.createNewFile(); 85 if(!ok2) 86 return ok2; 87 } 88 fp = new FileWriter(fe); 89 outfp = new PrintWriter(fp); 90 return true; 91 } 92 catch(Exception e){ 93 System.out.println("在构建文件时异常"); 94 e.printStackTrace(); 95 } 96 return false; 97 } 98 boolean CloseFile() { 99 if(fp==null) 100 return false; 101 else { 102 try { 103 fp.close(); 104 } catch (IOException e) { 105 // TODO Auto-generated catch block 106 System.out.println("关闭文件错误"); 107 e.printStackTrace(); 108 } 109 110 } 111 return true; 112 } 113 <T>boolean print(T tdata){ 114 if (fp==null||outfp==null) 115 return false; 116 outfp.print(tdata); 117 return true; 118 } 119 <T>boolean println(T tdata){ 120 if (fp==null||outfp==null) 121 return false; 122 outfp.println(tdata); 123 return true; 124 } 125 } 126 127 class KMP { 128 int nextp[]; 129 int n,m; 130 String pattern; 131 String text; 132 int cc; 133 int pp; 134 KMP(){ 135 cc=0; 136 pp=0; 137 last=-1; 138 begin=‘\0‘; 139 } 140 ArrayList<Integer> ans; 141 boolean Set_substring(String pattern) { 142 this.pattern=pattern; 143 if(pattern==null) 144 return false; 145 int n=pattern.length(); 146 this.n=n; 147 nextp=new int[n+10]; 148 for(int i=0;i<n;i++) 149 nextp[i]=0; 150 for(int i=1;i<n;++i) { 151 int j=i; 152 while(j>0) { 153 j = nextp[j]; 154 if(pattern.charAt(j)==pattern.charAt(i)) { 155 nextp[i+1]=j+1; 156 break; 157 } 158 } 159 } 160 return true; 161 } 162 void find_substring(String text) { 163 ans=new ArrayList<Integer>(); 164 this.text=text; 165 int m=text.length(); 166 this.m=m; 167 for(int i= 0,j=0;i<m;++i) { 168 if(j<n&&text.charAt(i)==pattern.charAt(j)) { 169 j++; 170 } 171 else { 172 while(j>0) { 173 try{ 174 j=nextp[j]; 175 } 176 catch(Exception e) { 177 } 178 if(text.charAt(i)==pattern.charAt(j)) { 179 ++j; 180 break; 181 } 182 } 183 } 184 if(j>=n) { 185 ans.add(i); 186 } 187 } 188 } 189 void Set_min(int t) {//Set the min index 190 cc=t; 191 } 192 193 char begin; 194 int last; 195 void set_last(int t) { 196 last=t; 197 } 198 void Set_begin(char t) { 199 begin = t; 200 } 201 202 ArrayList<String> get_data(char s) { 203 char a[]; 204 ArrayList<String> zans = new ArrayList<String>(); 205 a=new char[m]; 206 for(int i=0;i<ans.size();i++) { 207 int w=ans.get(i); 208 if(i==0) 209 pp=w+1; 210 int ss=w; 211 if(last!=-1&&w>last) 212 break; 213 if(begin!=‘\0‘) { 214 while( ss < m && text.charAt(ss) != begin) 215 ss++; 216 if(ss==m) 217 continue; 218 else { 219 w=ss; 220 } 221 } 222 int t=0,ok=0; 223 if(w+1<=cc) 224 continue; 225 for(int j=w+1;j<m;j++) { 226 a[t++]=text.charAt(j); 227 if(a[t-1]==s) { 228 ok=1; 229 break; 230 } 231 } 232 if(ok==1) { 233 a[t-1]=0; 234 t-=1; 235 } 236 else 237 a[t]=0; 238 if(t==0) 239 continue; 240 char b[]; 241 b=new char[t]; 242 for(int i1=0;i1<t;i1++) 243 b[i1]=a[i1]; 244 String p=new String(b); 245 zans.add(p); 246 } 247 cc=0; 248 begin = ‘\0‘; 249 last=-1; 250 return zans; 251 } 252 }