c#爬虫-1688官网自动以图搜图
Posted dotNET跨平台
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了c#爬虫-1688官网自动以图搜图相关的知识,希望对你有一定的参考价值。
背景
在1688有个功能,就是上传图片,就可以找到类似的商品。如下
网址 :https://www.1688.com/
这时候,我们可以使用程序来代替,大批量的完成图片上传功能。
实现思路
1、找到图片上传接口
post请求,form表单中有signature签名
2、再找sign生成接口,全局搜素找一下signature,发现了一个返回signature的接口。
接口链接:https://open-s.1688.com/openservice/ossDataService
这个接口也有一个变动的参数 appKey
全局搜索后在js文件中查看一下
往下找就可以发现appkey的生成了。
通过debug来查看生成规则。
获取加密时间的接口:https://open-s.1688.com/openservice/.htm?
参数:outfmt =json&serviceIds=cbu.searchweb.config.system.currenttime
需要先请求这个接口,获取加密时间。
private void go(string parhfile)
{
var response = HttpHelper.CreateGetHttpResponse("https://open-s.1688.com/openservice/.htm?serviceIds=cbu.searchweb.config.system.currenttime&outfmt=json", 5000, null, null);
Stream myResponseStream = response.GetResponseStream();
StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8"));
string retString = myStreamReader.ReadToEnd();
myStreamReader.Close();
myResponseStream.Close();
if (response.StatusCode == HttpStatusCode.OK)
{
var ss = JsonConvert.DeserializeObject<Currenttimemodel>(retString.Replace("cbu.searchweb.config.system.currenttime", "currenttime"));
// SetText("加密时间:" + ss.currenttime.dataSet.ToString());
getsin(ss.currenttime.dataSet.ToString(), parhfile);
}
else
{
SetText("错误代码:" + response.StatusCode.ToString());
}
}
再把时间和appName 传入 getAppKey。
然后e= “appname ; t” ,appName的base64编码之后的结果是 “cGNfdHVzb3U=”
经过encode64返回 i ( appkey)
然后通过上面生成sign的接口:https://open-s.1688.com/openservice/ossDataService
传入参数就行请求,就可以返回signature,policy,accessid。
params = {
"appName": key,
"appKey": base64.b64encode(appkey.encode("utf-8")),
}
1
2
3
4
/// <summary>
/// sign生成接口
/// </summary>
/// <param name="dataSet"></param>
private void getsin(string dataSet, string parhfile)
{
string appName = "pc_tusou";
//getAppKey
string appKey = Base64.EncodeBase64("utf-8", appName + ";" + dataSet.ToString());
var response = HttpHelper.CreateGetHttpResponse("https://open-s.1688.com/openservice/ossDataService?appName=" + appName + "&appKey=" + appKey, 5000, null, null);
Stream myResponseStream = response.GetResponseStream();
StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8"));
string retString = myStreamReader.ReadToEnd();
myStreamReader.Close();
myResponseStream.Close();
if (response.StatusCode == HttpStatusCode.OK)
{
var ss = JsonConvert.DeserializeObject<Rootsin>(retString);
// SetText("sign:" + ss.data.signature);
string key = "cbuimgsearch/" + Base64.Getimgname() + Base64.GetTimeStamp() + ".jepg";
var client = new RestClient("https://cbusearch.oss-cn-shanghai.aliyuncs.com/");
client.Timeout = -1;
var request = new RestRequest(Method.POST);
request.AddHeader("Origin", "https://www.1688.com");
client.UserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/75.0.3770.100 Safari/537.36";
request.AddHeader("Accept", "*/*");
request.AddHeader("Cache-Control", "no-cache");
request.AddHeader("cookie", "_samesite_flag_=true; _tb_token_=ee5138b911917; cookie2=163f6e3722351213514df4c9ab9116f6; t=96e8d0ab6d636f19306c429b276db552; __cn_logon__=false; ali_ab=120.253.224.246.1587973275662.6; l=caJGIJNTkgnFkWiGkSYyeKDwPQuOAiFJdcPgDahIhDlFGpKMvULclIQGPBDmDhmDdCsLYIU; na=ijBRbdRXZeKwRcTHilfNHSt+; ");
request.AddHeader("refer", "https://www.1688.com/");
request.AddParameter("name", Base64.Getname() + ".jpeg");
request.AddParameter("key", key);
request.AddParameter("OSSAccessKeyId", ss.data.accessid);
request.AddParameter("callback", "");
request.AddParameter("policy", ss.data.policy);
request.AddParameter("signature", ss.data.signature);
request.AddParameter("success_action_status", "200");
request.AddFile("file", parhfile);
IRestResponse response2 = client.Execute(request);
Console.WriteLine(response2.Content);
if (response2.StatusCode == HttpStatusCode.OK)
{
string picurl = "https://s.1688.com/youyuan/index.htm?tab=imageSearch&imageType=oss&imageAddress=" + key + "&spm=";
SetText3("\r\n" + picurl);
Write(picurl);
}
else
{
SetText("错误代码:" + response2.StatusCode.ToString());
}
}
else
{
SetText("错误代码:" + response.StatusCode.ToString());
}
}
3、数据详情接口
图片上传之后,返回的数据接口:
https://search.1688.com/service/imageSearchOfferResultViewService?
参数:
imageAddress是在上传图片之后返回的值
requestId 初始化参数,可以为空。
整个流程就是这样了,接着构造请求就可以获取数据了。
完整代码
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Net.Security;
using System.Security.Cryptography.X509Certificates;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
using Newtonsoft.Json;
using Reptiles1688;
using RestSharp;
namespace WindowsFormsApp1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
ThreadPool.QueueUserWorkItem(new WaitCallback(crawlingWeb), "test");
}
public void Write(string s)
{
string path = System.Environment.CurrentDirectory + "\\图片url\\" + Guid.NewGuid().ToString();
if (!Directory.Exists(path))
Directory.CreateDirectory(path);
FileStream fs = new FileStream(path + "\\data.txt", FileMode.Create);
//获得字节数组
byte[] data = System.Text.Encoding.Default.GetBytes(s);
//开始写入
fs.Write(data, 0, data.Length);
//清空缓冲区、关闭流
fs.Flush();
fs.Close();
}
private delegate void SetLabelDelegate(string value);
private void SetText(string value)
{
if (this.InvokeRequired)
{
SetLabelDelegate d = new SetLabelDelegate(SetText);
this.Invoke(d, new object[] { value });
}
else
{
textBox1.Text = value.ToString() + textBox1.Text;
}
}
private delegate void SetLabelDelegate3(string value);
private void SetText3(string value)
{
if (this.InvokeRequired)
{
SetLabelDelegate3 d = new SetLabelDelegate3(SetText3);
this.Invoke(d, new object[] { value });
}
else
{
textBox3.Text = value.ToString() + textBox3.Text;
}
}
private void crawlingWeb(object data)
{
for (int aa = 1; aa < 50; aa++)
{
// SetText3(aa.ToString());
for (int a = 1; a < 4; a++)
{
string ss = "D:\\pppppppppppppp\\" + a + ".jpg";
// SetText3(aa.ToString() + "-" + a);
go(ss);
Thread.Sleep(500);
}
}
SetText3("\r\n ok");
}
private void go(string parhfile)
{
var response = HttpHelper.CreateGetHttpResponse("https://open-s.1688.com/openservice/.htm?serviceIds=cbu.searchweb.config.system.currenttime&outfmt=json", 5000, null, null);
Stream myResponseStream = response.GetResponseStream();
StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8"));
string retString = myStreamReader.ReadToEnd();
myStreamReader.Close();
myResponseStream.Close();
if (response.StatusCode == HttpStatusCode.OK)
{
var ss = JsonConvert.DeserializeObject<Currenttimemodel>(retString.Replace("cbu.searchweb.config.system.currenttime", "currenttime"));
// SetText("加密时间:" + ss.currenttime.dataSet.ToString());
getsin(ss.currenttime.dataSet.ToString(), parhfile);
}
else
{
SetText("错误代码:" + response.StatusCode.ToString());
}
}
/// <summary>
/// sign生成接口
/// </summary>
/// <param name="dataSet"></param>
private void getsin(string dataSet, string parhfile)
{
string appName = "pc_tusou";
//getAppKey
string appKey = Base64.EncodeBase64("utf-8", appName + ";" + dataSet.ToString());
var response = HttpHelper.CreateGetHttpResponse("https://open-s.1688.com/openservice/ossDataService?appName=" + appName + "&appKey=" + appKey, 5000, null, null);
Stream myResponseStream = response.GetResponseStream();
StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8"));
string retString = myStreamReader.ReadToEnd();
myStreamReader.Close();
myResponseStream.Close();
if (response.StatusCode == HttpStatusCode.OK)
{
var ss = JsonConvert.DeserializeObject<Rootsin>(retString);
// SetText("sign:" + ss.data.signature);
string key = "cbuimgsearch/" + Base64.Getimgname() + Base64.GetTimeStamp() + ".jepg";
var client = new RestClient("https://cbusearch.oss-cn-shanghai.aliyuncs.com/");
client.Timeout = -1;
var request = new RestRequest(Method.POST);
request.AddHeader("Origin", "https://www.1688.com");
client.UserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36";
request.AddHeader("Accept", "*/*");
request.AddHeader("Cache-Control", "no-cache");
request.AddHeader("cookie", "_samesite_flag_=true; _tb_token_=ee5138b911917; cookie2=163f6e3722351213514df4c9ab9116f6; t=96e8d0ab6d636f19306c429b276db552; __cn_logon__=false; ali_ab=120.253.224.246.1587973275662.6; l=caJGIJNTkgnFkWiGkSYyeKDwPQuOAiFJdcPgDahIhDlFGpKMvULclIQGPBDmDhmDdCsLYIU; na=ijBRbdRXZeKwRcTHilfNHSt+; ");
request.AddHeader("refer", "https://www.1688.com/");
request.AddParameter("name", Base64.Getname() + ".jpeg");
request.AddParameter("key", key);
request.AddParameter("OSSAccessKeyId", ss.data.accessid);
request.AddParameter("callback", "");
request.AddParameter("policy", ss.data.policy);
request.AddParameter("signature", ss.data.signature);
request.AddParameter("success_action_status", "200");
request.AddFile("file", parhfile);
IRestResponse response2 = client.Execute(request);
Console.WriteLine(response2.Content);
if (response2.StatusCode == HttpStatusCode.OK)
{
string picurl = "https://s.1688.com/youyuan/index.htm?tab=imageSearch&imageType=oss&imageAddress=" + key + "&spm=";
SetText3("\r\n" + picurl);
Write(picurl);
}
else
{
SetText("错误代码:" + response2.StatusCode.ToString());
}
}
else
{
SetText("错误代码:" + response.StatusCode.ToString());
}
}
}
}
以上是关于c#爬虫-1688官网自动以图搜图的主要内容,如果未能解决你的问题,请参考以下文章
以图搜图 最佳实践阿里云 Elasticsearch 向量检索4步搭建“以图搜图”搜索引擎,