Using c#’s cef3 to simulate browser operations, grab the list of keywords in the Jingdong Mall.
Get the product title, product id, product link, product price
public partial class Form1: Form
{
public ChromiumWebBrowser browser {get; set;}
public string key = "Mobile phone"; //Keywords crawledstring span> jdTab="Comprehensive "; //select orderint goTab = 0; //Determine whether it is sortedpublic Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
browser = new ChromiumWebBrowser("https://www.jd.com/"); //Initialize the browser addressthis.Controls.Add(browser);
browser.FrameLoadEnd += Web_FrameLoadEnd; //Set up monitoring, when the browser is loaded< /span>
}
private async void Web_FrameLoadEnd(object sender, FrameLoadEndEventArgs e)
{
Debug.WriteLine("Enter page:" + e.Url);
//MessageBox.Show(e.Url.ToString()) ;
if (e.Url.Contains("https://www.jd.com/" )) //When entering the homepage
{
// set keywords, click search
await browser.GetMainFrame().EvaluateScriptAsync(" $('#key').focus()");
await browser.GetMainFrame().EvaluateScriptAsync(" $('#key').val('" + key + "')"< span style="color: #000000;">);
await browser.GetMainFrame().EvaluateScriptAsync(" $('.button').click()");
}
else if (e.Url.Contains(< span style="color: #800000;">"https://passport.jd.com/uc/login"))
{//Jingdong anti-crawl page, jump back to the homepage, search again
browser.Load("https://www.jd.com /");
}
else if (e.Url.Contains(< span style="color: #800000;">"Search?"))
{
string url = "";
string title = "";
string price = "";
string code = "";
string searchResult = ""; //Judging whether the keyword is validawait< /span> browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $('. ns-content').text();})()").ContinueWith(x =>
{
try
{
searchResult += x.Result.Result.ToString();
}
catch (NullReferenceException s)
{
}
});
await browser.GetMainFrame().EvaluateScriptAsync(" (function(){ return $('.check-error').text();})()").ContinueWith(x =>
{
try
{
searchResult += x.Result.Result.ToString();
}
catch (NullReferenceException s)
{
}
});
Debug.WriteLine("searchResult:" + searchResult);
if (!searchResult.Contains("No") && !searchResult.Contains("Search still") && !searchResult.Contains("Click to view" )) //When the page field does not contain these words, keyword search is effective
{
//Select the sort and set the flag to 1 switch (jdTab)
{
case "Comprehensive": await browser.GetMainFrame(). EvaluateScriptAsync("$('.f-sort a')[0].click() span>"); goTab = 1; break;
case "Sales": await browser.GetMainFrame(). EvaluateScriptAsync("$('.f-sort a')[1].click() span>"); goTab = 1; break;
case "Price": await browser.GetMainFrame(). EvaluateScriptAsync("$('.f-sort a')[4].click() span>"); goTab = 1; break;
case "Number of comments": await browser.GetMainFrame() .EvaluateScriptAsync("$('.f-sort a')[2].click()< /span>"); goTab = 1; break;
}
Thread.Sleep(2000);
browser.ExecuteScriptAsync(" scrollTo(0, document.body.scrollHeight )");
Thread.Sleep(3000);
int p = 0; //Data loss flagint max = 0; //Total number of products
Thread.Sleep(500);
await browser.GetMainFrame().EvaluateScriptAsync(" (function(){ return $('.gl-item').length})()" ).ContinueWith(x =>
{
try
{
max = int.Parse(x.Result.Result.ToString( ));
}
catch (NullReferenceException s)
{
}
});
for (int index = 0; index <= max; index++)
{
await browser.GetMainFrame().EvaluateScriptAsync(" (function(){ return $('.gl-item .p-name a')[" + index.ToString() + "].href})()").ContinueWith(x =>
{
try
{
url = x.Result.Result.ToString();
code = "JD" + Regex.Replace(url, @"[^ \d]*", "");
}
catch (Exception)
{
p = 1;
}
});
await browser.GetMainFrame().EvaluateScriptAsync(" (function(){ return $('.gl-item .p-name em')[" + index.ToString() + "].innerText})()").ContinueWith(x =>
{
try
{
title = x.Result.Result.ToString().Replace("\n","");
}
catch (Exception)
{
p = 1;
}
});
await browser.GetMainFrame().EvaluateScriptAsync(" (function(){ return $('.gl-item .p-price strong')[" + index.ToString() + "].innerText.replace('¥', '')})()").ContinueWith(x =>
{
try
{
price = x.Result.Result.ToString();
}
catch (Exception)
{
p = 1;
}
});
if (p == 0)
{
Debug.WriteLine("url:" + url + " title= span>" + title + " code=" + code + " price=" + price);
}
}
}
else
{
Debug.WriteLine("No word found:< span style="color: #800000;">" + key);
}
}
}
}
Fetch results:
public partial class Form1: Form
{
public ChromiumWebBrowser browser {get; set;}
public string key = "Mobile phone"; //Keywords crawledstring span> jdTab="Comprehensive "; //select orderint goTab = 0; //Determine whether it is sortedpublic Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
browser = new ChromiumWebBrowser("https://www.jd.com/"); //Initialize the browser addressthis.Controls.Add(browser);
browser.FrameLoadEnd += Web_FrameLoadEnd; //Set up monitoring, when the browser is loaded< /span>
}
private async void Web_FrameLoadEnd(object sender, FrameLoadEndEventArgs e)
{
Debug.WriteLine("Enter page:" + e.Url);
//MessageBox.Show(e.Url.ToString()) ;
if (e.Url.Contains("https://www.jd.com/" )) //When entering the homepage
{
// set keywords, click search
await browser.GetMainFrame().EvaluateScriptAsync(" $('#key').focus()");
await browser.GetMainFrame().EvaluateScriptAsync(" $('#key').val('" + key + "')"< span style="color: #000000;">);
await browser.GetMainFrame().EvaluateScriptAsync(" $('.button').click()");
}
else if (e.Url.Contains(< span style="color: #800000;">"https://passport.jd.com/uc/login"))
{//Jingdong anti-crawl page, jump back to the homepage, search again
browser.Load("https://www.jd.com /");
}
else if (e.Url.Contains(< span style="color: #800000;">"Search?"))
{
string url = "";
string title = "";
string price = "";
string code = "";
string searchResult = ""; //Judging whether the keyword is validawait< /span> browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $('. ns-content').text();})()").ContinueWith(x =>
{
try
{
searchResult += x.Result.Result.ToString();
}
catch (NullReferenceException s)
{
}
});
await browser.GetMainFrame().EvaluateScriptAsync(" (function(){ return $('.check-error').text();})()").ContinueWith(x =>
{
try
{
searchResult += x.Result.Result.ToString();
}
catch (NullReferenceException s)
{
}
});
Debug.WriteLine("searchResult:" + searchResult);
if (!searchResult.Contains("No") && !searchResult.Contains("Search still") && !searchResult.Contains("Click to view" )) //When the page field does not contain these words, keyword search is effective
{
//Select the sort and set the flag to 1 switch (jdTab)
{
case "Comprehensive": await browser.GetMainFrame(). EvaluateScriptAsync("$('.f-sort a')[0].click() span>"); goTab = 1; break;
case "Sales": await browser.GetMainFrame(). EvaluateScriptAsync("$('.f-sort a')[1].click() span>"); goTab = 1; break;
case "Price": await browser.GetMainFrame(). EvaluateScriptAsync("$('.f-sort a')[4].click() span>"); goTab = 1; break;
case "Number of comments": await browser.GetMainFrame() .EvaluateScriptAsync("$('.f-sort a')[2].click()< /span>"); goTab = 1; break;
}
Thread.Sleep(2000);
browser.ExecuteScriptAsync(" scrollTo(0, document.body.scrollHeight )");
Thread.Sleep(3000);
int p = 0; //Data loss flagint max = 0; //Total number of products
Thread.Sleep(500);
await browser.GetMainFrame().EvaluateScriptAsync(" (function(){ return $('.gl-item').length})()" ).ContinueWith(x =>
{
try
{
max = int.Parse(x.Result.Result.ToString( ));
}
catch (NullReferenceException s)
{
}
});
for (int index = 0; index <= max; index++)
{
await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $(‘.gl-item .p-name a‘)[" + index.ToString() + "].href})()").ContinueWith(x =>
{
try
{
url = x.Result.Result.ToString();
code = "JD" + Regex.Replace(url, @"[^\d]*", "");
}
catch (Exception)
{
p = 1;
}
});
await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $(‘.gl-item .p-name em‘)[" + index.ToString() + "].innerText})()").ContinueWith(x =>
{
try
{
title = x.Result.Result.ToString().Replace("\n","");
}
catch (Exception)
{
p = 1;
}
});
await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $(‘.gl-item .p-price strong‘)[" + index.ToString() + "].innerText.replace(‘¥‘,‘‘)})()").ContinueWith(x =>
{
try
{
price = x.Result.Result.ToString();
}
catch (Exception)
{
p = 1;
}
});
if (p == 0)
{
Debug.WriteLine("url:" + url + " title=" + title + " code=" + code + " price=" + price);
}
}
}
else
{
Debug.WriteLine("没有找到词:" + key);
}
}
}
}