c#爬取笔趣阁小说(附源码)
2021/12/7 11:17:31
本文主要是介绍c#爬取笔趣阁小说(附源码),对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!
c#使用控制台爬取笔趣阁小说,以下为效果图
以下为完整代码
using System; using System.IO; using System.Net; using System.Text; using System.Text.RegularExpressions; namespace ConsoleApp3 { class Program { static void Main(string[] args) { string searchbook = "https://www.biqugeu.net/searchbook.php?keyword=<<bookname>>"; string searchurl = null; string searchcontent = null; string baseurl = "https://www.biqugeu.net/"; string nextChapter = null; string html = null; string bookname = null; string bookTitle = null; string ChapterContent; string regex1 = "<h1>(?<bookname>.*?)</h1>"; string regex2 = "<a href=\"/.*?\" target=\"_top\" class=\"pre\">上一章</a> ← <a href=\"/.*?/\" target=\"_top\" title=\"\" class=\"back\">章节列表</a> → <a href=\"(?<nextChapter>.*?)\" target=\"_top\" class=\"next\""; string regex3 = "booktitle = \"(?<booktitle>.*?)\";"; string regex4 = "(?<data>.*?)<br/><br/>"; string regex5 = "<div class=\"image\">\\s*<a href=\"/(?<bookurl>.*?)\""; string regex6 = "<dt>.*?</dt><dd><ahref=\"/(?<bookfirst>.*?)\">.*?</a></dd>"; Console.WriteLine("请输入需要爬取的小说!"); string novelName = Console.ReadLine(); try { searchurl = searchbook.Replace("<<bookname>>", novelName); HttpWebRequest req1 = (HttpWebRequest)WebRequest.Create(searchurl); req1.Method = "GET"; req1.Accept = "text/html"; req1.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36"; HttpWebResponse res1 = (HttpWebResponse)req1.GetResponse(); using (StreamReader reader = new StreamReader(res1.GetResponseStream())) { html = reader.ReadToEnd(); if (!string.IsNullOrEmpty(html)) { //Console.WriteLine(html); html = html.Replace("\n", "").Replace("\t", "").Replace("\r", ""); searchcontent = Regex.Match(html, regex5).Groups["bookurl"].ToString(); if (searchcontent == "") { Console.WriteLine("没有找到该小说!"); } searchurl = baseurl + searchcontent; } } } catch (WebException we) { Console.WriteLine(we.Message); } try { HttpWebRequest req1 = (HttpWebRequest)WebRequest.Create(searchurl); req1.Method = "GET"; req1.Accept = "text/html"; req1.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36"; HttpWebResponse res1 = (HttpWebResponse)req1.GetResponse(); using (StreamReader reader = new StreamReader(res1.GetResponseStream())) { html = reader.ReadToEnd(); if (!string.IsNullOrEmpty(html)) { //Console.WriteLine(html); html = html.Replace("\n", "").Replace("\t", "").Replace("\r", "").Replace(" ",""); searchcontent = Regex.Matches(html, regex6)[1].Groups["bookfirst"].ToString(); searchurl = baseurl + searchcontent; } } } catch (Exception) { throw; } do { restart: try { HttpWebRequest req = (HttpWebRequest)WebRequest.Create(searchurl); req.Method = "GET"; req.Accept = "text/html"; req.AllowAutoRedirect = true; req.Headers.Add("Encoding", Encoding.UTF8.ToString()); req.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)"; HttpWebResponse res = (HttpWebResponse)req.GetResponse(); using (StreamReader reader = new StreamReader(res.GetResponseStream())) { html = reader.ReadToEnd(); if (!string.IsNullOrEmpty(html)) { ChapterContent = ""; //获取下一章 nextChapter = Regex.Match(html, regex2).Groups["nextChapter"].ToString(); searchurl = baseurl + nextChapter; //获取章节名 bookname = Regex.Match(html, regex1).Groups["bookname"].ToString(); ChapterContent += "\r\n"; ChapterContent += bookname; ChapterContent += "\r\n"; //获取书名 bookTitle = Regex.Match(html, regex3).Groups["booktitle"].ToString(); //获取内容 MatchCollection match = Regex.Matches(html, regex4); foreach (Match item in match) { string book = Regex.Match(item.Value, regex4).Groups["data"].ToString().Trim(); ChapterContent += book; } Console.WriteLine(bookname + "-------下载完毕!"); AddBookToTXT(ChapterContent, bookTitle); } } } catch (WebException we) { //Console.WriteLine(we.Message); Console.WriteLine("远程主机强迫关闭了一个现有的连接,重新爬取当前章节。。。"); goto restart; } } while (nextChapter.Contains("html"));//当下一章链接没有跳转时结束 } /// <summary> /// 将内容保存到txt文件 /// </summary> /// <param name="logstring">内容</param> /// <param name="pathName">书名</param> public static void AddBookToTXT(string logstring, string pathName) { string path = AppDomain.CurrentDomain.BaseDirectory + pathName + ".txt"; if (!System.IO.File.Exists(path)) { FileStream stream = System.IO.File.Create(path); stream.Close(); stream.Dispose(); } using (StreamWriter writer = new StreamWriter(path, true)) { writer.WriteLine(logstring); } } } }
这篇关于c#爬取笔趣阁小说(附源码)的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!
- 2022-03-01沐雪多租宝商城源码从.NetCore3.1升级到.Net6的步骤
- 2024-12-06使用Microsoft.Extensions.AI在.NET中生成嵌入向量
- 2024-11-18微软研究:RAG系统的四个层次提升理解与回答能力
- 2024-11-15C#中怎么从PEM格式的证书中提取公钥?-icode9专业技术文章分享
- 2024-11-14云架构设计——如何用diagrams.net绘制专业的AWS架构图?
- 2024-05-08首个适配Visual Studio平台的国产智能编程助手CodeGeeX正式上线!C#程序员必备效率神器!
- 2024-03-30C#设计模式之十六迭代器模式(Iterator Pattern)【行为型】
- 2024-03-29c# datetime tryparse
- 2024-02-21list find index c#
- 2024-01-24convert toint32 c#