下面代码保存为 bookbao99.xml
规则测试的时候没有问题
但是正式开采提示 未能对比两个数组元素???求解决 我用的是 关关采集器V9.05
- <?xml version="1.0"?>
- <RuleConfigInfo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
- <GetSiteCharset>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern>utf-8</Pattern>
- <RegexName>GetSiteCharset</RegexName>
- </GetSiteCharset>
- <GetSiteName>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern>书包网</Pattern>
- <RegexName>GetSiteName</RegexName>
- </GetSiteName>
- <GetSiteUrl>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern>https://www.bookbao99.net/</Pattern>
- <RegexName>GetSiteUrl</RegexName>
- </GetSiteUrl>
- <LagerSort>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern><meta property="og:novel:category" content="(.+?)"/></Pattern>
- <RegexName>LagerSort</RegexName>
- </LagerSort>
- <NovelAuthor>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern><meta property="og:novel:author" content="(.+?)"/></Pattern>
- <RegexName>NovelAuthor</RegexName>
- </NovelAuthor>
- <NovelCover>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern><meta property="og:image" content="(.+?)"/></Pattern>
- <RegexName>NovelCover</RegexName>
- </NovelCover>
- <NovelDefaultCoverUrl>
- <FilterPattern />
- <Method>Match</Method>
- <Options>IgnoreCase</Options>
- <Pattern>nocover.jpg</Pattern>
- <RegexName>NovelDefaultCoverUrl</RegexName>
- </NovelDefaultCoverUrl>
- <NovelDegree>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern><meta property="og:novel:status" content="(.+?)"/></Pattern>
- <RegexName>NovelDegree</RegexName>
- </NovelDegree>
- <NovelErr>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern>未找到您要的页面</Pattern>
- <RegexName>NovelErr</RegexName>
- </NovelErr>
- <NovelInfo_GetNovelPubKey>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern />
- <RegexName>NovelInfo_GetNovelPubKey</RegexName>
- </NovelInfo_GetNovelPubKey>
- <NovelIntro>
- <FilterPattern>
- </FilterPattern>
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern><div class="infocontent">((.|\n)+?)</div></Pattern>
- <RegexName>NovelIntro</RegexName>
- </NovelIntro>
- <NovelKeyword>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern />
- <RegexName>NovelKeyword</RegexName>
- </NovelKeyword>
- <NovelList_GetNovelKey>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern>《<a class="poptext" href="/book/(.+?).html" target="_blank">(.+?)</a>》</p></Pattern>
- <RegexName>NovelList_GetNovelKey</RegexName>
- </NovelList_GetNovelKey>
- <NovelListUrl>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern>https://www.bookbao99.net/</Pattern>
- <RegexName>NovelListUrl</RegexName>
- </NovelListUrl>
- <NovelName>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern><meta property="og:title" content="(.+?)"/></Pattern>
- <RegexName>NovelName</RegexName>
- </NovelName>
- <NovelSearch_GetNovelKey>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern />
- <RegexName>NovelSearch_GetNovelKey</RegexName>
- </NovelSearch_GetNovelKey>
- <NovelSearchData>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern />
- <RegexName>NovelSearchData</RegexName>
- </NovelSearchData>
- <NovelSearchUrl>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern />
- <RegexName>NovelSearchUrl</RegexName>
- </NovelSearchUrl>
- <NovelUrl>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern>https://www.bookbao99.net/book/{NovelKey}.html</Pattern>
- <RegexName>NovelUrl</RegexName>
- </NovelUrl>
- <PubChapter_GetChapterKey>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern><li><a href="/views/(.+?).html" target="_blank">.+?</a></li></Pattern>
- <RegexName>PubChapter_GetChapterKey</RegexName>
- </PubChapter_GetChapterKey>
- <PubChapterName>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern><li><a href="/views/.+?.html" target="_blank">(.+?)</a></li></Pattern>
- <RegexName>PubChapterName</RegexName>
- </PubChapterName>
- <PubContent_GetTextKey>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern />
- <RegexName>PubContent_GetTextKey</RegexName>
- </PubContent_GetTextKey>
- <PubContentErr>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern>未找到您要的页面</Pattern>
- <RegexName>PubContentErr</RegexName>
- </PubContentErr>
- <PubContentImages>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern><[^<]*((?<=<(?:img|IMG)[^>]*(?:(?:src|SRC)(?:\s*=\s*(?:["’]?))))(?:[^\s"’>]*)\.(?:jpg|gif|jpeg|bmp|png|GIF|JPG))[^>]*></Pattern>
- <RegexName>PubContentImages</RegexName>
- </PubContentImages>
- <PubContentPage>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern />
- <RegexName>PubContentPage</RegexName>
- </PubContentPage>
- <PubContentReplace>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern />
- <RegexName>PubContentReplace</RegexName>
- </PubContentReplace>
- <PubContentText>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern><dd id="contents">((.|\n)+?)</dd></Pattern>
- <RegexName>PubContentText</RegexName>
- </PubContentText>
- <PubContentTitle>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern />
- <RegexName>PubContentTitle</RegexName>
- </PubContentTitle>
- <PubContentUrl>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern>/views/{ChapterKey}.html</Pattern>
- <RegexName>PubContentUrl</RegexName>
- </PubContentUrl>
- <PubCookies>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern />
- <RegexName>PubCookies</RegexName>
- </PubCookies>
- <PubIndexErr>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern>出现错误!</Pattern>
- <RegexName>PubIndexErr</RegexName>
- </PubIndexErr>
- <PubIndexUrl>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern>https://www.bookbao99.net/book/{NovelKey}.html</Pattern>
- <RegexName>PubIndexUrl</RegexName>
- </PubIndexUrl>
- <PubTextUrl>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern />
- <RegexName>PubTextUrl</RegexName>
- </PubTextUrl>
- <PubVolumeContent>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern />
- <RegexName>PubVolumeContent</RegexName>
- </PubVolumeContent>
- <PubVolumeName>
- <FilterPattern />
- <Method>Match</Method>
- <Options>IgnoreCase</Options>
- <Pattern><dt>(.+?)</dt></Pattern>
- <RegexName>PubVolumeName</RegexName>
- </PubVolumeName>
- <PubVolumeSplit>
- <FilterPattern />
- <Method>Spilt</Method>
- <Options>IgnoreCase</Options>
- <Pattern><dt></Pattern>
- <RegexName>PubVolumeSplit</RegexName>
- </PubVolumeSplit>
- <RuleID>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern />
- <RegexName>RuleID</RegexName>
- </RuleID>
- <RuleVersion>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern />
- <RegexName>RuleVersion</RegexName>
- </RuleVersion>
- <SmallSort>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern><meta property="og:novel:category" content="(.+?)"/></Pattern>
- <RegexName>SmallSort</RegexName>
- </SmallSort>
- <TsContrary>
- <FilterPattern />
- <Method>Match</Method>
- <Options>None</Options>
- <Pattern />
- <RegexName>TsContrary</RegexName>
- </TsContrary>
- </RuleConfigInfo>
复制代码
浙江网友:一直用火车头,这玩意比火车头好用么?
香港网友:火车头采集小说太慢了
单线程
未经允许不得转载:美国VPS_搬瓦工CN2 GIA VPS » 关关采集器大佬 分享个才写的采集规则 bookbao99 net