关关采集器大佬 分享个才写的采集规则 bookbao99 net

搬瓦工机场JMS

下面代码保存为 bookbao99.xml
规则测试的时候没有问题
但是正式开采提示 未能对比两个数组元素???求解决 我用的是 关关采集器V9.05

  1. <?xml version="1.0"?>
  2. <RuleConfigInfo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance&quot; xmlns:xsd="http://www.w3.org/2001/XMLSchema"&gt;
  3.   <GetSiteCharset>
  4.     <FilterPattern />
  5.     <Method>Match</Method>
  6.     <Options>None</Options>
  7.     <Pattern>utf-8</Pattern>
  8.     <RegexName>GetSiteCharset</RegexName>
  9.   </GetSiteCharset>
  10.   <GetSiteName>
  11.     <FilterPattern />
  12.     <Method>Match</Method>
  13.     <Options>None</Options>
  14.     <Pattern>书包网</Pattern>
  15.     <RegexName>GetSiteName</RegexName>
  16.   </GetSiteName>
  17.   <GetSiteUrl>
  18.     <FilterPattern />
  19.     <Method>Match</Method>
  20.     <Options>None</Options>
  21.     <Pattern>https://www.bookbao99.net/</Pattern&gt;
  22.     <RegexName>GetSiteUrl</RegexName>
  23.   </GetSiteUrl>
  24.   <LagerSort>
  25.     <FilterPattern />
  26.     <Method>Match</Method>
  27.     <Options>None</Options>
  28.     <Pattern><meta property="og:novel:category" content="(.+?)"/></Pattern>
  29.     <RegexName>LagerSort</RegexName>
  30.   </LagerSort>
  31.   <NovelAuthor>
  32.     <FilterPattern />
  33.     <Method>Match</Method>
  34.     <Options>None</Options>
  35.     <Pattern><meta property="og:novel:author" content="(.+?)"/></Pattern>
  36.     <RegexName>NovelAuthor</RegexName>
  37.   </NovelAuthor>
  38.   <NovelCover>
  39.     <FilterPattern />
  40.     <Method>Match</Method>
  41.     <Options>None</Options>
  42.     <Pattern><meta property="og:image" content="(.+?)"/></Pattern>
  43.     <RegexName>NovelCover</RegexName>
  44.   </NovelCover>
  45.   <NovelDefaultCoverUrl>
  46.     <FilterPattern />
  47.     <Method>Match</Method>
  48.     <Options>IgnoreCase</Options>
  49.     <Pattern>nocover.jpg</Pattern>
  50.     <RegexName>NovelDefaultCoverUrl</RegexName>
  51.   </NovelDefaultCoverUrl>
  52.   <NovelDegree>
  53.     <FilterPattern />
  54.     <Method>Match</Method>
  55.     <Options>None</Options>
  56.     <Pattern><meta property="og:novel:status" content="(.+?)"/></Pattern>
  57.     <RegexName>NovelDegree</RegexName>
  58.   </NovelDegree>
  59.   <NovelErr>
  60.     <FilterPattern />
  61.     <Method>Match</Method>
  62.     <Options>None</Options>
  63.     <Pattern>未找到您要的页面</Pattern>
  64.     <RegexName>NovelErr</RegexName>
  65.   </NovelErr>
  66.   <NovelInfo_GetNovelPubKey>
  67.     <FilterPattern />
  68.     <Method>Match</Method>
  69.     <Options>None</Options>
  70.     <Pattern />
  71.     <RegexName>NovelInfo_GetNovelPubKey</RegexName>
  72.   </NovelInfo_GetNovelPubKey>
  73.   <NovelIntro>
  74.     <FilterPattern>&nbsp;
  75. </FilterPattern>
  76.     <Method>Match</Method>
  77.     <Options>None</Options>
  78.     <Pattern><div class="infocontent">((.|\n)+?)</div></Pattern>
  79.     <RegexName>NovelIntro</RegexName>
  80.   </NovelIntro>
  81.   <NovelKeyword>
  82.     <FilterPattern />
  83.     <Method>Match</Method>
  84.     <Options>None</Options>
  85.     <Pattern />
  86.     <RegexName>NovelKeyword</RegexName>
  87.   </NovelKeyword>
  88.   <NovelList_GetNovelKey>
  89.     <FilterPattern />
  90.     <Method>Match</Method>
  91.     <Options>None</Options>
  92.     <Pattern>《<a class="poptext" href="/book/(.+?).html" target="_blank">(.+?)</a>》</p></Pattern>
  93.     <RegexName>NovelList_GetNovelKey</RegexName>
  94.   </NovelList_GetNovelKey>
  95.   <NovelListUrl>
  96.     <FilterPattern />
  97.     <Method>Match</Method>
  98.     <Options>None</Options>
  99.     <Pattern>https://www.bookbao99.net/</Pattern&gt;
  100.     <RegexName>NovelListUrl</RegexName>
  101.   </NovelListUrl>
  102.   <NovelName>
  103.     <FilterPattern />
  104.     <Method>Match</Method>
  105.     <Options>None</Options>
  106.     <Pattern><meta property="og:title" content="(.+?)"/></Pattern>
  107.     <RegexName>NovelName</RegexName>
  108.   </NovelName>
  109.   <NovelSearch_GetNovelKey>
  110.     <FilterPattern />
  111.     <Method>Match</Method>
  112.     <Options>None</Options>
  113.     <Pattern />
  114.     <RegexName>NovelSearch_GetNovelKey</RegexName>
  115.   </NovelSearch_GetNovelKey>
  116.   <NovelSearchData>
  117.     <FilterPattern />
  118.     <Method>Match</Method>
  119.     <Options>None</Options>
  120.     <Pattern />
  121.     <RegexName>NovelSearchData</RegexName>
  122.   </NovelSearchData>
  123.   <NovelSearchUrl>
  124.     <FilterPattern />
  125.     <Method>Match</Method>
  126.     <Options>None</Options>
  127.     <Pattern />
  128.     <RegexName>NovelSearchUrl</RegexName>
  129.   </NovelSearchUrl>
  130.   <NovelUrl>
  131.     <FilterPattern />
  132.     <Method>Match</Method>
  133.     <Options>None</Options>
  134.     <Pattern>https://www.bookbao99.net/book/{NovelKey}.html</Pattern>
  135.     <RegexName>NovelUrl</RegexName>
  136.   </NovelUrl>
  137.   <PubChapter_GetChapterKey>
  138.     <FilterPattern />
  139.     <Method>Match</Method>
  140.     <Options>None</Options>
  141.     <Pattern><li><a href="/views/(.+?).html" target="_blank">.+?</a></li></Pattern>
  142.     <RegexName>PubChapter_GetChapterKey</RegexName>
  143.   </PubChapter_GetChapterKey>
  144.   <PubChapterName>
  145.     <FilterPattern />
  146.     <Method>Match</Method>
  147.     <Options>None</Options>
  148.     <Pattern><li><a href="/views/.+?.html" target="_blank">(.+?)</a></li></Pattern>
  149.     <RegexName>PubChapterName</RegexName>
  150.   </PubChapterName>
  151.   <PubContent_GetTextKey>
  152.     <FilterPattern />
  153.     <Method>Match</Method>
  154.     <Options>None</Options>
  155.     <Pattern />
  156.     <RegexName>PubContent_GetTextKey</RegexName>
  157.   </PubContent_GetTextKey>
  158.   <PubContentErr>
  159.     <FilterPattern />
  160.     <Method>Match</Method>
  161.     <Options>None</Options>
  162.     <Pattern>未找到您要的页面</Pattern>
  163.     <RegexName>PubContentErr</RegexName>
  164.   </PubContentErr>
  165.   <PubContentImages>
  166.     <FilterPattern />
  167.     <Method>Match</Method>
  168.     <Options>None</Options>
  169.     <Pattern><[^<]*((?<=<(?:img|IMG)[^>]*(?:(?:src|SRC)(?:\s*=\s*(?:["’]?))))(?:[^\s"’>]*)\.(?:jpg|gif|jpeg|bmp|png|GIF|JPG))[^>]*></Pattern>
  170.     <RegexName>PubContentImages</RegexName>
  171.   </PubContentImages>
  172.   <PubContentPage>
  173.     <FilterPattern />
  174.     <Method>Match</Method>
  175.     <Options>None</Options>
  176.     <Pattern />
  177.     <RegexName>PubContentPage</RegexName>
  178.   </PubContentPage>
  179.   <PubContentReplace>
  180.     <FilterPattern />
  181.     <Method>Match</Method>
  182.     <Options>None</Options>
  183.     <Pattern />
  184.     <RegexName>PubContentReplace</RegexName>
  185.   </PubContentReplace>
  186.   <PubContentText>
  187.     <FilterPattern />
  188.     <Method>Match</Method>
  189.     <Options>None</Options>
  190.     <Pattern><dd id="contents">((.|\n)+?)</dd></Pattern>
  191.     <RegexName>PubContentText</RegexName>
  192.   </PubContentText>
  193.   <PubContentTitle>
  194.     <FilterPattern />
  195.     <Method>Match</Method>
  196.     <Options>None</Options>
  197.     <Pattern />
  198.     <RegexName>PubContentTitle</RegexName>
  199.   </PubContentTitle>
  200.   <PubContentUrl>
  201.     <FilterPattern />
  202.     <Method>Match</Method>
  203.     <Options>None</Options>
  204.     <Pattern>/views/{ChapterKey}.html</Pattern>
  205.     <RegexName>PubContentUrl</RegexName>
  206.   </PubContentUrl>
  207.   <PubCookies>
  208.     <FilterPattern />
  209.     <Method>Match</Method>
  210.     <Options>None</Options>
  211.     <Pattern />
  212.     <RegexName>PubCookies</RegexName>
  213.   </PubCookies>
  214.   <PubIndexErr>
  215.     <FilterPattern />
  216.     <Method>Match</Method>
  217.     <Options>None</Options>
  218.     <Pattern>出现错误!</Pattern>
  219.     <RegexName>PubIndexErr</RegexName>
  220.   </PubIndexErr>
  221.   <PubIndexUrl>
  222.     <FilterPattern />
  223.     <Method>Match</Method>
  224.     <Options>None</Options>
  225.     <Pattern>https://www.bookbao99.net/book/{NovelKey}.html</Pattern>
  226.     <RegexName>PubIndexUrl</RegexName>
  227.   </PubIndexUrl>
  228.   <PubTextUrl>
  229.     <FilterPattern />
  230.     <Method>Match</Method>
  231.     <Options>None</Options>
  232.     <Pattern />
  233.     <RegexName>PubTextUrl</RegexName>
  234.   </PubTextUrl>
  235.   <PubVolumeContent>
  236.     <FilterPattern />
  237.     <Method>Match</Method>
  238.     <Options>None</Options>
  239.     <Pattern />
  240.     <RegexName>PubVolumeContent</RegexName>
  241.   </PubVolumeContent>
  242.   <PubVolumeName>
  243.     <FilterPattern />
  244.     <Method>Match</Method>
  245.     <Options>IgnoreCase</Options>
  246.     <Pattern><dt>(.+?)</dt></Pattern>
  247.     <RegexName>PubVolumeName</RegexName>
  248.   </PubVolumeName>
  249.   <PubVolumeSplit>
  250.     <FilterPattern />
  251.     <Method>Spilt</Method>
  252.     <Options>IgnoreCase</Options>
  253.     <Pattern><dt></Pattern>
  254.     <RegexName>PubVolumeSplit</RegexName>
  255.   </PubVolumeSplit>
  256.   <RuleID>
  257.     <FilterPattern />
  258.     <Method>Match</Method>
  259.     <Options>None</Options>
  260.     <Pattern />
  261.     <RegexName>RuleID</RegexName>
  262.   </RuleID>
  263.   <RuleVersion>
  264.     <FilterPattern />
  265.     <Method>Match</Method>
  266.     <Options>None</Options>
  267.     <Pattern />
  268.     <RegexName>RuleVersion</RegexName>
  269.   </RuleVersion>
  270.   <SmallSort>
  271.     <FilterPattern />
  272.     <Method>Match</Method>
  273.     <Options>None</Options>
  274.     <Pattern><meta property="og:novel:category" content="(.+?)"/></Pattern>
  275.     <RegexName>SmallSort</RegexName>
  276.   </SmallSort>
  277.   <TsContrary>
  278.     <FilterPattern />
  279.     <Method>Match</Method>
  280.     <Options>None</Options>
  281.     <Pattern />
  282.     <RegexName>TsContrary</RegexName>
  283.   </TsContrary>
  284. </RuleConfigInfo>

复制代码

浙江网友:一直用火车头,这玩意比火车头好用么?
香港网友:火车头采集小说太慢了
单线程

未经允许不得转载:美国VPS_搬瓦工CN2 GIA VPS » 关关采集器大佬 分享个才写的采集规则 bookbao99 net

赞 (0) 打赏

评论 0

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址

觉得文章有用就打赏一下文章作者

支付宝扫一扫打赏

微信扫一扫打赏