|
|
Configuration
Example of configuration
<?xml version="1.0"
encoding="UTF-8"?>
<jbox-configuration>
<webSpider class =
"org.jbox.webSpider.simpleSpider.SimpleSpider">
<!-- 设置具体的WebSpider实现类-->
<maxPageNum>10</maxPageNum>
<!-- 设置爬行的最大页面数
-->
<startUrls>
<!-- 设置WebSpider的起点 -->
<property name =
"URL">http://localhost/</property>
</startUrls>
<crawlRules>
<!-- 设置WebSpider爬行网络时需要遵守的规则
-->
<property name =
"Rule">http://localhost.*</property>
</crawlRules>
</webSpider>
<cutterBox>
<cutter
language="EN"
class="org.jbox.textCutter.EN.SimpleENCutter">
<!-- 设置将要放进CutterBox的具体Cutter类
-->
<property name = "UnicodeScope"
start="0x0030" end="0x0039"/>
<!--用2维数组指定Cutter的Unicode编码范围-->
<property name =
"UnicodeScope" start="0x0041"
end="0x005a"/>
<property name =
"UnicodeScope" start="0x0061"
end="0x007a"/>
</cutter>
<cutter
language="CJK"
class="org.jbox.textCutter.CJK.SimpleCJKCutter">
<property name =
"UnicodeBlock">CJK_UNIFIED_IDEOGRAPHS</property>
<!-- 用UnicodeBlock指定Cutter的Unicode编码范围-->
</cutter>
</cutterBox>
<indexWriter class =
"org.jbox.indexer.IndexWriterWithTFLOC">
<!-- set which concrete IndexWriter to use -->
<property name =
"PageHome">org.jbox.dao.PageHomeByHibernate</property>
<!-- 设置具体的Pagehome类-->
<property name =
"WordHome">org.jbox.dao.WordHomeByHibernate</property>
<!-- 设置具体的Wordhome类 -->
</indexWriter>
<searcher
class =
"org.jbox.searcher.simpleSearcher.SimpleSearcher">
<!-- 设置具体的Searcher类-->
<property name =
"PageHome">org.jbox.dao.PageHomeByHibernate</property>
<property name =
"WordHome">org.jbox.dao.WordHomeByHibernate</property>
</searcher>
</jbox-configuration>