|
|
Configuration
Example of configuration
<?xml version="1.0"
encoding="UTF-8"?>
<jbox-configuration>
<webSpider class =
"org.jbox.webSpider.simpleSpider.SimpleSpider">
<!-- set which concrete WebSpider to use -->
<maxPageNum>10</maxPageNum>
<!-- set how many pages the spider will crawl
-->
<startUrls>
<!-- set urls which the spider will start with -->
<property name =
"URL">http://localhost/</property>
</startUrls>
<crawlRules>
<!-- set rules that the spider will abided
-->
<property name =
"Rule">http://localhost.*</property>
</crawlRules>
</webSpider>
<cutterBox>
<cutter
language="EN"
class="org.jbox.textCutter.EN.SimpleENCutter">
<!-- set which cutter to be put in cutterBox
-->
<property name = "UnicodeScope"
start="0x0030" end="0x0039"/>
<!-- set unicode scope by
integer-->
<property name =
"UnicodeScope" start="0x0041"
end="0x005a"/>
<property name =
"UnicodeScope" start="0x0061"
end="0x007a"/>
</cutter>
<cutter
language="CJK"
class="org.jbox.textCutter.CJK.SimpleCJKCutter">
<property name =
"UnicodeBlock">CJK_UNIFIED_IDEOGRAPHS</property>
<!-- set unicode scope by
UnicodeBlock-->
</cutter>
</cutterBox>
<indexWriter class =
"org.jbox.indexer.IndexWriterWithTFLOC">
<!-- set which concrete IndexWriter to use -->
<property name =
"PageHome">org.jbox.dao.PageHomeByHibernate</property>
<!-- set which concrete PageHome to use -->
<property name =
"WordHome">org.jbox.dao.WordHomeByHibernate</property>
<!-- set which concrete WordHome to use -->
</indexWriter>
<searcher
class =
"org.jbox.searcher.simpleSearcher.SimpleSearcher">
<!-- set which concrete Searcher to use -->
<property name =
"PageHome">org.jbox.dao.PageHomeByHibernate</property>
<property name =
"WordHome">org.jbox.dao.WordHomeByHibernate</property>
</searcher>
</jbox-configuration>