Configuration

Example of configuration


<?xml version="1.0" encoding="UTF-8"?>
<jbox-configuration>

    <webSpider class = "org.jbox.webSpider.simpleSpider.SimpleSpider">
    <!-- 设置具体的WebSpider实现类-->

        <maxPageNum>10</maxPageNum>
        <!-- 设置爬行的最大页面数 -->

        <startUrls>
        <!-- 设置WebSpider的起点 -->

            <property name = "URL">http://localhost/</property>

        </startUrls>

        <crawlRules>
        <!-- 设置WebSpider爬行网络时需要遵守的规则 -->

            <property name = "Rule">http://localhost.*</property>

        </crawlRules>

    </webSpider>
   
    <cutterBox>

        <cutter language="EN" class="org.jbox.textCutter.EN.SimpleENCutter">
        <!-- 设置将要放进CutterBox的具体Cutter类 -->

            <property name = "UnicodeScope" start="0x0030" end="0x0039"/>
            <!--用2维数组指定Cutter的Unicode编码范围-->

            <property name = "UnicodeScope" start="0x0041" end="0x005a"/>

            <property name = "UnicodeScope" start="0x0061" end="0x007a"/>

        </cutter>

        <cutter language="CJK" class="org.jbox.textCutter.CJK.SimpleCJKCutter">

            <property name = "UnicodeBlock">CJK_UNIFIED_IDEOGRAPHS</property>
            <!-- 用UnicodeBlock指定
Cutter的Unicode编码范围-->

        </cutter>

    </cutterBox>

    <indexWriter class = "org.jbox.indexer.IndexWriterWithTFLOC">
    <!-- set which concrete IndexWriter to use -->

        <property name = "PageHome">org.jbox.dao.PageHomeByHibernate</property>
        <!-- 设置具体的Pagehome类-->

        <property name = "WordHome">org.jbox.dao.WordHomeByHibernate</property>
        <!--
设置具体的Wordhome类 -->

    </indexWriter>
       
  <searcher class = "org.jbox.searcher.simpleSearcher.SimpleSearcher">
    <!-- 设置具体的Searcher类-->

        <property name = "PageHome">org.jbox.dao.PageHomeByHibernate</property> 

        <property name = "WordHome">org.jbox.dao.WordHomeByHibernate</property>

    </searcher>

</jbox-configuration>


Copyright © 2007-2013 YiBin.h.
Licensed under the Apache License, Version 2.0.