Configuration

Example of configuration


<?xml version="1.0" encoding="UTF-8"?>
<jbox-configuration>

    <webSpider class = "org.jbox.webSpider.simpleSpider.SimpleSpider">
    <!-- set which concrete WebSpider to use -->

        <maxPageNum>10</maxPageNum>
        <!-- set how many pages the spider will crawl -->

        <startUrls>
        <!-- set urls which the spider will start with -->

            <property name = "URL">http://localhost/</property>

        </startUrls>

        <crawlRules>
        <!-- set rules that the spider will abided -->

            <property name = "Rule">http://localhost.*</property>

        </crawlRules>

    </webSpider>
   
    <cutterBox>

        <cutter language="EN" class="org.jbox.textCutter.EN.SimpleENCutter">
        <!-- set which cutter to be put in cutterBox -->

            <property name = "UnicodeScope" start="0x0030" end="0x0039"/>
            <!-- set unicode scope by integer-->

            <property name = "UnicodeScope" start="0x0041" end="0x005a"/>

            <property name = "UnicodeScope" start="0x0061" end="0x007a"/>

        </cutter>

        <cutter language="CJK" class="org.jbox.textCutter.CJK.SimpleCJKCutter">

            <property name = "UnicodeBlock">CJK_UNIFIED_IDEOGRAPHS</property>
            <!-- set unicode scope by UnicodeBlock-->

        </cutter>

    </cutterBox>

    <indexWriter class = "org.jbox.indexer.IndexWriterWithTFLOC">
    <!-- set which concrete IndexWriter to use -->

        <property name = "PageHome">org.jbox.dao.PageHomeByHibernate</property>
        <!-- set which concrete PageHome to use -->

        <property name = "WordHome">org.jbox.dao.WordHomeByHibernate</property>
        <!-- set which concrete WordHome to use -->

    </indexWriter>
       
  <searcher class = "org.jbox.searcher.simpleSearcher.SimpleSearcher">
    <!-- set which concrete Searcher to use -->

        <property name = "PageHome">org.jbox.dao.PageHomeByHibernate</property>
 
        <property name = "WordHome">org.jbox.dao.WordHomeByHibernate</property>

    </searcher>

</jbox-configuration>


Copyright © 2007-2013 YiBin.h.
Licensed under the Apache License, Version 2.0.