XQuilla/include/xqilla/fulltext/DefaultTokenizer.hpp
2020-02-17 22:21:23 +01:00

77 lines
2.1 KiB
C++

/*
* Copyright (c) 2001-2008
* DecisionSoft Limited. All rights reserved.
* Copyright (c) 2004-2008
* Oracle. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* $Id$
*/
#ifndef _DEFAULTTOKENIZER_HPP
#define _DEFAULTTOKENIZER_HPP
#include <xqilla/framework/XQillaExport.hpp>
#include <xqilla/fulltext/Tokenizer.hpp>
class XQILLA_API DefaultTokenizer : public Tokenizer
{
public:
class DefaultTokenInfo : public TokenInfo
{
public:
DefaultTokenInfo(const XMLCh *word, unsigned int pos, unsigned int sen, unsigned int para)
: word_(word), position_(pos), sentence_(sen), paragraph_(para) {}
virtual const XMLCh *getWord() const { return word_; }
virtual unsigned int getPosition() const { return position_; }
virtual unsigned int getSentence() const { return sentence_; }
virtual unsigned int getParagraph() const { return paragraph_; }
private:
const XMLCh *word_;
unsigned int position_;
unsigned int sentence_;
unsigned int paragraph_;
};
DefaultTokenizer() {}
virtual TokenStream::Ptr tokenize(const Node::Ptr &node, DynamicContext *context) const;
virtual TokenStream::Ptr tokenize(const XMLCh *str, XPath2MemoryManager *mm) const;
private:
class DefaultTokenStream : public TokenStream
{
public:
DefaultTokenStream(const XMLCh *str, XPath2MemoryManager *mm);
virtual ~DefaultTokenStream();
virtual TokenInfo::Ptr next();
private:
XMLCh *string_;
XMLCh *current_;
XMLCh *tokenStart_;
unsigned int position_;
unsigned int sentence_;
unsigned int paragraph_;
bool seenEndOfSentence_;
XPath2MemoryManager *mm_;
};
};
#endif