Logo Search packages:      
Sourcecode: kdegraphics-kde4 version File versions  Download package

xchmfile.h

/***************************************************************************
 *   Copyright (C) 2005 by Georgy Yunaev                                   *
 *   tim@krasnogorsk.ru                                                    *
 *                                                                         *
 *   Copyright (C) 2003  Razvan Cojocaru <razvanco@gmx.net>                *
 *   XML-RPC/Context ID code contributed by Eamon Millman / PCI Geomatics  *
 *   <millman@pcigeomatics.com>                                            *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.             *
 ***************************************************************************/


#ifndef INCLUDE_CHMFILE_H
#define INCLUDE_CHMFILE_H

#include <sys/types.h>

#include <qbytearray.h>
#include <qstring.h>
#include <qmap.h>
#include <qvector.h>
#include <qtextcodec.h>

#include "kchmtextencoding.h"

#include "chm_lib.h"

class CHMGenerator;

class KCHMSearchResult
{
      public:
            inline KCHMSearchResult() {}
            inline KCHMSearchResult ( const QString& t, const QString& u ) :
                  title(t), url(u) {};
            QString     title;
            QString     url;
};

class KCHMSearchProgressResult
{
      public:
            inline KCHMSearchProgressResult() {}
            inline KCHMSearchProgressResult( u_int32_t t, u_int32_t u ) : titleoff(t),urloff(u) {}
            
            QVector<u_int64_t>            offsets;
            u_int32_t                           titleoff;
            u_int32_t                           urloff;
};

typedef QVector<KCHMSearchProgressResult> KCHMSearchProgressResults_t;
typedef QVector<KCHMSearchResult>               KCHMSearchResults_t;


class KCHMParsedIndexEntry
{
      public:
            KCHMParsedIndexEntry () { m_imagenum = -1; }
            KCHMParsedIndexEntry ( QString name, KCHMParsedIndexEntry * parent = 0, QString url = QString(), int imagenum = -1 ) : m_parent(parent), m_name (name), m_url (url), m_imagenum (imagenum) {}
            
      public:
            KCHMParsedIndexEntry    *     m_parent;
            QString                                   m_name;
            QString                                   m_url;
            int                                       m_imagenum;
};

typedef QVector<KCHMParsedIndexEntry>           KCHMParsedIndexEntry_t;
class QDomDocument;

//! Maximum allowed number of search-returned items.
#define MAX_SEARCH_RESULTS 512


//! Mostly a C++ wrapper around the CHMLIB facilities. Concrete class.
00089 class CHMFile
{
    friend class CHMGenerator;
public:
      //! Default constructor.
      CHMFile();
      
      /*!
        \brief This constructor attempts to open the .chm file passed as
        it's argument. If it fails, IsOk() will return false.
        \param archiveName The .chm filename on disk.
       */
      CHMFile(const QString& archiveName);

      //! Destructor. If a file has been successfully opened, it closes it.
      ~CHMFile();

      /*!
        \brief Gets the name of the default page in the archive.
        \return The home page name, with a '/' added in front and
        relative to the root of the archive filesystem. If no .chm
        has been opened, the returned value is "/".
       */
00112       QString HomePage() const { return encodeWithCurrentCodec(m_home); }

      /*!
        \brief Gets name of the .hhc file in the archive that
        could potentially be used to generate content information from.
        \return The topics file name, with a '/' added in front and
        relative to the root of the archive filesystem. If no .chm
        has been opened, the return value is an empty string.
      */
00121       QString TopicsFile() const { return encodeWithCurrentCodec(m_topicsFile); }

      /*!
        \brief Gets the filename of the currently opened .chm file.
        \return The filename of the currently opened archive, relative
        to the root of the filesystem, or the empty string if no
        archive has been opened.
       */
00129       QString ArchiveName() const { return m_filename; }

      /*!
        \brief Gets name of the .hhk file in the archive that
        could potentially be used to generate content information from.
        \return The index file name, with a '/' added in front and
        relative to the root of the archive filesystem. If no .chm
        has been opened, the return value is an empty string.
       */
00138       QString IndexFile() const { return encodeWithCurrentCodec(m_indexFile); }

      /*!
        \brief Gets the name of the opened .chm.
        \return The name of the opened document, or an empty string
        if no .chm has been loaded.
      */
00145       QString Title() const { return encodeWithCurrentCodec(m_title); }

      /*!
        \brief Gets the name of the default font.
        \return The name of the default font or the empty string if
        no default font was selected.
      */
00152       QString DefaultFont() const { return m_font; }

      /*!
        \brief Checks if the last attempt to load a .chm file was
        successful.
        \return true, if the last attempt to load a .chm file was
        successful, false otherwise.
       */
00160       bool IsOk() const { return m_chmFile != NULL; }

      /*!
        \brief Attempts to load a .chm file from disk.
        \param archiveName The .chm filename on disk.
        \return true on success, false on failure.
       */
      bool LoadCHM(const QString& archiveName);

      //! Closes the currently opened .chm, or does nothing if none opened.
      void CloseCHM();

      /*!
        \brief Attempts to fill a QListView by parsing the topics file.
        \param toBuild Pointer to the QListView tree to be filled.
        If the topics file is not available, the
        tree is unmodified. The tree must be empty before passing it
        to this function.
        \return true if it's possible to build the tree, false otherwise.
       */
       bool ParseAndFillTopicsTree(QDomDocument*toBuild);

      /*!
        \brief Attempts to fill a QListView by parsing the index file.
        \param indexlist Pointer to the document to be filled.
        If the index file is not available, the list control
        is unmodified. The document must be empty before passing it
        to this function.
        \return true if it's possible to build the tree, false otherwise.
       */
      // bool ParseAndFillIndex (K3ListView *indexlist);

      /*!
        \brief Fast search using the $FIftiMain file in the .chm.
        \param word The text we're looking for.
        \param wholeWords Are we looking for whole words only?
        \param titlesOnly Are we looking for titles only?
        \param results A string-string hashmap that will hold
        the results in case of successful search. The keys are
        the URLs and the values are the page titles.
        \param phrase_search Indicates that word offset information should be kept.
        \return true if the search found something, false otherwise.
       */
      bool SearchWord (const QString& word, bool wholeWords, bool titlesOnly, KCHMSearchProgressResults_t& results, bool phrase_search);

      /*!
      \brief Finalize the search, resolves all the and generate the results.
      \param tempres Temporary search results from SearchWord.
      \param results A string-string hashmap that will hold the results in case of successful search.
                  The keys are the URLs and the values are the page titles.
      \param limit_results Maximum number of results to return.
      */
      void GetSearchResults ( const KCHMSearchProgressResults_t& tempres, KCHMSearchResults_t& results, unsigned int limit_results = 500 );

      /*!
        \brief Looks up fileName in the archive.
        \param fileName The file name to look up in the archive,
        qualified with '/' standing for the root of the archive
        filesystem.
        \param ui A pointer to CHMLIB specific data about the file.
        The parameter gets filled with useful data if the lookup 
        was successful.
        \return true if the file exists in the archive, false otherwise.
       */
      bool ResolveObject(const QString& fileName, chmUnitInfo *ui);

      /*!
        \brief Retrieves an uncompressed chunk of a file in the .chm.
        \param ui Pointer to a CHMLIB specific data structure obtained
        from a successful call to ResolveObject().
        \param buffer The buffer to place the chunk into.
        \param fileOffset Where does the chunk we want begin in the file?
        \param bufferSize The size of the buffer.
        \return 0 on error, length of chunk retrieved otherwise.
       */
      size_t RetrieveObject(chmUnitInfo *ui, unsigned char *buffer, LONGUINT64 fileOffset, LONGINT64 bufferSize);

      //! Puts in the str parameter the contents of the file referred by ui.
      bool GetFileContentAsString (QString& str, chmUnitInfo *ui);
      
      //! Puts in the str parameter the contents of the file referred by location.
      bool GetFileContentAsString (QString& str, QString location);

      /*! 
       * Puts in the str parameter the contents of the file filename referred by location.
       * If file has not been opened, it is opened, and added into m_chmLoadedFiles.
       */
      bool GetFileContentAsString (QString& str, QString filename, QString location);

      /*! 
       * Enumerate every file in the CHM archive, and store its path into the supplied vector.
       */
      bool enumerateArchive (QVector<QString>& files);
      
      /*! 
       * Find the tree item specified by URL.
       * Used to find the tree item related to the linked page.
       */
    int getPageNum( const QString & str ) const;
    QString getUrlForPage( int p ) const;

      //! Returns true if built-in search tree is present.
00262       bool  isSearchAvailable () { return m_searchAvailable; }
      
      //! Returns current CHM file text encoding.
00265       const KCHMTextEncoding::text_encoding_t * getCurrentEncoding() { return m_currentEncoding; }

      //! Sets the CHM file text encoding.
      bool setCurrentEncoding ( const KCHMTextEncoding::text_encoding_t * enc);

      //! Gets the CHM file pointer. Used when more than one CHM file is loaded (i.e. cross-links in CHM files)
      CHMFile * getCHMfilePointer ( const QString& filename );

      //! Returns the topic from #TOPICS
      QString           getTopicByUrl ( const QString& url );

private:
      //! Parse the HHC or HHS file, and fill the context (asIndex is false) or index (asIndex is true) tree.
      bool  ParseHhcAndFillTree (const QString& file, QDomDocument *tree, bool asIndex);
//    bool  ParseHhcAndFillTree (const QString& file, K3ListView *tree, bool asIndex);
      //! Parse the HHC or HHS file, and fill the data.
      bool ParseChmIndexFile ( const QString& file, bool asIndex, KCHMParsedIndexEntry_t & cont );

      //! Encode the string with the currently selected text codec, if possible. Or return as-is, if not.
00284       inline QString encodeWithCurrentCodec (const QString& str) const
      {
            return (m_textCodec ? m_textCodec->toUnicode (str.toLocal8Bit()) : str);
      }

      //! Encode the string with the currently selected text codec, if possible. Or return as-is, if not.
00290       inline QString encodeWithCurrentCodec (const char * str) const
      {
            return (m_textCodec ? m_textCodec->toUnicode (str) : (QString) str);
      }
      
      //! Helper. Translates from Win32 encodings to generic wxWidgets ones.
      const char * GetFontEncFromCharSet (const QString& font) const;

      //! Helper. Returns the $FIftiMain offset of leaf node or 0.
      u_int32_t GetLeafNodeOffset(const QString& text,
                            u_int32_t initalOffset,
                            u_int32_t buffSize,
                            u_int16_t treeDepth);

      //! Helper. Processes the word location code entries while searching.
      bool ProcessWLC(u_int64_t wlc_count, u_int64_t wlc_size,
                  u_int32_t wlc_offset, unsigned char ds,
                  unsigned char dr, unsigned char cs,
                  unsigned char cr, unsigned char ls,
                  unsigned char lr, KCHMSearchProgressResults_t& results,
                  bool phrase_search);

      //! Looks up as much information as possible from #WINDOWS/#STRINGS.
      bool InfoFromWindows();

      //! Looks up as much information as possible from #SYSTEM.
      bool InfoFromSystem();
      
      //! Sets up textCodec
      void setupTextCodec (const char * name);

      //! Guess used text encoding, using m_detectedLCID and m_font. Set up m_textCodec
      bool guessTextEncoding ();

      //! Change the current CHM encoding
      bool  changeFileEncoding (const char *qtencoding);

      //! Convert the word, so it has an appropriate encoding
      QByteArray convertSearchWord ( const QString &src );

      /*!
       * Helper procedure in TOC parsing, decodes the string between the quotes (first or last) with decoding HTML
       * entities like &iacute;
       */
      inline int findStringInQuotes (const QString& tag, int offset, QString& value, bool firstquote, bool decodeentities);

private:
//    typedef QMap<QString, KCHMMainTreeViewItem*> KCHMTreeUrlMap_t;

      //! Pointer to the chmlib structure
00340       chmFile     *     m_chmFile;
      
      //! Opened file name
00343       QString     m_filename;
      
      //! Home url, got from CHM file
00346       QString     m_home;

      //! Context tree filename. Got from CHM file
00349       QString     m_topicsFile;

      //! Index filename. Got from CHM file
00352       QString     m_indexFile;

      //! Chm Title. Got from CHM file
00355       QString           m_title;

      //! Used by getTreeItem() to find the tree element quickly
00358     QMap <QString, int> m_UrlPage;
    QMap <int,QString> m_PageUrl;

      // Localization stuff
      //! LCID from CHM file, used in encoding detection
00363       short             m_detectedLCID;

      //! font charset from CHM file, used in encoding detection
00366       QString           m_font;

      //! Chosen text codec
00369       QTextCodec  *     m_textCodec;

      //! Current encoding
00372       const KCHMTextEncoding::text_encoding_t * m_currentEncoding;

      typedef QMap<QString, CHMFile *>          chm_loaded_files_t;

      //! Loaded chm files
00377       chm_loaded_files_t      m_chmLoadedFiles;

      //! Map to decode HTML entitles like &acute; based on current encoding
00380       QMap<QString, QString>                          m_entityDecodeMap;

      //! true if /#TOPICS, /#STRINGS, /#URLTBL and  /#URLSTR are resolved, and the members below are valid
00383       bool        m_lookupTablesValid;

      //! pointer to /#TOPICS
00386       chmUnitInfo m_chmTOPICS;

      //! pointer to /#STRINGS
00389       chmUnitInfo m_chmSTRINGS;

      //! pointer to /#URLTBL
00392       chmUnitInfo m_chmURLTBL;

      //! pointer to /#URLSTR
00395       chmUnitInfo m_chmURLSTR;

      //! Indicates whether the built-in search is available. This is true only when m_lookupTablesValid
      //! is true, and m_chmFIftiMain is resolved.
00399       bool              m_searchAvailable;

      //! pointer to /$FIftiMain
00402       chmUnitInfo m_chmFIftiMain;
      
private:
      //! No copy construction allowed.
      CHMFile(const CHMFile&);

      //! No assignments.
      CHMFile& operator=(const CHMFile&);
};


#endif // INCLUDE_CHMFILE_H

Generated by  Doxygen 1.6.0   Back to index