Foxit PDF SDK
fs_search.h
Go to the documentation of this file.
1 
15 #ifndef FS_SEARCH_H_
16 #define FS_SEARCH_H_
17 
18 #include "common/fs_common.h"
19 #include "pdf/annots/fs_annot.h"
20 #include "pdf/fs_pdfpage.h"
21 #ifndef _FX_NO_XFA_
22 #include "addon/xfa/fs_xfa.h"
23 #endif // #ifndef _FX_NO_XFA_
24 
30 namespace foxit {
34 namespace pdf {
40 {
41  public:
48  virtual bool NeedToCancelNow() = 0;
49 };
50 
52 class TextPageCharInfo FS_FINAL : public Object {
53  public:
59  typedef enum _TextCharFlag {
61  e_Unknown = -1,
63  e_Normal = 0,
69  e_Hyphen = 3,
72  } TextCharFlag;
73 
74 
91  const RectF& char_box, const RectF& char_outbox, const Matrix& matrix) {
92  this->font = font;
93  this->flag = flag;
94  this->font_size = font_size;
95  this->origin_x = origin_x;
96  this->origin_y = origin_y;
97  this->char_box = char_box;
98  this->char_outbox = char_outbox;
99  this->matrix = matrix;
100  }
101 
105  , font_size(0)
106  , origin_x(0)
107  , origin_y(0) {}
108 
114  TextPageCharInfo(const TextPageCharInfo& char_info) {
115  this->font = char_info.font;
116  this->flag = char_info.flag;
117  this->font_size = char_info.font_size;
118  this->origin_x = char_info.origin_x;
119  this->origin_y = char_info.origin_y;
120  this->char_box = char_info.char_box;
121  this->char_outbox = char_info.char_outbox;
122  this->matrix = char_info.matrix;
123  }
124 
133  this->font = char_info.font;
134  this->flag = char_info.flag;
135  this->font_size = char_info.font_size;
136  this->origin_x = char_info.origin_x;
137  this->origin_y = char_info.origin_y;
138  this->char_box = char_info.char_box;
139  this->char_outbox = char_info.char_outbox;
140  this->matrix = char_info.matrix;
141  return *this;
142  }
143 
151  bool operator == (const TextPageCharInfo& char_info) const {
152  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
153  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
154  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
155  return false;
156 
157  return true;
158  }
159 
167  bool operator != (const TextPageCharInfo& char_info) const{
168  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
169  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
170  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
171  return true;
172 
173  return false;
174  }
175 
193  void Set(const common::Font& font, TextCharFlag flag, float font_size, float origin_x, float origin_y,
194  const RectF& char_box, const RectF& char_outbox, const Matrix& matrix) {
195  this->font = font;
196  this->flag = flag;
197  this->font_size = font_size;
198  this->origin_x = origin_x;
199  this->origin_y = origin_y;
200  this->char_box = char_box;
201  this->char_outbox = char_outbox;
202  this->matrix = matrix;
203  }
204 
209 
217 
223  float font_size;
224 
228  float origin_x;
229 
233  float origin_y;
234 
239 
244 
249 };
250 
266 class TextPage FS_FINAL : public Base {
267  public:
273  typedef enum _TextParseFlags {
280  } TextParseFlags;
281 
287  typedef enum _TextOrderFlag {
292  } TextOrderFlag;
293 
294 
302  explicit TextPage(const PDFPage& page, int flags = foxit::pdf::TextPage::e_ParseTextNormal);
303 
305  ~TextPage();
311  TextPage(const TextPage& other);
319  TextPage& operator = (const TextPage& other);
320 
328  bool operator == (const TextPage& other) const;
336  bool operator != (const TextPage& other) const;
337 
345  bool IsEmpty() const;
346 
352  int GetCharCount() const;
353 
363  TextPageCharInfo GetCharInfo(int char_index);
364 
379  WString GetChars(int start = 0, int count = -1) const;
380 
393  int GetIndexAtPos(float x, float y, float tolerance) const;
394 
402  WString GetTextInRect(const RectF& rect) const;
403 
412  WString GetText(TextOrderFlag flag) const;
413 
428  common::Range GetWordAtPos(float x, float y, float tolerance) const;
429 
441  int GetTextRectCount(int start = 0, int count = -1);
442 
452  RectF GetTextRect(int rect_index) const;
453 
464  common::Rotation GetBaselineRotation(int rect_index);
465 
474 
482  common::Range GetCharRange(const RectF& rect);
483 
498 
499  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
500  explicit TextPage(FS_HANDLE handle = NULL);
501 };
502 
514 class TextSearch FS_FINAL : public Base {
515  public:
521  typedef enum _SearchFlags {
532  } SearchFlags;
533 
534 
550  explicit TextSearch(const PDFDoc& document, SearchCancelCallback* cancel = NULL, int flags = foxit::pdf::TextPage::e_ParseTextNormal);
551 
552  #ifndef _FX_NO_XFA_
553 
564  explicit TextSearch(const foxit::addon::xfa::XFADoc& xfa_document, foxit::pdf::SearchCancelCallback* cancel = NULL);
565 #endif // #ifndef _FX_NO_XFA_
566 
571  explicit TextSearch(const foxit::pdf::TextPage& text_page);
572 
581  explicit TextSearch(const foxit::pdf::annots::Annot& annot);
582 
584  ~TextSearch();
590  TextSearch(const TextSearch& other);
598  TextSearch& operator = (const TextSearch& other);
599 
607  bool operator == (const TextSearch& other) const;
615  bool operator != (const TextSearch& other) const;
616 
624  bool IsEmpty() const;
625 
636  bool SetPattern(const wchar_t* key_words, bool is_regex_search = false);
637 
653  bool SetStartPage(int page_index);
654 
670  bool SetEndPage(int page_index);
671 
693  bool SetStartCharacter(int char_index);
694 
706  bool SetSearchFlags(uint32 search_flags);
707 
714  bool FindNext();
715 
722  bool FindPrev();
723 
729  RectFArray GetMatchRects() const;
730 
739  int GetMatchPageIndex() const;
740 
747 
758 
769 
776  int GetMatchStartCharIndex() const;
777 
784  int GetMatchEndCharIndex() const;
785 
786  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
787  explicit TextSearch(FS_HANDLE handle = NULL);
788 };
789 
798 class TextLink FS_FINAL : public Base{
799  public:
801  ~TextLink();
807  TextLink(const TextLink& other);
815  TextLink& operator = (const TextLink& other);
816 
824  bool operator == (const TextLink& other) const;
832  bool operator != (const TextLink& other) const;
833 
841  bool IsEmpty() const;
842 
851  WString GetURI();
852 
858  int GetStartCharIndex();
859 
865  int GetEndCharIndex();
866 
873  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
874  explicit TextLink(FS_HANDLE handle = NULL);
875 
876 };
877 
882 class PageTextLinks FS_FINAL : public Base{
883  public:
889  explicit PageTextLinks(const TextPage& page);
895  PageTextLinks(const PageTextLinks& other);
903  PageTextLinks& operator = (const PageTextLinks& other);
911  bool operator == (const PageTextLinks& other) const ;
919  bool operator != (const PageTextLinks& other) const ;
920 
928  bool IsEmpty() const;
930  ~PageTextLinks();
931 
937  int GetTextLinkCount();
938 
947  TextLink GetTextLink(int index);
948 
949  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
950  explicit PageTextLinks(FS_HANDLE handle = NULL);
951 };
952 } // namespace pdf
953 } // namespace foxit
954 #endif // FS_SEARCH_H_
955 
foxit::pdf::TextPage::TextParseFlags
TextParseFlags
Enumeration for parsing flags used for text page.
Definition: fs_search.h:273
foxit::pdf::TextSearch::e_SearchMatchCase
If set, match the case of keyword when searching.
Definition: fs_search.h:525
foxit::pdf::TextPage::IsEmpty
bool IsEmpty() const
Check whether current object is empty or not.
foxit::pdf::TextPage::e_TextStreamOrder
If this is set, that means to get text content of a PDF page by the stream order.
Definition: fs_search.h:289
foxit::pdf::TextPageCharInfo::origin_x
float origin_x
The x-coordinate of the origin position.
Definition: fs_search.h:228
foxit::pdf::TextPageCharInfo::char_box
RectF char_box
The glyph bounding box in page space.
Definition: fs_search.h:238
foxit::FS_HANDLE
void * FS_HANDLE
Handle type.
Definition: fs_basictypes.h:214
foxit::pdf::TextPage::e_ParseTextOutputHyphen
Parse the text content of a PDF page with outputting the hyphen on a line feed.
Definition: fs_search.h:277
foxit::pdf::TextPage::GetTextRectArrayByRect
RectFArray GetTextRectArrayByRect(const RectF &rect)
Get the array of all text rectangles within the specified rectangle region.
foxit::pdf::annots::Annot
Definition: fs_annot.h:994
foxit::pdf::TextPageCharInfo::font
common::Font font
A font for character.
Definition: fs_search.h:208
foxit::pdf::TextPage::GetCharCount
int GetCharCount() const
Get the count of all the characters.
foxit::pdf::TextSearch::SetEndPage
bool SetEndPage(int page_index)
Set ending page index.
foxit::pdf::TextSearch::operator!=
bool operator!=(const TextSearch &other) const
Not equal operator.
foxit::Object
CFX_Object Object
Object type.
Definition: fs_basictypes.h:221
foxit::pdf::TextPageCharInfo::flag
TextCharFlag flag
Flags to indicate which properties of textpage character flag are meaningful.
Definition: fs_search.h:216
foxit::pdf::TextSearch::GetMatchStartCharIndex
int GetMatchStartCharIndex() const
Get the index of the first character of current match pattern, based on current match page.
foxit::pdf::TextPageCharInfo::matrix
Matrix matrix
The matrix of the character.
Definition: fs_search.h:248
foxit::pdf::TextPage::operator=
TextPage & operator=(const TextPage &other)
Assign operator.
foxit::pdf::SearchCancelCallback::NeedToCancelNow
virtual bool NeedToCancelNow()=0
A callback function used to check whether to cancel the searching process or not.
foxit::pdf::TextPageCharInfo::origin_y
float origin_y
The y-coordinate of the origin position.
Definition: fs_search.h:233
foxit::pdf::TextPage::GetTextRect
RectF GetTextRect(int rect_index) const
Get the text rectangle by the index.
foxit::pdf::TextPage::e_TextDisplayOrder
If this is set, that means to get text content of a PDF page by the display order.
Definition: fs_search.h:291
foxit::pdf::TextPage
Definition: fs_search.h:266
foxit::pdf::TextPage::GetWordAtPos
common::Range GetWordAtPos(float x, float y, float tolerance) const
Get the character range of a word at or around a specified position on the page, in PDF coordinate sy...
foxit::pdf::TextPageCharInfo::e_Hyphen
Character flag: Hyphen.
Definition: fs_search.h:69
fs_common.h
Header file for common definitions and classes.
foxit::pdf::TextSearch::GetMatchRects
RectFArray GetMatchRects() const
Get the rectangles of current match pattern.
CFX_ArrayTemplate< RectF >
foxit::pdf::TextPageCharInfo::e_Generated
Character flag: Generated.
Definition: fs_search.h:65
foxit::pdf::TextPageCharInfo::char_outbox
RectF char_outbox
The typographic(display and printing) bounding box in page space.
Definition: fs_search.h:243
foxit::pdf::TextPage::GetBaselineRotation
common::Rotation GetBaselineRotation(int rect_index)
Get the text trend (as rotation) of a specified rectangle.
foxit::pdf::TextPage::GetCharRange
common::Range GetCharRange(const RectF &rect)
Get the character index range of all text rectangles within the specified rectangle region.
foxit::pdf::TextSearch::GetMatchEndCharIndex
int GetMatchEndCharIndex() const
Get the index of the last character of current match pattern, based on current match page.
foxit::pdf::TextSearch::operator==
bool operator==(const TextSearch &other) const
Equal operator.
foxit::pdf::TextPageCharInfo
Definition: fs_search.h:52
foxit::pdf::TextPage::e_ParseTextUseStreamOrder
Parse the text content of a PDF page by the stream order.
Definition: fs_search.h:279
foxit::pdf::TextPage::GetText
WString GetText(TextOrderFlag flag) const
Get the page text.
foxit::pdf::TextSearch::SetStartCharacter
bool SetStartCharacter(int char_index)
Set starting character index, from where the search process is to be started.
foxit::pdf::TextSearch::FindPrev
bool FindPrev()
Search for previous matched pattern.
foxit::pdf::TextPageCharInfo::TextPageCharInfo
TextPageCharInfo(const TextPageCharInfo &char_info)
Constructor, with another character information object.
Definition: fs_search.h:114
foxit::pdf::TextPageCharInfo::font_size
float font_size
Font size for character.
Definition: fs_search.h:223
fs_xfa.h
Header file for XFA related definitions and functions.
foxit::pdf::TextPageCharInfo::operator=
TextPageCharInfo & operator=(const TextPageCharInfo &char_info)
Assign operator.
Definition: fs_search.h:132
foxit::pdf::TextPageCharInfo::e_UnUnicode
Character flag: UnUnicode.
Definition: fs_search.h:67
foxit::pdf::TextPage::GetChars
WString GetChars(int start=0, int count=-1) const
Get all the characters within a range specified by a start index and count.
foxit::pdf::TextSearch::GetMatchPageIndex
int GetMatchPageIndex() const
Get the page index, to which current match belongs.
foxit::pdf::TextSearch::SetStartPage
bool SetStartPage(int page_index)
Set starting page index.
foxit::pdf::TextPage::GetIndexAtPos
int GetIndexAtPos(float x, float y, float tolerance) const
Get the character index at or around a specified position on the page, in PDF coordinate system.
foxit::common::Rotation
Rotation
Enumeration for rotation.
Definition: fs_common.h:57
foxit::pdf::TextPageCharInfo::e_Normal
Character flag: Normal.
Definition: fs_search.h:63
foxit::pdf::TextSearch::e_SearchMatchWholeWord
If set, match the whole word of keyword when searching.
Definition: fs_search.h:527
foxit::pdf::TextSearch::e_SearchConsecutive
If set, match the key word consecutively when searching. For example, "CC" will be matched twice in "...
Definition: fs_search.h:529
foxit::pdf::TextPageCharInfo::TextCharFlag
TextCharFlag
Enumeration for PDF textpage character flag.
Definition: fs_search.h:59
fs_pdfpage.h
Header file for PDF page related definitions and classes.
foxit
Foxit namespace.
Definition: fs_taggedpdf.h:27
foxit::pdf::TextPage::e_ParseTextNormal
Parse the text content of a PDF page by normalizing characters based on their positions in the PDF pa...
Definition: fs_search.h:275
foxit::pdf::TextPage::TextPage
TextPage(const PDFPage &page, int flags=foxit::pdf::TextPage::e_ParseTextNormal)
Constructor, from a parsed PDF page.
foxit::addon::xfa::XFADoc
Definition: fs_xfa.h:898
foxit::pdf::TextSearch::operator=
TextSearch & operator=(const TextSearch &other)
Assign operator.
foxit::pdf::TextPage::GetTextInRect
WString GetTextInRect(const RectF &rect) const
Get the text within a rectangle, in PDF coordinate system.
foxit::pdf::TextPageCharInfo::TextPageCharInfo
TextPageCharInfo(const common::Font &font, TextCharFlag flag, float font_size, float origin_x, float origin_y, const RectF &char_box, const RectF &char_outbox, const Matrix &matrix)
Constructor, with parameters.
Definition: fs_search.h:90
foxit::pdf::TextSearch::SetPattern
bool SetPattern(const wchar_t *key_words, bool is_regex_search=false)
Set keywords to search.
foxit::pdf::TextSearch::FindNext
bool FindNext()
Search for next matched pattern.
foxit::pdf::TextPageCharInfo::TextPageCharInfo
TextPageCharInfo()
Constructor.
Definition: fs_search.h:103
foxit::pdf::TextSearch::IsEmpty
bool IsEmpty() const
Check whether current object is empty or not.
NULL
#define NULL
The null-pointer value.
Definition: fx_system.h:792
foxit::pdf::TextSearch::e_SearchNormal
No special searching options.
Definition: fs_search.h:523
CFX_FloatRect
Definition: fx_coordinates.h:771
foxit::pdf::TextPageCharInfo::Set
void Set(const common::Font &font, TextCharFlag flag, float font_size, float origin_x, float origin_y, const RectF &char_box, const RectF &char_outbox, const Matrix &matrix)
Set value.
Definition: fs_search.h:193
foxit::pdf::TextSearch::SearchFlags
SearchFlags
Enumeration for searching flags.
Definition: fs_search.h:521
foxit::pdf::TextSearch
Definition: fs_search.h:514
foxit::pdf::PDFDoc
Definition: fs_pdfdoc.h:648
foxit::pdf::PDFPage
Definition: fs_pdfpage.h:412
foxit::pdf::SearchCancelCallback
Definition: fs_search.h:39
foxit::pdf::TextPageCharInfo::e_Unknown
Character flag: Unknown.
Definition: fs_search.h:61
foxit::pdf::TextSearch::TextSearch
TextSearch(const PDFDoc &document, SearchCancelCallback *cancel=0, int flags=foxit::pdf::TextPage::e_ParseTextNormal)
Constructor, for a PDF document.
foxit::pdf::TextPageCharInfo::e_ComboWord
Character flag: ComboWord.
Definition: fs_search.h:71
foxit::pdf::TextSearch::SetSearchFlags
bool SetSearchFlags(uint32 search_flags)
Set search flags.
foxit::pdf::TextPage::~TextPage
~TextPage()
Destructor.
foxit::common::Font
Definition: fs_common.h:1428
CFX_Matrix
Definition: fx_coordinates.h:1076
foxit::pdf::TextSearch::e_SearchNotMatchFullWidth
If set, to ignore full-width characters and treat all characters as standard ASCII or standard-width ...
Definition: fs_search.h:531
fs_annot.h
Header file for annotation related definitions and classes.
foxit::pdf::TextSearch::GetMatchSentenceEndIndex
int GetMatchSentenceEndIndex()
Get the index of the last character of current matched pattern, based on the matched sentence.
foxit::pdf::TextPageCharInfo::operator==
bool operator==(const TextPageCharInfo &char_info) const
Equal operator.
Definition: fs_search.h:151
CFX_WideString
WIDE STRING CLASS.
Definition: fx_string.h:1461
foxit::pdf::TextSearch::GetMatchSentence
WString GetMatchSentence()
Get the sentence that contains current match pattern.
foxit::common::Range
Definition: fs_common.h:1273
foxit::pdf::TextPage::GetTextUnderAnnot
WString GetTextUnderAnnot(annots::Annot &annot) const
Get the page text which intersect with a specified an annotation.
foxit::pdf::TextPage::GetTextRectCount
int GetTextRectCount(int start=0, int count=-1)
Count the text rectangles within a range specified by a start index and count.
foxit::uint32
FX_UINT32 uint32
32-bit unsigned integer.
Definition: fs_basictypes.h:196
foxit::pdf::TextPage::operator!=
bool operator!=(const TextPage &other) const
Not equal operator.
foxit::pdf::TextSearch::GetMatchSentenceStartIndex
int GetMatchSentenceStartIndex()
Get the index of the first character of current matched pattern, based on the matched sentence.
foxit::pdf::TextSearch::~TextSearch
~TextSearch()
Destructor.
foxit::Base
Definition: fs_basictypes.h:443
foxit::pdf::TextPage::operator==
bool operator==(const TextPage &other) const
Equal operator.
foxit::pdf::TextPage::TextOrderFlag
TextOrderFlag
Enumeration for text order flag which is used when getting text content of a PDF page.
Definition: fs_search.h:287
foxit::pdf::TextPage::GetCharInfo
TextPageCharInfo GetCharInfo(int char_index)
Get character information of a specific character.
foxit::pdf::TextPageCharInfo::operator!=
bool operator!=(const TextPageCharInfo &char_info) const
Not equal operator.
Definition: fs_search.h:167