Foxit PDF SDK
fs_search.h
Go to the documentation of this file.
1 
15 #ifndef FS_SEARCH_H_
16 #define FS_SEARCH_H_
17 
18 #include "common/fs_common.h"
19 #include "pdf/annots/fs_annot.h"
20 #include "pdf/fs_pdfpage.h"
21 #ifndef _FX_NO_XFA_
22 #include "addon/xfa/fs_xfa.h"
23 #endif
24 
30 namespace foxit {
34 namespace pdf {
40 {
41  public:
48  virtual bool NeedToCancelNow() = 0;
49 };
50 
52 class TextPageCharInfo FS_FINAL : public Object {
53  public:
59  typedef enum _TextCharFlag {
61  e_Unknown = -1,
63  e_Normal = 0,
69  e_Hyphen = 3,
72  } TextCharFlag;
73 
74 
92  this->font = font;
93  this->flag = flag;
94  this->font_size = font_size;
95  this->origin_x = origin_x;
96  this->origin_y = origin_y;
97  this->char_box = char_box;
98  this->char_outbox = char_outbox;
99  this->matrix = matrix;
100  }
101 
105  , font_size(0)
106  , origin_x(0)
107  , origin_y(0) {}
108 
111 
117  TextPageCharInfo(const TextPageCharInfo& char_info) {
118  this->font = char_info.font;
119  this->flag = char_info.flag;
120  this->font_size = char_info.font_size;
121  this->origin_x = char_info.origin_x;
122  this->origin_y = char_info.origin_y;
123  this->char_box = char_info.char_box;
124  this->char_outbox = char_info.char_outbox;
125  this->matrix = char_info.matrix;
126  }
127 
136  this->font = char_info.font;
137  this->flag = char_info.flag;
138  this->font_size = char_info.font_size;
139  this->origin_x = char_info.origin_x;
140  this->origin_y = char_info.origin_y;
141  this->char_box = char_info.char_box;
142  this->char_outbox = char_info.char_outbox;
143  this->matrix = char_info.matrix;
144  return *this;
145  }
146 
154  bool operator == (const TextPageCharInfo& char_info) const {
155  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
156  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
157  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
158  return false;
159 
160  return true;
161  }
162 
170  bool operator != (const TextPageCharInfo& char_info) const{
171  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
172  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
173  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
174  return true;
175 
176  return false;
177  }
178 
198  this->font = font;
199  this->flag = flag;
200  this->font_size = font_size;
201  this->origin_x = origin_x;
202  this->origin_y = origin_y;
203  this->char_box = char_box;
204  this->char_outbox = char_outbox;
205  this->matrix = matrix;
206  }
207 
212 
220 
226  float font_size;
227 
231  float origin_x;
232 
236  float origin_y;
237 
242 
247 
252 };
253 
269 class TextPage FS_FINAL : public Base {
270  public:
276  typedef enum _TextParseFlags {
283  } TextParseFlags;
284 
290  typedef enum _TextOrderFlag {
295  } TextOrderFlag;
296 
297 
305  explicit TextPage(const PDFPage& page, int flags = foxit::pdf::TextPage::e_ParseTextNormal);
306 
308  ~TextPage();
314  TextPage(const TextPage& other);
322  TextPage& operator = (const TextPage& other);
323 
331  bool operator == (const TextPage& other) const;
339  bool operator != (const TextPage& other) const;
340 
348  bool IsEmpty() const;
349 
355  int GetCharCount() const;
356 
366  TextPageCharInfo GetCharInfo(int char_index);
367 
382  WString GetChars(int start = 0, int count = -1) const;
383 
396  int GetIndexAtPos(float x, float y, float tolerance) const;
397 
405  WString GetTextInRect(const RectF& rect) const;
406 
415  WString GetText(TextOrderFlag flag) const;
416 
431  common::Range GetWordAtPos(float x, float y, float tolerance) const;
432 
444  int GetTextRectCount(int start = 0, int count = -1);
445 
455  RectF GetTextRect(int rect_index) const;
456 
467  common::Rotation GetBaselineRotation(int rect_index);
468 
477 
485  common::Range GetCharRange(const RectF rect);
486 
501 
502  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
503  explicit TextPage(FS_HANDLE handle = NULL);
504 };
505 
517 class TextSearch FS_FINAL : public Base {
518  public:
524  typedef enum _SearchFlags {
533  } SearchFlags;
534 
535 
548  explicit TextSearch(const PDFDoc& document, SearchCancelCallback* cancel = NULL);
549 
550  #ifndef _FX_NO_XFA_
551 
562  explicit TextSearch(const foxit::addon::xfa::XFADoc& xfa_document, foxit::pdf::SearchCancelCallback* cancel = NULL);
563 #endif
564 
569  explicit TextSearch(const foxit::pdf::TextPage& text_page);
570 
579  explicit TextSearch(const foxit::pdf::annots::Annot& annot);
580 
582  ~TextSearch();
588  TextSearch(const TextSearch& other);
596  TextSearch& operator = (const TextSearch& other);
597 
605  bool operator == (const TextSearch& other) const;
613  bool operator != (const TextSearch& other) const;
614 
622  bool IsEmpty() const;
623 
631  bool SetPattern(const wchar_t* key_words);
632 
648  bool SetStartPage(int page_index);
649 
665  bool SetEndPage(int page_index);
666 
678  bool SetSearchFlags(uint32 search_flags);
679 
686  bool FindNext();
687 
694  bool FindPrev();
695 
701  RectFArray GetMatchRects() const;
702 
711  int GetMatchPageIndex() const;
712 
719 
730 
737  int GetMatchStartCharIndex() const;
738 
745  int GetMatchEndCharIndex() const;
746 
747  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
748  explicit TextSearch(FS_HANDLE handle = NULL);
749 };
750 
759 class TextLink FS_FINAL : public Base{
760  public:
762  ~TextLink();
768  TextLink(const TextLink& other);
776  TextLink& operator = (const TextLink& other);
777 
785  bool operator == (const TextLink& other) const;
793  bool operator != (const TextLink& other) const;
794 
802  bool IsEmpty() const;
803 
812  WString GetURI();
813 
819  int GetStartCharIndex();
820 
826  int GetEndCharIndex();
827 
834  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
835  explicit TextLink(FS_HANDLE handle = NULL);
836 
837 };
838 
843 class PageTextLinks FS_FINAL : public Base{
844  public:
850  explicit PageTextLinks(const TextPage& page);
856  PageTextLinks(const PageTextLinks& other);
864  PageTextLinks& operator = (const PageTextLinks& other);
872  bool operator == (const PageTextLinks& other) const ;
880  bool operator != (const PageTextLinks& other) const ;
881 
889  bool IsEmpty() const;
891  ~PageTextLinks();
892 
898  int GetTextLinkCount();
899 
908  TextLink GetTextLink(int index);
909 
910  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
911  explicit PageTextLinks(FS_HANDLE handle = NULL);
912 };
913 } // namespace pdf
914 } // namespace foxit
915 #endif // FS_SEARCH_H_
916 
int GetMatchStartCharIndex() const
Get the index of the first character of current match pattern, based on current match page.
bool operator!=(const TextPageCharInfo &char_info) const
Not equal operator.
Definition: fs_search.h:170
TextOrderFlag
Enumeration for text order flag which is used when getting text content of a PDF page.
Definition: fs_search.h:290
bool IsEmpty() const
Check whether current object is empty or not.
If set, match the case of keyword when searching.
Definition: fs_search.h:528
Definition: fs_common.h:1157
Definition: fs_search.h:517
RectF char_box
The glyph bounding box in page space.
Definition: fs_search.h:241
CFX_Object Object
Object type.
Definition: fs_basictypes.h:219
bool FindPrev()
Search for previous matched pattern.
TextCharFlag
Enumeration for PDF textpage character flag.
Definition: fs_search.h:59
Character flag: Normal.
Definition: fs_search.h:63
bool SetPattern(const wchar_t *key_words)
Set keywords to search.
common::Range GetWordAtPos(float x, float y, float tolerance) const
Get the character range of a word at or around a specified position on the page, in PDF coordinate sy...
~TextSearch()
Destructor.
Character flag: Hyphen.
Definition: fs_search.h:69
bool SetStartPage(int page_index)
Set starting page index.
WIDE STRING CLASS.
Definition: fx_string.h:1459
common::Font font
A font for character.
Definition: fs_search.h:211
int GetMatchEndCharIndex() const
Get the index of the last character of current match pattern, based on current match page.
Character flag: Generated.
Definition: fs_search.h:65
RectF char_outbox
The typographic(display and printing) bounding box in page space.
Definition: fs_search.h:246
Definition: fs_pdfdoc.h:347
If set, match the whole word of keyword when searching.
Definition: fs_search.h:530
bool operator==(const TextSearch &other) const
Equal operator.
TextPageCharInfo(const TextPageCharInfo &char_info)
Constructor, with another character information object.
Definition: fs_search.h:117
TextParseFlags
Enumeration for parsing flags used for text page.
Definition: fs_search.h:276
Character flag: UnUnicode.
Definition: fs_search.h:67
If set, match the key word consecutively when searching. For example, "CC" will be matched twice in "...
Definition: fs_search.h:532
~TextPage()
Destructor.
int GetTextRectCount(int start=0, int count=-1)
Count the text rectangles within a range specified by a start index and count.
Definition: fs_xfa.h:889
RectF GetTextRect(int rect_index) const
Get the text rectangle by the index.
bool operator==(const TextPage &other) const
Equal operator.
TextSearch & operator=(const TextSearch &other)
Assign operator.
float font_size
Font size for character.
Definition: fs_search.h:226
Header file for annotation related definitions and classes.
common::Range GetCharRange(const RectF rect)
Get the character index range of all text rectangles within the specified rectangle region.
TextCharFlag flag
Flags to indicate which properties of textpage character flag are meaningful.
Definition: fs_search.h:219
virtual bool NeedToCancelNow()=0
A callback function used to check whether to cancel the searching process or not.
WString GetText(TextOrderFlag flag) const
Get the page text.
int GetCharCount() const
Get the count of all the characters.
bool operator!=(const TextPage &other) const
Not equal operator.
TextPageCharInfo GetCharInfo(int char_index)
Get character information of a specific character.
FX_UINT32 uint32
32-bit unsigned integer.
Definition: fs_basictypes.h:198
Definition: fs_pdfpage.h:313
void * FS_HANDLE
Handle type.
Definition: fs_basictypes.h:216
TextPageCharInfo(const common::Font font, TextCharFlag flag, float font_size, float origin_x, float origin_y, RectF char_box, RectF char_outbox, Matrix matrix)
Constructor, with parameters.
Definition: fs_search.h:90
Header file for common definitions and classes.
int GetMatchPageIndex() const
Get the page index, to which current match belongs.
TextSearch(const PDFDoc &document, SearchCancelCallback *cancel=0)
Constructor, for a PDF document.
void Set(const common::Font font, TextCharFlag flag, float font_size, float origin_x, float origin_y, RectF char_box, RectF char_outbox, Matrix matrix)
Set value.
Definition: fs_search.h:196
If this is set, that means to get text content of a PDF page by the display order.
Definition: fs_search.h:294
Parse the text content of a PDF page by normalizing characters based on their positions in the PDF pa...
Definition: fs_search.h:278
WString GetTextInRect(const RectF &rect) const
Get the text within a rectangle, in PDF coordinate system.
TextPageCharInfo()
Constructor.
Definition: fs_search.h:103
bool IsEmpty() const
Check whether current object is empty or not.
Definition: fs_basictypes.h:375
Character flag: Unknown.
Definition: fs_search.h:61
Header file for XFA related definitions and functions.
Header file for PDF page related definitions and classes.
bool FindNext()
Search for next matched pattern.
~TextPageCharInfo()
Destructor.
Definition: fs_search.h:110
Definition: fs_annot.h:749
common::Rotation GetBaselineRotation(int rect_index)
Get the text trend (as rotation) of a specified rectangle.
int GetIndexAtPos(float x, float y, float tolerance) const
Get the character index at or around a specified position on the page, in PDF coordinate system.
Definition: fs_common.h:1312
TextPage & operator=(const TextPage &other)
Assign operator.
Rotation
Enumeration for rotation.
Definition: fs_common.h:275
Foxit namespace.
Definition: fs_compare.h:27
int GetMatchSentenceStartIndex()
Get the index of the first character of current match pattern, based on the match sentence.
TextPageCharInfo & operator=(const TextPageCharInfo &char_info)
Assign operator.
Definition: fs_search.h:135
Definition: fs_search.h:52
float origin_y
The y-coordinate of the origin position.
Definition: fs_search.h:236
WString GetTextUnderAnnot(annots::Annot &annot) const
Get the page text which intersect with a specified an annotation.
Matrix matrix
The matrix of the character.
Definition: fs_search.h:251
WString GetChars(int start=0, int count=-1) const
Get all the characters within a range specified by a start index and count.
#define NULL
The null-pointer value.
Definition: fx_system.h:771
No special searching options.
Definition: fs_search.h:526
Definition: fx_coordinates.h:1056
WString GetMatchSentence()
Get the sentence that contains current match pattern.
Parse the text content of a PDF page by the stream order.
Definition: fs_search.h:282
bool operator==(const TextPageCharInfo &char_info) const
Equal operator.
Definition: fs_search.h:154
SearchFlags
Enumeration for searching flags.
Definition: fs_search.h:524
TextPage(const PDFPage &page, int flags=foxit::pdf::TextPage::e_ParseTextNormal)
Constructor, from a parsed PDF page.
Definition: fs_search.h:269
Definition: fs_search.h:39
If this is set, that means to get text content of a PDF page by the stream order.
Definition: fs_search.h:292
Character flag: ComboWord.
Definition: fs_search.h:71
bool SetEndPage(int page_index)
Set ending page index.
bool operator!=(const TextSearch &other) const
Not equal operator.
bool SetSearchFlags(uint32 search_flags)
Set search flags.
Definition: fx_coordinates.h:766
RectFArray GetTextRectArrayByRect(const RectF rect)
Get the array of all text rectangles within the specified rectangle region.
RectFArray GetMatchRects() const
Get the rectangles of current match pattern.
Parse the text content of a PDF page with outputting the hyphen on a line feed.
Definition: fs_search.h:280
float origin_x
The x-coordinate of the origin position.
Definition: fs_search.h:231