Foxit PDF SDK
fs_search.h
Go to the documentation of this file.
1 
15 #ifndef FS_SEARCH_H_
16 #define FS_SEARCH_H_
17 
18 #include "common/fs_common.h"
19 #include "pdf/annots/fs_annot.h"
20 #include "pdf/fs_pdfpage.h"
21 #ifndef _FX_NO_XFA_
22 #include "addon/xfa/fs_xfa.h"
23 #endif
24 
30 namespace foxit {
34 namespace pdf {
40 {
41  public:
48  virtual bool NeedToCancelNow() = 0;
49 };
50 
52 class TextPageCharInfo FS_FINAL : public Object {
53  public:
59  typedef enum _TextCharFlag {
61  e_Unknown = -1,
63  e_Normal = 0,
69  e_Hyphen = 3,
72  } TextCharFlag;
73 
74 
91  const RectF& char_box, const RectF& char_outbox, const Matrix& matrix) {
92  this->font = font;
93  this->flag = flag;
94  this->font_size = font_size;
95  this->origin_x = origin_x;
96  this->origin_y = origin_y;
97  this->char_box = char_box;
98  this->char_outbox = char_outbox;
99  this->matrix = matrix;
100  }
101 
105  , font_size(0)
106  , origin_x(0)
107  , origin_y(0) {}
108 
111 
117  TextPageCharInfo(const TextPageCharInfo& char_info) {
118  this->font = char_info.font;
119  this->flag = char_info.flag;
120  this->font_size = char_info.font_size;
121  this->origin_x = char_info.origin_x;
122  this->origin_y = char_info.origin_y;
123  this->char_box = char_info.char_box;
124  this->char_outbox = char_info.char_outbox;
125  this->matrix = char_info.matrix;
126  }
127 
136  this->font = char_info.font;
137  this->flag = char_info.flag;
138  this->font_size = char_info.font_size;
139  this->origin_x = char_info.origin_x;
140  this->origin_y = char_info.origin_y;
141  this->char_box = char_info.char_box;
142  this->char_outbox = char_info.char_outbox;
143  this->matrix = char_info.matrix;
144  return *this;
145  }
146 
154  bool operator == (const TextPageCharInfo& char_info) const {
155  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
156  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
157  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
158  return false;
159 
160  return true;
161  }
162 
170  bool operator != (const TextPageCharInfo& char_info) const{
171  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
172  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
173  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
174  return true;
175 
176  return false;
177  }
178 
196  void Set(const common::Font& font, TextCharFlag flag, float font_size, float origin_x, float origin_y,
197  const RectF& char_box, const RectF& char_outbox, const Matrix& matrix) {
198  this->font = font;
199  this->flag = flag;
200  this->font_size = font_size;
201  this->origin_x = origin_x;
202  this->origin_y = origin_y;
203  this->char_box = char_box;
204  this->char_outbox = char_outbox;
205  this->matrix = matrix;
206  }
207 
212 
220 
226  float font_size;
227 
231  float origin_x;
232 
236  float origin_y;
237 
242 
247 
252 };
253 
269 class TextPage FS_FINAL : public Base {
270  public:
276  typedef enum _TextParseFlags {
283  } TextParseFlags;
284 
290  typedef enum _TextOrderFlag {
295  } TextOrderFlag;
296 
297 
305  explicit TextPage(const PDFPage& page, int flags = foxit::pdf::TextPage::e_ParseTextNormal);
306 
308  ~TextPage();
314  TextPage(const TextPage& other);
322  TextPage& operator = (const TextPage& other);
323 
331  bool operator == (const TextPage& other) const;
339  bool operator != (const TextPage& other) const;
340 
348  bool IsEmpty() const;
349 
355  int GetCharCount() const;
356 
366  TextPageCharInfo GetCharInfo(int char_index);
367 
382  WString GetChars(int start = 0, int count = -1) const;
383 
396  int GetIndexAtPos(float x, float y, float tolerance) const;
397 
405  WString GetTextInRect(const RectF& rect) const;
406 
415  WString GetText(TextOrderFlag flag) const;
416 
431  common::Range GetWordAtPos(float x, float y, float tolerance) const;
432 
444  int GetTextRectCount(int start = 0, int count = -1);
445 
455  RectF GetTextRect(int rect_index) const;
456 
467  common::Rotation GetBaselineRotation(int rect_index);
468 
477 
485  common::Range GetCharRange(const RectF& rect);
486 
501 
502  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
503  explicit TextPage(FS_HANDLE handle = NULL);
504 };
505 
517 class TextSearch FS_FINAL : public Base {
518  public:
524  typedef enum _SearchFlags {
533  } SearchFlags;
534 
535 
548  explicit TextSearch(const PDFDoc& document, SearchCancelCallback* cancel = NULL);
549 
550  #ifndef _FX_NO_XFA_
551 
562  explicit TextSearch(const foxit::addon::xfa::XFADoc& xfa_document, foxit::pdf::SearchCancelCallback* cancel = NULL);
563 #endif
564 
569  explicit TextSearch(const foxit::pdf::TextPage& text_page);
570 
579  explicit TextSearch(const foxit::pdf::annots::Annot& annot);
580 
582  ~TextSearch();
588  TextSearch(const TextSearch& other);
596  TextSearch& operator = (const TextSearch& other);
597 
605  bool operator == (const TextSearch& other) const;
613  bool operator != (const TextSearch& other) const;
614 
622  bool IsEmpty() const;
623 
631  bool SetPattern(const wchar_t* key_words);
632 
648  bool SetStartPage(int page_index);
649 
665  bool SetEndPage(int page_index);
666 
678  bool SetSearchFlags(uint32 search_flags);
679 
686  bool FindNext();
687 
694  bool FindPrev();
695 
701  RectFArray GetMatchRects() const;
702 
711  int GetMatchPageIndex() const;
712 
719 
730 
737  int GetMatchStartCharIndex() const;
738 
745  int GetMatchEndCharIndex() const;
746 
747  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
748  explicit TextSearch(FS_HANDLE handle = NULL);
749 };
750 
759 class TextLink FS_FINAL : public Base{
760  public:
762  ~TextLink();
768  TextLink(const TextLink& other);
776  TextLink& operator = (const TextLink& other);
777 
785  bool operator == (const TextLink& other) const;
793  bool operator != (const TextLink& other) const;
794 
802  bool IsEmpty() const;
803 
812  WString GetURI();
813 
819  int GetStartCharIndex();
820 
826  int GetEndCharIndex();
827 
834  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
835  explicit TextLink(FS_HANDLE handle = NULL);
836 
837 };
838 
843 class PageTextLinks FS_FINAL : public Base{
844  public:
850  explicit PageTextLinks(const TextPage& page);
856  PageTextLinks(const PageTextLinks& other);
864  PageTextLinks& operator = (const PageTextLinks& other);
872  bool operator == (const PageTextLinks& other) const ;
880  bool operator != (const PageTextLinks& other) const ;
881 
889  bool IsEmpty() const;
891  ~PageTextLinks();
892 
898  int GetTextLinkCount();
899 
908  TextLink GetTextLink(int index);
909 
910  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
911  explicit PageTextLinks(FS_HANDLE handle = NULL);
912 };
913 } // namespace pdf
914 } // namespace foxit
915 #endif // FS_SEARCH_H_
916 
TextParseFlags
Enumeration for parsing flags used for text page.
Definition: fs_search.h:276
If set, match the case of keyword when searching.
Definition: fs_search.h:528
bool IsEmpty() const
Check whether current object is empty or not.
If this is set, that means to get text content of a PDF page by the stream order.
Definition: fs_search.h:292
float origin_x
The x-coordinate of the origin position.
Definition: fs_search.h:231
RectF char_box
The glyph bounding box in page space.
Definition: fs_search.h:241
void * FS_HANDLE
Handle type.
Definition: fs_basictypes.h:216
Parse the text content of a PDF page with outputting the hyphen on a line feed.
Definition: fs_search.h:280
RectFArray GetTextRectArrayByRect(const RectF &rect)
Get the array of all text rectangles within the specified rectangle region.
Definition: fs_annot.h:754
common::Font font
A font for character.
Definition: fs_search.h:211
int GetCharCount() const
Get the count of all the characters.
bool SetEndPage(int page_index)
Set ending page index.
CFX_Object Object
Object type.
Definition: fs_basictypes.h:219
TextCharFlag flag
Flags to indicate which properties of textpage character flag are meaningful.
Definition: fs_search.h:219
int GetMatchStartCharIndex() const
Get the index of the first character of current match pattern, based on current match page.
bool operator !=(const TextPageCharInfo &char_info) const
Not equal operator.
Definition: fs_search.h:170
Matrix matrix
The matrix of the character.
Definition: fs_search.h:251
TextPage & operator=(const TextPage &other)
Assign operator.
virtual bool NeedToCancelNow()=0
A callback function used to check whether to cancel the searching process or not.
float origin_y
The y-coordinate of the origin position.
Definition: fs_search.h:236
RectF GetTextRect(int rect_index) const
Get the text rectangle by the index.
If this is set, that means to get text content of a PDF page by the display order.
Definition: fs_search.h:294
Definition: fs_search.h:269
common::Range GetWordAtPos(float x, float y, float tolerance) const
Get the character range of a word at or around a specified position on the page, in PDF coordinate sy...
Character flag: Hyphen.
Definition: fs_search.h:69
Header file for common definitions and classes.
RectFArray GetMatchRects() const
Get the rectangles of current match pattern.
Character flag: Generated.
Definition: fs_search.h:65
~TextPageCharInfo()
Destructor.
Definition: fs_search.h:110
RectF char_outbox
The typographic(display and printing) bounding box in page space.
Definition: fs_search.h:246
bool operator !=(const TextSearch &other) const
Not equal operator.
common::Rotation GetBaselineRotation(int rect_index)
Get the text trend (as rotation) of a specified rectangle.
common::Range GetCharRange(const RectF &rect)
Get the character index range of all text rectangles within the specified rectangle region.
int GetMatchEndCharIndex() const
Get the index of the last character of current match pattern, based on current match page.
bool operator==(const TextSearch &other) const
Equal operator.
Definition: fs_search.h:52
Parse the text content of a PDF page by the stream order.
Definition: fs_search.h:282
WString GetText(TextOrderFlag flag) const
Get the page text.
bool FindPrev()
Search for previous matched pattern.
TextPageCharInfo(const TextPageCharInfo &char_info)
Constructor, with another character information object.
Definition: fs_search.h:117
float font_size
Font size for character.
Definition: fs_search.h:226
Header file for XFA related definitions and functions.
TextPageCharInfo & operator=(const TextPageCharInfo &char_info)
Assign operator.
Definition: fs_search.h:135
Character flag: UnUnicode.
Definition: fs_search.h:67
WString GetChars(int start=0, int count=-1) const
Get all the characters within a range specified by a start index and count.
bool SetPattern(const wchar_t *key_words)
Set keywords to search.
int GetMatchPageIndex() const
Get the page index, to which current match belongs.
TextSearch(const PDFDoc &document, SearchCancelCallback *cancel=0)
Constructor, for a PDF document.
bool SetStartPage(int page_index)
Set starting page index.
int GetIndexAtPos(float x, float y, float tolerance) const
Get the character index at or around a specified position on the page, in PDF coordinate system.
Rotation
Enumeration for rotation.
Definition: fs_common.h:344
Character flag: Normal.
Definition: fs_search.h:63
If set, match the whole word of keyword when searching.
Definition: fs_search.h:530
If set, match the key word consecutively when searching. For example, "CC" will be matched twice in "...
Definition: fs_search.h:532
TextCharFlag
Enumeration for PDF textpage character flag.
Definition: fs_search.h:59
Header file for PDF page related definitions and classes.
Foxit namespace.
Definition: fs_compare.h:27
Parse the text content of a PDF page by normalizing characters based on their positions in the PDF pa...
Definition: fs_search.h:278
TextPage(const PDFPage &page, int flags=foxit::pdf::TextPage::e_ParseTextNormal)
Constructor, from a parsed PDF page.
Definition: fs_xfa.h:893
TextSearch & operator=(const TextSearch &other)
Assign operator.
WString GetTextInRect(const RectF &rect) const
Get the text within a rectangle, in PDF coordinate system.
TextPageCharInfo(const common::Font &font, TextCharFlag flag, float font_size, float origin_x, float origin_y, const RectF &char_box, const RectF &char_outbox, const Matrix &matrix)
Constructor, with parameters.
Definition: fs_search.h:90
bool FindNext()
Search for next matched pattern.
TextPageCharInfo()
Constructor.
Definition: fs_search.h:103
bool IsEmpty() const
Check whether current object is empty or not.
#define NULL
The null-pointer value.
Definition: fx_system.h:767
No special searching options.
Definition: fs_search.h:526
Definition: fx_coordinates.h:771
void Set(const common::Font &font, TextCharFlag flag, float font_size, float origin_x, float origin_y, const RectF &char_box, const RectF &char_outbox, const Matrix &matrix)
Set value.
Definition: fs_search.h:196
SearchFlags
Enumeration for searching flags.
Definition: fs_search.h:524
Definition: fs_search.h:517
Definition: fs_pdfdoc.h:389
Definition: fs_pdfpage.h:342
Definition: fs_search.h:39
Character flag: Unknown.
Definition: fs_search.h:61
Character flag: ComboWord.
Definition: fs_search.h:71
bool SetSearchFlags(uint32 search_flags)
Set search flags.
~TextPage()
Destructor.
Definition: fs_common.h:1418
bool operator !=(const TextPage &other) const
Not equal operator.
Definition: fx_coordinates.h:1076
Header file for annotation related definitions and classes.
bool operator==(const TextPageCharInfo &char_info) const
Equal operator.
Definition: fs_search.h:154
WIDE STRING CLASS.
Definition: fx_string.h:1452
WString GetMatchSentence()
Get the sentence that contains current match pattern.
Definition: fs_common.h:1263
WString GetTextUnderAnnot(annots::Annot &annot) const
Get the page text which intersect with a specified an annotation.
int GetTextRectCount(int start=0, int count=-1)
Count the text rectangles within a range specified by a start index and count.
FX_UINT32 uint32
32-bit unsigned integer.
Definition: fs_basictypes.h:198
int GetMatchSentenceStartIndex()
Get the index of the first character of current match pattern, based on the match sentence.
~TextSearch()
Destructor.
Definition: fs_basictypes.h:399
bool operator==(const TextPage &other) const
Equal operator.
TextOrderFlag
Enumeration for text order flag which is used when getting text content of a PDF page.
Definition: fs_search.h:290
TextPageCharInfo GetCharInfo(int char_index)
Get character information of a specific character.