Foxit PDF SDK
fs_search.h
Go to the documentation of this file.
1 
15 #ifndef FS_SEARCH_H_
16 #define FS_SEARCH_H_
17 
18 #include "common/fs_common.h"
19 #include "pdf/annots/fs_annot.h"
20 #include "pdf/fs_pdfpage.h"
21 #ifndef _FX_NO_XFA_
22 #include "addon/xfa/fs_xfa.h"
23 #endif // #ifndef _FX_NO_XFA_
24 
30 namespace foxit {
34 namespace pdf {
40 {
41  public:
48  virtual bool NeedToCancelNow() = 0;
49 };
50 
52 class TextPageCharInfo FS_FINAL : public Object {
53  public:
59  typedef enum _TextCharFlag {
61  e_Unknown = -1,
63  e_Normal = 0,
69  e_Hyphen = 3,
72  } TextCharFlag;
73 
74 
91  const RectF& char_box, const RectF& char_outbox, const Matrix& matrix) {
92  this->font = font;
93  this->flag = flag;
94  this->font_size = font_size;
95  this->origin_x = origin_x;
96  this->origin_y = origin_y;
97  this->char_box = char_box;
98  this->char_outbox = char_outbox;
99  this->matrix = matrix;
100  }
101 
105  , font_size(0)
106  , origin_x(0)
107  , origin_y(0) {}
108 
111 
117  TextPageCharInfo(const TextPageCharInfo& char_info) {
118  this->font = char_info.font;
119  this->flag = char_info.flag;
120  this->font_size = char_info.font_size;
121  this->origin_x = char_info.origin_x;
122  this->origin_y = char_info.origin_y;
123  this->char_box = char_info.char_box;
124  this->char_outbox = char_info.char_outbox;
125  this->matrix = char_info.matrix;
126  }
127 
136  this->font = char_info.font;
137  this->flag = char_info.flag;
138  this->font_size = char_info.font_size;
139  this->origin_x = char_info.origin_x;
140  this->origin_y = char_info.origin_y;
141  this->char_box = char_info.char_box;
142  this->char_outbox = char_info.char_outbox;
143  this->matrix = char_info.matrix;
144  return *this;
145  }
146 
154  bool operator == (const TextPageCharInfo& char_info) const {
155  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
156  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
157  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
158  return false;
159 
160  return true;
161  }
162 
170  bool operator != (const TextPageCharInfo& char_info) const{
171  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
172  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
173  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
174  return true;
175 
176  return false;
177  }
178 
196  void Set(const common::Font& font, TextCharFlag flag, float font_size, float origin_x, float origin_y,
197  const RectF& char_box, const RectF& char_outbox, const Matrix& matrix) {
198  this->font = font;
199  this->flag = flag;
200  this->font_size = font_size;
201  this->origin_x = origin_x;
202  this->origin_y = origin_y;
203  this->char_box = char_box;
204  this->char_outbox = char_outbox;
205  this->matrix = matrix;
206  }
207 
212 
220 
226  float font_size;
227 
231  float origin_x;
232 
236  float origin_y;
237 
242 
247 
252 };
253 
269 class TextPage FS_FINAL : public Base {
270  public:
276  typedef enum _TextParseFlags {
283  } TextParseFlags;
284 
290  typedef enum _TextOrderFlag {
295  } TextOrderFlag;
296 
297 
305  explicit TextPage(const PDFPage& page, int flags = foxit::pdf::TextPage::e_ParseTextNormal);
306 
308  ~TextPage();
314  TextPage(const TextPage& other);
322  TextPage& operator = (const TextPage& other);
323 
331  bool operator == (const TextPage& other) const;
339  bool operator != (const TextPage& other) const;
340 
348  bool IsEmpty() const;
349 
355  int GetCharCount() const;
356 
366  TextPageCharInfo GetCharInfo(int char_index);
367 
382  WString GetChars(int start = 0, int count = -1) const;
383 
396  int GetIndexAtPos(float x, float y, float tolerance) const;
397 
405  WString GetTextInRect(const RectF& rect) const;
406 
415  WString GetText(TextOrderFlag flag) const;
416 
431  common::Range GetWordAtPos(float x, float y, float tolerance) const;
432 
444  int GetTextRectCount(int start = 0, int count = -1);
445 
455  RectF GetTextRect(int rect_index) const;
456 
467  common::Rotation GetBaselineRotation(int rect_index);
468 
477 
485  common::Range GetCharRange(const RectF& rect);
486 
501 
502  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
503  explicit TextPage(FS_HANDLE handle = NULL);
504 };
505 
517 class TextSearch FS_FINAL : public Base {
518  public:
524  typedef enum _SearchFlags {
533  } SearchFlags;
534 
535 
551  explicit TextSearch(const PDFDoc& document, SearchCancelCallback* cancel = NULL, int flags = foxit::pdf::TextPage::e_ParseTextNormal);
552 
553  #ifndef _FX_NO_XFA_
554 
565  explicit TextSearch(const foxit::addon::xfa::XFADoc& xfa_document, foxit::pdf::SearchCancelCallback* cancel = NULL);
566 #endif // #ifndef _FX_NO_XFA_
567 
572  explicit TextSearch(const foxit::pdf::TextPage& text_page);
573 
582  explicit TextSearch(const foxit::pdf::annots::Annot& annot);
583 
585  ~TextSearch();
591  TextSearch(const TextSearch& other);
599  TextSearch& operator = (const TextSearch& other);
600 
608  bool operator == (const TextSearch& other) const;
616  bool operator != (const TextSearch& other) const;
617 
625  bool IsEmpty() const;
626 
634  bool SetPattern(const wchar_t* key_words);
635 
651  bool SetStartPage(int page_index);
652 
668  bool SetEndPage(int page_index);
669 
691  bool SetStartCharacter(int char_index);
692 
704  bool SetSearchFlags(uint32 search_flags);
705 
712  bool FindNext();
713 
720  bool FindPrev();
721 
727  RectFArray GetMatchRects() const;
728 
737  int GetMatchPageIndex() const;
738 
745 
756 
767 
774  int GetMatchStartCharIndex() const;
775 
782  int GetMatchEndCharIndex() const;
783 
784  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
785  explicit TextSearch(FS_HANDLE handle = NULL);
786 };
787 
796 class TextLink FS_FINAL : public Base{
797  public:
799  ~TextLink();
805  TextLink(const TextLink& other);
813  TextLink& operator = (const TextLink& other);
814 
822  bool operator == (const TextLink& other) const;
830  bool operator != (const TextLink& other) const;
831 
839  bool IsEmpty() const;
840 
849  WString GetURI();
850 
856  int GetStartCharIndex();
857 
863  int GetEndCharIndex();
864 
871  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
872  explicit TextLink(FS_HANDLE handle = NULL);
873 
874 };
875 
880 class PageTextLinks FS_FINAL : public Base{
881  public:
887  explicit PageTextLinks(const TextPage& page);
893  PageTextLinks(const PageTextLinks& other);
901  PageTextLinks& operator = (const PageTextLinks& other);
909  bool operator == (const PageTextLinks& other) const ;
917  bool operator != (const PageTextLinks& other) const ;
918 
926  bool IsEmpty() const;
928  ~PageTextLinks();
929 
935  int GetTextLinkCount();
936 
945  TextLink GetTextLink(int index);
946 
947  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
948  explicit PageTextLinks(FS_HANDLE handle = NULL);
949 };
950 } // namespace pdf
951 } // namespace foxit
952 #endif // FS_SEARCH_H_
953 
int GetMatchStartCharIndex() const
Get the index of the first character of current match pattern, based on current match page.
common::Range GetCharRange(const RectF &rect)
Get the character index range of all text rectangles within the specified rectangle region.
TextOrderFlag
Enumeration for text order flag which is used when getting text content of a PDF page.
Definition: fs_search.h:290
bool IsEmpty() const
Check whether current object is empty or not.
If set, match the case of keyword when searching.
Definition: fs_search.h:528
Definition: fs_common.h:1179
TextSearch(const PDFDoc &document, SearchCancelCallback *cancel=0, int flags=foxit::pdf::TextPage::e_ParseTextNormal)
Constructor, for a PDF document.
Definition: fs_search.h:517
RectF char_box
The glyph bounding box in page space.
Definition: fs_search.h:241
CFX_Object Object
Object type.
Definition: fs_basictypes.h:217
bool FindPrev()
Search for previous matched pattern.
TextCharFlag
Enumeration for PDF textpage character flag.
Definition: fs_search.h:59
Character flag: Normal.
Definition: fs_search.h:63
TextPageCharInfo(const common::Font &font, TextCharFlag flag, float font_size, float origin_x, float origin_y, const RectF &char_box, const RectF &char_outbox, const Matrix &matrix)
Constructor, with parameters.
Definition: fs_search.h:90
RectFArray GetTextRectArrayByRect(const RectF &rect)
Get the array of all text rectangles within the specified rectangle region.
bool SetPattern(const wchar_t *key_words)
Set keywords to search.
common::Range GetWordAtPos(float x, float y, float tolerance) const
Get the character range of a word at or around a specified position on the page, in PDF coordinate sy...
~TextSearch()
Destructor.
Character flag: Hyphen.
Definition: fs_search.h:69
bool SetStartPage(int page_index)
Set starting page index.
WIDE STRING CLASS.
Definition: fx_string.h:1452
common::Font font
A font for character.
Definition: fs_search.h:211
int GetMatchEndCharIndex() const
Get the index of the last character of current match pattern, based on current match page.
Character flag: Generated.
Definition: fs_search.h:65
RectF char_outbox
The typographic(display and printing) bounding box in page space.
Definition: fs_search.h:246
Definition: fs_pdfdoc.h:508
bool operator !=(const TextPageCharInfo &char_info) const
Not equal operator.
Definition: fs_search.h:170
If set, match the whole word of keyword when searching.
Definition: fs_search.h:530
bool operator==(const TextSearch &other) const
Equal operator.
TextPageCharInfo(const TextPageCharInfo &char_info)
Constructor, with another character information object.
Definition: fs_search.h:117
bool SetStartCharacter(int char_index)
Set starting character index, from where the search process is to be started.
TextParseFlags
Enumeration for parsing flags used for text page.
Definition: fs_search.h:276
Character flag: UnUnicode.
Definition: fs_search.h:67
If set, match the key word consecutively when searching. For example, "CC" will be matched twice in "...
Definition: fs_search.h:532
~TextPage()
Destructor.
int GetTextRectCount(int start=0, int count=-1)
Count the text rectangles within a range specified by a start index and count.
Definition: fs_xfa.h:897
bool operator !=(const TextSearch &other) const
Not equal operator.
RectF GetTextRect(int rect_index) const
Get the text rectangle by the index.
bool operator==(const TextPage &other) const
Equal operator.
TextSearch & operator=(const TextSearch &other)
Assign operator.
float font_size
Font size for character.
Definition: fs_search.h:226
Header file for annotation related definitions and classes.
TextCharFlag flag
Flags to indicate which properties of textpage character flag are meaningful.
Definition: fs_search.h:219
virtual bool NeedToCancelNow()=0
A callback function used to check whether to cancel the searching process or not.
WString GetText(TextOrderFlag flag) const
Get the page text.
int GetCharCount() const
Get the count of all the characters.
TextPageCharInfo GetCharInfo(int char_index)
Get character information of a specific character.
FX_UINT32 uint32
32-bit unsigned integer.
Definition: fs_basictypes.h:196
Definition: fs_pdfpage.h:411
void * FS_HANDLE
Handle type.
Definition: fs_basictypes.h:214
Header file for common definitions and classes.
int GetMatchPageIndex() const
Get the page index, to which current match belongs.
If this is set, that means to get text content of a PDF page by the display order.
Definition: fs_search.h:294
Parse the text content of a PDF page by normalizing characters based on their positions in the PDF pa...
Definition: fs_search.h:278
WString GetTextInRect(const RectF &rect) const
Get the text within a rectangle, in PDF coordinate system.
TextPageCharInfo()
Constructor.
Definition: fs_search.h:103
bool IsEmpty() const
Check whether current object is empty or not.
Definition: fs_basictypes.h:407
Character flag: Unknown.
Definition: fs_search.h:61
Header file for XFA related definitions and functions.
Header file for PDF page related definitions and classes.
bool FindNext()
Search for next matched pattern.
~TextPageCharInfo()
Destructor.
Definition: fs_search.h:110
Definition: fs_annot.h:964
common::Rotation GetBaselineRotation(int rect_index)
Get the text trend (as rotation) of a specified rectangle.
int GetIndexAtPos(float x, float y, float tolerance) const
Get the character index at or around a specified position on the page, in PDF coordinate system.
Definition: fs_common.h:1334
TextPage & operator=(const TextPage &other)
Assign operator.
Rotation
Enumeration for rotation.
Definition: fs_common.h:57
Foxit namespace.
Definition: fs_taggedpdf.h:27
int GetMatchSentenceStartIndex()
Get the index of the first character of current matched pattern, based on the matched sentence.
TextPageCharInfo & operator=(const TextPageCharInfo &char_info)
Assign operator.
Definition: fs_search.h:135
Definition: fs_search.h:52
float origin_y
The y-coordinate of the origin position.
Definition: fs_search.h:236
WString GetTextUnderAnnot(annots::Annot &annot) const
Get the page text which intersect with a specified an annotation.
Matrix matrix
The matrix of the character.
Definition: fs_search.h:251
WString GetChars(int start=0, int count=-1) const
Get all the characters within a range specified by a start index and count.
bool operator !=(const TextPage &other) const
Not equal operator.
#define NULL
The null-pointer value.
Definition: fx_system.h:780
No special searching options.
Definition: fs_search.h:526
Definition: fx_coordinates.h:1076
WString GetMatchSentence()
Get the sentence that contains current match pattern.
void Set(const common::Font &font, TextCharFlag flag, float font_size, float origin_x, float origin_y, const RectF &char_box, const RectF &char_outbox, const Matrix &matrix)
Set value.
Definition: fs_search.h:196
int GetMatchSentenceEndIndex()
Get the index of the last character of current matched pattern, based on the matched sentence.
Parse the text content of a PDF page by the stream order.
Definition: fs_search.h:282
bool operator==(const TextPageCharInfo &char_info) const
Equal operator.
Definition: fs_search.h:154
SearchFlags
Enumeration for searching flags.
Definition: fs_search.h:524
TextPage(const PDFPage &page, int flags=foxit::pdf::TextPage::e_ParseTextNormal)
Constructor, from a parsed PDF page.
Definition: fs_search.h:269
Definition: fs_search.h:39
If this is set, that means to get text content of a PDF page by the stream order.
Definition: fs_search.h:292
Character flag: ComboWord.
Definition: fs_search.h:71
bool SetEndPage(int page_index)
Set ending page index.
bool SetSearchFlags(uint32 search_flags)
Set search flags.
Definition: fx_coordinates.h:771
RectFArray GetMatchRects() const
Get the rectangles of current match pattern.
Parse the text content of a PDF page with outputting the hyphen on a line feed.
Definition: fs_search.h:280
float origin_x
The x-coordinate of the origin position.
Definition: fs_search.h:231