Foxit PDF SDK
fs_search.h
Go to the documentation of this file.
1 
15 #ifndef FS_SEARCH_H_
16 #define FS_SEARCH_H_
17 
18 #include "common/fs_common.h"
19 #include "pdf/annots/fs_annot.h"
20 #include "pdf/fs_pdfpage.h"
21 #ifndef _FX_NO_XFA_
22 #include "addon/xfa/fs_xfa.h"
23 #endif
24 
30 namespace foxit {
34 namespace pdf {
40 {
41  public:
48  virtual bool NeedToCancelNow() = 0;
49 };
50 
52 class TextPageCharInfo FS_FINAL : public Object {
53  public:
59  typedef enum _TextCharFlag {
61  e_Unknown = -1,
63  e_Normal = 0,
69  e_Hyphen = 3,
72  } TextCharFlag;
73 
74 
91  const RectF& char_box, const RectF& char_outbox, const Matrix& matrix) {
92  this->font = font;
93  this->flag = flag;
94  this->font_size = font_size;
95  this->origin_x = origin_x;
96  this->origin_y = origin_y;
97  this->char_box = char_box;
98  this->char_outbox = char_outbox;
99  this->matrix = matrix;
100  }
101 
105  , font_size(0)
106  , origin_x(0)
107  , origin_y(0) {}
108 
111 
117  TextPageCharInfo(const TextPageCharInfo& char_info) {
118  this->font = char_info.font;
119  this->flag = char_info.flag;
120  this->font_size = char_info.font_size;
121  this->origin_x = char_info.origin_x;
122  this->origin_y = char_info.origin_y;
123  this->char_box = char_info.char_box;
124  this->char_outbox = char_info.char_outbox;
125  this->matrix = char_info.matrix;
126  }
127 
136  this->font = char_info.font;
137  this->flag = char_info.flag;
138  this->font_size = char_info.font_size;
139  this->origin_x = char_info.origin_x;
140  this->origin_y = char_info.origin_y;
141  this->char_box = char_info.char_box;
142  this->char_outbox = char_info.char_outbox;
143  this->matrix = char_info.matrix;
144  return *this;
145  }
146 
154  bool operator == (const TextPageCharInfo& char_info) const {
155  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
156  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
157  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
158  return false;
159 
160  return true;
161  }
162 
170  bool operator != (const TextPageCharInfo& char_info) const{
171  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
172  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
173  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
174  return true;
175 
176  return false;
177  }
178 
196  void Set(const common::Font& font, TextCharFlag flag, float font_size, float origin_x, float origin_y,
197  const RectF& char_box, const RectF& char_outbox, const Matrix& matrix) {
198  this->font = font;
199  this->flag = flag;
200  this->font_size = font_size;
201  this->origin_x = origin_x;
202  this->origin_y = origin_y;
203  this->char_box = char_box;
204  this->char_outbox = char_outbox;
205  this->matrix = matrix;
206  }
207 
212 
220 
226  float font_size;
227 
231  float origin_x;
232 
236  float origin_y;
237 
242 
247 
252 };
253 
269 class TextPage FS_FINAL : public Base {
270  public:
276  typedef enum _TextParseFlags {
283  } TextParseFlags;
284 
290  typedef enum _TextOrderFlag {
295  } TextOrderFlag;
296 
297 
305  explicit TextPage(const PDFPage& page, int flags = foxit::pdf::TextPage::e_ParseTextNormal);
306 
308  ~TextPage();
314  TextPage(const TextPage& other);
322  TextPage& operator = (const TextPage& other);
323 
331  bool operator == (const TextPage& other) const;
339  bool operator != (const TextPage& other) const;
340 
348  bool IsEmpty() const;
349 
355  int GetCharCount() const;
356 
366  TextPageCharInfo GetCharInfo(int char_index);
367 
382  WString GetChars(int start = 0, int count = -1) const;
383 
396  int GetIndexAtPos(float x, float y, float tolerance) const;
397 
405  WString GetTextInRect(const RectF& rect) const;
406 
415  WString GetText(TextOrderFlag flag) const;
416 
431  common::Range GetWordAtPos(float x, float y, float tolerance) const;
432 
444  int GetTextRectCount(int start = 0, int count = -1);
445 
455  RectF GetTextRect(int rect_index) const;
456 
467  common::Rotation GetBaselineRotation(int rect_index);
468 
477 
485  common::Range GetCharRange(const RectF& rect);
486 
501 
502  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
503  explicit TextPage(FS_HANDLE handle = NULL);
504 };
505 
517 class TextSearch FS_FINAL : public Base {
518  public:
524  typedef enum _SearchFlags {
533  } SearchFlags;
534 
535 
551  explicit TextSearch(const PDFDoc& document, SearchCancelCallback* cancel = NULL, int flags = foxit::pdf::TextPage::e_ParseTextNormal);
552 
553  #ifndef _FX_NO_XFA_
554 
565  explicit TextSearch(const foxit::addon::xfa::XFADoc& xfa_document, foxit::pdf::SearchCancelCallback* cancel = NULL);
566 #endif
567 
572  explicit TextSearch(const foxit::pdf::TextPage& text_page);
573 
582  explicit TextSearch(const foxit::pdf::annots::Annot& annot);
583 
585  ~TextSearch();
591  TextSearch(const TextSearch& other);
599  TextSearch& operator = (const TextSearch& other);
600 
608  bool operator == (const TextSearch& other) const;
616  bool operator != (const TextSearch& other) const;
617 
625  bool IsEmpty() const;
626 
634  bool SetPattern(const wchar_t* key_words);
635 
651  bool SetStartPage(int page_index);
652 
668  bool SetEndPage(int page_index);
669 
691  bool SetStartCharacter(int char_index);
692 
704  bool SetSearchFlags(uint32 search_flags);
705 
712  bool FindNext();
713 
720  bool FindPrev();
721 
727  RectFArray GetMatchRects() const;
728 
737  int GetMatchPageIndex() const;
738 
745 
756 
763  int GetMatchStartCharIndex() const;
764 
771  int GetMatchEndCharIndex() const;
772 
773  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
774  explicit TextSearch(FS_HANDLE handle = NULL);
775 };
776 
785 class TextLink FS_FINAL : public Base{
786  public:
788  ~TextLink();
794  TextLink(const TextLink& other);
802  TextLink& operator = (const TextLink& other);
803 
811  bool operator == (const TextLink& other) const;
819  bool operator != (const TextLink& other) const;
820 
828  bool IsEmpty() const;
829 
838  WString GetURI();
839 
845  int GetStartCharIndex();
846 
852  int GetEndCharIndex();
853 
860  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
861  explicit TextLink(FS_HANDLE handle = NULL);
862 
863 };
864 
869 class PageTextLinks FS_FINAL : public Base{
870  public:
876  explicit PageTextLinks(const TextPage& page);
882  PageTextLinks(const PageTextLinks& other);
890  PageTextLinks& operator = (const PageTextLinks& other);
898  bool operator == (const PageTextLinks& other) const ;
906  bool operator != (const PageTextLinks& other) const ;
907 
915  bool IsEmpty() const;
917  ~PageTextLinks();
918 
924  int GetTextLinkCount();
925 
934  TextLink GetTextLink(int index);
935 
936  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
937  explicit PageTextLinks(FS_HANDLE handle = NULL);
938 };
939 } // namespace pdf
940 } // namespace foxit
941 #endif // FS_SEARCH_H_
942 
int GetMatchStartCharIndex() const
Get the index of the first character of current match pattern, based on current match page.
common::Range GetCharRange(const RectF &rect)
Get the character index range of all text rectangles within the specified rectangle region.
TextOrderFlag
Enumeration for text order flag which is used when getting text content of a PDF page.
Definition: fs_search.h:290
bool IsEmpty() const
Check whether current object is empty or not.
If set, match the case of keyword when searching.
Definition: fs_search.h:528
Definition: fs_common.h:991
TextSearch(const PDFDoc &document, SearchCancelCallback *cancel=0, int flags=foxit::pdf::TextPage::e_ParseTextNormal)
Constructor, for a PDF document.
Definition: fs_search.h:517
RectF char_box
The glyph bounding box in page space.
Definition: fs_search.h:241
CFX_Object Object
Object type.
Definition: fs_basictypes.h:217
bool FindPrev()
Search for previous matched pattern.
TextCharFlag
Enumeration for PDF textpage character flag.
Definition: fs_search.h:59
Character flag: Normal.
Definition: fs_search.h:63
TextPageCharInfo(const common::Font &font, TextCharFlag flag, float font_size, float origin_x, float origin_y, const RectF &char_box, const RectF &char_outbox, const Matrix &matrix)
Constructor, with parameters.
Definition: fs_search.h:90
RectFArray GetTextRectArrayByRect(const RectF &rect)
Get the array of all text rectangles within the specified rectangle region.
bool SetPattern(const wchar_t *key_words)
Set keywords to search.
common::Range GetWordAtPos(float x, float y, float tolerance) const
Get the character range of a word at or around a specified position on the page, in PDF coordinate sy...
~TextSearch()
Destructor.
Character flag: Hyphen.
Definition: fs_search.h:69
bool SetStartPage(int page_index)
Set starting page index.
WIDE STRING CLASS.
Definition: fx_string.h:1452
common::Font font
A font for character.
Definition: fs_search.h:211
int GetMatchEndCharIndex() const
Get the index of the last character of current match pattern, based on current match page.
Character flag: Generated.
Definition: fs_search.h:65
RectF char_outbox
The typographic(display and printing) bounding box in page space.
Definition: fs_search.h:246
Definition: fs_pdfdoc.h:389
bool operator !=(const TextPageCharInfo &char_info) const
Not equal operator.
Definition: fs_search.h:170
If set, match the whole word of keyword when searching.
Definition: fs_search.h:530
bool operator==(const TextSearch &other) const
Equal operator.
TextPageCharInfo(const TextPageCharInfo &char_info)
Constructor, with another character information object.
Definition: fs_search.h:117
bool SetStartCharacter(int char_index)
Set starting character index, from where the search process is to be started.
TextParseFlags
Enumeration for parsing flags used for text page.
Definition: fs_search.h:276
Character flag: UnUnicode.
Definition: fs_search.h:67
If set, match the key word consecutively when searching. For example, "CC" will be matched twice in "...
Definition: fs_search.h:532
~TextPage()
Destructor.
int GetTextRectCount(int start=0, int count=-1)
Count the text rectangles within a range specified by a start index and count.
Definition: fs_xfa.h:897
bool operator !=(const TextSearch &other) const
Not equal operator.
RectF GetTextRect(int rect_index) const
Get the text rectangle by the index.
bool operator==(const TextPage &other) const
Equal operator.
TextSearch & operator=(const TextSearch &other)
Assign operator.
float font_size
Font size for character.
Definition: fs_search.h:226
Header file for annotation related definitions and classes.
TextCharFlag flag
Flags to indicate which properties of textpage character flag are meaningful.
Definition: fs_search.h:219
virtual bool NeedToCancelNow()=0
A callback function used to check whether to cancel the searching process or not.
WString GetText(TextOrderFlag flag) const
Get the page text.
int GetCharCount() const
Get the count of all the characters.
TextPageCharInfo GetCharInfo(int char_index)
Get character information of a specific character.
FX_UINT32 uint32
32-bit unsigned integer.
Definition: fs_basictypes.h:196
Definition: fs_pdfpage.h:367
void * FS_HANDLE
Handle type.
Definition: fs_basictypes.h:214
Header file for common definitions and classes.
int GetMatchPageIndex() const
Get the page index, to which current match belongs.
If this is set, that means to get text content of a PDF page by the display order.
Definition: fs_search.h:294
Parse the text content of a PDF page by normalizing characters based on their positions in the PDF pa...
Definition: fs_search.h:278
WString GetTextInRect(const RectF &rect) const
Get the text within a rectangle, in PDF coordinate system.
TextPageCharInfo()
Constructor.
Definition: fs_search.h:103
bool IsEmpty() const
Check whether current object is empty or not.
Definition: fs_basictypes.h:397
Character flag: Unknown.
Definition: fs_search.h:61
Header file for XFA related definitions and functions.
Header file for PDF page related definitions and classes.
bool FindNext()
Search for next matched pattern.
~TextPageCharInfo()
Destructor.
Definition: fs_search.h:110
Definition: fs_annot.h:965
common::Rotation GetBaselineRotation(int rect_index)
Get the text trend (as rotation) of a specified rectangle.
int GetIndexAtPos(float x, float y, float tolerance) const
Get the character index at or around a specified position on the page, in PDF coordinate system.
Definition: fs_common.h:1146
TextPage & operator=(const TextPage &other)
Assign operator.
Rotation
Enumeration for rotation.
Definition: fs_common.h:56
Foxit namespace.
Definition: fs_compare.h:27
int GetMatchSentenceStartIndex()
Get the index of the first character of current match pattern, based on the match sentence.
TextPageCharInfo & operator=(const TextPageCharInfo &char_info)
Assign operator.
Definition: fs_search.h:135
Definition: fs_search.h:52
float origin_y
The y-coordinate of the origin position.
Definition: fs_search.h:236
WString GetTextUnderAnnot(annots::Annot &annot) const
Get the page text which intersect with a specified an annotation.
Matrix matrix
The matrix of the character.
Definition: fs_search.h:251
WString GetChars(int start=0, int count=-1) const
Get all the characters within a range specified by a start index and count.
bool operator !=(const TextPage &other) const
Not equal operator.
#define NULL
The null-pointer value.
Definition: fx_system.h:767
No special searching options.
Definition: fs_search.h:526
Definition: fx_coordinates.h:1076
WString GetMatchSentence()
Get the sentence that contains current match pattern.
void Set(const common::Font &font, TextCharFlag flag, float font_size, float origin_x, float origin_y, const RectF &char_box, const RectF &char_outbox, const Matrix &matrix)
Set value.
Definition: fs_search.h:196
Parse the text content of a PDF page by the stream order.
Definition: fs_search.h:282
bool operator==(const TextPageCharInfo &char_info) const
Equal operator.
Definition: fs_search.h:154
SearchFlags
Enumeration for searching flags.
Definition: fs_search.h:524
TextPage(const PDFPage &page, int flags=foxit::pdf::TextPage::e_ParseTextNormal)
Constructor, from a parsed PDF page.
Definition: fs_search.h:269
Definition: fs_search.h:39
If this is set, that means to get text content of a PDF page by the stream order.
Definition: fs_search.h:292
Character flag: ComboWord.
Definition: fs_search.h:71
bool SetEndPage(int page_index)
Set ending page index.
bool SetSearchFlags(uint32 search_flags)
Set search flags.
Definition: fx_coordinates.h:771
RectFArray GetMatchRects() const
Get the rectangles of current match pattern.
Parse the text content of a PDF page with outputting the hyphen on a line feed.
Definition: fs_search.h:280
float origin_x
The x-coordinate of the origin position.
Definition: fs_search.h:231