Foxit PDF SDK
fs_search.h
Go to the documentation of this file.
1 
15 #ifndef FS_SEARCH_H_
16 #define FS_SEARCH_H_
17 
18 #include "common/fs_common.h"
19 #include "pdf/annots/fs_annot.h"
20 #include "pdf/fs_pdfpage.h"
21 #ifndef _FX_NO_XFA_
22 #include "addon/xfa/fs_xfa.h"
23 #endif // #ifndef _FX_NO_XFA_
24 
30 namespace foxit {
34 namespace pdf {
40 {
41  public:
48  virtual bool NeedToCancelNow() = 0;
49 };
50 
52 class TextPageCharInfo FS_FINAL : public Object {
53  public:
59  typedef enum _TextCharFlag {
61  e_Unknown = -1,
63  e_Normal = 0,
69  e_Hyphen = 3,
72  } TextCharFlag;
73 
74 
91  const RectF& char_box, const RectF& char_outbox, const Matrix& matrix) {
92  this->font = font;
93  this->flag = flag;
94  this->font_size = font_size;
95  this->origin_x = origin_x;
96  this->origin_y = origin_y;
97  this->char_box = char_box;
98  this->char_outbox = char_outbox;
99  this->matrix = matrix;
100  }
101 
105  , font_size(0)
106  , origin_x(0)
107  , origin_y(0) {}
108 
114  TextPageCharInfo(const TextPageCharInfo& char_info) {
115  this->font = char_info.font;
116  this->flag = char_info.flag;
117  this->font_size = char_info.font_size;
118  this->origin_x = char_info.origin_x;
119  this->origin_y = char_info.origin_y;
120  this->char_box = char_info.char_box;
121  this->char_outbox = char_info.char_outbox;
122  this->matrix = char_info.matrix;
123  }
124 
133  this->font = char_info.font;
134  this->flag = char_info.flag;
135  this->font_size = char_info.font_size;
136  this->origin_x = char_info.origin_x;
137  this->origin_y = char_info.origin_y;
138  this->char_box = char_info.char_box;
139  this->char_outbox = char_info.char_outbox;
140  this->matrix = char_info.matrix;
141  return *this;
142  }
143 
151  bool operator == (const TextPageCharInfo& char_info) const {
152  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
153  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
154  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
155  return false;
156 
157  return true;
158  }
159 
167  bool operator != (const TextPageCharInfo& char_info) const{
168  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
169  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
170  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
171  return true;
172 
173  return false;
174  }
175 
193  void Set(const common::Font& font, TextCharFlag flag, float font_size, float origin_x, float origin_y,
194  const RectF& char_box, const RectF& char_outbox, const Matrix& matrix) {
195  this->font = font;
196  this->flag = flag;
197  this->font_size = font_size;
198  this->origin_x = origin_x;
199  this->origin_y = origin_y;
200  this->char_box = char_box;
201  this->char_outbox = char_outbox;
202  this->matrix = matrix;
203  }
204 
209 
217 
223  float font_size;
224 
228  float origin_x;
229 
233  float origin_y;
234 
239 
244 
249 };
250 
266 class TextPage FS_FINAL : public Base {
267  public:
273  typedef enum _TextParseFlags {
280  } TextParseFlags;
281 
287  typedef enum _TextOrderFlag {
292  } TextOrderFlag;
293 
294 
302  explicit TextPage(const PDFPage& page, int flags = foxit::pdf::TextPage::e_ParseTextNormal);
303 
305  ~TextPage();
311  TextPage(const TextPage& other);
319  TextPage& operator = (const TextPage& other);
320 
328  bool operator == (const TextPage& other) const;
336  bool operator != (const TextPage& other) const;
337 
345  bool IsEmpty() const;
346 
352  int GetCharCount() const;
353 
363  TextPageCharInfo GetCharInfo(int char_index);
364 
379  WString GetChars(int start = 0, int count = -1) const;
380 
393  int GetIndexAtPos(float x, float y, float tolerance) const;
394 
402  WString GetTextInRect(const RectF& rect) const;
403 
412  WString GetText(TextOrderFlag flag) const;
413 
428  common::Range GetWordAtPos(float x, float y, float tolerance) const;
429 
441  int GetTextRectCount(int start = 0, int count = -1);
442 
452  RectF GetTextRect(int rect_index) const;
453 
464  common::Rotation GetBaselineRotation(int rect_index);
465 
474 
482  common::Range GetCharRange(const RectF& rect);
483 
498 
499  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
500  explicit TextPage(FS_HANDLE handle = NULL);
501 };
502 
514 class TextSearch FS_FINAL : public Base {
515  public:
521  typedef enum _SearchFlags {
532  } SearchFlags;
533 
534 
550  explicit TextSearch(const PDFDoc& document, SearchCancelCallback* cancel = NULL, int flags = foxit::pdf::TextPage::e_ParseTextNormal);
551 
552  #ifndef _FX_NO_XFA_
553 
564  explicit TextSearch(const foxit::addon::xfa::XFADoc& xfa_document, foxit::pdf::SearchCancelCallback* cancel = NULL);
565 #endif // #ifndef _FX_NO_XFA_
566 
571  explicit TextSearch(const foxit::pdf::TextPage& text_page);
572 
581  explicit TextSearch(const foxit::pdf::annots::Annot& annot);
582 
584  ~TextSearch();
590  TextSearch(const TextSearch& other);
598  TextSearch& operator = (const TextSearch& other);
599 
607  bool operator == (const TextSearch& other) const;
615  bool operator != (const TextSearch& other) const;
616 
624  bool IsEmpty() const;
625 
636  bool SetPattern(const wchar_t* key_words, bool is_regex_search = false);
637 
653  bool SetStartPage(int page_index);
654 
670  bool SetEndPage(int page_index);
671 
693  bool SetStartCharacter(int char_index);
694 
706  bool SetSearchFlags(uint32 search_flags);
707 
714  bool FindNext();
715 
722  bool FindPrev();
723 
729  RectFArray GetMatchRects() const;
730 
739  int GetMatchPageIndex() const;
740 
747 
758 
769 
776  int GetMatchStartCharIndex() const;
777 
784  int GetMatchEndCharIndex() const;
785 
786  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
787  explicit TextSearch(FS_HANDLE handle = NULL);
788 };
789 
798 class TextLink FS_FINAL : public Base{
799  public:
801  ~TextLink();
807  TextLink(const TextLink& other);
815  TextLink& operator = (const TextLink& other);
816 
824  bool operator == (const TextLink& other) const;
832  bool operator != (const TextLink& other) const;
833 
841  bool IsEmpty() const;
842 
851  WString GetURI();
852 
858  int GetStartCharIndex();
859 
865  int GetEndCharIndex();
866 
873  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
874  explicit TextLink(FS_HANDLE handle = NULL);
875 
876 };
877 
882 class PageTextLinks FS_FINAL : public Base{
883  public:
889  explicit PageTextLinks(const TextPage& page);
895  PageTextLinks(const PageTextLinks& other);
903  PageTextLinks& operator = (const PageTextLinks& other);
911  bool operator == (const PageTextLinks& other) const ;
919  bool operator != (const PageTextLinks& other) const ;
920 
928  bool IsEmpty() const;
930  ~PageTextLinks();
931 
937  int GetTextLinkCount();
938 
947  TextLink GetTextLink(int index);
948 
949  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
950  explicit PageTextLinks(FS_HANDLE handle = NULL);
951 };
952 } // namespace pdf
953 } // namespace foxit
954 #endif // FS_SEARCH_H_
955 
int GetMatchStartCharIndex() const
Get the index of the first character of current match pattern, based on current match page.
common::Range GetCharRange(const RectF &rect)
Get the character index range of all text rectangles within the specified rectangle region.
TextOrderFlag
Enumeration for text order flag which is used when getting text content of a PDF page.
Definition: fs_search.h:287
bool IsEmpty() const
Check whether current object is empty or not.
If set, match the case of keyword when searching.
Definition: fs_search.h:525
Definition: fs_common.h:1273
TextSearch(const PDFDoc &document, SearchCancelCallback *cancel=0, int flags=foxit::pdf::TextPage::e_ParseTextNormal)
Constructor, for a PDF document.
Definition: fs_search.h:514
RectF char_box
The glyph bounding box in page space.
Definition: fs_search.h:238
CFX_Object Object
Object type.
Definition: fs_basictypes.h:221
bool FindPrev()
Search for previous matched pattern.
TextCharFlag
Enumeration for PDF textpage character flag.
Definition: fs_search.h:59
Character flag: Normal.
Definition: fs_search.h:63
TextPageCharInfo(const common::Font &font, TextCharFlag flag, float font_size, float origin_x, float origin_y, const RectF &char_box, const RectF &char_outbox, const Matrix &matrix)
Constructor, with parameters.
Definition: fs_search.h:90
RectFArray GetTextRectArrayByRect(const RectF &rect)
Get the array of all text rectangles within the specified rectangle region.
common::Range GetWordAtPos(float x, float y, float tolerance) const
Get the character range of a word at or around a specified position on the page, in PDF coordinate sy...
~TextSearch()
Destructor.
Character flag: Hyphen.
Definition: fs_search.h:69
bool SetStartPage(int page_index)
Set starting page index.
WIDE STRING CLASS.
Definition: fx_string.h:1461
common::Font font
A font for character.
Definition: fs_search.h:208
int GetMatchEndCharIndex() const
Get the index of the last character of current match pattern, based on current match page.
Character flag: Generated.
Definition: fs_search.h:65
RectF char_outbox
The typographic(display and printing) bounding box in page space.
Definition: fs_search.h:243
Definition: fs_pdfdoc.h:648
bool operator !=(const TextPageCharInfo &char_info) const
Not equal operator.
Definition: fs_search.h:167
If set, match the whole word of keyword when searching.
Definition: fs_search.h:527
bool operator==(const TextSearch &other) const
Equal operator.
TextPageCharInfo(const TextPageCharInfo &char_info)
Constructor, with another character information object.
Definition: fs_search.h:114
bool SetStartCharacter(int char_index)
Set starting character index, from where the search process is to be started.
TextParseFlags
Enumeration for parsing flags used for text page.
Definition: fs_search.h:273
Character flag: UnUnicode.
Definition: fs_search.h:67
If set, match the key word consecutively when searching. For example, "CC" will be matched twice in "...
Definition: fs_search.h:529
~TextPage()
Destructor.
int GetTextRectCount(int start=0, int count=-1)
Count the text rectangles within a range specified by a start index and count.
Definition: fs_xfa.h:898
bool operator !=(const TextSearch &other) const
Not equal operator.
RectF GetTextRect(int rect_index) const
Get the text rectangle by the index.
bool operator==(const TextPage &other) const
Equal operator.
TextSearch & operator=(const TextSearch &other)
Assign operator.
float font_size
Font size for character.
Definition: fs_search.h:223
Header file for annotation related definitions and classes.
TextCharFlag flag
Flags to indicate which properties of textpage character flag are meaningful.
Definition: fs_search.h:216
virtual bool NeedToCancelNow()=0
A callback function used to check whether to cancel the searching process or not.
WString GetText(TextOrderFlag flag) const
Get the page text.
int GetCharCount() const
Get the count of all the characters.
TextPageCharInfo GetCharInfo(int char_index)
Get character information of a specific character.
FX_UINT32 uint32
32-bit unsigned integer.
Definition: fs_basictypes.h:196
Definition: fs_pdfpage.h:412
void * FS_HANDLE
Handle type.
Definition: fs_basictypes.h:214
Header file for common definitions and classes.
int GetMatchPageIndex() const
Get the page index, to which current match belongs.
If this is set, that means to get text content of a PDF page by the display order.
Definition: fs_search.h:291
Parse the text content of a PDF page by normalizing characters based on their positions in the PDF pa...
Definition: fs_search.h:275
WString GetTextInRect(const RectF &rect) const
Get the text within a rectangle, in PDF coordinate system.
TextPageCharInfo()
Constructor.
Definition: fs_search.h:103
bool IsEmpty() const
Check whether current object is empty or not.
Definition: fs_basictypes.h:443
Character flag: Unknown.
Definition: fs_search.h:61
Header file for XFA related definitions and functions.
Header file for PDF page related definitions and classes.
bool FindNext()
Search for next matched pattern.
bool SetPattern(const wchar_t *key_words, bool is_regex_search=false)
Set keywords to search.
Definition: fs_annot.h:994
common::Rotation GetBaselineRotation(int rect_index)
Get the text trend (as rotation) of a specified rectangle.
int GetIndexAtPos(float x, float y, float tolerance) const
Get the character index at or around a specified position on the page, in PDF coordinate system.
Definition: fs_common.h:1428
TextPage & operator=(const TextPage &other)
Assign operator.
Rotation
Enumeration for rotation.
Definition: fs_common.h:57
Foxit namespace.
Definition: fs_pdf3d.h:27
int GetMatchSentenceStartIndex()
Get the index of the first character of current matched pattern, based on the matched sentence.
TextPageCharInfo & operator=(const TextPageCharInfo &char_info)
Assign operator.
Definition: fs_search.h:132
Definition: fs_search.h:52
float origin_y
The y-coordinate of the origin position.
Definition: fs_search.h:233
WString GetTextUnderAnnot(annots::Annot &annot) const
Get the page text which intersect with a specified an annotation.
Matrix matrix
The matrix of the character.
Definition: fs_search.h:248
WString GetChars(int start=0, int count=-1) const
Get all the characters within a range specified by a start index and count.
bool operator !=(const TextPage &other) const
Not equal operator.
#define NULL
The null-pointer value.
Definition: fx_system.h:792
No special searching options.
Definition: fs_search.h:523
If set, to ignore full-width characters and treat all characters as standard ASCII or standard-width ...
Definition: fs_search.h:531
Definition: fx_coordinates.h:1076
WString GetMatchSentence()
Get the sentence that contains current match pattern.
void Set(const common::Font &font, TextCharFlag flag, float font_size, float origin_x, float origin_y, const RectF &char_box, const RectF &char_outbox, const Matrix &matrix)
Set value.
Definition: fs_search.h:193
int GetMatchSentenceEndIndex()
Get the index of the last character of current matched pattern, based on the matched sentence.
Parse the text content of a PDF page by the stream order.
Definition: fs_search.h:279
bool operator==(const TextPageCharInfo &char_info) const
Equal operator.
Definition: fs_search.h:151
SearchFlags
Enumeration for searching flags.
Definition: fs_search.h:521
TextPage(const PDFPage &page, int flags=foxit::pdf::TextPage::e_ParseTextNormal)
Constructor, from a parsed PDF page.
Definition: fs_search.h:266
Definition: fs_search.h:39
If this is set, that means to get text content of a PDF page by the stream order.
Definition: fs_search.h:289
Character flag: ComboWord.
Definition: fs_search.h:71
bool SetEndPage(int page_index)
Set ending page index.
bool SetSearchFlags(uint32 search_flags)
Set search flags.
Definition: fx_coordinates.h:771
RectFArray GetMatchRects() const
Get the rectangles of current match pattern.
Parse the text content of a PDF page with outputting the hyphen on a line feed.
Definition: fs_search.h:277
float origin_x
The x-coordinate of the origin position.
Definition: fs_search.h:228