Foxit PDF SDK
fs_search.h
Go to the documentation of this file.
1 
15 #ifndef FS_SEARCH_H_
16 #define FS_SEARCH_H_
17 
18 #include "common/fs_common.h"
19 #include "pdf/annots/fs_annot.h"
20 #include "pdf/fs_pdfpage.h"
21 #ifndef _FX_NO_XFA_
22 #include "addon/xfa/fs_xfa.h"
23 #endif // #ifndef _FX_NO_XFA_
24 
30 namespace foxit {
34 namespace pdf {
40 {
41  public:
48  virtual bool NeedToCancelNow() = 0;
49 };
50 
52 class TextPageCharInfo FS_FINAL : public Object {
53  public:
59  typedef enum _TextCharFlag {
61  e_Unknown = -1,
63  e_Normal = 0,
69  e_Hyphen = 3,
72  } TextCharFlag;
73 
74 
91  const RectF& char_box, const RectF& char_outbox, const Matrix& matrix) {
92  this->font = font;
93  this->flag = flag;
94  this->font_size = font_size;
95  this->origin_x = origin_x;
96  this->origin_y = origin_y;
97  this->char_box = char_box;
98  this->char_outbox = char_outbox;
99  this->matrix = matrix;
100  }
101 
105  , font_size(0)
106  , origin_x(0)
107  , origin_y(0) {}
108 
114  TextPageCharInfo(const TextPageCharInfo& char_info) {
115  this->font = char_info.font;
116  this->flag = char_info.flag;
117  this->font_size = char_info.font_size;
118  this->origin_x = char_info.origin_x;
119  this->origin_y = char_info.origin_y;
120  this->char_box = char_info.char_box;
121  this->char_outbox = char_info.char_outbox;
122  this->matrix = char_info.matrix;
123  }
124 
133  this->font = char_info.font;
134  this->flag = char_info.flag;
135  this->font_size = char_info.font_size;
136  this->origin_x = char_info.origin_x;
137  this->origin_y = char_info.origin_y;
138  this->char_box = char_info.char_box;
139  this->char_outbox = char_info.char_outbox;
140  this->matrix = char_info.matrix;
141  return *this;
142  }
143 
151  bool operator == (const TextPageCharInfo& char_info) const {
152  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
153  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
154  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
155  return false;
156 
157  return true;
158  }
159 
167  bool operator != (const TextPageCharInfo& char_info) const{
168  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
169  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
170  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
171  return true;
172 
173  return false;
174  }
175 
193  void Set(const common::Font& font, TextCharFlag flag, float font_size, float origin_x, float origin_y,
194  const RectF& char_box, const RectF& char_outbox, const Matrix& matrix) {
195  this->font = font;
196  this->flag = flag;
197  this->font_size = font_size;
198  this->origin_x = origin_x;
199  this->origin_y = origin_y;
200  this->char_box = char_box;
201  this->char_outbox = char_outbox;
202  this->matrix = matrix;
203  }
204 
209 
217 
223  float font_size;
224 
228  float origin_x;
229 
233  float origin_y;
234 
239 
244 
249 };
250 
266 class TextPage FS_FINAL : public Base {
267  public:
273  typedef enum _TextParseFlags {
280  } TextParseFlags;
281 
287  typedef enum _TextOrderFlag {
292  } TextOrderFlag;
293 
294 
302  explicit TextPage(const PDFPage& page, int flags = foxit::pdf::TextPage::e_ParseTextNormal);
303 
305  ~TextPage();
311  TextPage(const TextPage& other);
319  TextPage& operator = (const TextPage& other);
320 
328  bool operator == (const TextPage& other) const;
336  bool operator != (const TextPage& other) const;
337 
345  bool IsEmpty() const;
346 
352  int GetCharCount() const;
353 
363  TextPageCharInfo GetCharInfo(int char_index);
364 
379  WString GetChars(int start = 0, int count = -1) const;
380 
393  int GetIndexAtPos(float x, float y, float tolerance) const;
394 
402  WString GetTextInRect(const RectF& rect) const;
403 
412  WString GetText(TextOrderFlag flag) const;
413 
428  common::Range GetWordAtPos(float x, float y, float tolerance) const;
429 
441  int GetTextRectCount(int start = 0, int count = -1);
442 
452  RectF GetTextRect(int rect_index) const;
453 
464  common::Rotation GetBaselineRotation(int rect_index);
465 
474 
482  common::Range GetCharRange(const RectF& rect);
483 
498 
499  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
500  explicit TextPage(FS_HANDLE handle = NULL);
501 };
502 
514 class TextSearch FS_FINAL : public Base {
515  public:
521  typedef enum _SearchFlags {
532  } SearchFlags;
533 
534 
550  explicit TextSearch(const PDFDoc& document, SearchCancelCallback* cancel = NULL, int flags = foxit::pdf::TextPage::e_ParseTextNormal);
551 
552  #ifndef _FX_NO_XFA_
553 
564  explicit TextSearch(const foxit::addon::xfa::XFADoc& xfa_document, foxit::pdf::SearchCancelCallback* cancel = NULL);
565 #endif // #ifndef _FX_NO_XFA_
566 
571  explicit TextSearch(const foxit::pdf::TextPage& text_page);
572 
581  explicit TextSearch(const foxit::pdf::annots::Annot& annot);
582 
584  ~TextSearch();
590  TextSearch(const TextSearch& other);
598  TextSearch& operator = (const TextSearch& other);
599 
607  bool operator == (const TextSearch& other) const;
615  bool operator != (const TextSearch& other) const;
616 
624  bool IsEmpty() const;
625 
633  bool SetPattern(const wchar_t* key_words);
634 
650  bool SetStartPage(int page_index);
651 
667  bool SetEndPage(int page_index);
668 
690  bool SetStartCharacter(int char_index);
691 
703  bool SetSearchFlags(uint32 search_flags);
704 
711  bool FindNext();
712 
719  bool FindPrev();
720 
726  RectFArray GetMatchRects() const;
727 
736  int GetMatchPageIndex() const;
737 
744 
755 
766 
773  int GetMatchStartCharIndex() const;
774 
781  int GetMatchEndCharIndex() const;
782 
783  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
784  explicit TextSearch(FS_HANDLE handle = NULL);
785 };
786 
795 class TextLink FS_FINAL : public Base{
796  public:
798  ~TextLink();
804  TextLink(const TextLink& other);
812  TextLink& operator = (const TextLink& other);
813 
821  bool operator == (const TextLink& other) const;
829  bool operator != (const TextLink& other) const;
830 
838  bool IsEmpty() const;
839 
848  WString GetURI();
849 
855  int GetStartCharIndex();
856 
862  int GetEndCharIndex();
863 
870  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
871  explicit TextLink(FS_HANDLE handle = NULL);
872 
873 };
874 
879 class PageTextLinks FS_FINAL : public Base{
880  public:
886  explicit PageTextLinks(const TextPage& page);
892  PageTextLinks(const PageTextLinks& other);
900  PageTextLinks& operator = (const PageTextLinks& other);
908  bool operator == (const PageTextLinks& other) const ;
916  bool operator != (const PageTextLinks& other) const ;
917 
925  bool IsEmpty() const;
927  ~PageTextLinks();
928 
934  int GetTextLinkCount();
935 
944  TextLink GetTextLink(int index);
945 
946  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
947  explicit PageTextLinks(FS_HANDLE handle = NULL);
948 };
949 } // namespace pdf
950 } // namespace foxit
951 #endif // FS_SEARCH_H_
952 
foxit::pdf::TextPage::TextParseFlags
TextParseFlags
Enumeration for parsing flags used for text page.
Definition: fs_search.h:273
foxit::pdf::TextSearch::e_SearchMatchCase
If set, match the case of keyword when searching.
Definition: fs_search.h:525
foxit::pdf::TextPage::IsEmpty
bool IsEmpty() const
Check whether current object is empty or not.
foxit::pdf::TextPage::e_TextStreamOrder
If this is set, that means to get text content of a PDF page by the stream order.
Definition: fs_search.h:289
foxit::pdf::TextPageCharInfo::origin_x
float origin_x
The x-coordinate of the origin position.
Definition: fs_search.h:228
foxit::pdf::TextPageCharInfo::char_box
RectF char_box
The glyph bounding box in page space.
Definition: fs_search.h:238
foxit::FS_HANDLE
void * FS_HANDLE
Handle type.
Definition: fs_basictypes.h:214
foxit::pdf::TextPage::e_ParseTextOutputHyphen
Parse the text content of a PDF page with outputting the hyphen on a line feed.
Definition: fs_search.h:277
foxit::pdf::TextPage::GetTextRectArrayByRect
RectFArray GetTextRectArrayByRect(const RectF &rect)
Get the array of all text rectangles within the specified rectangle region.
foxit::pdf::annots::Annot
Definition: fs_annot.h:996
foxit::pdf::TextPageCharInfo::font
common::Font font
A font for character.
Definition: fs_search.h:208
foxit::pdf::TextPage::GetCharCount
int GetCharCount() const
Get the count of all the characters.
foxit::pdf::TextSearch::SetEndPage
bool SetEndPage(int page_index)
Set ending page index.
foxit::pdf::TextSearch::operator!=
bool operator!=(const TextSearch &other) const
Not equal operator.
foxit::Object
CFX_Object Object
Object type.
Definition: fs_basictypes.h:221
foxit::pdf::TextPageCharInfo::flag
TextCharFlag flag
Flags to indicate which properties of textpage character flag are meaningful.
Definition: fs_search.h:216
foxit::pdf::TextSearch::GetMatchStartCharIndex
int GetMatchStartCharIndex() const
Get the index of the first character of current match pattern, based on current match page.
foxit::pdf::TextPageCharInfo::matrix
Matrix matrix
The matrix of the character.
Definition: fs_search.h:248
foxit::pdf::TextPage::operator=
TextPage & operator=(const TextPage &other)
Assign operator.
foxit::pdf::SearchCancelCallback::NeedToCancelNow
virtual bool NeedToCancelNow()=0
A callback function used to check whether to cancel the searching process or not.
foxit::pdf::TextPageCharInfo::origin_y
float origin_y
The y-coordinate of the origin position.
Definition: fs_search.h:233
foxit::pdf::TextPage::GetTextRect
RectF GetTextRect(int rect_index) const
Get the text rectangle by the index.
foxit::pdf::TextPage::e_TextDisplayOrder
If this is set, that means to get text content of a PDF page by the display order.
Definition: fs_search.h:291
foxit::pdf::TextPage
Definition: fs_search.h:266
foxit::pdf::TextPage::GetWordAtPos
common::Range GetWordAtPos(float x, float y, float tolerance) const
Get the character range of a word at or around a specified position on the page, in PDF coordinate sy...
foxit::pdf::TextPageCharInfo::e_Hyphen
Character flag: Hyphen.
Definition: fs_search.h:69
fs_common.h
Header file for common definitions and classes.
foxit::pdf::TextSearch::GetMatchRects
RectFArray GetMatchRects() const
Get the rectangles of current match pattern.
CFX_ArrayTemplate< RectF >
foxit::pdf::TextPageCharInfo::e_Generated
Character flag: Generated.
Definition: fs_search.h:65
foxit::pdf::TextPageCharInfo::char_outbox
RectF char_outbox
The typographic(display and printing) bounding box in page space.
Definition: fs_search.h:243
foxit::pdf::TextPage::GetBaselineRotation
common::Rotation GetBaselineRotation(int rect_index)
Get the text trend (as rotation) of a specified rectangle.
foxit::pdf::TextPage::GetCharRange
common::Range GetCharRange(const RectF &rect)
Get the character index range of all text rectangles within the specified rectangle region.
foxit::pdf::TextSearch::GetMatchEndCharIndex
int GetMatchEndCharIndex() const
Get the index of the last character of current match pattern, based on current match page.
foxit::pdf::TextSearch::operator==
bool operator==(const TextSearch &other) const
Equal operator.
foxit::pdf::TextPageCharInfo
Definition: fs_search.h:52
foxit::pdf::TextPage::e_ParseTextUseStreamOrder
Parse the text content of a PDF page by the stream order.
Definition: fs_search.h:279
foxit::pdf::TextPage::GetText
WString GetText(TextOrderFlag flag) const
Get the page text.
foxit::pdf::TextSearch::SetStartCharacter
bool SetStartCharacter(int char_index)
Set starting character index, from where the search process is to be started.
foxit::pdf::TextSearch::FindPrev
bool FindPrev()
Search for previous matched pattern.
foxit::pdf::TextPageCharInfo::TextPageCharInfo
TextPageCharInfo(const TextPageCharInfo &char_info)
Constructor, with another character information object.
Definition: fs_search.h:114
foxit::pdf::TextPageCharInfo::font_size
float font_size
Font size for character.
Definition: fs_search.h:223
fs_xfa.h
Header file for XFA related definitions and functions.
foxit::pdf::TextPageCharInfo::operator=
TextPageCharInfo & operator=(const TextPageCharInfo &char_info)
Assign operator.
Definition: fs_search.h:132
foxit::pdf::TextPageCharInfo::e_UnUnicode
Character flag: UnUnicode.
Definition: fs_search.h:67
foxit::pdf::TextPage::GetChars
WString GetChars(int start=0, int count=-1) const
Get all the characters within a range specified by a start index and count.
foxit::pdf::TextSearch::SetPattern
bool SetPattern(const wchar_t *key_words)
Set keywords to search.
foxit::pdf::TextSearch::GetMatchPageIndex
int GetMatchPageIndex() const
Get the page index, to which current match belongs.
foxit::pdf::TextSearch::SetStartPage
bool SetStartPage(int page_index)
Set starting page index.
foxit::pdf::TextPage::GetIndexAtPos
int GetIndexAtPos(float x, float y, float tolerance) const
Get the character index at or around a specified position on the page, in PDF coordinate system.
foxit::common::Rotation
Rotation
Enumeration for rotation.
Definition: fs_common.h:57
foxit::pdf::TextPageCharInfo::e_Normal
Character flag: Normal.
Definition: fs_search.h:63
foxit::pdf::TextSearch::e_SearchMatchWholeWord
If set, match the whole word of keyword when searching.
Definition: fs_search.h:527
foxit::pdf::TextSearch::e_SearchConsecutive
If set, match the key word consecutively when searching. For example, "CC" will be matched twice in "...
Definition: fs_search.h:529
foxit::pdf::TextPageCharInfo::TextCharFlag
TextCharFlag
Enumeration for PDF textpage character flag.
Definition: fs_search.h:59
fs_pdfpage.h
Header file for PDF page related definitions and classes.
foxit
Foxit namespace.
Definition: fs_taggedpdf.h:27
foxit::pdf::TextPage::e_ParseTextNormal
Parse the text content of a PDF page by normalizing characters based on their positions in the PDF pa...
Definition: fs_search.h:275
foxit::pdf::TextPage::TextPage
TextPage(const PDFPage &page, int flags=foxit::pdf::TextPage::e_ParseTextNormal)
Constructor, from a parsed PDF page.
foxit::addon::xfa::XFADoc
Definition: fs_xfa.h:898
foxit::pdf::TextSearch::operator=
TextSearch & operator=(const TextSearch &other)
Assign operator.
foxit::pdf::TextPage::GetTextInRect
WString GetTextInRect(const RectF &rect) const
Get the text within a rectangle, in PDF coordinate system.
foxit::pdf::TextPageCharInfo::TextPageCharInfo
TextPageCharInfo(const common::Font &font, TextCharFlag flag, float font_size, float origin_x, float origin_y, const RectF &char_box, const RectF &char_outbox, const Matrix &matrix)
Constructor, with parameters.
Definition: fs_search.h:90
foxit::pdf::TextSearch::FindNext
bool FindNext()
Search for next matched pattern.
foxit::pdf::TextPageCharInfo::TextPageCharInfo
TextPageCharInfo()
Constructor.
Definition: fs_search.h:103
foxit::pdf::TextSearch::IsEmpty
bool IsEmpty() const
Check whether current object is empty or not.
NULL
#define NULL
The null-pointer value.
Definition: fx_system.h:780
foxit::pdf::TextSearch::e_SearchNormal
No special searching options.
Definition: fs_search.h:523
CFX_FloatRect
Definition: fx_coordinates.h:771
foxit::pdf::TextPageCharInfo::Set
void Set(const common::Font &font, TextCharFlag flag, float font_size, float origin_x, float origin_y, const RectF &char_box, const RectF &char_outbox, const Matrix &matrix)
Set value.
Definition: fs_search.h:193
foxit::pdf::TextSearch::SearchFlags
SearchFlags
Enumeration for searching flags.
Definition: fs_search.h:521
foxit::pdf::TextSearch
Definition: fs_search.h:514
foxit::pdf::PDFDoc
Definition: fs_pdfdoc.h:610
foxit::pdf::PDFPage
Definition: fs_pdfpage.h:412
foxit::pdf::SearchCancelCallback
Definition: fs_search.h:39
foxit::pdf::TextPageCharInfo::e_Unknown
Character flag: Unknown.
Definition: fs_search.h:61
foxit::pdf::TextSearch::TextSearch
TextSearch(const PDFDoc &document, SearchCancelCallback *cancel=0, int flags=foxit::pdf::TextPage::e_ParseTextNormal)
Constructor, for a PDF document.
foxit::pdf::TextPageCharInfo::e_ComboWord
Character flag: ComboWord.
Definition: fs_search.h:71
foxit::pdf::TextSearch::SetSearchFlags
bool SetSearchFlags(uint32 search_flags)
Set search flags.
foxit::pdf::TextPage::~TextPage
~TextPage()
Destructor.
foxit::common::Font
Definition: fs_common.h:1391
CFX_Matrix
Definition: fx_coordinates.h:1076
foxit::pdf::TextSearch::e_SearchNotMatchFullWidth
If set, to ignore full-width characters and treat all characters as standard ASCII or standard-width ...
Definition: fs_search.h:531
fs_annot.h
Header file for annotation related definitions and classes.
foxit::pdf::TextSearch::GetMatchSentenceEndIndex
int GetMatchSentenceEndIndex()
Get the index of the last character of current matched pattern, based on the matched sentence.
foxit::pdf::TextPageCharInfo::operator==
bool operator==(const TextPageCharInfo &char_info) const
Equal operator.
Definition: fs_search.h:151
CFX_WideString
WIDE STRING CLASS.
Definition: fx_string.h:1452
foxit::pdf::TextSearch::GetMatchSentence
WString GetMatchSentence()
Get the sentence that contains current match pattern.
foxit::common::Range
Definition: fs_common.h:1236
foxit::pdf::TextPage::GetTextUnderAnnot
WString GetTextUnderAnnot(annots::Annot &annot) const
Get the page text which intersect with a specified an annotation.
foxit::pdf::TextPage::GetTextRectCount
int GetTextRectCount(int start=0, int count=-1)
Count the text rectangles within a range specified by a start index and count.
foxit::uint32
FX_UINT32 uint32
32-bit unsigned integer.
Definition: fs_basictypes.h:196
foxit::pdf::TextPage::operator!=
bool operator!=(const TextPage &other) const
Not equal operator.
foxit::pdf::TextSearch::GetMatchSentenceStartIndex
int GetMatchSentenceStartIndex()
Get the index of the first character of current matched pattern, based on the matched sentence.
foxit::pdf::TextSearch::~TextSearch
~TextSearch()
Destructor.
foxit::Base
Definition: fs_basictypes.h:427
foxit::pdf::TextPage::operator==
bool operator==(const TextPage &other) const
Equal operator.
foxit::pdf::TextPage::TextOrderFlag
TextOrderFlag
Enumeration for text order flag which is used when getting text content of a PDF page.
Definition: fs_search.h:287
foxit::pdf::TextPage::GetCharInfo
TextPageCharInfo GetCharInfo(int char_index)
Get character information of a specific character.
foxit::pdf::TextPageCharInfo::operator!=
bool operator!=(const TextPageCharInfo &char_info) const
Not equal operator.
Definition: fs_search.h:167