module Pcre:sig..end
Perl Compatibility Regular Expressions
type error =
| |
Partial |
(* | String only matched the pattern partially | *) |
| |
BadPartial |
(* | Pattern contains items that cannot be used together with partial matching. | *) |
| |
BadPattern of |
(* |
| *) |
| |
BadUTF8 |
(* | UTF8 string being matched is invalid | *) |
| |
BadUTF8Offset |
(* | Gets raised when a UTF8 string being matched with offset is invalid. | *) |
| |
MatchLimit |
(* | Maximum allowed number of match attempts with backtracking or recursion is reached during matching. ALL FUNCTIONS CALLING THE MATCHING ENGINE MAY RAISE IT!!! | *) |
| |
RecursionLimit |
|||
| |
InternalError of |
(* |
| *) |
exception Error of error
Exception indicating PCRE errors.
exception Backtrack
Backtrack used in callout functions to force backtracking.
exception Regexp_or of string * error
Regexp_or (pat, error) gets raised for sub-pattern pat by regexp_or
if it failed to compile.
type icflag
Internal representation of compilation flags
type irflag
Internal representation of runtime flags
typecflag =[ `ANCHORED
| `AUTO_CALLOUT
| `CASELESS
| `DOLLAR_ENDONLY
| `DOTALL
| `EXTENDED
| `EXTRA
| `FIRSTLINE
| `MULTILINE
| `NO_AUTO_CAPTURE
| `NO_UTF8_CHECK
| `UNGREEDY
| `UTF8 ]
Compilation flags
val cflags : cflag list -> icflagcflags cflag_list converts a list of compilation flags to
their internal representation.
val cflag_list : icflag -> cflag listcflag_list cflags converts internal representation of
compilation flags to a list.
typerflag =[ `ANCHORED | `NOTBOL | `NOTEMPTY | `NOTEOL | `PARTIAL ]
Runtime flags
val rflags : rflag list -> irflagrflags rflag_list converts a list of runtime flags to
their internal representation.
val rflag_list : irflag -> rflag listrflag_list rflags converts internal representation of
runtime flags to a list.
val version : stringVersion information
Version of the PCRE-C-library
val config_utf8 : boolIndicates whether UTF8-support is enabled
val config_newline : charCharacter used as newline
val config_link_size : intNumber of bytes used for internal linkage of regular expressions
val config_match_limit : intDefault limit for calls to internal matching function
val config_match_limit_recursion : intDefault limit recursion for calls to internal matching function
val config_stackrecurse : boolIndicates use of stack recursion in matching function
typefirstbyte_info =[ `ANCHORED | `Char of char | `Start_only ]
Information on matching of "first chars" in patterns
typestudy_stat =[ `Not_studied | `Optimal | `Studied ]
Information on the study status of patterns
type regexp
Compiled regular expressions
val options : regexp -> icflagoptions regexp
regexp.val size : regexp -> intsize regexp
regexp.val studysize : regexp -> intstudysize regexp
regexp.val capturecount : regexp -> intcapturecount regexp
regexp.val backrefmax : regexp -> intbackrefmax regexp
regexp.val namecount : regexp -> intnamecount regexp
regexp.val names : regexp -> string arraynames regex
regexp.val nameentrysize : regexp -> intnameentrysize regexp
regexp + 3.val firstbyte : regexp -> firstbyte_infofirstbyte regexp
regexp.val firsttable : regexp -> string optionfirsttable regexp
regexp if available, None otherwise.val lastliteral : regexp -> char optionlastliteral regexp
regexp
if available, None otherwise.val study_stat : regexp -> study_statstudy_stat regexp
regexp.val get_stringnumber : regexp -> string -> intget_stringnumber rex name
Invalid_arg if there is no such named substring.name in regular expression rex. This index can then be used with
get_substring.val get_match_limit : regexp -> int optionget_match_limit rex
rex or None.val get_match_limit_recursion : regexp -> int optionget_match_limit_recursion rex
rex or None.type chtables
Alternative set of char tables for pattern matching
val maketables : unit -> chtablesGenerates new set of char tables for the current locale.
val regexp : ?study:bool ->
?limit:int ->
?limit_recursion:int ->
?iflags:icflag ->
?flags:cflag list -> ?chtables:chtables -> string -> regexpregexp ?study ?limit ?limit_recursion ?iflags ?flags ?chtables pattern
compiles pattern with flags when given, with iflags otherwise, and
with char tables chtables. If study is true, then the resulting regular
expression will be studied. If limit is specified, this sets a limit to
the amount of recursion and backtracking (only lower than the builtin
default!). If this limit is exceeded, MatchLimit will be raised during
matching.
study : default = truelimit : default = no extra limit other than defaultlimit_recursion : default = no extra limit_recursion other than defaultiflags : default = no extra flagsflags : default = ignoredchtables : default = builtin char tablesval regexp_or : ?study:bool ->
?limit:int ->
?limit_recursion:int ->
?iflags:icflag ->
?flags:cflag list ->
?chtables:chtables -> string list -> regexpregexp_or ?study ?limit ?limit_recursion ?iflags ?flags ?chtables patterns
like Pcre.regexp, but combines patterns as alternatives (or-patterns) into
one regular expression.
val quote : string -> stringquote str
str.type substrings
Information on substrings after pattern matching
val get_subject : substrings -> stringget_subject substrings
substrings.val num_of_subs : substrings -> intnum_of_subs substrings
substrings
(whole match inclusive).val get_substring : substrings -> int -> stringget_substring substrings n
Invalid_argument if n is not in the range of the number of
substrings.Not_found if the corresponding subpattern did not capture
a substring.nth substring
(0 is whole match) of substrings.val get_substring_ofs : substrings -> int -> int * intget_substring_ofs substrings n
Invalid_argument if n is not in the range of the number
of substrings.Not_found if the corresponding subpattern did not capture
a substring.nth substring of substrings (0 is whole match).val get_substrings : ?full_match:bool -> substrings -> string arrayget_substrings ?full_match substrings
substrings. It includes the full match at index 0
when full_match is true, the captured substrings only when it
is false. If a subpattern did not capture a substring, the empty
string is returned in the corresponding position instead.full_match : default = trueval get_opt_substrings : ?full_match:bool -> substrings -> string option arrayget_opt_substrings ?full_match substrings
substrings. It includes Some full_match_str
at index 0 when full_match is true, Some captured_substrings
only when it is false. If a subpattern did not capture a substring,
None is returned in the corresponding position instead.full_match : default = trueval get_named_substring : regexp -> string -> substrings -> stringget_named_substring rex name substrings
Invalid_argument if there is no such named substring.Not_found if the corresponding subpattern did not capture
a substring.name in regular expression rex and substrings.val get_named_substring_ofs : regexp -> string -> substrings -> int * intget_named_substring_ofs rex name substrings
Invalid_argument if there is no such named substring.Not_found if the corresponding subpattern did not capture
a substring.name in regular expression rex and
substrings.type callout_data = {
|
callout_number : |
(* | Callout number | *) |
|
substrings : |
(* | Substrings matched so far | *) |
|
start_match : |
(* | Subject start offset of current match attempt | *) |
|
current_position : |
(* | Subject offset of current match pointer | *) |
|
capture_top : |
(* | Number of the highest captured substring so far | *) |
|
capture_last : |
(* | Number of the most recently captured substring | *) |
|
pattern_position : |
(* | Offset of next match item in pattern string | *) |
|
next_item_length : |
(* | Length of next match item in pattern string | *) |
}
typecallout =callout_data -> unit
Type of callout functions
Callouts are referred to in patterns as "(?Cn)" where "n" is a
callout_number ranging from 0 to 255. Substrings captured so far
are accessible as usual via substrings. You will have to consider
capture_top and capture_last to know about the current state of
valid substrings.
By raising exception Backtrack within a callout function, the user
can force the pattern matching engine to backtrack to other possible
solutions. Other exceptions will terminate matching immediately
and return control to OCaml.
val pcre_exec : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string -> ?pos:int -> ?callout:callout -> string -> int arraypcre_exec ?iflags ?flags ?rex ?pat ?pos ?callout subj
Not_found if pattern does not match.subj starting at position pos with pattern pat when
given, regular expression rex otherwise. The array also contains
additional workspace needed by the match engine. Uses flags when
given, the precompiled iflags otherwise. Callouts are handled by
callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0callout : default = ignore calloutsval exec : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string -> ?pos:int -> ?callout:callout -> string -> substringsexec ?iflags ?flags ?rex ?pat ?pos ?callout subj
Not_found if pattern does not match.subj starting at position pos with pattern
pat when given, regular expression rex otherwise. Uses flags
when given, the precompiled iflags otherwise. Callouts are handled
by callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0callout : default = ignore calloutsval exec_all : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int -> ?callout:callout -> string -> substrings arrayexec_all ?iflags ?flags ?rex ?pat ?pos ?callout subj
Not_found if pattern does not match.subj starting at position pos with pattern pat when
given, regular expression rex otherwise. Uses flags when given,
the precompiled iflags otherwise. Callouts are handled by callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0callout : default = ignore calloutsval next_match : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int -> ?callout:callout -> substrings -> substringsnext_match ?iflags ?flags ?rex ?pat ?pos ?callout substrs
Not_found if pattern does not match.Invalid_arg if pos let matching start outside of
the subject string.substrs, jumping over pos characters (also
backwards!), using pattern pat when given, regular expression
rex otherwise. Uses flags when given, the precompiled iflags
otherwise. Callouts are handled by callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0callout : default = ignore calloutsval extract : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?full_match:bool -> ?callout:callout -> string -> string arrayextract ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj
Not_found if pattern does not match.subj starting at
position pos, using pattern pat when given, regular expression
rex otherwise. Uses flags when given, the precompiled iflags
otherwise. It includes the full match at index 0 when full_match is
true, the captured substrings only when it is false. Callouts are
handled by callout. If a subpattern did not capture a substring,
the empty string is returned in the corresponding position instead.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0full_match : default = truecallout : default = ignore calloutsval extract_opt : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?full_match:bool -> ?callout:callout -> string -> string option arrayextract_opt ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj
Not_found if pattern does not match.subj starting
at position pos, using pattern pat when given, regular expression
rex otherwise. Uses flags when given, the precompiled iflags
otherwise. It includes Some full_match_str at index 0 when
full_match is true, Some captured-substrings only when it is
false. Callouts are handled by callout. If a subpattern did
not capture a substring, None is returned in the corresponding
position instead.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0full_match : default = truecallout : default = ignore calloutsval extract_all : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?full_match:bool -> ?callout:callout -> string -> string array arrayextract_all ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj
Not_found if pattern does not match.subj starting at position pos, using pattern pat when given,
regular expression rex otherwise. Uses flags when given, the
precompiled iflags otherwise. It includes the full match at index
0 of the extracted string arrays when full_match is true, the
captured substrings only when it is false. Callouts are handled by
callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0full_match : default = truecallout : default = ignore calloutsval extract_all_opt : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?full_match:bool ->
?callout:callout -> string -> string option array arrayextract_all_opt
?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj
Not_found if pattern does not match.subj starting at position pos, using pattern pat when
given, regular expression rex otherwise. Uses flags when given,
the precompiled iflags otherwise. It includes Some full_match_str
at index 0 of the extracted string arrays when full_match is true,
Some captured_substrings only when it is false. Callouts are
handled by callout. If a subpattern did not capture a substring,
None is returned in the corresponding position instead.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0full_match : default = truecallout : default = ignore calloutsval pmatch : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string -> ?pos:int -> ?callout:callout -> string -> boolpmatch ?iflags ?flags ?rex ?pat ?pos ?callout subj
true
if subj is matched by pattern pat when given, regular expression
rex otherwise, starting at position pos. Uses flags when given,
the precompiled iflags otherwise. Callouts are handled by callout.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0callout : default = ignore calloutstype substitution
Information on substitution patterns
val subst : string -> substitutionsubst str converts the string str representing a
substitution pattern to the internal representation
The contents of the substitution string str can be normal text
mixed with any of the following (mostly as in PERL):
0-9+" from an immediately
following other number.val replace : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?itempl:substitution ->
?templ:string -> ?callout:callout -> string -> stringreplace ?iflags ?flags ?rex ?pat ?pos ?itempl ?templ ?callout subj
replaces all substrings of subj matching pattern pat when given,
regular expression rex otherwise, starting at position pos with
the substitution string templ when given, itempl otherwise. Uses
flags when given, the precompiled iflags otherwise. Callouts
are handled by callout.
Failure if there are backreferences to nonexistent subpatterns.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0itempl : default = empty stringtempl : default = ignoredcallout : default = ignore calloutsval qreplace : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int -> ?templ:string -> ?callout:callout -> string -> stringqreplace ?iflags ?flags ?rex ?pat ?pos ?templ ?callout subj
replaces all substrings of subj matching pattern pat when given,
regular expression rex otherwise, starting at position pos
with the string templ. Uses flags when given, the precompiled
iflags otherwise. Callouts are handled by callout.
iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0templ : default = ignoredcallout : default = ignore calloutsval substitute_substrings : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?callout:callout ->
subst:(substrings -> string) -> string -> stringsubstitute_substrings ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj
replaces all substrings of subj matching pattern pat when given,
regular expression rex otherwise, starting at position pos
with the result of function subst applied to the substrings
of the match. Uses flags when given, the precompiled iflags
otherwise. Callouts are handled by callout.
iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0callout : default = ignore calloutsval substitute : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?callout:callout -> subst:(string -> string) -> string -> stringsubstitute ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj
replaces all substrings of subj matching pattern pat when given,
regular expression rex otherwise, starting at position pos with
the result of function subst applied to the match. Uses flags
when given, the precompiled iflags otherwise. Callouts are handled
by callout.
iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0callout : default = ignore calloutsval replace_first : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?itempl:substitution ->
?templ:string -> ?callout:callout -> string -> stringreplace_first ?iflags ?flags ?rex ?pat ?pos ?itempl ?templ ?callout subj
replaces the first substring of subj matching pattern pat when
given, regular expression rex otherwise, starting at position
pos with the substitution string templ when given, itempl
otherwise. Uses flags when given, the precompiled iflags
otherwise. Callouts are handled by callout.
Failure if there are backreferences to nonexistent subpatterns.iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0itempl : default = empty stringtempl : default = ignoredcallout : default = ignore calloutsval qreplace_first : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int -> ?templ:string -> ?callout:callout -> string -> stringqreplace_first ?iflags ?flags ?rex ?pat ?pos ?templ ?callout subj
replaces the first substring of subj matching pattern pat when
given, regular expression rex otherwise, starting at position pos
with the string templ. Uses flags when given, the precompiled
iflags otherwise. Callouts are handled by callout.
iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0templ : default = ignoredcallout : default = ignore calloutsval substitute_substrings_first : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?callout:callout ->
subst:(substrings -> string) -> string -> stringsubstitute_substrings_first
?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj
replaces the first substring of subj matching pattern pat when
given, regular expression rex otherwise, starting at position
pos with the result of function subst applied to the substrings
of the match. Uses flags when given, the precompiled iflags
otherwise. Callouts are handled by callout.
iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0callout : default = ignore calloutsval substitute_first : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?callout:callout -> subst:(string -> string) -> string -> stringsubstitute_first ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj
replaces the first substring of subj matching pattern pat when
given, regular expression rex otherwise, starting at position
pos with the result of function subst applied to the match. Uses
flags when given, the precompiled iflags otherwise. Callouts
are handled by callout.
iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0callout : default = ignore calloutsval split : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int -> ?max:int -> ?callout:callout -> string -> string listsplit ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj splits subj
into a list of at most max strings, using as delimiter pattern
pat when given, regular expression rex otherwise, starting at
position pos. Uses flags when given, the precompiled iflags
otherwise. If max is zero, trailing empty fields are stripped. If
it is negative, it is treated as arbitrarily large. If neither pat
nor rex are specified, leading whitespace will be stripped! Should
behave exactly as in PERL. Callouts are handled by callout.
iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0max : default = 0callout : default = ignore calloutsval asplit : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int -> ?max:int -> ?callout:callout -> string -> string arrayasplit ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj same as
Pcre.split but
type split_result =
| |
Text of |
(* | Text part of split string | *) |
| |
Delim of |
(* | Delimiter part of split string | *) |
| |
Group of |
(* | Subgroup of matched delimiter (subgroup_nr, subgroup_str) | *) |
| |
NoGroup |
(* | Unmatched subgroup | *) |
Result of a Pcre.full_split
val full_split : ?iflags:irflag ->
?flags:rflag list ->
?rex:regexp ->
?pat:string ->
?pos:int ->
?max:int -> ?callout:callout -> string -> split_result listfull_split ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj splits
subj into a list of at most max elements of type "split_result",
using as delimiter pattern pat when given, regular expression
rex otherwise, starting at position pos. Uses flags when given,
the precompiled iflags otherwise. If max is zero, trailing empty
fields are stripped. If it is negative, it is treated as arbitrarily
large. Should behave exactly as in PERL. Callouts are handled by
callout.
iflags : default = no extra flagsflags : default = ignoredrex : default = matches whitespacepat : default = ignoredpos : default = 0max : default = 0callout : default = ignore calloutsval foreach_line : ?ic:Stdlib.in_channel -> (string -> unit) -> unitforeach_line ?ic f applies f to each line in inchannel ic until
the end-of-file is reached.
ic : default = stdinval foreach_file : string list -> (string -> Stdlib.in_channel -> unit) -> unitforeach_file filenames f opens each file in the list filenames
for input and applies f to each filename and the corresponding
channel. Channels are closed after each operation (even when
exceptions occur - they get reraised afterwards!).
val unsafe_pcre_exec : irflag ->
regexp ->
pos:int ->
subj_start:int -> subj:string -> int array -> callout option -> unitunsafe_pcre_exec flags rex ~pos ~subj_start ~subj offset_vector.
You should read the C-source to know what happens.
If you do not understand it - don't use this function!
val make_ovector : regexp -> int * int arraymake_ovector regexp calculates the tuple (subgroups2, ovector)
which is the number of subgroup offsets and the offset array.