FFmpeg
avstring.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2007 Mans Rullgard
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef AVUTIL_AVSTRING_H
22 #define AVUTIL_AVSTRING_H
23 
24 #include <stddef.h>
25 #include <stdint.h>
26 #include "attributes.h"
27 #include "version.h"
28 
29 /**
30  * @addtogroup lavu_string
31  * @{
32  */
33 
34 /**
35  * Return non-zero if pfx is a prefix of str. If it is, *ptr is set to
36  * the address of the first character in str after the prefix.
37  *
38  * @param str input string
39  * @param pfx prefix to test
40  * @param ptr updated if the prefix is matched inside str
41  * @return non-zero if the prefix matches, zero otherwise
42  */
43 int av_strstart(const char *str, const char *pfx, const char **ptr);
44 
45 /**
46  * Return non-zero if pfx is a prefix of str independent of case. If
47  * it is, *ptr is set to the address of the first character in str
48  * after the prefix.
49  *
50  * @param str input string
51  * @param pfx prefix to test
52  * @param ptr updated if the prefix is matched inside str
53  * @return non-zero if the prefix matches, zero otherwise
54  */
55 int av_stristart(const char *str, const char *pfx, const char **ptr);
56 
57 /**
58  * Locate the first case-independent occurrence in the string haystack
59  * of the string needle. A zero-length string needle is considered to
60  * match at the start of haystack.
61  *
62  * This function is a case-insensitive version of the standard strstr().
63  *
64  * @param haystack string to search in
65  * @param needle string to search for
66  * @return pointer to the located match within haystack
67  * or a null pointer if no match
68  */
69 char *av_stristr(const char *haystack, const char *needle);
70 
71 /**
72  * Locate the first occurrence of the string needle in the string haystack
73  * where not more than hay_length characters are searched. A zero-length
74  * string needle is considered to match at the start of haystack.
75  *
76  * This function is a length-limited version of the standard strstr().
77  *
78  * @param haystack string to search in
79  * @param needle string to search for
80  * @param hay_length length of string to search in
81  * @return pointer to the located match within haystack
82  * or a null pointer if no match
83  */
84 char *av_strnstr(const char *haystack, const char *needle, size_t hay_length);
85 
86 /**
87  * Copy the string src to dst, but no more than size - 1 bytes, and
88  * null-terminate dst.
89  *
90  * This function is the same as BSD strlcpy().
91  *
92  * @param dst destination buffer
93  * @param src source string
94  * @param size size of destination buffer
95  * @return the length of src
96  *
97  * @warning since the return value is the length of src, src absolutely
98  * _must_ be a properly 0-terminated string, otherwise this will read beyond
99  * the end of the buffer and possibly crash.
100  */
101 size_t av_strlcpy(char *dst, const char *src, size_t size);
102 
103 /**
104  * Append the string src to the string dst, but to a total length of
105  * no more than size - 1 bytes, and null-terminate dst.
106  *
107  * This function is similar to BSD strlcat(), but differs when
108  * size <= strlen(dst).
109  *
110  * @param dst destination buffer
111  * @param src source string
112  * @param size size of destination buffer
113  * @return the total length of src and dst
114  *
115  * @warning since the return value use the length of src and dst, these
116  * absolutely _must_ be a properly 0-terminated strings, otherwise this
117  * will read beyond the end of the buffer and possibly crash.
118  */
119 size_t av_strlcat(char *dst, const char *src, size_t size);
120 
121 /**
122  * Append output to a string, according to a format. Never write out of
123  * the destination buffer, and always put a terminating 0 within
124  * the buffer.
125  * @param dst destination buffer (string to which the output is
126  * appended)
127  * @param size total size of the destination buffer
128  * @param fmt printf-compatible format string, specifying how the
129  * following parameters are used
130  * @return the length of the string that would have been generated
131  * if enough space had been available
132  */
133 size_t av_strlcatf(char *dst, size_t size, const char *fmt, ...) av_printf_format(3, 4);
134 
135 /**
136  * Get the count of continuous non zero chars starting from the beginning.
137  *
138  * @param s the string whose length to count
139  * @param len maximum number of characters to check in the string, that
140  * is the maximum value which is returned by the function
141  */
142 static inline size_t av_strnlen(const char *s, size_t len)
143 {
144  size_t i;
145  for (i = 0; i < len && s[i]; i++)
146  ;
147  return i;
148 }
149 
150 /**
151  * Print arguments following specified format into a large enough auto
152  * allocated buffer. It is similar to GNU asprintf().
153  * @param fmt printf-compatible format string, specifying how the
154  * following parameters are used.
155  * @return the allocated string
156  * @note You have to free the string yourself with av_free().
157  */
158 char *av_asprintf(const char *fmt, ...) av_printf_format(1, 2);
159 
160 /**
161  * Unescape the given string until a non escaped terminating char,
162  * and return the token corresponding to the unescaped string.
163  *
164  * The normal \ and ' escaping is supported. Leading and trailing
165  * whitespaces are removed, unless they are escaped with '\' or are
166  * enclosed between ''.
167  *
168  * @param buf the buffer to parse, buf will be updated to point to the
169  * terminating char
170  * @param term a 0-terminated list of terminating chars
171  * @return the malloced unescaped string, which must be av_freed by
172  * the user, NULL in case of allocation failure
173  */
174 char *av_get_token(const char **buf, const char *term);
175 
176 /**
177  * Split the string into several tokens which can be accessed by
178  * successive calls to av_strtok().
179  *
180  * A token is defined as a sequence of characters not belonging to the
181  * set specified in delim.
182  *
183  * On the first call to av_strtok(), s should point to the string to
184  * parse, and the value of saveptr is ignored. In subsequent calls, s
185  * should be NULL, and saveptr should be unchanged since the previous
186  * call.
187  *
188  * This function is similar to strtok_r() defined in POSIX.1.
189  *
190  * @param s the string to parse, may be NULL
191  * @param delim 0-terminated list of token delimiters, must be non-NULL
192  * @param saveptr user-provided pointer which points to stored
193  * information necessary for av_strtok() to continue scanning the same
194  * string. saveptr is updated to point to the next character after the
195  * first delimiter found, or to NULL if the string was terminated
196  * @return the found token, or NULL when no token is found
197  */
198 char *av_strtok(char *s, const char *delim, char **saveptr);
199 
200 /**
201  * Locale-independent conversion of ASCII isdigit.
202  */
203 static inline av_const int av_isdigit(int c)
204 {
205  return c >= '0' && c <= '9';
206 }
207 
208 /**
209  * Locale-independent conversion of ASCII isgraph.
210  */
211 static inline av_const int av_isgraph(int c)
212 {
213  return c > 32 && c < 127;
214 }
215 
216 /**
217  * Locale-independent conversion of ASCII isspace.
218  */
219 static inline av_const int av_isspace(int c)
220 {
221  return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' ||
222  c == '\v';
223 }
224 
225 /**
226  * Locale-independent conversion of ASCII characters to uppercase.
227  */
228 static inline av_const int av_toupper(int c)
229 {
230  if (c >= 'a' && c <= 'z')
231  c ^= 0x20;
232  return c;
233 }
234 
235 /**
236  * Locale-independent conversion of ASCII characters to lowercase.
237  */
238 static inline av_const int av_tolower(int c)
239 {
240  if (c >= 'A' && c <= 'Z')
241  c ^= 0x20;
242  return c;
243 }
244 
245 /**
246  * Locale-independent conversion of ASCII isxdigit.
247  */
248 static inline av_const int av_isxdigit(int c)
249 {
250  c = av_tolower(c);
251  return av_isdigit(c) || (c >= 'a' && c <= 'f');
252 }
253 
254 /**
255  * Locale-independent case-insensitive compare.
256  * @note This means only ASCII-range characters are case-insensitive
257  */
258 int av_strcasecmp(const char *a, const char *b);
259 
260 /**
261  * Locale-independent case-insensitive compare.
262  * @note This means only ASCII-range characters are case-insensitive
263  */
264 int av_strncasecmp(const char *a, const char *b, size_t n);
265 
266 /**
267  * Locale-independent strings replace.
268  * @note This means only ASCII-range characters are replace
269  */
270 char *av_strireplace(const char *str, const char *from, const char *to);
271 
272 /**
273  * Thread safe basename.
274  * @param path the string to parse, on DOS both \ and / are considered separators.
275  * @return pointer to the basename substring.
276  * If path does not contain a slash, the function returns a copy of path.
277  * If path is a NULL pointer or points to an empty string, a pointer
278  * to a string "." is returned.
279  */
280 const char *av_basename(const char *path);
281 
282 /**
283  * Thread safe dirname.
284  * @param path the string to parse, on DOS both \ and / are considered separators.
285  * @return A pointer to a string that's the parent directory of path.
286  * If path is a NULL pointer or points to an empty string, a pointer
287  * to a string "." is returned.
288  * @note the function may modify the contents of the path, so copies should be passed.
289  */
290 const char *av_dirname(char *path);
291 
292 /**
293  * Match instances of a name in a comma-separated list of names.
294  * List entries are checked from the start to the end of the names list,
295  * the first match ends further processing. If an entry prefixed with '-'
296  * matches, then 0 is returned. The "ALL" list entry is considered to
297  * match all names.
298  *
299  * @param name Name to look for.
300  * @param names List of names.
301  * @return 1 on match, 0 otherwise.
302  */
303 int av_match_name(const char *name, const char *names);
304 
305 /**
306  * Append path component to the existing path.
307  * Path separator '/' is placed between when needed.
308  * Resulting string have to be freed with av_free().
309  * @param path base path
310  * @param component component to be appended
311  * @return new path or NULL on error.
312  */
313 char *av_append_path_component(const char *path, const char *component);
314 
316  AV_ESCAPE_MODE_AUTO, ///< Use auto-selected escaping mode.
317  AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping.
318  AV_ESCAPE_MODE_QUOTE, ///< Use single-quote escaping.
319  AV_ESCAPE_MODE_XML, ///< Use XML non-markup character data escaping.
320 };
321 
322 /**
323  * Consider spaces special and escape them even in the middle of the
324  * string.
325  *
326  * This is equivalent to adding the whitespace characters to the special
327  * characters lists, except it is guaranteed to use the exact same list
328  * of whitespace characters as the rest of libavutil.
329  */
330 #define AV_ESCAPE_FLAG_WHITESPACE (1 << 0)
331 
332 /**
333  * Escape only specified special characters.
334  * Without this flag, escape also any characters that may be considered
335  * special by av_get_token(), such as the single quote.
336  */
337 #define AV_ESCAPE_FLAG_STRICT (1 << 1)
338 
339 /**
340  * Within AV_ESCAPE_MODE_XML, additionally escape single quotes for single
341  * quoted attributes.
342  */
343 #define AV_ESCAPE_FLAG_XML_SINGLE_QUOTES (1 << 2)
344 
345 /**
346  * Within AV_ESCAPE_MODE_XML, additionally escape double quotes for double
347  * quoted attributes.
348  */
349 #define AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES (1 << 3)
350 
351 
352 /**
353  * Escape string in src, and put the escaped string in an allocated
354  * string in *dst, which must be freed with av_free().
355  *
356  * @param dst pointer where an allocated string is put
357  * @param src string to escape, must be non-NULL
358  * @param special_chars string containing the special characters which
359  * need to be escaped, can be NULL
360  * @param mode escape mode to employ, see AV_ESCAPE_MODE_* macros.
361  * Any unknown value for mode will be considered equivalent to
362  * AV_ESCAPE_MODE_BACKSLASH, but this behaviour can change without
363  * notice.
364  * @param flags flags which control how to escape, see AV_ESCAPE_FLAG_ macros
365  * @return the length of the allocated string, or a negative error code in case of error
366  * @see av_bprint_escape()
367  */
369 int av_escape(char **dst, const char *src, const char *special_chars,
370  enum AVEscapeMode mode, int flags);
371 
372 #define AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES 1 ///< accept codepoints over 0x10FFFF
373 #define AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS 2 ///< accept non-characters - 0xFFFE and 0xFFFF
374 #define AV_UTF8_FLAG_ACCEPT_SURROGATES 4 ///< accept UTF-16 surrogates codes
375 #define AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES 8 ///< exclude control codes not accepted by XML
376 
377 #define AV_UTF8_FLAG_ACCEPT_ALL \
378  AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES|AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS|AV_UTF8_FLAG_ACCEPT_SURROGATES
379 
380 /**
381  * Read and decode a single UTF-8 code point (character) from the
382  * buffer in *buf, and update *buf to point to the next byte to
383  * decode.
384  *
385  * In case of an invalid byte sequence, the pointer will be updated to
386  * the next byte after the invalid sequence and the function will
387  * return an error code.
388  *
389  * Depending on the specified flags, the function will also fail in
390  * case the decoded code point does not belong to a valid range.
391  *
392  * @note For speed-relevant code a carefully implemented use of
393  * GET_UTF8() may be preferred.
394  *
395  * @param codep pointer used to return the parsed code in case of success.
396  * The value in *codep is set even in case the range check fails.
397  * @param bufp pointer to the address the first byte of the sequence
398  * to decode, updated by the function to point to the
399  * byte next after the decoded sequence
400  * @param buf_end pointer to the end of the buffer, points to the next
401  * byte past the last in the buffer. This is used to
402  * avoid buffer overreads (in case of an unfinished
403  * UTF-8 sequence towards the end of the buffer).
404  * @param flags a collection of AV_UTF8_FLAG_* flags
405  * @return >= 0 in case a sequence was successfully read, a negative
406  * value in case of invalid sequence
407  */
409 int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end,
410  unsigned int flags);
411 
412 /**
413  * Check if a name is in a list.
414  * @returns 0 if not found, or the 1 based index where it has been found in the
415  * list.
416  */
417 int av_match_list(const char *name, const char *list, char separator);
418 
419 /**
420  * See libc sscanf manual for more information.
421  * Locale-independent sscanf implementation.
422  */
423 int av_sscanf(const char *string, const char *format, ...);
424 
425 /**
426  * @}
427  */
428 
429 #endif /* AVUTIL_AVSTRING_H */
Macro definitions for various function/variable attributes.
#define av_printf_format(fmtpos, attrpos)
Definition: attributes.h:161
#define av_warn_unused_result
Definition: attributes.h:62
#define av_const
Definition: attributes.h:82
unsigned size
allocated memory
Definition: bprint.h:99
unsigned len
length so far
Definition: bprint.h:99
char * str
Buffer to print data progressively.
Definition: bprint.h:99
char * av_asprintf(const char *fmt,...) av_printf_format(1
Print arguments following specified format into a large enough auto allocated buffer.
size_t av_strlcat(char *dst, const char *src, size_t size)
Append the string src to the string dst, but to a total length of no more than size - 1 bytes,...
size_t static size_t av_strnlen(const char *s, size_t len)
Get the count of continuous non zero chars starting from the beginning.
Definition: avstring.h:142
const char * av_basename(const char *path)
Thread safe basename.
av_warn_unused_result int av_escape(char **dst, const char *src, const char *special_chars, enum AVEscapeMode mode, int flags)
Escape string in src, and put the escaped string in an allocated string in *dst, which must be freed ...
char * av_strtok(char *s, const char *delim, char **saveptr)
Split the string into several tokens which can be accessed by successive calls to av_strtok().
static av_const int av_tolower(int c)
Locale-independent conversion of ASCII characters to lowercase.
Definition: avstring.h:238
char * av_stristr(const char *haystack, const char *needle)
Locate the first case-independent occurrence in the string haystack of the string needle.
int av_strcasecmp(const char *a, const char *b)
Locale-independent case-insensitive compare.
static av_const int av_isdigit(int c)
Locale-independent conversion of ASCII isdigit.
Definition: avstring.h:203
int av_match_name(const char *name, const char *names)
Match instances of a name in a comma-separated list of names.
int av_strstart(const char *str, const char *pfx, const char **ptr)
Return non-zero if pfx is a prefix of str.
static av_const int av_isxdigit(int c)
Locale-independent conversion of ASCII isxdigit.
Definition: avstring.h:248
size_t av_strlcpy(char *dst, const char *src, size_t size)
Copy the string src to dst, but no more than size - 1 bytes, and null-terminate dst.
char * av_append_path_component(const char *path, const char *component)
Append path component to the existing path.
char * av_strnstr(const char *haystack, const char *needle, size_t hay_length)
Locate the first occurrence of the string needle in the string haystack where not more than hay_lengt...
static av_const int av_isgraph(int c)
Locale-independent conversion of ASCII isgraph.
Definition: avstring.h:211
static av_const int av_isspace(int c)
Locale-independent conversion of ASCII isspace.
Definition: avstring.h:219
int av_match_list(const char *name, const char *list, char separator)
Check if a name is in a list.
static av_const int av_toupper(int c)
Locale-independent conversion of ASCII characters to uppercase.
Definition: avstring.h:228
int av_sscanf(const char *string, const char *format,...)
See libc sscanf manual for more information.
const char * av_dirname(char *path)
Thread safe dirname.
av_warn_unused_result int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end, unsigned int flags)
Read and decode a single UTF-8 code point (character) from the buffer in *buf, and update *buf to poi...
int av_stristart(const char *str, const char *pfx, const char **ptr)
Return non-zero if pfx is a prefix of str independent of case.
char char * av_get_token(const char **buf, const char *term)
Unescape the given string until a non escaped terminating char, and return the token corresponding to...
size_t av_strlcatf(char *dst, size_t size, const char *fmt,...) av_printf_format(3
Append output to a string, according to a format.
int av_strncasecmp(const char *a, const char *b, size_t n)
Locale-independent case-insensitive compare.
AVEscapeMode
Definition: avstring.h:315
char * av_strireplace(const char *str, const char *from, const char *to)
Locale-independent strings replace.
@ AV_ESCAPE_MODE_AUTO
Use auto-selected escaping mode.
Definition: avstring.h:316
@ AV_ESCAPE_MODE_XML
Use XML non-markup character data escaping.
Definition: avstring.h:319
@ AV_ESCAPE_MODE_QUOTE
Use single-quote escaping.
Definition: avstring.h:318
@ AV_ESCAPE_MODE_BACKSLASH
Use backslash escaping.
Definition: avstring.h:317
Libavutil version macros.