mbstowcs -- Convert Multibyte String to Wide-Character String

Format

#include <stdlib.h>
size_t mbstowcs(wchar_t *dest, const char *string, size_t len);

Language Level: ANSI, XPG4
mbstowcs converts the multibyte character string pointed to by string into the wide-character array pointed to by dest. Depending on the encoding scheme used by the code set, the multibyte character string can contain any combination of single-byte or double-byte characters.

The conversion stops after len bytes in dest are filled or after a wchar_t null character is encountered. The terminating null character is converted to a wide character with the value 0; characters that follow it are not processed.

The LC_CTYPE category of the current locale affects the behavior of mbstowcs.

Return Value
If successful, mbstowcs returns the number of characters converted and stored in dest, not counting the terminating null character. The string pointed to by dest ends with a null character unless mbstowcs returns the value len.

If it encounters an invalid multibyte character, mbstowcs returns (size_t)-1. If dest is a null pointer, the value of len is ignored and mbstowcs returns the number of elements required for the converted wide characters.

Example
This example uses mbstowcs to convert the multibyte character mbs to a a wide character string and store it in wcs. The backslash character encoding is 0x5C; the second byte of some of the double byte encodings is also 0x5C.

#include <wchar.h>
#include <stdio.h>
#include <stdlib.h>
#include <locale.h>
#define SIZE 24
#if (1 == __TOS_OS2__)
   #define LOCNAME "ja_jp.ibm-932"
#else
   #define LOCNAME "ja_jp.ibm-943"
#endif
int main(void)
{
   char        mbs[] = "\x89\x8C\x89\xE4\x8D\x61\x8B\x41\x89\x5c"
                  "\\" "\x89\x8C\x89\xE4\x8D\x61\x8B\x41\x89\x5c";
   char *p;
   wchar_t     wcs[SIZE], backslash;
   int i, len;
   if (NULL == setlocale(LC_ALL, LOCNAME)) {
      printf("Locale \"%s\" could not be loaded\n", LOCNAME);
      exit(1);
   }
   /* It is a mistake to process the multibyte string as single bytes */
   for (i=0; mbs[i] != '\x00'; ++i) {
      if (mbs[i]=='\\') {
         printf("There is a backslash at byte position %d\n",i);
      }
   }
   printf("\n");
   /* One way to parse the string -- using mblen */
   for (i=0, p = mbs; *p != '\x00'; p += len, ++i) {
      len = mblen(p, MB_CUR_MAX);
      if (len==1 && *p=='\\') {
         printf("There is a backslash at character position %d\n",i);
      }
   }
   printf("\n");
   /* A easier way to parse the string -- using mbstowcs */
   mbtowc(&backslash, "\\", MB_CUR_MAX);
   mbstowcs(wcs, mbs, SIZE);
   for (i=0; wcs[i] != (wchar_t)0; ++i) {
      if (wcs[i]==backslash) {
         printf("There is a backslash at character position %d\n",i);
      }
   }
   printf("\n");
   return 0;
   /******************************************************************
      The output should be similiar to:
      There is a backslash at byte position 9
      There is a backslash at byte position 10
      There is a backslash at byte position 20
      There is a backslash at character position 5
      There is a backslash at character position 5
   ******************************************************************/
}


mblen -- Determine Length of Multibyte Character
mbtowc -- Convert Multibyte Character to Wide Character
mbsrtowcs -- Convert Multibyte String to Wide-Character String
setlocale -- Set Locale
wcrtomb -- Convert Wide Character to Multibyte Character
wcsrtombs -- Convert Wide-Character String to Multibyte String
<locale.h>
<stdlib.h>
<wchar.h>