Using opcFileOpen with Unicode filenames

Coordinator
Jan 7, 2014 at 5:53 PM
I'm working on incorporating libopc into a project on Windows and see a possible snag. I need to be able to pass filenames with Unicode characters in them however, opcFileOpen() uses fopen( ) instead of CreateFile or wfopen. fopen AFAIK only accepts ANSI filenames where I have a UTF-8 filename.

My thought is that I need to patch the code to use _wfopen() but wondered if I'm missing something as I see it's an xmlChar* typedef used and that's something I need to study as well.

John
Sep 22, 2016 at 7:21 PM
To handle this use case I took the UTF-16 file name that is passed in as an arg to my Windows application and encoded it as UTF-8.
I then passed the UTF-8 file name into opcContainerOpen.

This change to opc\file.c handles converting the UTF-8 encoded file name back to UTF-16 and uses _wfopen() to open the file:
#ifdef WIN32
#include <Windows.h>
#endif
#include <opc/file.h>
#include <stdio.h>
#include <libxml/xmlmemory.h>
#include <libxml/globals.h>
#include <plib/plib.h>

static void *opcFileOpen(const xmlChar *filename, int flags) {
    int mode_ofs=0;
#ifdef WIN32
    wchar_t* utf16FileName = NULL;
    wchar_t wideMode[5];
    int ret;

    wideMode[mode_ofs++] = L'r';
    wideMode[mode_ofs++] = L'b';
    wideMode[mode_ofs++] = L'\0';

    ret = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)filename, -1, NULL, 0);
    utf16FileName = (wchar_t *)malloc(ret * sizeof(wchar_t));
    if (utf16FileName == NULL)
    {
        return NULL;
    }

    ret = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)filename, -1, utf16FileName, ret);
    if (!ret)
    {
        free(utf16FileName);
        return NULL;
    }

    FILE *file = _wfopen(utf16FileName, wideMode);

    if (flags & OPC_FILE_WRITE) {
        if (NULL!=file && flags & OPC_FILE_TRUNC) {
            fclose(file); file=NULL; // force creating of new file..
        }
        if (NULL==file) {
            // try to create one
            mode_ofs=0;
            wideMode[mode_ofs++] = L'w';
            wideMode[mode_ofs++] = L'+';
            wideMode[mode_ofs++] = L'b';
            wideMode[mode_ofs++] = L'\0';
            file = _wfopen(utf16FileName, wideMode);
        } else {
            fclose(file); // close the read handle...
            mode_ofs=0;
            wideMode[mode_ofs++] = L'r';
            wideMode[mode_ofs++] = L'+';
            wideMode[mode_ofs++] = L'b';
            wideMode[mode_ofs++] = L'\0';
            file = _wfopen(utf16FileName, wideMode);
        }
    }
    free (utf16FileName);
#else
    char mode[5];
    mode[mode_ofs++] = 'r';
    mode[mode_ofs++] = 'b';
    mode[mode_ofs++] = '\0';
    FILE *file = fopen((const char*)filename, mode); // try to open in READ mode...
    if (flags & OPC_FILE_WRITE) {
        if (NULL != file && flags & OPC_FILE_TRUNC) {
            fclose(file); file = NULL; // force creating of new file..
        }
        if (NULL == file) {
            // try to create one
            mode_ofs = 0;
            mode[mode_ofs++] = 'w';
            mode[mode_ofs++] = '+';
            mode[mode_ofs++] = 'b';
            mode[mode_ofs++] = '\0';
            file = fopen((const char *)filename, mode); // try to open new file
        }
        else {
            fclose(file); // close the read handle...
            mode_ofs = 0;
            mode[mode_ofs++] = 'r';
            mode[mode_ofs++] = '+';
            mode[mode_ofs++] = 'b';
            mode[mode_ofs++] = '\0';
            file = fopen((const char *)filename, mode); // try to open existing for read/write
        }
    }

#endif
    return file;
}
I have run some tests and it seems to work for my simple use case.

I am just opening a Japanese document and getting the keywords from it.
I can get the Japanese keywords by converting them to UTF-16 when they are returned.

Anything more complex may run into issues IDK.