Code Samples

Parts and Input Stream

The following sample program shows how to
  • open an OPC container for reading
  • open the "word/document.xml" stream found in "docx" files.
  • open an input stream and read the entire content.

#include <opc/opc.h>

int main( int argc, const char* argv[] )
{
    opcInitLibrary();
    opcContainer *c=opcContainerOpen(_X("OOXMLI1.docx"), OPC_OPEN_READ_ONLY, NULL, NULL);
    if (NULL!=c) {
        opcPart part=opcPartFind(c, _X("/word/document.xml"), NULL, 0);
        if (OPC_PART_INVALID!=part) {
            opcContainerInputStream *stream=opcContainerOpenInputStream(c, part);
            if (NULL!=stream) {
                int ret=0;
                opc_uint8_t buf[100];
                while((ret=opcContainerReadInputStream(stream, buf, sizeof(buf)))>0) {
                    printf("%.*s", ret, buf);
                }
                opcContainerCloseInputStream(stream);
                printf("\n");
            }
        }
        opcContainerClose(c, OPC_CLOSE_NOW);
    }
    opcFreeLibrary();
    return 0;
}

Parts and XML Input Streams

The following sample program shows how to
  • open an OPC container for reading
  • open the "word/document.xml" stream found in "docx" files.
  • open an XML Reader and dump all the tags found in "word/document.xml"
#include <opc/opc.h>

static void dumpElement(mceTextReader_t *reader) {
    xmlChar *ln=xmlStrdup(xmlTextReaderLocalName(reader->reader));
    printf("<%s>\n", ln);
    mce_start_attributes(reader) {
    } mce_end_attributes(reader);
    mce_start_children(reader) {
        mce_start_element(reader, NULL, NULL) {
            dumpElement(reader);
        } mce_end_element(reader);
        mce_start_text(reader) {
            printf("%s", xmlTextReaderConstValue(reader->reader));
        } mce_end_text(reader);
    } mce_end_children(reader);
    printf("</%s>\n", ln);
    xmlFree(ln);
}


int main( int argc, const char* argv[] )
{
    opcInitLibrary();
    opcContainer *c=opcContainerOpen(_X("OOXMLI1.docx"), OPC_OPEN_READ_ONLY, NULL, NULL);
    if (NULL!=c) {
        mceTextReader_t reader;
        if (OPC_ERROR_NONE==opcXmlReaderOpen(c, &reader, _X("/word/document.xml"), NULL, 0, 0)) {
            mce_start_document(&reader) {
                mce_start_element(&reader, NULL, NULL) {
                    dumpElement(&reader);
                } mce_end_element(&reader);
            } mce_end_document(&reader);
            mceTextReaderCleanup(&reader);
        }
        opcContainerClose(c, OPC_CLOSE_NOW);

    }
    opcFreeLibrary();
    return 0;
}

Dump all text from a part as HTML

The following sample will dump all text from a "docx" file into HTML:
#include <opc/opc.h>

static void dumpText(mceTextReader_t *reader) {
    mce_skip_attributes(reader);
    mce_start_children(reader) {
        mce_start_element(reader, _X("http://schemas.openxmlformats.org/wordprocessingml/2006/main"), _X("t")) {
            mce_skip_attributes(reader);
            mce_start_children(reader) {
                mce_start_text(reader) {
                    for(const xmlChar *txt=xmlTextReaderConstValue(reader->reader);0!=*txt;txt++) {
                        switch(*txt) {
                        case '<': 
                            printf("&lt;");
                            break;
                        case '>': 
                            printf("&gt;");
                            break;
                        case '&': 
                            printf("&amp;");
                            break;
                        default:
                            putc(*txt, stdout);
                            break;
                        }
                    }
                } mce_end_text(reader);
            } mce_end_children(reader);
        } mce_end_element(reader);
        mce_start_element(reader, _X("http://schemas.openxmlformats.org/wordprocessingml/2006/main"), _X("p")) {
            printf("<p>");
            dumpText(reader);
            printf("</p>\n");
        } mce_end_element(reader);
        mce_start_element(reader, NULL, NULL) {
            dumpText(reader);
        } mce_end_element(reader);
    } mce_end_children(reader);
}


int main( int argc, const char* argv[] )
{
    opcInitLibrary();
    opcContainer *c=opcContainerOpen(_X("OOXMLI1.docx"), OPC_OPEN_READ_ONLY, NULL, NULL);
    if (NULL!=c) {
        mceTextReader_t reader;
        if (OPC_ERROR_NONE==opcXmlReaderOpen(c, &reader, _X("/word/document.xml"), NULL, 0, 0)) {
            mce_start_document(&reader) {
                mce_start_element(&reader, NULL, NULL) {
                    printf("<html>\n");
                    printf("<head>\n");
                    printf("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n");
                    printf("</head>\n");
                    printf("<body>\n");
                    dumpText(&reader);
                    printf("<body>\n");
                    printf("</html>\n");
                } mce_end_element(&reader);
            } mce_end_document(&reader);
            mceTextReaderCleanup(&reader);
        }
        opcContainerClose(c, OPC_CLOSE_NOW);
    }
    opcFreeLibrary();
    return 0;
}

Determine the OPC container type

Here is the correct way to determine whether an OPC container is a Word, Excel or PowerPoint document:
#include <opc/opc.h>

int main( int argc, const char* argv[] )
{
    opcInitLibrary();
    opcContainer *c=opcContainerOpen(_X("OOXMLI1.docx"), OPC_OPEN_READ_ONLY, NULL, NULL);
    if (NULL!=c) {
        opcRelation rel=opcRelationFind(c, OPC_PART_INVALID, NULL, _X("http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"));
        if (OPC_RELATION_INVALID!=rel) {
            opcPart main=opcRelationGetInternalTarget(c, OPC_PART_INVALID, rel);
            if (OPC_PART_INVALID!=main) {
                const xmlChar *type=opcPartGetType(c, main);
                printf("Office Document Type: %s\n", type);
                if (0==xmlStrcmp(type, _X("application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"))) {
                    printf("WORD Document\n");
                } else if (0==xmlStrcmp(type, _X("application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml"))) {
                    printf("POWERPOINT Document\n");
                } else if (0==xmlStrcmp(type, _X("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"))) {
                    printf("EXCEL Document\n");
                }
            }
        }
        opcContainerClose(c, OPC_CLOSE_NOW);
    }
    opcFreeLibrary();
    return 0;
}

Traverse all Relations

The following sample will travers all relations of the given OPC container:
#include <opc/opc.h>

static void traverse(opcContainer *c, opcPart source) {
    for(opcRelation rel=opcRelationFirst(c, source);OPC_RELATION_INVALID!=rel;rel=opcRelationNext(c, source, rel)) {
        opcPart target=opcRelationGetInternalTarget(c, source, rel);
        if (OPC_PART_INVALID!=target) {
            const xmlChar *prefix=NULL;
            opc_uint32_t counter=-1;
            const xmlChar *type=NULL;
            opcRelationGetInformation(c, source, rel, &prefix, &counter, &type);
            if (-1!=counter) {
                printf("%s %s%i %s %s\n", source, prefix, counter, target, type);
            } else {
                printf("%s %s %s %s\n", source, prefix, target, type);
            }
            traverse(c, target);
        }
    }
}

int main( int argc, const char* argv[] )
{
    opcInitLibrary();
    opcContainer *c=opcContainerOpen(_X(argv[1]), OPC_OPEN_READ_ONLY, NULL, NULL);
    if (NULL!=c) {
        traverse(c, OPC_PART_INVALID);
        opcContainerClose(c, OPC_CLOSE_NOW);
    }
    opcFreeLibrary();
    return 0;
}

Extract all pictures from an OPC container

The following sample program will export all pictures (jpeg and png) from an OPC container:
#include <opc/opc.h>

static void extract(opcContainer *c, opcPart p) {
    opc_uint32_t i=xmlStrlen(p);
    while(i>0 && p[i]!='/') i--;
    if (p[i]=='/') i++;
    FILE *out=fopen((char *)(p+i), "wb");
    if (NULL!=out) {
        opcContainerInputStream *stream=opcContainerOpenInputStream(c, p);
        if (NULL!=stream) {
            int ret=0;
            opc_uint8_t buf[100];
            while((ret=opcContainerReadInputStream(stream, buf, sizeof(buf)))>0) {
                fwrite(buf, sizeof(char), ret, out);
            }
            opcContainerCloseInputStream(stream);
        }
        fclose(out);
    }
}

int main( int argc, const char* argv[] )
{
    opcInitLibrary();
    opcContainer *c=opcContainerOpen(_X("OOXMLI4.docx"), OPC_OPEN_READ_ONLY, NULL, NULL);
    if (NULL!=c) {
        for(opcPart part=opcPartGetFirst(c);OPC_PART_INVALID!=part;part=opcPartGetNext(c, part)) {
            const xmlChar *type=opcPartGetType(c, part);
            if (xmlStrcmp(type, _X("image/jpeg"))==0) {
                extract(c, part);
            } else if (xmlStrcmp(type, _X("image/png"))==0) {
                extract(c, part);
            } else {
                printf("skipped %s of type %s\n", part, type);
            }
        }
        opcContainerClose(c, OPC_CLOSE_NOW);
    }
    opcFreeLibrary();
    return 0;
}

Concurrent Write

#include <opc/opc.h>

int main( int argc, const char* argv[] )
{
    opcInitLibrary();
    opcContainer *c=opcContainerOpen(_X("sample.opc"), OPC_OPEN_READ_WRITE, NULL, NULL); 
    opcPart part1=opcPartCreate(c, _X("part1.xml"), _X("text/plain"), 0); 
    opcPart part2=opcPartCreate(c, _X("part2.xml"), _X("text/plain"), 0); 

    opcContainerOutputStream *stream1=opcContainerCreateOutputStream(c, part1); 
    opcContainerOutputStream *stream2=opcContainerCreateOutputStream(c, part2); 

    // WRITE to stream1 and stream2 concurrently using
    opcContainerWriteOutputStream(stream1, "HELLO", 5);
    opcContainerWriteOutputStream(stream2, "HELLO", 5);
    opcContainerWriteOutputStream(stream2, " WORLD", 6);
    opcContainerWriteOutputStream(stream1, " WORLD", 6);

    opcContainerCloseOutputStream(stream1); 
    opcContainerCloseOutputStream(stream2); 
    opcContainerClose(c, OPC_CLOSE_NOW);
    opcFreeLibrary();
    return 0;
}

Dump all paragraphs and change information from a Word Document

#include <opc/opc.h>
#include <stdio.h>
#include <time.h>

typedef void (paragraph_callback_t)(void *callback_ctx, int level, xmlChar *modeTxt, xmlChar *parTxt);

typedef struct CHANGE_MODE {
    opc_bool_t deleted;
    xmlChar *mode;
} changemode_t;

static void initMode(changemode_t *mode) {
    opc_bzero_mem(mode, sizeof(*mode));
}

static void cleanupMode(changemode_t *mode) {
    if (NULL!=mode->mode) {
        xmlFree(mode->mode);
    }
    opc_bzero_mem(mode, sizeof(*mode));
}

typedef struct PARSER_CONTEXT {
    xmlChar *modeTxt;
    opc_bool_t deleted;
    xmlChar *parTxt;
    void *callback_ctx;
    paragraph_callback_t *callback_fct;
} context_t;

static void flush(context_t *ctx, int level) {
    if (NULL!=ctx->callback_fct) ctx->callback_fct(ctx->callback_ctx, level, ctx->modeTxt, ctx->parTxt);
    if (NULL!=ctx->modeTxt) xmlFree(ctx->modeTxt); ctx->modeTxt=NULL;
    if (NULL!=ctx->parTxt) xmlFree(ctx->parTxt); ctx->parTxt=NULL;
}

static void cleanup(context_t *ctx) {
    if (NULL!=ctx->modeTxt) xmlFree(ctx->modeTxt); ctx->modeTxt=NULL;
    if (NULL!=ctx->parTxt) xmlFree(ctx->parTxt); ctx->parTxt=NULL;
}

static void text(context_t *ctx, const xmlChar *text, changemode_t *textMode) {
    if (NULL!=textMode) {
        ctx->modeTxt=xmlStrcat(ctx->modeTxt, textMode->mode);
        ctx->modeTxt=xmlStrcat(ctx->modeTxt, _X(": \""));
        ctx->modeTxt=xmlStrcat(ctx->modeTxt, text);
        ctx->modeTxt=xmlStrcat(ctx->modeTxt, _X("\"\n"));
    }
    if (NULL!=textMode && textMode->deleted) {
        if (!ctx->deleted) {
            ctx->parTxt=xmlStrcat(ctx->parTxt, _X("[]"));
        }
        ctx->deleted=OPC_TRUE;
    } else {
        ctx->parTxt=xmlStrcat(ctx->parTxt, text);
        ctx->deleted=OPC_FALSE;
    }
}

static void par(context_t *ctx, int level, changemode_t *parMode, changemode_t *cellMode, changemode_t *rowMode) {
    if (NULL!=rowMode && NULL!=rowMode->mode) {
        xmlChar *modeTxt=NULL;;
        modeTxt=xmlStrcat(modeTxt, rowMode->mode);
        modeTxt=xmlStrcat(modeTxt, _X(": row mark\n"));
        ctx->modeTxt=xmlStrcat(ctx->modeTxt, modeTxt);
        xmlFree(modeTxt);
    }
    if (NULL!=parMode && NULL!=parMode->mode) {
        xmlChar *modeTxt=NULL;;
        modeTxt=xmlStrcat(modeTxt, parMode->mode);
        modeTxt=xmlStrcat(modeTxt, _X(": paragraph mark\n"));
        ctx->modeTxt=xmlStrcat(ctx->modeTxt, modeTxt);
        xmlFree(modeTxt);
    }
    if (NULL!=parMode &&  parMode->deleted) {
        if (!ctx->deleted) {
            ctx->parTxt=xmlStrcat(ctx->parTxt, _X("[]"));
        }
        ctx->deleted=OPC_TRUE;
    } else {
        ctx->parTxt=xmlStrcat(ctx->parTxt, _X("\n"));
        ctx->deleted=OPC_FALSE;
        flush(ctx, level);
    }
}


static char ns_w[]="http://schemas.openxmlformats.org/wordprocessingml/2006/main";

static void dumpText(context_t *ctx, mceTextReader_t *reader, int level, changemode_t *textMode, changemode_t *parMode, changemode_t *cellMode, changemode_t *rowMode, changemode_t *prop_mode);
static void dumpChildren(context_t *ctx, mceTextReader_t *reader, int level, changemode_t *textMode, changemode_t *parMode, changemode_t *cellMode, changemode_t *rowMode, changemode_t *prop_mode) {
    mce_start_children(reader) {
        mce_match_element(reader, NULL, NULL) {
            dumpText(ctx, reader, level, textMode, parMode, cellMode, rowMode, prop_mode);
        }
        mce_match_text(reader) {
            dumpText(ctx, reader, level, textMode, parMode, cellMode, rowMode, prop_mode);
        }
    } mce_end_children(reader);
}


static void dumpText(context_t *ctx, mceTextReader_t *reader, int level, changemode_t *textMode, changemode_t *parMode, changemode_t *cellMode, changemode_t *rowMode, changemode_t *prop_mode) {
    mce_start_choice(reader) {
        mce_start_element(reader, _X(ns_w), _X("t")) {
            mce_skip_attributes(reader);
            mce_start_children(reader) {
                mce_start_text(reader) {
                    text(ctx, xmlTextReaderConstValue(reader->reader), textMode);
                } mce_end_text(reader);
            } mce_end_children(reader);
        } mce_end_element(reader);
        mce_start_element(reader, _X(ns_w), _X("delText")) {
            mce_skip_attributes(reader);
            mce_start_children(reader) {
                mce_start_text(reader) {
                    OPC_ASSERT(NULL!=textMode && textMode->deleted);
                    text(ctx, xmlTextReaderConstValue(reader->reader), textMode);
                } mce_end_text(reader);
            } mce_end_children(reader);
        } mce_end_element(reader);
        mce_start_element(reader, _X(ns_w), _X("ins")) {
            changemode_t ins_props;
            initMode(&ins_props);
            ins_props.deleted=0;
            ins_props.mode=xmlStrdup(_X("Insertion by "));
            mce_start_attributes(reader) {
                mce_start_attribute(reader, _X(ns_w), _X("author")) {
                    ins_props.mode=xmlStrcat(ins_props.mode, xmlTextReaderConstValue(reader->reader));
                } mce_end_attribute(reader);
            } mce_end_attributes(reader);
            if (NULL!=prop_mode) {
                prop_mode->deleted=ins_props.deleted;
                prop_mode->mode=xmlStrdup(ins_props.mode);
            }
            dumpChildren(ctx, reader, level, &ins_props, parMode, cellMode, rowMode, prop_mode);
            cleanupMode(&ins_props);
        } mce_end_element(reader);
        mce_start_element(reader, _X(ns_w), _X("moveTo")) {
            changemode_t ins_props;
            initMode(&ins_props);
            ins_props.deleted=0;
            ins_props.mode=xmlStrdup(_X("Insertion by "));
            mce_start_attributes(reader) {
                mce_start_attribute(reader, _X(ns_w), _X("author")) {
                    ins_props.mode=xmlStrcat(ins_props.mode, xmlTextReaderConstValue(reader->reader));
                } mce_end_attribute(reader);
            } mce_end_attributes(reader);
            if (NULL!=prop_mode) {
                prop_mode->deleted=ins_props.deleted;
                prop_mode->mode=xmlStrdup(ins_props.mode);
            }
            dumpChildren(ctx, reader, level, &ins_props, parMode, cellMode, rowMode, prop_mode);
            cleanupMode(&ins_props);
        } mce_end_element(reader);
        mce_start_element(reader, _X(ns_w), _X("del")) {
            changemode_t del_props;
            initMode(&del_props);
            del_props.deleted=1;
            del_props.mode=xmlStrdup(_X("Deletion by "));
            mce_start_attributes(reader) {
                mce_start_attribute(reader, _X(ns_w), _X("author")) {
                    del_props.mode=xmlStrcat(del_props.mode, xmlTextReaderConstValue(reader->reader));
                } mce_end_attribute(reader);
            } mce_end_attributes(reader);
            if (NULL!=prop_mode) {
                prop_mode->deleted=del_props.deleted;
                prop_mode->mode=xmlStrdup(del_props.mode);
            }
            dumpChildren(ctx, reader, level, &del_props, parMode, cellMode, rowMode, prop_mode);
            cleanupMode(&del_props);
        } mce_end_element(reader);
        mce_start_element(reader, _X(ns_w), _X("moveFrom")) {
            changemode_t del_props;
            initMode(&del_props);
            del_props.deleted=1;
            del_props.mode=xmlStrdup(_X("Deletion by "));
            mce_start_attributes(reader) {
                mce_start_attribute(reader, _X(ns_w), _X("author")) {
                    del_props.mode=xmlStrcat(del_props.mode, xmlTextReaderConstValue(reader->reader));
                } mce_end_attribute(reader);
            } mce_end_attributes(reader);
            if (NULL!=prop_mode) {
                prop_mode->deleted=del_props.deleted;
                prop_mode->mode=xmlStrdup(del_props.mode);
            }
            dumpChildren(ctx, reader, level, &del_props, parMode, cellMode, rowMode, prop_mode);
            cleanupMode(&del_props);
        } mce_end_element(reader);
        mce_start_element(reader, _X(ns_w), _X("p")) {
            changemode_t p_props;
            initMode(&p_props);
            mce_skip_attributes(reader);
            mce_start_children(reader) {
                mce_match_element(reader, _X(ns_w), _X("pPr")) {
                    dumpText(ctx, reader, level, textMode, parMode, cellMode, rowMode, &p_props);
                };
                mce_match_element(reader, NULL, NULL) {
                    dumpText(ctx, reader, level, textMode, &p_props, cellMode, rowMode, NULL);
                };
            } mce_end_children(reader);
            par(ctx, level, &p_props, cellMode, rowMode);
            cleanupMode(&p_props);
        } mce_end_element(reader);
        mce_start_element(reader, _X(ns_w), _X("tr")) {
            changemode_t tr_props;
            initMode(&tr_props);
            mce_skip_attributes(reader);
            mce_start_children(reader) {
                mce_match_element(reader, _X(ns_w), _X("trPr")) {
                    dumpText(ctx, reader, level+1, textMode, parMode, cellMode, rowMode, &tr_props);
                };
                mce_match_element(reader, NULL, NULL) {
                    dumpText(ctx, reader, level+1, textMode, parMode, cellMode, &tr_props, NULL);
                };
            } mce_end_children(reader);
            cleanupMode(&tr_props);
        } mce_end_element(reader);
        mce_start_element(reader, NULL, NULL) {
            mce_skip_attributes(reader);
            dumpChildren(ctx, reader, level, textMode, parMode, cellMode, rowMode, prop_mode);
        } mce_end_element(reader);
        mce_start_text(reader) {
        } mce_end_text(reader);
    } mce_end_choice(reader);
}

void parseText(xmlChar *filename, paragraph_callback_t *callback_fct, void *callback_ctx) {
    opcContainer *c=opcContainerOpen(filename, OPC_OPEN_READ_ONLY, NULL, NULL);
    if (NULL!=c) {
        opcRelation rel=opcRelationFind(c, OPC_PART_INVALID, NULL, _X("http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"));
        if (OPC_RELATION_INVALID!=rel) {
            opcPart main=opcRelationGetInternalTarget(c, OPC_PART_INVALID, rel);
            if (OPC_PART_INVALID!=main) {
                const xmlChar *type=opcPartGetType(c, main);
                if (0==xmlStrcmp(type, _X("application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"))) {
                    mceTextReader_t reader;
                    if (OPC_ERROR_NONE==opcXmlReaderOpen(c, &reader, main, NULL, 0, 0)) {
                        context_t ctx;
                        opc_bzero_mem(&ctx, sizeof(ctx));
                        ctx.callback_fct=callback_fct;
                        ctx.callback_ctx=callback_ctx;
                        mce_start_document(&reader) {
                            mce_match_element(&reader, NULL, NULL) {
                                dumpText(&ctx, &reader, 0, NULL, NULL, NULL, NULL, NULL);
                            };
                        } mce_end_document(&reader);
                        flush(&ctx, 0);
                        cleanup(&ctx);
                    }
                    mceTextReaderCleanup(&reader);
                }
            }
        }
        opcContainerClose(c, OPC_CLOSE_NOW);
    }
}

static void paragraph_callback(void *callback_ctx, int level, xmlChar *modeTxt, xmlChar *parTxt) {
    if (NULL!=modeTxt) {
        fputs((const char *)modeTxt, (FILE*)callback_ctx);
    }
    if (NULL!=parTxt) {
        fputs((const char *)parTxt, (FILE*)callback_ctx);
    }
}

int main( int argc, const char* argv[] )
{
    opcInitLibrary();
    parseText(_X(argv[1]), paragraph_callback, stdout);
    opcFreeLibrary();
    return 0;
}


Last edited Jun 28, 2011 at 8:36 AM by flr, version 28

Comments

michCl Oct 17, 2014 at 12:47 PM 
Unrelated to comment above: https://libopc.codeplex.com/discussions/402351

masoodshaik Feb 28, 2012 at 5:05 AM 
Hi All

could any one please suggest me solution for below error when i used below statement in cpp file

statement:
opcContainerReadInputStream(stream, buf, sizeof(buf))

error:
Invalid arguments ' Candidates are: ? opcContainerReadInputStream(OPC_CONTAINER_INPUTSTREAM_STRUCT *, ? *, ?) '