1 files changed, 85 insertions, 82 deletions
diff --git a/extract/src/docx.c b/extract/src/docx.c
index 761de176..ca6c5d78 100644
--- a/extract/src/docx.c
+++ b/extract/src/docx.c
@@ -95,7 +95,7 @@ static int s_docx_paragraph_empty(extract_alloc_t* alloc, extract_astring_t* con
     content_state.font.size = 10;
     content_state.font.bold = 0;
     content_state.font.italic = 0;
-    
+
     if (s_docx_run_start(alloc, content, &content_state)) goto end;
     //docx_char_append_string(content, "&#160;");   /* &#160; is non-break space. */
     if (s_docx_run_finish(alloc, NULL /*state*/, content)) goto end;
@@ -168,9 +168,9 @@ font. */
         if (s_docx_run_finish(alloc, content_state, content)) goto end;
     }
     if (s_docx_paragraph_finish(alloc, content)) goto end;
-    
+
     e = 0;
-    
+
     end:
     return e;
 }
@@ -245,7 +245,7 @@ static int s_docx_append_image(
 
 static int s_docx_output_rotated_paragraphs(
         extract_alloc_t*    alloc,
-        extract_page_t*     page,
+        subpage_t*          subpage,
         int                 paragraph_begin,
         int                 paragraph_end,
         int                 rot,
@@ -330,7 +330,7 @@ static int s_docx_output_rotated_paragraphs(
 
     /* Output paragraphs p0..p2-1. */
     for (p=paragraph_begin; p<paragraph_end; ++p) {
-        paragraph_t* paragraph = page->paragraphs[p];
+        paragraph_t* paragraph = subpage->paragraphs[p];
         if (s_document_to_docx_content_paragraph(alloc, state, paragraph, content)) goto end;
     }
 
@@ -364,7 +364,7 @@ static int s_docx_output_rotated_paragraphs(
     extract_astring_cat(alloc, content, "              <w:txbxContent>");
 
     for (p=paragraph_begin; p<paragraph_end; ++p) {
-        paragraph_t* paragraph = page->paragraphs[p];
+        paragraph_t* paragraph = subpage->paragraphs[p];
         if (s_document_to_docx_content_paragraph(alloc, state, paragraph, content)) goto end;
     }
 
@@ -392,7 +392,7 @@ to the application. */
 {
     int e = -1;
     int y;
-    
+
     if (extract_astring_cat(alloc, content,
             "\n"
             "    <w:tbl>\n"
@@ -406,14 +406,14 @@ to the application. */
                 "        <w:tr>\n"
                 "            <w:trPr/>\n"
                 )) goto end;
-        
+
         for (x=0; x<table->cells_num_x; ++x)
         {
             cell_t* cell = table->cells[y*table->cells_num_x + x];
             if (!cell->left) continue;
-            
+
             if (extract_astring_cat(alloc, content, "            <w:tc>\n")) goto end;
-            
+
             /* Write cell properties. */
             {
                 if (extract_astring_cat(alloc, content,
@@ -442,7 +442,7 @@ to the application. */
                 }
                 if (extract_astring_cat(alloc, content, "                </w:tcPr>\n")) goto end;
             }
-            
+
             /* Write contents of this cell. */
             {
                 size_t chars_num_old = content->chars_num;
@@ -476,20 +476,20 @@ to the application. */
     }
     if (extract_astring_cat(alloc, content, "    </w:tbl>\n")) goto end;
     e = 0;
-    
+
     end:
     return e;
 }
 
 static int s_docx_append_rotated_paragraphs(
         extract_alloc_t*    alloc,
-        extract_page_t*     page,
+        subpage_t*          subpage,
         content_state_t*    state,
         int*                p,
         int*                text_box_id,
         const matrix_t*     ctm,
         double              rotate,
-        extract_astring_t*  content
+        extract_astring_t*  output
         )
 /* Appends paragraphs with same rotation, starting with page->paragraphs[*p]
 and updates *p. */
@@ -501,8 +501,8 @@ and updates *p. */
     point_t extent = {0, 0};
     int p0 = *p;
     int p1;
-    paragraph_t* paragraph = page->paragraphs[*p];
-    
+    paragraph_t* paragraph = subpage->paragraphs[*p];
+
     outf("rotate=%.2frad=%.1fdeg ctm: ef=(%f %f) abcd=(%f %f %f %f)",
             rotate, rotate * 180 / pi,
             ctm->e,
@@ -535,8 +535,8 @@ and updates *p. */
                     ctm->a, ctm->b, ctm->c, ctm->d);
         }
 
-        for (*p=p0; *p<page->paragraphs_num; ++(*p)) {
-            paragraph = page->paragraphs[*p];
+        for (*p=p0; *p<subpage->paragraphs_num; ++(*p)) {
+            paragraph = subpage->paragraphs[*p];
             ctm = &paragraph->lines[0]->spans[0]->ctm;
             rotate = atan2(ctm->b, ctm->a);
             if (rotate != rotate0) {
@@ -625,13 +625,13 @@ and updates *p. */
         x -= dx;
         y -= -dy;
 
-        if (s_docx_output_rotated_paragraphs(alloc, page, p0, p1, rot, x, y, w, h, *text_box_id, content, state)) goto end;
+        if (s_docx_output_rotated_paragraphs(alloc, subpage, p0, p1, rot, x, y, w, h, *text_box_id, output, state)) goto end;
     }
     *p = p1 - 1;
     e = 0;
-    
+
     end:
-    
+
     return e;
 }
 
@@ -647,38 +647,40 @@ int extract_document_to_docx_content(
     int ret = -1;
     int text_box_id = 0;
     int p;
-    
+
     /* Write paragraphs into <content>. */
     for (p=0; p<document->pages_num; ++p) {
         extract_page_t* page = document->pages[p];
-        
-        int p = 0;
-        int t = 0;
-        
-        content_state_t content_state;
-        content_state.font.name = NULL;
-        content_state.font.size = 0;
-        content_state.font.bold = 0;
-        content_state.font.italic = 0;
-        content_state.ctm_prev = NULL;
-        
-        /* Output paragraphs and tables in order of y coordinate. */
-        for(;;)
-        {
-            paragraph_t* paragraph = (p == page->paragraphs_num) ? NULL : page->paragraphs[p];
-            table_t* table = (t == page->tables_num) ? NULL : page->tables[t];
-            double y_paragraph;
-            double y_table;
-            if (!paragraph && !table)   break;
-            y_paragraph = (paragraph) ? paragraph->lines[0]->spans[0]->chars[0].y : DBL_MAX;
-            y_table = (table) ? table->pos.y : DBL_MAX;
-            
-            if (paragraph && y_paragraph < y_table)
-            {
-                const matrix_t* ctm = &paragraph->lines[0]->spans[0]->ctm;
-                double rotate = atan2(ctm->b, ctm->a);
-
-                if (spacing
+	int c;
+
+        for (c=0; c<page->subpages_num; ++c) {
+            subpage_t* subpage = page->subpages[c];
+
+            int p = 0;
+            int t = 0;
+
+            content_state_t content_state;
+            content_state.font.name = NULL;
+            content_state.font.size = 0;
+            content_state.font.bold = 0;
+            content_state.font.italic = 0;
+            content_state.ctm_prev = NULL;
+
+            /* Output paragraphs and tables in order of y coordinate. */
+            for(;;) {
+                paragraph_t* paragraph = (p == subpage->paragraphs_num) ? NULL : subpage->paragraphs[p];
+                table_t* table = (t == subpage->tables_num) ? NULL : subpage->tables[t];
+                double y_paragraph;
+                double y_table;
+                if (!paragraph && !table)   break;
+                y_paragraph = (paragraph) ? paragraph->lines[0]->spans[0]->chars[0].y : DBL_MAX;
+                y_table = (table) ? table->pos.y : DBL_MAX;
+
+                if (paragraph && y_paragraph < y_table) {
+                    const matrix_t* ctm = &paragraph->lines[0]->spans[0]->ctm;
+                    double rotate = atan2(ctm->b, ctm->a);
+
+                    if (spacing
                         && content_state.ctm_prev
                         && paragraph->lines_num
                         && paragraph->lines[0]->spans_num
@@ -687,37 +689,38 @@ int extract_document_to_docx_content(
                                 &paragraph->lines[0]->spans[0]->ctm
                                 )
                         ) {
-                    /* Extra vertical space between paragraphs that were at
-                    different angles in the original document. */
-                    if (s_docx_paragraph_empty(alloc, content)) goto end;
-                }
+                        /* Extra vertical space between paragraphs that were at
+                        different angles in the original document. */
+                        if (s_docx_paragraph_empty(alloc, content)) goto end;
+                    }
 
-                if (spacing) {
-                    /* Extra vertical space between paragraphs. */
-                    if (s_docx_paragraph_empty(alloc, content)) goto end;
-                }
+                    if (spacing) {
+                        /* Extra vertical space between paragraphs. */
+                        if (s_docx_paragraph_empty(alloc, content)) goto end;
+                    }
 
-                if (rotation && rotate != 0)
-                {
-                    if (s_docx_append_rotated_paragraphs(alloc, page, &content_state, &p, &text_box_id, ctm, rotate, content)) goto end;
+                    if (rotation && rotate != 0)
+                    {
+                        if (s_docx_append_rotated_paragraphs(alloc, subpage, &content_state, &p, &text_box_id, ctm, rotate, content)) goto end;
+                    }
+                    else
+                    {
+                        if (s_document_to_docx_content_paragraph(alloc, &content_state, paragraph, content)) goto end;
+                    }
+                    p += 1;
                 }
-                else
+                else if (table)
                 {
-                    if (s_document_to_docx_content_paragraph(alloc, &content_state, paragraph, content)) goto end;
+                    if (s_docx_append_table(alloc, table, content)) goto end;
+                    t += 1;
                 }
-                p += 1;
-            }
-            else if (table)
-            {
-                if (s_docx_append_table(alloc, table, content)) goto end;
-                t += 1;
             }
-        }
-        
-        if (images) {
-            int i;
-            for (i=0; i<page->images_num; ++i) {
-                s_docx_append_image(alloc, content, &page->images[i]);
+
+            if (images) {
+                int i;
+                for (i=0; i<subpage->images_num; ++i) {
+                    s_docx_append_image(alloc, content, &subpage->images[i]);
+                }
             }
         }
     }
@@ -759,7 +762,7 @@ int extract_docx_content_item(
     extract_astring_t   temp;
     extract_astring_init(&temp);
     *text2 = NULL;
-    
+
     if (0)
     {}
     else if (!strcmp(name, "[Content_Types].xml")) {
@@ -841,7 +844,7 @@ int extract_docx_content_item(
     return e;
 }
 
-        
+
 
 int extract_docx_write_template(
         extract_alloc_t*    alloc,
@@ -862,7 +865,7 @@ int extract_docx_write_template(
 
     assert(path_out);
     assert(path_template);
-    
+
     if (extract_check_path_shell_safe(path_out)) {
         outf("path_out is unsafe: %s", path_out);
         goto end;
@@ -889,7 +892,7 @@ int extract_docx_write_template(
     /* Might be nice to iterate through all items in path_tempdir, but for now
     we look at just the items that we know extract_docx_content_item() will
     modify. */
-    
+
     {
         const char* names[] = {
                 "word/document.xml",
@@ -904,7 +907,7 @@ int extract_docx_write_template(
             extract_free(alloc, &text2);
             if (extract_asprintf(alloc, &path, "%s/%s", path_tempdir, name) < 0) goto end;
             if (extract_read_all_path(alloc, path, &text)) goto end;
-            
+
             if (extract_docx_content_item(
                     alloc,
                     contentss,
@@ -926,14 +929,14 @@ int extract_docx_write_template(
     extract_free(alloc, &path);
     if (extract_asprintf(alloc, &path, "%s/word/media", path_tempdir) < 0) goto end;
     if (extract_mkdir(path, 0777)) goto end;
-    
+
     for (i=0; i<images->images_num; ++i) {
         image_t* image = &images->images[i];
         extract_free(alloc, &path);
         if (extract_asprintf(alloc, &path, "%s/word/media/%s", path_tempdir, image->name) < 0) goto end;
         if (extract_write_all(image->data, image->data_size, path)) goto end;
     }
-    
+
     outf("Zipping tempdir to create %s", path_out);
     {
         const char* path_out_leaf = strrchr(path_out, '/');