Files
2021-08-22 16:47:09 +10:00

86 lines
1.6 KiB
Go

package formater
import (
"archive/zip"
"bytes"
"encoding/xml"
"fmt"
. "github.com/mickael-kerjean/filestash/server/common"
"io"
"math/rand"
"os"
"regexp"
"strings"
)
func OfficeFormater(r io.ReadCloser) (io.ReadCloser, error) {
tmpName := fmt.Sprintf("/tmp/docx_%d.docx", rand.Intn(1000000))
defer os.Remove(tmpName)
f, err := os.OpenFile(tmpName, os.O_CREATE|os.O_WRONLY, os.ModePerm)
if err != nil {
return nil, err
}
_, err = io.Copy(f, r)
if err != nil {
return nil, err
}
z, err := zip.OpenReader(tmpName)
if err != nil {
return nil, err
}
defer z.Close()
hasData := false
content := bytes.NewBuffer([]byte{})
for _, f := range z.File {
shouldExtract := false
if f.Name == "word/document.xml" {
shouldExtract = true
}
if strings.HasPrefix(f.Name, "ppt/slides/slide") {
shouldExtract = true
}
if shouldExtract == false {
continue
}
hasData = true
o, err := f.Open()
if err != nil {
return nil, err
}
dec := xml.NewDecoder(o)
for {
t, err := dec.Token()
if err != nil {
break
}
if t == nil {
break
}
switch el := t.(type) {
case xml.StartElement:
if el.Name.Local == "t" {
w := WordDoc{}
dec.DecodeElement(&w, &el)
if len(w.Text) > 0 {
w.Text = regexp.MustCompile("\\s+\\.\\s+").ReplaceAll(w.Text, []byte(". "))
w.Text = regexp.MustCompile("\\s{2,}").ReplaceAll(w.Text, []byte(" "))
content.Write(w.Text)
content.Write([]byte(" "))
}
}
}
}
}
if hasData == false {
return nil, ErrNotFound
}
return NewReadCloserFromReader(content), nil
}
type WordDoc struct {
Text []byte `xml:",innerxml"`
}