Documentation
¶
Overview ¶
Package fitz provides wrapper for the [MuPDF](http://mupdf.com/) fitz library that can extract pages from PDF, EPUB, MOBI, DOCX, XLSX and PPTX documents as IMG, TXT, HTML or SVG.
Index ¶
- Variables
- type Document
- func (f *Document) Bound(pageNumber int) (image.Rectangle, error)
- func (f *Document) Close() error
- func (f *Document) HTML(pageNumber int, header bool) (string, error)
- func (f *Document) Image(pageNumber int) (*image.RGBA, error)
- func (f *Document) ImageDPI(pageNumber int, dpi float64) (*image.RGBA, error)
- func (f *Document) ImagePNG(pageNumber int, dpi float64) ([]byte, error)
- func (f *Document) Links(pageNumber int) ([]Link, error)
- func (f *Document) Metadata() map[string]string
- func (f *Document) NumPage() int
- func (f *Document) SVG(pageNumber int) (string, error)
- func (f *Document) Text(pageNumber int) (string, error)
- func (f *Document) ToC() ([]Outline, error)
- type Link
- type Outline
Examples ¶
Constants ¶
This section is empty.
Variables ¶
var ( ErrNoSuchFile = errors.New("fitz: no such file") ErrCreateContext = errors.New("fitz: cannot create context") ErrOpenDocument = errors.New("fitz: cannot open document") ErrEmptyBytes = errors.New("fitz: cannot send empty bytes") ErrOpenMemory = errors.New("fitz: cannot open memory") ErrLoadPage = errors.New("fitz: cannot load page") ErrRunPageContents = errors.New("fitz: cannot run page contents") ErrPageMissing = errors.New("fitz: page missing") ErrCreatePixmap = errors.New("fitz: cannot create pixmap") ErrPixmapSamples = errors.New("fitz: cannot get pixmap samples") ErrNeedsPassword = errors.New("fitz: document needs password") ErrLoadOutline = errors.New("fitz: cannot load outline") )
Errors.
var FzVersion = "1.24.9"
FzVersion is used for experimental purego implementation, it must be exactly the same as libmupdf shared library version. It is also possible to set `FZ_VERSION` environment variable.
var MaxStore = 256 << 20
MaxStore is maximum size in bytes of the resource store, before it will start evicting cached resources such as fonts and images.
Functions ¶
This section is empty.
Types ¶
type Document ¶
type Document struct {
// contains filtered or unexported fields
}
Document represents fitz document.
func New ¶
New returns new fitz document.
Example ¶
doc, err := fitz.New("test.pdf")
if err != nil {
panic(err)
}
defer doc.Close()
tmpDir, err := os.MkdirTemp(os.TempDir(), "fitz")
if err != nil {
panic(err)
}
// Extract pages as images
for n := 0; n < doc.NumPage(); n++ {
img, err := doc.Image(n)
if err != nil {
panic(err)
}
f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.jpg", n)))
if err != nil {
panic(err)
}
err = jpeg.Encode(f, img, &jpeg.Options{Quality: jpeg.DefaultQuality})
if err != nil {
panic(err)
}
f.Close()
}
// Extract pages as text
for n := 0; n < doc.NumPage(); n++ {
text, err := doc.Text(n)
if err != nil {
panic(err)
}
f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.txt", n)))
if err != nil {
panic(err)
}
_, err = f.WriteString(text)
if err != nil {
panic(err)
}
f.Close()
}
// Extract pages as html
for n := 0; n < doc.NumPage(); n++ {
html, err := doc.HTML(n, true)
if err != nil {
panic(err)
}
f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.html", n)))
if err != nil {
panic(err)
}
_, err = f.WriteString(html)
if err != nil {
panic(err)
}
f.Close()
}
// Extract pages as svg
for n := 0; n < doc.NumPage(); n++ {
svg, err := doc.SVG(n)
if err != nil {
panic(err)
}
f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.svg", n)))
if err != nil {
panic(err)
}
_, err = f.WriteString(svg)
if err != nil {
panic(err)
}
f.Close()
}
func NewFromMemory ¶
NewFromMemory returns new fitz document from byte slice.
func NewFromReader ¶
NewFromReader returns new fitz document from io.Reader.
type Outline ¶
type Outline struct {
// Hierarchy level of the entry (starting from 1).
Level int
// Title of outline item.
Title string
// Destination in the document to be displayed when this outline item is activated.
URI string
// The page number of an internal link.
Page int
// Top.
Top float64
}
Outline type.