DEV Community

E-iceblue Product Family
E-iceblue Product Family

Posted on

Extract Text and Images from a PowerPoint Document in Java

When you find a PowerPoint document with beautiful illustrations or background pictures, you may want to save the pictures to your own computer; or the text content in the document is exactly what you want to reference to other documents, you can save the text content to the text file first. In this article, we’ll introduce how to extract text and images from a PowerPoint document by using Free Spire.Presentation for Java.

Here is a screent of the sample PowerPoint file:

Alt Text

Extract Text from an Entire Document

import com.spire.presentation.IAutoShape;
import com.spire.presentation.ISlide;
import com.spire.presentation.ParagraphEx;
import com.spire.presentation.Presentation;

import java.io.FileWriter;

public class ExtractText {

    public static void main(String[] args) throws Exception{

        //create a Presentation instance
        Presentation ppt = new Presentation();

        //load the PowerPoint document
        ppt.loadFromFile("C:\\Users\\Administrator\\Desktop\\ Triceratops Herds.pptx");

        //create a StringBuilder object
        StringBuilder buffer = new StringBuilder();

        //loop through the slides and extract text
        for (Object slide : ppt.getSlides()) {
            for (Object shape : ((ISlide) slide).getShapes()) {
                if (shape instanceof IAutoShape) {
                    buffer.append("\r\n");
                    for (Object tp : ((IAutoShape) shape).getTextFrame().getParagraphs()) {
                        buffer.append(((ParagraphEx) tp).getText());
                        buffer.append("\r\n");
                    }
                }
            }
        }

        //write to text file
        FileWriter writer = new FileWriter("ExtractedFiles/Text.txt");
        writer.write(buffer.toString());
        writer.flush();
        writer.close();
    }
}
Enter fullscreen mode Exit fullscreen mode

Output:

Alt Text

Extract Images from an Entire Document

import com.spire.presentation.Presentation;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;

public class ExtractImages {

    public static void main(String[] args) throws Exception {

        //create a Presentation instance
        Presentation ppt = new Presentation();

        //load a PowerPoint document
        ppt.loadFromFile("C:\\Users\\Administrator\\Desktop\\ Triceratops Herds.pptx");

        //loop through the images embedded in the document
        for (int i = 0; i < ppt.getImages().getCount(); i++) {

            //save each image to a local folder
            BufferedImage image = ppt.getImages().get(i).getImage();
            ImageIO.write(image, "PNG",  new File(String.format("ExtractedFiles/" + "Image-%1$s.png", i)));
        }
    }
}
Enter fullscreen mode Exit fullscreen mode

Output:

Alt Text

Top comments (0)