DEV Community

Subramanian ๐Ÿ˜Ž
Subramanian ๐Ÿ˜Ž

Posted on

Profiling in Go with real-time example

Problem: Packing and unpacking gzip files.

Go has the gzip package that makes working with gzip files easier.

Writer from the gzip package can be used to compress data, and as you'd expect, Reader is used to read-back original data from a compressed gzip file.

Initial Approach

Natural instinct would be to read the input file content, compress/decompress the data with Writer/Reader respectively, and then write the resulting content to the output file.

package main

import (
    "bytes"
    "compress/gzip"
    "fmt"
    "io"
    "io/ioutil"
    "os"
    "strings"
)

func packGzipFile(srcFilePath, dstFilePath string) error {

    inputBytes, err := ioutil.ReadFile(srcFilePath)
    if err != nil {
        fmt.Println("Error while opening input file :", srcFilePath, "Error :", err)
        return err
    }

    var compressedByteBuffer bytes.Buffer
    gzWriter := gzip.NewWriter(&compressedByteBuffer)

    _, err = gzWriter.Write(inputBytes)
    if err != nil {
        fmt.Println("Unable to perform write operation :", err)
        return err
    }

    gzWriter.Close()

    compressedData := compressedByteBuffer.Bytes()
    err = ioutil.WriteFile(dstFilePath, compressedData, 0660)

    if err != nil {
        fmt.Println("Error while writing to file :", dstFilePath, "Error :", err)
        return err
    }

    return nil
}

func unpackGzipFile(srcFilePath, dstFilePath string) error {

    data, err := ioutil.ReadFile(srcFilePath)

    b := bytes.NewBuffer(data)

    var r io.Reader
    r, err = gzip.NewReader(b)
    if err != nil {
        fmt.Println("Error while creating a reader :", err)
        return err
    }

    var resB bytes.Buffer
    _, err = resB.ReadFrom(r)
    if err != nil {
        fmt.Println("Error while reading :", err)
        return err
    }

    resData := resB.Bytes()
    err = ioutil.WriteFile(dstFilePath, resData, 0660)
    if err != nil {
        fmt.Println("Error while writing :", err)
        return err
    }

    return nil
}

func main() {

    if len(os.Args) < 4 {
        fmt.Println("USAGE: ./gztool <c | d> <input_filename> <output_filename>")
        return
    }

    mode := strings.ToLower(os.Args[1])
    inputFilename := os.Args[2]
    outputFilename := os.Args[3]

    switch mode {
    case "c":
        packGzipFile(inputFilename, outputFilename)
    case "d":
        unpackGzipFile(inputFilename, outputFilename)
    default:
        fmt.Println("Invalid mode. Use \"c\" or \"d\"")
    }
}

This approach would start to falter (in terms of memory usage) as the size of the file that we're working on increases.

Let's add profiling to get some information on CPU and Memory Usage.

Profiling features are built into Go.
To make things simpler, I've used the profile package for profiling.

package main

import (
    "bytes"
    "compress/gzip"
    "fmt"
    "io"
    "io/ioutil"
    "os"
    "strings"

    "github.com/pkg/profile"
)

func packGzipFile(srcFilePath, dstFilePath string) error {

    inputBytes, err := ioutil.ReadFile(srcFilePath)
    if err != nil {
        fmt.Println("Error while opening input file :", srcFilePath, "Error :", err)
        return err
    }

    var compressedByteBuffer bytes.Buffer
    gzWriter := gzip.NewWriter(&compressedByteBuffer)

    _, err = gzWriter.Write(inputBytes)
    if err != nil {
        fmt.Println("Unable to perform write operation :", err)
        return err
    }

    gzWriter.Close()

    compressedData := compressedByteBuffer.Bytes()
    err = ioutil.WriteFile(dstFilePath, compressedData, 0660)

    if err != nil {
        fmt.Println("Error while writing to file :", dstFilePath, "Error :", err)
        return err
    }

    return nil
}

func unpackGzipFile(srcFilePath, dstFilePath string) error {

    data, err := ioutil.ReadFile(srcFilePath)

    b := bytes.NewBuffer(data)

    var r io.Reader
    r, err = gzip.NewReader(b)
    if err != nil {
        fmt.Println("Error while creating a reader :", err)
        return err
    }

    var resB bytes.Buffer
    _, err = resB.ReadFrom(r)
    if err != nil {
        fmt.Println("Error while reading :", err)
        return err
    }

    resData := resB.Bytes()
    err = ioutil.WriteFile(dstFilePath, resData, 0660)
    if err != nil {
        fmt.Println("Error while writing :", err)
        return err
    }

    return nil
}

func main() {

    if len(os.Args) < 4 {
        fmt.Println("USAGE: ./gztool <c | d> <input_filename> <output_filename> [cpu | mem]")
        return
    }

    if len(os.Args) == 5 {
        switch os.Args[4] {
        case "cpu":
            defer profile.Start().Stop()
        case "mem":
            defer profile.Start(profile.MemProfile).Stop()
        }
    }

    mode := strings.ToLower(os.Args[1])
    inputFilename := os.Args[2]
    outputFilename := os.Args[3]

    switch mode {
    case "c":
        packGzipFile(inputFilename, outputFilename)
    case "d":
        unpackGzipFile(inputFilename, outputFilename)
    default:
        fmt.Println("Invalid mode. Use \"c\" or \"d\"")
    }
}

Stats for a 200 MB file:

Action CPU Time Memory Usage
Packing 6.01 s 287.87 MB
Unpacking 705.24 ms 286.84 MB

Not only is memory usage too high, but the process would also exit with an Insufficient Memory panic if the size of the file is too big.

Efficient Approach

An efficient approach would be to pipe the contents of the file to the Writer/Reader. This way, only a small chunk of the actual file needs to be in the memory.

Thanks to goroutines, we can write data concurrently to one end of the pipe and read the data at the other.

package main

import (
    "compress/gzip"
    "fmt"
    "io"
    "os"
    "strings"

    "github.com/pkg/profile"
)

func packGzipFile(srcFilePath, dstFilePath string) error {
    srcFile, err := os.Open(srcFilePath)
    if err != nil {
        fmt.Println("Error while opening source file :", err)
        return err
    }

    dstFile, err := os.OpenFile(dstFilePath, os.O_CREATE|os.O_WRONLY, 0660)
    if err != nil {
        fmt.Println("Error while opening destination file :", err)
        return err
    }

    ioReader, ioWriter := io.Pipe()

    go func() {
        defer func() {
            srcFile.Close()
            ioWriter.Close()
        }()

        read, err := io.Copy(ioWriter, srcFile)
        if err != nil {
            fmt.Println("Error while reading :", err)
            return
        }
        fmt.Printf("Read %v bytes.\n", read)
    }()

    gzipWriter := gzip.NewWriter(dstFile)

    written, err := io.Copy(gzipWriter, ioReader)
    if err != nil {
        fmt.Println("Error while writing :", err)
        return err
    }
    fmt.Printf("Wrote %v bytes.\n", written)
    ioReader.Close()
    gzipWriter.Close()
    dstFile.Close()

    return nil
}

func unpackGzipFile(srcFilePath, dstFilePath string) error {
    gzFile, err := os.Open(srcFilePath)
    if err != nil {
        fmt.Println("Error while opening source file :", err)
        return err
    }
    dstFile, err := os.OpenFile(dstFilePath, os.O_CREATE|os.O_WRONLY, 0660)
    if err != nil {
        fmt.Println("Error while opening destination file :", err)
        return err
    }

    ioReader, ioWriter := io.Pipe()

    go func() {
        gzReader, _ := gzip.NewReader(gzFile)
        defer func() {
            gzFile.Close()
            gzReader.Close()
            ioWriter.Close()
        }()

        read, err := io.Copy(ioWriter, gzReader)
        if err != nil {
            fmt.Println("Error while reading :", err)
            return
        }
        fmt.Printf("Read %v bytes.\n", read)
    }()

    written, err := io.Copy(dstFile, ioReader)
    if err != nil {
        fmt.Println("Error while writing :", err)
        return err
    }
    fmt.Printf("Wrote %v bytes.\n", written)
    ioReader.Close()
    dstFile.Close()

    return nil
}

func main() {
    if len(os.Args) < 4 {
        fmt.Println("USAGE: ./gztool <c | d> <input_filename> <output_filename> [cpu | mem]")
        return
    }

    if len(os.Args) == 5 {
        switch os.Args[4] {
        case "cpu":
            defer profile.Start().Stop()
        case "mem":
            defer profile.Start(profile.MemProfile).Stop()
        }
    }

    mode := strings.ToLower(os.Args[1])
    inputFilename := os.Args[2]
    outputFilename := os.Args[3]

    switch mode {
    case "c":
        packGzipFile(inputFilename, outputFilename)
    case "d":
        unpackGzipFile(inputFilename, outputFilename)
    default:
        fmt.Println("Invalid mode. Use \"c\" or \"d\"")
    }
}

Stats:

Action CPU Time Memory Usage
Packing 6.21 s 793.25 kB
Unpacking 300.91 ms nil^

^ - The process completed before go profile could capture any data.

Conclusion

The takeaway is to consider all edge cases - not just in terms of time complexity, but also in terms of space complexity.

Also, Go has several tools like pprof and vet that would help in diagnosing common performance problems.

Top comments (0)