Skip to content

runtime: defer/recover support in race detector #26813

Open
@adilbaig

Description

@adilbaig

Hi Go Devs,

I've noticed a strange memory leak when running my Go program with the Race Detector. The program is an ad-hit tracker, it receives data from RabbitMQ and writes them down to the database. When there are errors in the DB, the program leaks memory (the RES mem usage grows unchecked until the machine runs out of RAM).

Run the program (below) with and without the -race detector to see the difference.

What version of Go are you using (go version)?

go version go1.10.3 linux/amd64

Does this issue reproduce with the latest release?

Yes

What operating system and processor architecture are you using (go env)?

GOARCH="amd64"
GOBIN=""
GOCACHE="/home/adil/.cache/go-build"
GOEXE=""
GOHOSTARCH="amd64"
GOHOSTOS="linux"
GOOS="linux"
GOPATH="/home/adil/go"
GORACE=""
GOROOT="/snap/go/2130"
GOTMPDIR=""
GOTOOLDIR="/snap/go/2130/pkg/tool/linux_amd64"
GCCGO="gccgo"
CC="gcc"
CXX="g++"
CGO_ENABLED="1"
CGO_CFLAGS="-g -O2"
CGO_CPPFLAGS=""
CGO_CXXFLAGS="-g -O2"
CGO_FFLAGS="-g -O2"
CGO_LDFLAGS="-g -O2"
PKG_CONFIG="pkg-config"
GOGCCFLAGS="-fPIC -m64 -pthread -fmessage-length=0 -fdebug-prefix-map=/tmp/go-build049513405=/tmp/go-build -gno-record-gcc-switches"

What did you do?

Run this program without the race detector:

> go build -o race experimental/rabbitmqmemoryleak.go && ./race

Observe the memory usage in top. It should flatline. Then, run it with the -race detector

> go build -race -o race experimental/rabbitmqmemoryleak.go && ./race

The memory usage grows unchecked.

package main

import (
	"database/sql"
	"log"

	"github.com/go-errors/errors"
	_ "github.com/lib/pq"
	"github.com/streadway/amqp"
)

// This program demonstrates a memory leak in Go's built-in race detector.
// You will need RabbitMQ and Postgres installed on your machine.

// First, run the program like this:
// > 	go build -o race experimental/rabbitmqmemoryleak.go && ./race
// Observe the 'RES' memory in `top`. It should level after a bit
//
// Then, run the program like this:
// > 	go build -race -o race experimental/rabbitmqmemoryleak.go && ./race
// Watch as RAM usage counts unchecked

// Strangely running `go run -race experimental/rabbitmqmemoryleak.go` does not leak memory.

func main() {

	var err error

	conn, err := amqp.Dial("amqp://guest:guest@localhost:5672")
	if err != nil {
		log.Fatal(err)
	}
	defer conn.Close()

	channel, err := conn.Channel()
	if err != nil {
		log.Fatal(err)
	}
	defer channel.Close()

	queue := "testing-memory-leak"

	// 1. Create the queue
	var args amqp.Table
	_, err = channel.QueueDeclare(queue, false, true, false, false, args)
	defer channel.QueueDelete(queue, false, false, true)

	// 2. Fill in messages
	cnt := 100000
	log.Printf("Inserting %d messages", cnt)
	for index := 0; index < cnt; index++ {
		msg := `{"date":"2018-07-17 08:44:30.202358+02","campaignID":18275,"affiliateSiteID":70002,"materialID":969206,"referenceID":0,"referenceHash":null,"referenceName":null,"isUnique":true,"merchantCommission":"0","affiliateCommission":"0","IP":"37.74.164.6","postImpressionHash":"0ebdd4720ef7eba5f09a84b8c68367c6","universalFingerprintHash":"c47e486da2ff1a4695b1499052b09807","pushToDynamoDB":true,"timestamp":"1531809870","handlerType":"impression","host":"snipped","pid":7962,"userAgentID":null,"userAgentHash":"08ac6aeb466ccd8f05ad37a144afe0f8","userAgentName":"Mozilla\/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/58.0.3029.110 Safari\/537.36 Edge\/16.16299","requestHash":"29c2363878fee77174d0b1e2491ea93b"}`
		err := channel.Publish("", queue, true, false, amqp.Publishing{ContentType: "application/json", Body: []byte(msg), DeliveryMode: 2})
		if err != nil {
			log.Panic(err)
		}
	}
	log.Printf("Done")

	pgb, err := sql.Open("postgres", "host=localhost user=user password=pass dbname=db sslmode=disable")
	if err != nil {
		log.Fatal(err)
	}
	defer pgb.Close()

	// 2. Listen on the messages
	forever := make(chan bool)
	for i := 0; i < 10; i++ {
		go handler(queue, pgb)
	}
	<-forever
}

func handler(queue string, pgb *sql.DB) {
	conn, err := amqp.Dial("amqp://guest:guest@localhost:5672")
	if err != nil {
		log.Fatal(err)
	}

	channel, err := conn.Channel()
	if err != nil {
		log.Fatal(err)
	}

	for {
		message, ok, err := channel.Get(queue, false)
		if err != nil {
			log.Fatal(err)
		}
		if !ok {
			continue
		}

		log.Printf("%+v", message)

		err = someErroneousSQLTransaction(pgb)
		if err != nil {
			log.Printf("ERROR: %s", err)
			message.Nack(false, true)
		} else {
			message.Ack(false)
		}
	}
}

func someErroneousSQLTransaction(pgb *sql.DB) error {
	refTx, err := pgb.Begin()
	if err != nil {
		return errors.Wrap(err, 0)
	}
	defer func() {
		if err != nil {
			refTx.Rollback()
		} else {
			refTx.Commit()
		}
	}()
	_, err = refTx.Exec(`set transaction isolation level repeatable read`)
	if err != nil {
		return err
	}

	var referenceID int64
	switch err = pgb.QueryRow(`SELECT "ID" FROM public.blah LIMIT 1`).Scan(&referenceID); err {
	case sql.ErrNoRows:
		break
	case nil:
		break
	default:
		return err
	}

	return nil
}

What did you expect to see?

Flatline memory usage, as measured with top when running with -race. I expect it to take more memory to maintain internal data structures, but it should flatline at some point.

What did you see instead?

Constantly growing RES mem usage in top.

Strangely running go run -race experimental/rabbitmqmemoryleak.go does not leak memory.

Metadata

Metadata

Assignees

No one assigned

    Labels

    NeedsInvestigationSomeone must examine and confirm this is a valid issue and not a duplicate of an existing one.RaceDetector

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions