// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package runtime_test

import (
	"flag"
	"fmt"
	"internal/cpu"
	"internal/runtime/atomic"
	"io"
	"math/bits"
	. "runtime"
	"runtime/debug"
	"slices"
	"strings"
	"sync"
	"testing"
	"time"
	"unsafe"
)

// flagQuick is set by the -quick option to skip some relatively slow tests.
// This is used by the cmd/dist test runtime:cpu124.
// The cmd/dist test passes both -test.short and -quick;
// there are tests that only check testing.Short, and those tests will
// not be skipped if only -quick is used.
var flagQuick = flag.Bool("quick", false, "skip slow tests, for cmd/dist test runtime:cpu124")

func init() {
	// We're testing the runtime, so make tracebacks show things
	// in the runtime. This only raises the level, so it won't
	// override GOTRACEBACK=crash from the user.
	SetTracebackEnv("system")
}

var errf error

func errfn() error {
	return errf
}

func errfn1() error {
	return io.EOF
}

func BenchmarkIfaceCmp100(b *testing.B) {
	for i := 0; i < b.N; i++ {
		for j := 0; j < 100; j++ {
			if errfn() == io.EOF {
				b.Fatal("bad comparison")
			}
		}
	}
}

func BenchmarkIfaceCmpNil100(b *testing.B) {
	for i := 0; i < b.N; i++ {
		for j := 0; j < 100; j++ {
			if errfn1() == nil {
				b.Fatal("bad comparison")
			}
		}
	}
}

var efaceCmp1 any
var efaceCmp2 any

func BenchmarkEfaceCmpDiff(b *testing.B) {
	x := 5
	efaceCmp1 = &x
	y := 6
	efaceCmp2 = &y
	for i := 0; i < b.N; i++ {
		for j := 0; j < 100; j++ {
			if efaceCmp1 == efaceCmp2 {
				b.Fatal("bad comparison")
			}
		}
	}
}

func BenchmarkEfaceCmpDiffIndirect(b *testing.B) {
	efaceCmp1 = [2]int{1, 2}
	efaceCmp2 = [2]int{1, 2}
	for i := 0; i < b.N; i++ {
		for j := 0; j < 100; j++ {
			if efaceCmp1 != efaceCmp2 {
				b.Fatal("bad comparison")
			}
		}
	}
}

func BenchmarkDefer(b *testing.B) {
	for i := 0; i < b.N; i++ {
		defer1()
	}
}

func defer1() {
	defer func(x, y, z int) {
		if recover() != nil || x != 1 || y != 2 || z != 3 {
			panic("bad recover")
		}
	}(1, 2, 3)
}

func BenchmarkDefer10(b *testing.B) {
	for i := 0; i < b.N/10; i++ {
		defer2()
	}
}

func defer2() {
	for i := 0; i < 10; i++ {
		defer func(x, y, z int) {
			if recover() != nil || x != 1 || y != 2 || z != 3 {
				panic("bad recover")
			}
		}(1, 2, 3)
	}
}

func BenchmarkDeferMany(b *testing.B) {
	for i := 0; i < b.N; i++ {
		defer func(x, y, z int) {
			if recover() != nil || x != 1 || y != 2 || z != 3 {
				panic("bad recover")
			}
		}(1, 2, 3)
	}
}

func BenchmarkPanicRecover(b *testing.B) {
	for i := 0; i < b.N; i++ {
		defer3()
	}
}

func defer3() {
	defer func(x, y, z int) {
		if recover() == nil {
			panic("failed recover")
		}
	}(1, 2, 3)
	panic("hi")
}

// golang.org/issue/7063
func TestStopCPUProfilingWithProfilerOff(t *testing.T) {
	SetCPUProfileRate(0)
}

// Addresses to test for faulting behavior.
// This is less a test of SetPanicOnFault and more a check that
// the operating system and the runtime can process these faults
// correctly. That is, we're indirectly testing that without SetPanicOnFault
// these would manage to turn into ordinary crashes.
// Note that these are truncated on 32-bit systems, so the bottom 32 bits
// of the larger addresses must themselves be invalid addresses.
// We might get unlucky and the OS might have mapped one of these
// addresses, but probably not: they're all in the first page, very high
// addresses that normally an OS would reserve for itself, or malformed
// addresses. Even so, we might have to remove one or two on different
// systems. We will see.

var faultAddrs = []uint64{
	// low addresses
	0,
	1,
	0xfff,
	// high (kernel) addresses
	// or else malformed.
	0xffffffffffffffff,
	0xfffffffffffff001,
	0xffffffffffff0001,
	0xfffffffffff00001,
	0xffffffffff000001,
	0xfffffffff0000001,
	0xffffffff00000001,
	0xfffffff000000001,
	0xffffff0000000001,
	0xfffff00000000001,
	0xffff000000000001,
	0xfff0000000000001,
	0xff00000000000001,
	0xf000000000000001,
	0x8000000000000001,
}

func TestSetPanicOnFault(t *testing.T) {
	old := debug.SetPanicOnFault(true)
	defer debug.SetPanicOnFault(old)

	nfault := 0
	for _, addr := range faultAddrs {
		testSetPanicOnFault(t, uintptr(addr), &nfault)
	}
	if nfault == 0 {
		t.Fatalf("none of the addresses faulted")
	}
}

// testSetPanicOnFault tests one potentially faulting address.
// It deliberately constructs and uses an invalid pointer,
// so mark it as nocheckptr.
//
//go:nocheckptr
func testSetPanicOnFault(t *testing.T, addr uintptr, nfault *int) {
	if GOOS == "js" || GOOS == "wasip1" {
		t.Skip(GOOS + " does not support catching faults")
	}

	defer func() {
		if err := recover(); err != nil {
			*nfault++
		}
	}()

	// The read should fault, except that sometimes we hit
	// addresses that have had C or kernel pages mapped there
	// readable by user code. So just log the content.
	// If no addresses fault, we'll fail the test.
	v := *(*byte)(unsafe.Pointer(addr))
	t.Logf("addr %#x: %#x\n", addr, v)
}

func eqstring_generic(s1, s2 string) bool {
	if len(s1) != len(s2) {
		return false
	}
	// optimization in assembly versions:
	// if s1.str == s2.str { return true }
	for i := 0; i < len(s1); i++ {
		if s1[i] != s2[i] {
			return false
		}
	}
	return true
}

func TestEqString(t *testing.T) {
	// This isn't really an exhaustive test of == on strings, it's
	// just a convenient way of documenting (via eqstring_generic)
	// what == does.
	s := []string{
		"",
		"a",
		"c",
		"aaa",
		"ccc",
		"cccc"[:3], // same contents, different string
		"1234567890",
	}
	for _, s1 := range s {
		for _, s2 := range s {
			x := s1 == s2
			y := eqstring_generic(s1, s2)
			if x != y {
				t.Errorf(`("%s" == "%s") = %t, want %t`, s1, s2, x, y)
			}
		}
	}
}

func TestTrailingZero(t *testing.T) {
	// make sure we add padding for structs with trailing zero-sized fields
	type T1 struct {
		n int32
		z [0]byte
	}
	if unsafe.Sizeof(T1{}) != 8 {
		t.Errorf("sizeof(%#v)==%d, want 8", T1{}, unsafe.Sizeof(T1{}))
	}
	type T2 struct {
		n int64
		z struct{}
	}
	if unsafe.Sizeof(T2{}) != 8+unsafe.Sizeof(uintptr(0)) {
		t.Errorf("sizeof(%#v)==%d, want %d", T2{}, unsafe.Sizeof(T2{}), 8+unsafe.Sizeof(uintptr(0)))
	}
	type T3 struct {
		n byte
		z [4]struct{}
	}
	if unsafe.Sizeof(T3{}) != 2 {
		t.Errorf("sizeof(%#v)==%d, want 2", T3{}, unsafe.Sizeof(T3{}))
	}
	// make sure padding can double for both zerosize and alignment
	type T4 struct {
		a int32
		b int16
		c int8
		z struct{}
	}
	if unsafe.Sizeof(T4{}) != 8 {
		t.Errorf("sizeof(%#v)==%d, want 8", T4{}, unsafe.Sizeof(T4{}))
	}
	// make sure we don't pad a zero-sized thing
	type T5 struct {
	}
	if unsafe.Sizeof(T5{}) != 0 {
		t.Errorf("sizeof(%#v)==%d, want 0", T5{}, unsafe.Sizeof(T5{}))
	}
}

func TestAppendGrowth(t *testing.T) {
	var x []int64
	check := func(want int) {
		if cap(x) != want {
			t.Errorf("len=%d, cap=%d, want cap=%d", len(x), cap(x), want)
		}
	}

	check(0)
	want := 1
	for i := 1; i <= 100; i++ {
		x = append(x, 1)
		check(want)
		if i&(i-1) == 0 {
			want = 2 * i
		}
	}
}

var One = []int64{1}

func TestAppendSliceGrowth(t *testing.T) {
	var x []int64
	check := func(want int) {
		if cap(x) != want {
			t.Errorf("len=%d, cap=%d, want cap=%d", len(x), cap(x), want)
		}
	}

	check(0)
	want := 1
	for i := 1; i <= 100; i++ {
		x = append(x, One...)
		check(want)
		if i&(i-1) == 0 {
			want = 2 * i
		}
	}
}

func TestGoroutineProfileTrivial(t *testing.T) {
	// Calling GoroutineProfile twice in a row should find the same number of goroutines,
	// but it's possible there are goroutines just about to exit, so we might end up
	// with fewer in the second call. Try a few times; it should converge once those
	// zombies are gone.
	for i := 0; ; i++ {
		n1, ok := GoroutineProfile(nil) // should fail, there's at least 1 goroutine
		if n1 < 1 || ok {
			t.Fatalf("GoroutineProfile(nil) = %d, %v, want >0, false", n1, ok)
		}
		n2, ok := GoroutineProfile(make([]StackRecord, n1))
		if n2 == n1 && ok {
			break
		}
		t.Logf("GoroutineProfile(%d) = %d, %v, want %d, true", n1, n2, ok, n1)
		if i >= 10 {
			t.Fatalf("GoroutineProfile not converging")
		}
	}
}

func BenchmarkGoroutineProfile(b *testing.B) {
	run := func(fn func() bool) func(b *testing.B) {
		runOne := func(b *testing.B) {
			latencies := make([]time.Duration, 0, b.N)

			b.ResetTimer()
			for i := 0; i < b.N; i++ {
				start := time.Now()
				ok := fn()
				if !ok {
					b.Fatal("goroutine profile failed")
				}
				latencies = append(latencies, time.Since(start))
			}
			b.StopTimer()

			// Sort latencies then report percentiles.
			slices.Sort(latencies)
			b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns")
			b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns")
			b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns")
		}
		return func(b *testing.B) {
			b.Run("idle", runOne)

			b.Run("loaded", func(b *testing.B) {
				stop := applyGCLoad(b)
				runOne(b)
				// Make sure to stop the timer before we wait! The load created above
				// is very heavy-weight and not easy to stop, so we could end up
				// confusing the benchmarking framework for small b.N.
				b.StopTimer()
				stop()
			})
		}
	}

	// Measure the cost of counting goroutines
	b.Run("small-nil", run(func() bool {
		GoroutineProfile(nil)
		return true
	}))

	// Measure the cost with a small set of goroutines
	n := NumGoroutine()
	p := make([]StackRecord, 2*n+2*GOMAXPROCS(0))
	b.Run("small", run(func() bool {
		_, ok := GoroutineProfile(p)
		return ok
	}))

	// Measure the cost with a large set of goroutines
	ch := make(chan int)
	var ready, done sync.WaitGroup
	for i := 0; i < 5000; i++ {
		ready.Add(1)
		done.Add(1)
		go func() { ready.Done(); <-ch; done.Done() }()
	}
	ready.Wait()

	// Count goroutines with a large allgs list
	b.Run("large-nil", run(func() bool {
		GoroutineProfile(nil)
		return true
	}))

	n = NumGoroutine()
	p = make([]StackRecord, 2*n+2*GOMAXPROCS(0))
	b.Run("large", run(func() bool {
		_, ok := GoroutineProfile(p)
		return ok
	}))

	close(ch)
	done.Wait()

	// Count goroutines with a large (but unused) allgs list
	b.Run("sparse-nil", run(func() bool {
		GoroutineProfile(nil)
		return true
	}))

	// Measure the cost of a large (but unused) allgs list
	n = NumGoroutine()
	p = make([]StackRecord, 2*n+2*GOMAXPROCS(0))
	b.Run("sparse", run(func() bool {
		_, ok := GoroutineProfile(p)
		return ok
	}))
}

func TestVersion(t *testing.T) {
	// Test that version does not contain \r or \n.
	vers := Version()
	if strings.Contains(vers, "\r") || strings.Contains(vers, "\n") {
		t.Fatalf("cr/nl in version: %q", vers)
	}
}

func TestTimediv(t *testing.T) {
	for _, tc := range []struct {
		num int64
		div int32
		ret int32
		rem int32
	}{
		{
			num: 8,
			div: 2,
			ret: 4,
			rem: 0,
		},
		{
			num: 9,
			div: 2,
			ret: 4,
			rem: 1,
		},
		{
			// Used by runtime.check.
			num: 12345*1000000000 + 54321,
			div: 1000000000,
			ret: 12345,
			rem: 54321,
		},
		{
			num: 1<<32 - 1,
			div: 2,
			ret: 1<<31 - 1, // no overflow.
			rem: 1,
		},
		{
			num: 1 << 32,
			div: 2,
			ret: 1<<31 - 1, // overflow.
			rem: 0,
		},
		{
			num: 1 << 40,
			div: 2,
			ret: 1<<31 - 1, // overflow.
			rem: 0,
		},
		{
			num: 1<<40 + 1,
			div: 1 << 10,
			ret: 1 << 30,
			rem: 1,
		},
	} {
		name := fmt.Sprintf("%d div %d", tc.num, tc.div)
		t.Run(name, func(t *testing.T) {
			// Double check that the inputs make sense using
			// standard 64-bit division.
			ret64 := tc.num / int64(tc.div)
			rem64 := tc.num % int64(tc.div)
			if ret64 != int64(int32(ret64)) {
				// Simulate timediv overflow value.
				ret64 = 1<<31 - 1
				rem64 = 0
			}
			if ret64 != int64(tc.ret) {
				t.Errorf("%d / %d got ret %d rem %d want ret %d rem %d", tc.num, tc.div, ret64, rem64, tc.ret, tc.rem)
			}

			var rem int32
			ret := Timediv(tc.num, tc.div, &rem)
			if ret != tc.ret || rem != tc.rem {
				t.Errorf("timediv %d / %d got ret %d rem %d want ret %d rem %d", tc.num, tc.div, ret, rem, tc.ret, tc.rem)
			}
		})
	}
}

func BenchmarkProcYield(b *testing.B) {
	benchN := func(n uint32) func(*testing.B) {
		return func(b *testing.B) {
			for i := 0; i < b.N; i++ {
				ProcYield(n)
			}
		}
	}

	b.Run("1", benchN(1))
	b.Run("10", benchN(10))
	b.Run("30", benchN(30)) // active_spin_cnt in lock_sema.go and lock_futex.go
	b.Run("100", benchN(100))
	b.Run("1000", benchN(1000))
}

func BenchmarkOSYield(b *testing.B) {
	for i := 0; i < b.N; i++ {
		OSYield()
	}
}

func BenchmarkMutexContention(b *testing.B) {
	// Measure throughput of a single mutex with all threads contending
	//
	// Share a single counter across all threads. Progress from any thread is
	// progress for the benchmark as a whole. We don't measure or give points
	// for fairness here, arbitrary delay to any given thread's progress is
	// invisible and allowed.
	//
	// The cache line that holds the count value will need to move between
	// processors, but not as often as the cache line that holds the mutex. The
	// mutex protects access to the count value, which limits contention on that
	// cache line. This is a simple design, but it helps to make the behavior of
	// the benchmark clear. Most real uses of mutex will protect some number of
	// cache lines anyway.

	var state struct {
		_     cpu.CacheLinePad
		lock  Mutex
		_     cpu.CacheLinePad
		count atomic.Int64
		_     cpu.CacheLinePad
	}

	procs := GOMAXPROCS(0)
	var wg sync.WaitGroup
	for range procs {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for {
				Lock(&state.lock)
				ours := state.count.Add(1)
				Unlock(&state.lock)
				if ours >= int64(b.N) {
					return
				}
			}
		}()
	}
	wg.Wait()
}

func BenchmarkMutexCapture(b *testing.B) {

	// Measure mutex fairness.
	//
	// Have several threads contend for a single mutex value. Measure how
	// effectively a single thread is able to capture the lock and report the
	// duration of those "streak" events. Measure how long other individual
	// threads need to wait between their turns with the lock. Report the
	// duration of those "starve" events.
	//
	// Report in terms of wall clock time (assuming a constant time per
	// lock/unlock pair) rather than number of locks/unlocks. This keeps
	// timekeeping overhead out of the critical path, and avoids giving an
	// advantage to lock/unlock implementations that take less time per
	// operation.

	var state struct {
		_     cpu.CacheLinePad
		lock  Mutex
		_     cpu.CacheLinePad
		count atomic.Int64
		_     cpu.CacheLinePad
	}

	procs := GOMAXPROCS(0)
	var wg sync.WaitGroup
	histograms := make(chan [2][65]int)
	for range procs {
		wg.Add(1)
		go func() {
			var (
				prev      int64
				streak    int64
				histogram [2][65]int
			)
			for {
				Lock(&state.lock)
				ours := state.count.Add(1)
				Unlock(&state.lock)
				delta := ours - prev - 1
				prev = ours
				if delta == 0 {
					streak++
				} else {
					histogram[0][bits.LeadingZeros64(uint64(streak))]++
					histogram[1][bits.LeadingZeros64(uint64(delta))]++
					streak = 1
				}
				if ours >= int64(b.N) {
					wg.Done()
					if delta == 0 {
						histogram[0][bits.LeadingZeros64(uint64(streak))]++
						histogram[1][bits.LeadingZeros64(uint64(delta))]++
					}
					histograms <- histogram
					return
				}
			}
		}()
	}

	wg.Wait()
	b.StopTimer()

	var histogram [2][65]int
	for range procs {
		h := <-histograms
		for i := range h {
			for j := range h[i] {
				histogram[i][j] += h[i][j]
			}
		}
	}

	percentile := func(h [65]int, p float64) int {
		sum := 0
		for i, v := range h {
			bound := uint64(1<<63) >> i
			sum += int(bound) * v
		}

		// Imagine that the longest streak / starvation events were instead half
		// as long but twice in number. (Note that we've pre-multiplied by the
		// [lower] "bound" value.) Continue those splits until we meet the
		// percentile target.
		part := 0
		for i, v := range h {
			bound := uint64(1<<63) >> i
			part += int(bound) * v
			// have we trimmed off enough at the head to dip below the percentile goal
			if float64(sum-part) < float64(sum)*p {
				return int(bound)
			}
		}

		return 0
	}

	perOp := float64(b.Elapsed().Nanoseconds()) / float64(b.N)
	b.ReportMetric(perOp*float64(percentile(histogram[0], 1.0)), "ns/streak-p100")
	b.ReportMetric(perOp*float64(percentile(histogram[0], 0.9)), "ns/streak-p90")
	b.ReportMetric(perOp*float64(percentile(histogram[1], 1.0)), "ns/starve-p100")
	b.ReportMetric(perOp*float64(percentile(histogram[1], 0.9)), "ns/starve-p90")
}

func BenchmarkMutexHandoff(b *testing.B) {
	testcase := func(delay func(l *Mutex)) func(b *testing.B) {
		return func(b *testing.B) {
			if workers := 2; GOMAXPROCS(0) < workers {
				b.Skipf("requires GOMAXPROCS >= %d", workers)
			}

			// Measure latency of mutex handoff between threads.
			//
			// Hand off a runtime.mutex between two threads, one running a
			// "coordinator" goroutine and the other running a "worker"
			// goroutine. We don't override the runtime's typical
			// goroutine/thread mapping behavior.
			//
			// Measure the latency, starting when the coordinator enters a call
			// to runtime.unlock and ending when the worker's call to
			// runtime.lock returns. The benchmark can specify a "delay"
			// function to simulate the length of the mutex-holder's critical
			// section, including to arrange for the worker's thread to be in
			// either the "spinning" or "sleeping" portions of the runtime.lock2
			// implementation. Measurement starts after any such "delay".
			//
			// The two threads' goroutines communicate their current position to
			// each other in a non-blocking way via the "turn" state.

			var state struct {
				_    cpu.CacheLinePad
				lock Mutex
				_    cpu.CacheLinePad
				turn atomic.Int64
				_    cpu.CacheLinePad
			}

			var delta atomic.Int64
			var wg sync.WaitGroup

			// coordinator:
			//  - acquire the mutex
			//  - set the turn to 2 mod 4, instructing the worker to begin its Lock call
			//  - wait until the mutex is contended
			//  - wait a bit more so the worker can commit to its sleep
			//  - release the mutex and wait for it to be our turn (0 mod 4) again
			wg.Add(1)
			go func() {
				defer wg.Done()
				var t int64
				for range b.N {
					Lock(&state.lock)
					state.turn.Add(2)
					delay(&state.lock)
					t -= Nanotime() // start the timer
					Unlock(&state.lock)
					for state.turn.Load()&0x2 != 0 {
					}
				}
				state.turn.Add(1)
				delta.Add(t)
			}()

			// worker:
			//  - wait until its our turn (2 mod 4)
			//  - acquire and release the mutex
			//  - switch the turn counter back to the coordinator (0 mod 4)
			wg.Add(1)
			go func() {
				defer wg.Done()
				var t int64
				for {
					switch state.turn.Load() & 0x3 {
					case 0:
					case 1, 3:
						delta.Add(t)
						return
					case 2:
						Lock(&state.lock)
						t += Nanotime() // stop the timer
						Unlock(&state.lock)
						state.turn.Add(2)
					}
				}
			}()

			wg.Wait()
			b.ReportMetric(float64(delta.Load())/float64(b.N), "ns/op")
		}
	}

	b.Run("Solo", func(b *testing.B) {
		var lock Mutex
		for range b.N {
			Lock(&lock)
			Unlock(&lock)
		}
	})

	b.Run("FastPingPong", testcase(func(l *Mutex) {}))
	b.Run("SlowPingPong", testcase(func(l *Mutex) {
		// Wait for the worker to stop spinning and prepare to sleep
		for !MutexContended(l) {
		}
		// Wait a bit longer so the OS can finish committing the worker to its
		// sleep. Balance consistency against getting enough iterations.
		const extraNs = 10e3
		for t0 := Nanotime(); Nanotime()-t0 < extraNs; {
		}
	}))
}
