mirror of
https://github.com/valyala/fasthttp.git
synced 2026-06-13 15:46:49 +03:00
perf: improve copyZeroAlloc for File and TCPConn (#1893)
Improve performance of `copyZeroAlloc` function
```
goos: linux
goarch: amd64
pkg: github.com/valyala/fasthttp
cpu: QEMU Virtual CPU version 2.5+
│ old6.txt │ new7.txt │
│ sec/op │ sec/op vs base │
CopyZeroAllocOSFileToBytesBuffer-8 1.802µ ± 3% 1.303µ ± 2% -27.69% (p=0.000 n=25)
CopyZeroAllocBytesBufferToOSFile-8 1.066µ ± 17% 1.048µ ± 1% -1.69% (p=0.043 n=25)
CopyZeroAllocOSFileToStringsBuilder-8 9.477µ ± 0% 1.345µ ± 2% -85.81% (p=0.000 n=25)
CopyZeroAllocIOLimitedReaderToOSFile-8 1.031µ ± 1% 1.092µ ± 4% +5.92% (p=0.000 n=25)
CopyZeroAllocOSFileToOSFile-8 12.132µ ± 1% 2.386µ ± 2% -80.33% (p=0.000 n=25)
CopyZeroAllocOSFileToNetConn-8 2.009µ ± 2% 1.995µ ± 2% ~ (p=0.733 n=25)
CopyZeroAllocNetConnToOSFile-8 21.86µ ± 2% 20.21µ ± 1% -7.56% (p=0.000 n=25)
geomean 3.728µ 2.121µ -43.11%
│ old6.txt │ new7.txt │
│ B/op │ B/op vs base │
CopyZeroAllocOSFileToBytesBuffer-8 40.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=25)
CopyZeroAllocBytesBufferToOSFile-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=25) ¹
CopyZeroAllocOSFileToStringsBuilder-8 32.04Ki ± 0% 0.00Ki ± 0% -100.00% (p=0.000 n=25)
CopyZeroAllocIOLimitedReaderToOSFile-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=25) ¹
CopyZeroAllocOSFileToOSFile-8 32.06Ki ± 0% 0.00Ki ± 0% -100.00% (p=0.000 n=25)
CopyZeroAllocOSFileToNetConn-8 96.00 ± 0% 96.00 ± 0% ~ (p=1.000 n=25) ¹
CopyZeroAllocNetConnToOSFile-8 16.000 ± 6% 8.000 ± 12% -50.00% (p=0.000 n=25)
geomean ² ? ² ³
¹ all samples are equal
² summaries must be >0 to compute geomean
³ ratios must be >0 to compute geomean
│ old6.txt │ new7.txt │
│ allocs/op │ allocs/op vs base │
CopyZeroAllocOSFileToBytesBuffer-8 4.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=25)
CopyZeroAllocBytesBufferToOSFile-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=25) ¹
CopyZeroAllocOSFileToStringsBuilder-8 5.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=25)
CopyZeroAllocIOLimitedReaderToOSFile-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=25) ¹
CopyZeroAllocOSFileToOSFile-8 8.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=25)
CopyZeroAllocOSFileToNetConn-8 6.000 ± 0% 6.000 ± 0% ~ (p=1.000 n=25) ¹
CopyZeroAllocNetConnToOSFile-8 2.000 ± 0% 1.000 ± 0% -50.00% (p=0.000 n=25)
geomean ² ? ² ³
¹ all samples are equal
² summaries must be >0 to compute geomean
³ ratios must be >0 to compute geomean
```
```
goos: windows
goarch: amd64
pkg: github.com/valyala/fasthttp
cpu: Intel(R) Core(TM) i5-8250U CPU @ 1.60GHz
│ old_win.txt │ new_win.txt │
│ sec/op │ sec/op vs base │
CopyZeroAllocOSFileToBytesBuffer-8 4.347µ ± 7% 4.220µ ± 11% ~ (p=0.211 n=25)
CopyZeroAllocBytesBufferToOSFile-8 1.408µ ± 12% 1.460µ ± 7% ~ (p=0.427 n=25)
CopyZeroAllocOSFileToStringsBuilder-8 17.448µ ± 5% 3.613µ ± 9% -79.29% (p=0.000 n=25)
CopyZeroAllocIOLimitedReaderToOSFile-8 1.324µ ± 8% 1.257µ ± 6% -5.06% (p=0.024 n=25)
CopyZeroAllocOSFileToOSFile-8 19.953µ ± 8% 4.846µ ± 7% -75.71% (p=0.000 n=25)
CopyZeroAllocOSFileToNetConn-8 18.18µ ± 8% 18.22µ ± 7% ~ (p=0.405 n=25)
CopyZeroAllocNetConnToOSFile-8 74.75µ ± 2% 68.10µ ± 3% -8.90% (p=0.000 n=25)
geomean 8.720µ 5.579µ -36.02%
│ old_win.txt │ new_win.txt │
│ B/op │ B/op vs base │
CopyZeroAllocOSFileToBytesBuffer-8 8.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=25)
CopyZeroAllocBytesBufferToOSFile-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=25) ¹
CopyZeroAllocOSFileToStringsBuilder-8 32.01Ki ± 0% 0.00Ki ± 0% -100.00% (p=0.000 n=25)
CopyZeroAllocIOLimitedReaderToOSFile-8 9.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=25)
CopyZeroAllocOSFileToOSFile-8 32.02Ki ± 0% 0.00Ki ± 0% -100.00% (p=0.000 n=25)
CopyZeroAllocOSFileToNetConn-8 32.02Ki ± 0% 32.02Ki ± 0% ~ (p=1.000 n=25) ¹
CopyZeroAllocNetConnToOSFile-8 32.02Ki ± 0% 32.02Ki ± 0% -0.00% (p=0.012 n=25)
geomean ² ? ² ³
¹ all samples are equal
² summaries must be >0 to compute geomean
³ ratios must be >0 to compute geomean
│ old_win.txt │ new_win.txt │
│ allocs/op │ allocs/op vs base │
CopyZeroAllocOSFileToBytesBuffer-8 1.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=25)
CopyZeroAllocBytesBufferToOSFile-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=25) ¹
CopyZeroAllocOSFileToStringsBuilder-8 2.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=25)
CopyZeroAllocIOLimitedReaderToOSFile-8 2.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=25)
CopyZeroAllocOSFileToOSFile-8 3.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=25)
CopyZeroAllocOSFileToNetConn-8 3.000 ± 0% 3.000 ± 0% ~ (p=1.000 n=25) ¹
CopyZeroAllocNetConnToOSFile-8 3.000 ± 0% 3.000 ± 0% ~ (p=1.000 n=25) ¹
geomean ² ? ² ³
¹ all samples are equal
² summaries must be >0 to compute geomean
³ ratios must be >0 to compute geomean
```
This commit is contained in:
@@ -2219,20 +2219,109 @@ func writeBodyFixedSize(w *bufio.Writer, r io.Reader, size int64) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// copyZeroAlloc optimizes io.Copy by calling ReadFrom or WriteTo only when
|
||||
// copying between os.File and net.TCPConn. If the reader has a WriteTo
|
||||
// method, it uses WriteTo for copying; if the writer has a ReadFrom method,
|
||||
// it uses ReadFrom for copying. If neither method is available, it gets a
|
||||
// buffer from sync.Pool to perform the copy.
|
||||
//
|
||||
// io.CopyBuffer always uses the WriterTo or ReadFrom interface if it's
|
||||
// available. however, os.File and net.TCPConn unfortunately have a
|
||||
// fallback in their WriterTo that calls io.Copy if sendfile isn't possible.
|
||||
//
|
||||
// See issue: https://github.com/valyala/fasthttp/issues/1889
|
||||
//
|
||||
// sendfile can only be triggered when copying between os.File and net.TCPConn.
|
||||
// Since the function confirming zero-copy is a private function, we use
|
||||
// ReadFrom only in this specific scenario. For all other cases, we prioritize
|
||||
// using our own copyBuffer method.
|
||||
//
|
||||
// o: our copyBuffer
|
||||
// r: readFrom
|
||||
// w: writeTo
|
||||
//
|
||||
// write\read *File *TCPConn writeTo other
|
||||
// *File o r w o
|
||||
// *TCPConn w,r o w o
|
||||
// readFrom r r w r
|
||||
// other o o w o
|
||||
//
|
||||
//nolint:dupword
|
||||
func copyZeroAlloc(w io.Writer, r io.Reader) (int64, error) {
|
||||
if wt, ok := r.(io.WriterTo); ok {
|
||||
return wt.WriteTo(w)
|
||||
var readerIsFile, readerIsConn bool
|
||||
|
||||
switch r := r.(type) {
|
||||
case *os.File:
|
||||
readerIsFile = true
|
||||
case *net.TCPConn:
|
||||
readerIsConn = true
|
||||
case io.WriterTo:
|
||||
return r.WriteTo(w)
|
||||
}
|
||||
if rt, ok := w.(io.ReaderFrom); ok {
|
||||
return rt.ReadFrom(r)
|
||||
|
||||
switch w := w.(type) {
|
||||
case *os.File:
|
||||
if readerIsConn {
|
||||
return w.ReadFrom(r)
|
||||
}
|
||||
case *net.TCPConn:
|
||||
if readerIsFile {
|
||||
// net.WriteTo requires go1.22 or later
|
||||
// Benchmark tests show that on Windows, WriteTo performs
|
||||
// significantly better than ReadFrom. On Linux, however,
|
||||
// ReadFrom slightly outperforms WriteTo. When possible,
|
||||
// copyZeroAlloc aims to perform better than or as well
|
||||
// as io.Copy, so we use WriteTo whenever possible for
|
||||
// optimal performance.
|
||||
if rt, ok := r.(io.WriterTo); ok {
|
||||
return rt.WriteTo(w)
|
||||
}
|
||||
return w.ReadFrom(r)
|
||||
}
|
||||
case io.ReaderFrom:
|
||||
return w.ReadFrom(r)
|
||||
}
|
||||
|
||||
vbuf := copyBufPool.Get()
|
||||
buf := vbuf.([]byte)
|
||||
n, err := io.CopyBuffer(w, r, buf)
|
||||
n, err := copyBuffer(w, r, buf)
|
||||
copyBufPool.Put(vbuf)
|
||||
return n, err
|
||||
}
|
||||
|
||||
// copyBuffer is rewritten from io.copyBuffer. We do not check if src has a
|
||||
// WriteTo method, if dst has a ReadFrom method, or if buf is empty.
|
||||
func copyBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) {
|
||||
for {
|
||||
nr, er := src.Read(buf)
|
||||
if nr > 0 {
|
||||
nw, ew := dst.Write(buf[0:nr])
|
||||
if nw < 0 || nr < nw {
|
||||
nw = 0
|
||||
if ew == nil {
|
||||
ew = errors.New("invalid write result")
|
||||
}
|
||||
}
|
||||
written += int64(nw)
|
||||
if ew != nil {
|
||||
err = ew
|
||||
break
|
||||
}
|
||||
if nr != nw {
|
||||
err = io.ErrShortWrite
|
||||
break
|
||||
}
|
||||
}
|
||||
if er != nil {
|
||||
if er != io.EOF {
|
||||
err = er
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
return written, err
|
||||
}
|
||||
|
||||
var copyBufPool = sync.Pool{
|
||||
New: func() any {
|
||||
return make([]byte, 4096)
|
||||
|
||||
@@ -0,0 +1,283 @@
|
||||
package fasthttp
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkCopyZeroAllocOSFileToBytesBuffer(b *testing.B) {
|
||||
r, err := os.Open("./README.md")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
buf := &bytes.Buffer{}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
buf.Reset()
|
||||
_, err = copyZeroAlloc(buf, r)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCopyZeroAllocBytesBufferToOSFile(b *testing.B) {
|
||||
f, err := os.Open("./README.md")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
buf := &bytes.Buffer{}
|
||||
_, err = io.Copy(buf, f)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
tmp, err := os.CreateTemp(os.TempDir(), "test_*")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer os.Remove(tmp.Name())
|
||||
|
||||
w, err := os.OpenFile(tmp.Name(), os.O_WRONLY, 0o444)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer w.Close()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_, err := w.Seek(0, 0)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
_, err = copyZeroAlloc(w, buf)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCopyZeroAllocOSFileToStringsBuilder(b *testing.B) {
|
||||
r, err := os.Open("./README.md")
|
||||
if err != nil {
|
||||
b.Fatalf("Failed to open testing file: %v", err)
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
w := &strings.Builder{}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
w.Reset()
|
||||
_, err = copyZeroAlloc(w, r)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCopyZeroAllocIOLimitedReaderToOSFile(b *testing.B) {
|
||||
f, err := os.Open("./README.md")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
r := io.LimitReader(f, 1024)
|
||||
|
||||
tmp, err := os.CreateTemp(os.TempDir(), "test_*")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer os.Remove(tmp.Name())
|
||||
|
||||
w, err := os.OpenFile(tmp.Name(), os.O_WRONLY, 0o444)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer w.Close()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_, err := w.Seek(0, 0)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
_, err = copyZeroAlloc(w, r)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCopyZeroAllocOSFileToOSFile(b *testing.B) {
|
||||
r, err := os.Open("./README.md")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
f, err := os.CreateTemp(os.TempDir(), "test_*")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer os.Remove(f.Name())
|
||||
|
||||
w, err := os.OpenFile(f.Name(), os.O_WRONLY, 0o444)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer w.Close()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_, err := w.Seek(0, 0)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
_, err = copyZeroAlloc(w, r)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCopyZeroAllocOSFileToNetConn(b *testing.B) {
|
||||
ln, err := net.Listen("tcp", "127.0.0.1:0")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
addr := ln.Addr().String()
|
||||
defer ln.Close()
|
||||
|
||||
done := make(chan struct{})
|
||||
defer close(done)
|
||||
|
||||
go func() {
|
||||
conn, err := ln.Accept()
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
return
|
||||
}
|
||||
defer conn.Close()
|
||||
for {
|
||||
select {
|
||||
case <-done:
|
||||
return
|
||||
default:
|
||||
_, err := io.Copy(io.Discard, conn)
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
conn, err := net.Dial("tcp", addr)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
file, err := os.Open("./README.md")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
if _, err := copyZeroAlloc(conn, file); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCopyZeroAllocNetConnToOSFile(b *testing.B) {
|
||||
data, err := os.ReadFile("./README.md")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
ln, err := net.Listen("tcp", "127.0.0.1:0")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
addr := ln.Addr().String()
|
||||
defer ln.Close()
|
||||
|
||||
done := make(chan struct{})
|
||||
defer close(done)
|
||||
|
||||
writeDone := make(chan struct{})
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-done:
|
||||
return
|
||||
default:
|
||||
conn, err := ln.Accept()
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
return
|
||||
}
|
||||
_, err = conn.Write(data)
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
}
|
||||
conn.Close()
|
||||
writeDone <- struct{}{}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
tmp, err := os.CreateTemp(os.TempDir(), "test_*")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer os.Remove(tmp.Name())
|
||||
|
||||
file, err := os.OpenFile(tmp.Name(), os.O_WRONLY, 0o444)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
conn, err := net.Dial("tcp", addr)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
b.StopTimer()
|
||||
<-writeDone
|
||||
_, err = file.Seek(0, 0)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
b.StartTimer()
|
||||
_, err = copyZeroAlloc(file, conn)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
b.StopTimer()
|
||||
conn, err = net.Dial("tcp", addr)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user