diff options
author | Ori Bernstein <ori@eigenstate.org> | 2015-10-02 23:27:11 -0700 |
---|---|---|
committer | Ori Bernstein <ori@eigenstate.org> | 2015-10-02 23:27:11 -0700 |
commit | 08442dd9c3426e2dbe598ef1f6fb10f2f67638aa (patch) | |
tree | f8a9d3092248258e5a33bb17fe3a985868ae2ef8 | |
parent | 76086513de9efd7a000d0bc229c3e990f0af5a2f (diff) | |
download | mc-08442dd9c3426e2dbe598ef1f6fb10f2f67638aa.tar.gz |
Implement some asm optimized memcpy/memmove checks.
TODO: memcmp
-rw-r--r-- | bench/bld.sub | 9 | ||||
-rw-r--r-- | bench/copious-allocs.myr | 20 | ||||
-rw-r--r-- | bench/many-memcpy.myr | 27 | ||||
-rw-r--r-- | lib/std/alloc.myr | 25 | ||||
-rw-r--r-- | lib/std/bld.sub | 5 | ||||
-rw-r--r-- | lib/std/memops-impl+posixy-x64.s | 50 | ||||
-rw-r--r-- | lib/std/memops-impl.myr | 39 | ||||
-rw-r--r-- | lib/std/memops.myr | 7 | ||||
-rw-r--r-- | lib/std/slcp.myr | 19 | ||||
-rw-r--r-- | lib/std/test/slcp.myr | 4 |
10 files changed, 163 insertions, 42 deletions
diff --git a/bench/bld.sub b/bench/bld.sub index a6117ba..484226d 100644 --- a/bench/bld.sub +++ b/bench/bld.sub @@ -31,6 +31,14 @@ bin regex-match = lib @/lib/sys:sys lib @/lib/regex:regex ;; + +bin many-memcpy = + memcpy.myr + lib @/lib/std:std + lib @/lib/sys:sys +;; + +# benchmark runner bin runbench = runbench.myr lib @/lib/std:std @@ -45,4 +53,5 @@ cmd benchit = bigfactorial mandelbrot regex-match + many-memcpy ;; diff --git a/bench/copious-allocs.myr b/bench/copious-allocs.myr index 1237b7f..7b02594 100644 --- a/bench/copious-allocs.myr +++ b/bench/copious-allocs.myr @@ -5,32 +5,31 @@ type blob = struct ;; const main = { - var i, j var a : blob#[10000] - for j = 0; j < 100; j++ + for var j = 0; j < 100; j++ /* alloc forwards, dealloc forwards */ - for i = 0; i < a.len; i++ + for var i = 0; i < a.len; i++ a[i] = std.alloc() ;; - for i = 0; i < a.len; i++ + for var i = 0; i < a.len; i++ std.free(a[i]) ;; /* alloc forwards, dealloc backwards */ - for i = 0; i < a.len; i++ + for var i = 0; i < a.len; i++ a[i] = std.alloc() ;; - for i = a.len; i > 0; i-- + for var i = a.len; i > 0; i-- std.free(a[i - 1]) ;; /* alloc forwards, dealloc randomly */ - for i = 0; i < a.len; i++ + for var i = 0; i < a.len; i++ a[i] = std.alloc() ;; shuffle(a[:]) - for i = a.len; i > 0; i-- + for var i = a.len; i > 0; i-- std.free(a[i - 1]) ;; ;; @@ -39,10 +38,11 @@ const main = { const shuffle = {a var t var rng - var i, j + var j + /* we want determinism for benchmarking */ rng = std.mksrng(123) - for i = 0; i < a.len - 1; i++ + for var i = 0; i < a.len - 1; i++ j = std.rngrand(rng, i, a.len) t = a[j] a[j] = a[i] diff --git a/bench/many-memcpy.myr b/bench/many-memcpy.myr new file mode 100644 index 0000000..94a5486 --- /dev/null +++ b/bench/many-memcpy.myr @@ -0,0 +1,27 @@ +use std + +const main = { + var a : uint64[100000] + + for var j = 0; j < 100; j++ + /* independent copies forward */ + for var i = 0; i < 10; i++ + std.slcp(a[:a.len/2-1], a[a.len/2+1:]) + ;; + /* independent copies backward */ + for var i = 0; i < 10; i++ + std.slcp(a[:a.len/2-1], a[a.len/2+1:]) + ;; + + /* dependent copies forward */ + for var i = 0; i < 10; i++ + std.slcp(a[:a.len/2+1000], a[a.len/2-1000:]) + ;; + /* dependent copies backward */ + for var i = 0; i < 10; i++ + std.slcp(a[a.len/2-1000:], a[:a.len/2+1000]) + ;; + ;; +} + + diff --git a/lib/std/alloc.myr b/lib/std/alloc.myr index 342354b..5f4c4f6 100644 --- a/lib/std/alloc.myr +++ b/lib/std/alloc.myr @@ -3,6 +3,7 @@ use "extremum.use" use "types.use" use "units.use" use "syswrap.use" +use "memops.use" /* The allocator implementation here is based on Bonwick's slab allocator. @@ -181,11 +182,15 @@ generic slgrow = {sl : @a[:], len /* Grows a slice, filling new entries with zero bytes */ generic slzgrow = {sl : @a[:], len - var oldsz + var oldlen + var base - oldsz = sl.len*sizeof(@a) + oldlen = sl.len sl = slgrow(sl, len) - zfill((sl castto(byte#))[oldsz:len*sizeof(@a)]) + base = sl castto(byte#) castto(intptr) + if oldlen < len + memfill(sl[oldlen:] castto(byte#), 0, (len - oldlen)*sizeof(@a)) + ;; -> sl } @@ -200,16 +205,10 @@ const zbytealloc = {sz var p p = bytealloc(sz) - zfill(p[0:sz]) + memfill(p, 0, sz) -> p } -const zfill = {sl - for var i = 0; i < sl.len; i++ - sl[i] = 0 - ;; -} - /* Allocates a blob that is 'sz' bytes long. Dies if the allocation fails */ const bytealloc = {sz var bkt, p @@ -229,12 +228,8 @@ const bytealloc = {sz /* frees a blob that is 'sz' bytes long. */ const bytefree = {p, sz var bkt - var b, i - b = (p castto(uint64#))[:sz/8] - for i = 0; i < sz>>3; i++ - b[i] = 0xa8a8a8a8a8a8a8a8 - ;; + memfill(p, 0xa8, sz) if (sz < Bktmax) bkt = &buckets[bktnum(sz)] bktfree(bkt, p) diff --git a/lib/std/bld.sub b/lib/std/bld.sub index 9a97f5f..1fc4578 100644 --- a/lib/std/bld.sub +++ b/lib/std/bld.sub @@ -62,6 +62,11 @@ lib std {inc=.} = utf.myr varargs.myr + # asm optimizations + memops.myr + memops-impl.myr + memops-impl+posixy-x64.s + # platform specific files env+plan9.myr env+posixy.myr diff --git a/lib/std/memops-impl+posixy-x64.s b/lib/std/memops-impl+posixy-x64.s new file mode 100644 index 0000000..d43d0f5 --- /dev/null +++ b/lib/std/memops-impl+posixy-x64.s @@ -0,0 +1,50 @@ +/* +std.memblit : (dst : byte#, src : byte#, len : std.size -> void) +std.memfill : (dst : byte#, val : byte, len : std.size -> void) +*/ +.globl _std$memblit +.globl std$memblit +_std$memblit: +std$memblit: + cmpq %rdi,%rsi + jz .done + jg .fwdcpy + movq %rsi,%rax + subq %rdi,%rax + cmpq %rax,%rcx + jg .revcpy +.fwdcpy: + movq %rdx,%rcx + shrq $3,%rcx + rep movsq + movq %rdx,%rcx + andq $7,%rcx + rep movsb + jmp .done +.revcpy: + std + movq %rdx,%rcx + leaq -1(%rdx,%rsi),%rsi + leaq -1(%rdx,%rdi),%rdi + rep movsb + cld +.done: + ret + +.globl _std$memfill +.globl std$memfill +_std$memfill: +std$memfill: + /* generate 8 bytes of fill */ + movzbq %sil,%rbx + mov $0x101010101010101,%rax + imul %rbx,%rax + + /* and fill */ + movq %rdx,%rcx + shrq $3,%rcx + rep stosq + movq %rdx,%rcx + andq $7,%rcx + rep stosb + ret diff --git a/lib/std/memops-impl.myr b/lib/std/memops-impl.myr new file mode 100644 index 0000000..8c43453 --- /dev/null +++ b/lib/std/memops-impl.myr @@ -0,0 +1,39 @@ +use "types.use" + +pkg std = + pkglocal const memblit : (dst : byte#, src : byte#, len : std.size -> void) + pkglocal const memfill : (dst : byte#, val : byte, len : std.size -> void) +;; + + +const memblit = {dst, src, len + var sa, da + var s, d + + da = dst castto(intptr) + sa = src castto(intptr) + d = dst[:len] + s = src[:len] + + if da == sa + -> + elif da < sa + for var i = 0; i < d.len; i++ + d[i] = s[i] + ;; + else + for var i = d.len; i > 0; i-- + d[i - 1] = s[i - 1] + ;; + ;; +} + +const memfill = {dst, val, len + var d + + d = dst[:len] + for var i = 0; i < d.len; i++ + d[i] = val + ;; +} + diff --git a/lib/std/memops.myr b/lib/std/memops.myr new file mode 100644 index 0000000..58add1e --- /dev/null +++ b/lib/std/memops.myr @@ -0,0 +1,7 @@ +use "types.use" + +pkg std = + pkglocal extern const memblit : (src : byte#, dst : byte#, len : std.size -> void) + pkglocal extern const memfill : (src : byte#, val : byte, len : std.size -> void) +;; + diff --git a/lib/std/slcp.myr b/lib/std/slcp.myr index b2c9760..a099bf3 100644 --- a/lib/std/slcp.myr +++ b/lib/std/slcp.myr @@ -1,25 +1,12 @@ use "die.use" use "types.use" +use "memops.use" pkg std = generic slcp : (a : @a[:], b : @a[:] -> void) ;; generic slcp = {a : @a[:], b : @a[:] - var addr_a, addr_b - - assert(a.len == b.len, "arguments to slcp() must be of equal length") - - addr_a = a castto(@a#) castto(intptr) - addr_b = b castto(@a#) castto(intptr) - if addr_a <= addr_b - for var i = 0; i < a.len; i++ - a[i] = b[i] - ;; - else - for var i = a.len; i > 0; i-- - a[i - 1] = b[i - 1] - ;; - ;; - + assert(a.len == b.len, "arguments to slcp() must be of equal length\n") + memblit(a castto(byte#), b castto(byte#), a.len * sizeof(@a)) } diff --git a/lib/std/test/slcp.myr b/lib/std/test/slcp.myr index 67c8298..8eea596 100644 --- a/lib/std/test/slcp.myr +++ b/lib/std/test/slcp.myr @@ -9,6 +9,8 @@ const main = { std.slcp(a[:a.len-2], a[2:]) std.slcp(b[2:], b[:b.len-2]) + std.put("a: {}, a_cped: {}\n", a[:], a_cped[:]) + std.put("b: {}, b_cped: {}\n", b[:], b_cped[:]) std.assert(std.sleq(a[:], a_cped[:]), "slcp of a failed") - std.assert(std.sleq(b[:], b_cped[:]), "slcp of a failed") + std.assert(std.sleq(b[:], b_cped[:]), "slcp of b failed") } |