summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOri Bernstein <ori@eigenstate.org>2017-07-31 22:36:12 -0700
committerOri Bernstein <ori@eigenstate.org>2017-07-31 22:46:36 -0700
commit1c61d05cba15d7e93d3207d4f3ab3c6043d63b77 (patch)
treef602ad598abbd25985cf302a4dda0085eeb120ab
parentd798a21d16cbd1d866752fbde81871012a789ea8 (diff)
downloadmc-1c61d05cba15d7e93d3207d4f3ab3c6043d63b77.tar.gz
Optimize sleq.
Now the comparison is done in assembly, using quads for checking the larger loop, and words for the smaller.
-rw-r--r--lib/std/memops-impl+plan9-x64.s33
-rw-r--r--lib/std/memops-impl+posixy-x64.s43
-rw-r--r--lib/std/memops-impl.myr14
-rw-r--r--lib/std/memops.myr1
-rw-r--r--lib/std/sleq.myr16
5 files changed, 93 insertions, 14 deletions
diff --git a/lib/std/memops-impl+plan9-x64.s b/lib/std/memops-impl+plan9-x64.s
index 088ee22..2d216ed 100644
--- a/lib/std/memops-impl+plan9-x64.s
+++ b/lib/std/memops-impl+plan9-x64.s
@@ -42,3 +42,36 @@ TEXT std$memfill+0(SB),$0
ANDQ $7,CX
REP; STOSB
RET
+
+TEXT std$memeq+0(SB),$0
+ MOVQ DX,R8
+ ANDQ $~0x7,R8
+ JZ .dotail
+.nextquad:
+ MOVQ (DI),R9
+ MOVQ (SI),R10
+ XORQ R10,R9
+ JNZ .unequal
+ ADDQ $8,SI
+ ADDQ $8,DI
+ SUBQ $8,R8
+ JNZ .nextquad
+.dotail:
+ ANDQ $0x7,DX
+ TESTQ DX,DX
+ JZ .equal
+.nextbyte:
+ MOVBLZX (DI),R9
+ MOVBLZX (SI),R10
+ XORL R10,R9
+ JNZ .unequal
+ ADDQ $1,SI
+ ADDQ $1,DI
+ SUBQ $1,DX
+ JNZ .nextbyte
+.equal:
+ MOVQ $1,AX
+ RET
+.unequal:
+ MOVQ $0,AX
+ RET
diff --git a/lib/std/memops-impl+posixy-x64.s b/lib/std/memops-impl+posixy-x64.s
index c2a3d14..899aaca 100644
--- a/lib/std/memops-impl+posixy-x64.s
+++ b/lib/std/memops-impl+posixy-x64.s
@@ -1,7 +1,4 @@
-/*
-std.memblit : (dst : byte#, src : byte#, len : std.size -> void)
-std.memfill : (dst : byte#, val : byte, len : std.size -> void)
-*/
+/* std.memblit : (dst : byte#, src : byte#, len : std.size -> void) */
.globl _std$memblit
.globl std$memblit
_std$memblit:
@@ -30,6 +27,7 @@ std$memblit:
.done:
ret
+/* std.memfill : (dst : byte#, val : byte, len : std.size -> void) */
.globl _std$memfill
.globl std$memfill
_std$memfill:
@@ -47,3 +45,40 @@ std$memfill:
andq $7,%rcx
rep stosb
ret
+
+/* std.memeq : (a : byte#, b : byte#, len : std.size -> bool) */
+.globl _std$memeq
+.globl std$memeq
+_std$memeq:
+std$memeq:
+ movq %rdx,%r8
+ andq $~0x7,%r8
+ jz .dotail
+.nextquad:
+ movq (%rdi),%r9
+ movq (%rsi),%r10
+ xorq %r10,%r9
+ jnz .unequal
+ addq $8,%rsi
+ addq $8,%rdi
+ subq $8,%r8
+ jnz .nextquad
+.dotail:
+ andq $0x7,%rdx
+ testq %rdx,%rdx
+ jz .equal
+.nextbyte:
+ movzbl (%rdi),%r9d
+ movzbl (%rsi),%r10d
+ xorl %r10d,%r9d
+ jnz .unequal
+ addq $1,%rsi
+ addq $1,%rdi
+ subq $1,%rdx
+ jnz .nextbyte
+.equal:
+ movq $1,%rax
+ ret
+.unequal:
+ movq $0,%rax
+ ret
diff --git a/lib/std/memops-impl.myr b/lib/std/memops-impl.myr
index 7a84945..67c67d3 100644
--- a/lib/std/memops-impl.myr
+++ b/lib/std/memops-impl.myr
@@ -3,6 +3,7 @@ use "types"
pkg std =
pkglocal const memblit : (dst : byte#, src : byte#, len : std.size -> void)
pkglocal const memfill : (dst : byte#, val : byte, len : std.size -> void)
+ pkglocal const memeq : (a : byte#, b : byte#, len : std.size -> bool)
;;
@@ -37,3 +38,16 @@ const memfill = {dst, val, len
;;
}
+const memeq = {a, b, len
+ var sa, sb
+
+ sa = a[:len]
+ sb = b[:len]
+ for i = 0; i < len; i++
+ if sa[i] != sb[i]
+ -> false
+ ;;
+ ;;
+ -> true
+}
+
diff --git a/lib/std/memops.myr b/lib/std/memops.myr
index a5b5b4a..7eb7e8f 100644
--- a/lib/std/memops.myr
+++ b/lib/std/memops.myr
@@ -3,5 +3,6 @@ use "types"
pkg std =
pkglocal extern const memblit : (dst : byte#, src : byte#, len : std.size -> void)
pkglocal extern const memfill : (dst : byte#, val : byte, len : std.size -> void)
+ pkglocal extern const memeq : (a : byte#, b : byte#, len : std.size -> bool)
;;
diff --git a/lib/std/sleq.myr b/lib/std/sleq.myr
index cfc60b4..e913cd6 100644
--- a/lib/std/sleq.myr
+++ b/lib/std/sleq.myr
@@ -1,18 +1,14 @@
+use "types"
+use "memops"
+
pkg std =
generic sleq : (a : @a[:], b : @a[:] -> bool)
;;
generic sleq = {a, b
- var i
-
- if a.len != b.len
+ if a.len == b.len
+ -> memeq((a : byte#), (b : byte#), a.len)
+ else
-> false
;;
-
- for i = 0; i < a.len; i++
- if a[i] != b[i]
- -> false
- ;;
- ;;
- -> true
}