summaryrefslogtreecommitdiff
path: root/mbld
diff options
context:
space:
mode:
authorS. Gilles <sgilles@math.umd.edu>2018-03-21 23:21:23 -0400
committerS. Gilles <sgilles@math.umd.edu>2018-03-22 10:59:33 -0400
commitd27469804f73bf0874250eda520532875ecf5645 (patch)
tree3b46f6f7a4a91cd5d831d832798cbb08cc682ec6 /mbld
parent31356a321f79afc2dd2cc5a6b7c9c72f77cf3394 (diff)
downloadmc-d27469804f73bf0874250eda520532875ecf5645.tar.gz
Implement assembly version of fused multiply-add
The feature flag for fma includes OSXSave and AVX detection, as these are prerequisites for using the xmm/ymm registers. We do not, however, check the result of XGETBV (as recommended in the Intel software developer manual, vol 1, 14.5.3) because this seems to be a check that should be performed at runtime (see the note in section 14.3). This is slightly uncomfortable -- perhaps libmath should come with an __init__ that bails if the user is trying to use AVX stuff when the OS isn't using XSAVE.
Diffstat (limited to 'mbld')
-rw-r--r--mbld/opts.myr7
-rw-r--r--mbld/syssel.myr3
2 files changed, 10 insertions, 0 deletions
diff --git a/mbld/opts.myr b/mbld/opts.myr
index b82921f..8956338 100644
--- a/mbld/opts.myr
+++ b/mbld/opts.myr
@@ -35,6 +35,13 @@ pkg bld =
/* not exactly portable, but good enough for now */
const CpuidSSE4 : uint64= 0x180000
+
+ /*
+ Intel manuals (vol 1, 14.5.3) say AVX, OSXSAVE also
+ needed. For full portability, XGETBV also needs to be
+ checked, though it isn't right now.
+ */
+ const CpuidFMA : uint64= 0x18001000
extern const cpufeatures : (-> uint64)
;;
diff --git a/mbld/syssel.myr b/mbld/syssel.myr
index 756cf4d..f0223ca 100644
--- a/mbld/syssel.myr
+++ b/mbld/syssel.myr
@@ -165,6 +165,9 @@ const addsysattrs = {b, tags
if opt_cpufeatures & CpuidSSE4 == CpuidSSE4
tag(b, "sse4")
;;
+ if opt_cpufeatures & CpuidFMA == CpuidFMA
+ tag(b, "fma")
+ ;;
| unknown:
std.fatal("unknown architecture {}\n", unknown)
;;