diff --git a/blis-Reimplemented-bli_cpuid_query-for-ARM.patch b/blis-Reimplemented-bli_cpuid_query-for-ARM.patch deleted file mode 100644 index 569c3ab..0000000 --- a/blis-Reimplemented-bli_cpuid_query-for-ARM.patch +++ /dev/null @@ -1,306 +0,0 @@ -From 1cfe8e2562e5e50769468382626ce36b734741c1 Mon Sep 17 00:00:00 2001 -From: "Field G. Van Zee" -Date: Thu, 5 Sep 2019 16:08:30 -0500 -Subject: [PATCH] Reimplemented bli_cpuid_query() for ARM. - -Details: -- Rewrote bli_cpuid_query() for ARM architectures to use stdio-based - functions such as fopen() and fgets() instead of popen(). The new code - does more or less the same thing as before--searches /proc/cpuinfo for - various strings, which are then parsed in order to determine the - model, part number, and features. Thanks to Dave Love for suggesting - this change in issue #335. ---- - frame/base/bli_cpuid.c | 174 ++++++++++++++++++++++++++----------------------- - frame/base/bli_cpuid.h | 34 +++++----- - 2 files changed, 109 insertions(+), 99 deletions(-) - -diff --git a/frame/base/bli_cpuid.c b/frame/base/bli_cpuid.c -index f5c53fc..c8891f0 100644 ---- a/frame/base/bli_cpuid.c -+++ b/frame/base/bli_cpuid.c -@@ -380,10 +380,12 @@ arch_t bli_cpuid_query_id( void ) - // vendor. - vendor = bli_cpuid_query( &model, &part, &features ); - -- //printf( "vendor = %u\n", vendor ); -- //printf( "model = %u\n", model ); -- //printf( "part = 0x%x\n", part ); -- //printf( "features = %u\n", features ); -+#if 0 -+ printf( "vendor = %u\n", vendor ); -+ printf( "model = %u\n", model ); -+ printf( "part = 0x%x\n", part ); -+ printf( "features = %u\n", features ); -+#endif - - if ( vendor == VENDOR_ARM ) - { -@@ -909,6 +911,8 @@ int vpu_count( void ) - - #elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) - -+#define TEMP_BUFFER_SIZE 200 -+ - uint32_t bli_cpuid_query - ( - uint32_t* model, -@@ -919,96 +923,40 @@ uint32_t bli_cpuid_query - *model = MODEL_UNKNOWN; - *part = 0; - *features = 0; -- --#if 1 -- const char* grep_str1 = "grep -m 1 Processor /proc/cpuinfo"; -- const char* grep_str2 = "grep -m 1 'CPU part' /proc/cpuinfo"; -- const char* grep_str3 = "grep -m 1 Features /proc/cpuinfo"; --#else -- const char* grep_str1 = "grep -m 1 Processor ./proc_cpuinfo"; -- const char* grep_str2 = "grep -m 1 'CPU part' ./proc_cpuinfo"; -- const char* grep_str3 = "grep -m 1 Features ./proc_cpuinfo"; --#endif - -- FILE *fd1 = popen( grep_str1, "r"); -- if ( !fd1 ) -- { -- //printf("popen 1 failed\n"); -- return VENDOR_ARM; -- } -- FILE *fd2 = popen( grep_str2, "r"); -- if (!fd2) -- { -- //printf("popen 2 failed\n"); -- pclose(fd1); -- return VENDOR_ARM; -- } -- FILE *fd3 = popen( grep_str3, "r"); -- if (!fd3) -- { -- //printf("popen 3 failed\n"); -- pclose(fd1); -- pclose(fd2); -- return VENDOR_ARM; -- } -- -- uint32_t n1, n2, n3; -- int c; -- -- // First, discover how many chars are in each stream. -- for ( n1 = 0; (c = fgetc(fd1)) != EOF; ++n1 ) continue; -- for ( n2 = 0; (c = fgetc(fd2)) != EOF; ++n2 ) continue; -- for ( n3 = 0; (c = fgetc(fd3)) != EOF; ++n3 ) continue; -- -- //printf( "n1, n2, n3 = %u %u %u\n", n1, n2, n3 ); -- -- // Close the streams. -- pclose( fd1 ); -- pclose( fd2 ); -- pclose( fd3 ); -- -- // Allocate the correct amount of memory for each stream. -- char* proc_str = malloc( ( size_t )( n1 + 1 ) ); -- char* ptno_str = malloc( ( size_t )( n2 + 1 ) ); -- char* feat_str = malloc( ( size_t )( n3 + 1 ) ); -- *proc_str = 0; -- *ptno_str = 0; -- *feat_str = 0; -- -- // Re-open the streams. Note that there is no need to check for errors -- // this time since we're assumign that the contents of /proc/cpuinfo -- // will be the same as before. -- fd1 = popen( grep_str1, "r"); -- fd2 = popen( grep_str2, "r"); -- fd3 = popen( grep_str3, "r"); -+ char* pci_str = "/proc/cpuinfo"; - -+ char proc_str[ TEMP_BUFFER_SIZE ]; -+ char ptno_str[ TEMP_BUFFER_SIZE ]; -+ char feat_str[ TEMP_BUFFER_SIZE ]; - char* r_val; - -- // Now read each stream in its entirety. Nothing should go wrong, but -- // if it does, bail out. -- r_val = fgets( proc_str, n1, fd1 ); -- if ( n1 && r_val == NULL ) bli_abort(); -+ //printf( "bli_cpuid_query(): beginning search\n" ); - -- r_val = fgets( ptno_str, n2, fd2 ); -- if ( n2 && r_val == NULL ) bli_abort(); -+ // Search /proc/cpuinfo for the 'Processor' entry. -+ r_val = find_string_in( "Processor", proc_str, TEMP_BUFFER_SIZE, pci_str ); -+ if ( r_val == NULL ) return VENDOR_ARM; - -- r_val = fgets( feat_str, n3, fd3 ); -- if ( n3 && r_val == NULL ) bli_abort(); -+ // Search /proc/cpuinfo for the 'CPU part' entry. -+ r_val = find_string_in( "CPU part", ptno_str, TEMP_BUFFER_SIZE, pci_str ); -+ if ( r_val == NULL ) return VENDOR_ARM; - -- //printf( "proc_str: %s\n", proc_str ); -- //printf( "ptno_str: %s\n", ptno_str ); -- //printf( "feat_str: %s\n", feat_str ); -+ // Search /proc/cpuinfo for the 'Features' entry. -+ r_val = find_string_in( "Features", feat_str, TEMP_BUFFER_SIZE, pci_str ); -+ if ( r_val == NULL ) return VENDOR_ARM; - -- // Close the streams. -- pclose( fd1 ); -- pclose( fd2 ); -- pclose( fd3 ); -+#if 0 -+ printf( "bli_cpuid_query(): full processor string: %s\n", proc_str ); -+ printf( "bli_cpuid_query(): full part num string: %s\n", ptno_str ); -+ printf( "bli_cpuid_query(): full features string: %s\n", feat_str ); -+#endif - - // Parse the feature string to check for SIMD features. - if ( strstr( feat_str, "neon" ) != NULL || - strstr( feat_str, "asimd" ) != NULL ) - *features |= FEATURE_NEON; -- //printf( "features var: %u\n", *features ); -+ -+ //printf( "bli_cpuid_query(): features var: %u\n", *features ); - - // Parse the processor string to uncover the model. - if ( strstr( proc_str, "ARMv7" ) != NULL ) -@@ -1016,7 +964,8 @@ uint32_t bli_cpuid_query - else if ( strstr( proc_str, "AArch64" ) != NULL || - strstr( proc_str, "ARMv8" ) ) - *model = MODEL_ARMV8; -- //printf( "model: %u\n", *model ); -+ -+ //printf( "bli_cpuid_query(): model: %u\n", *model ); - - // Parse the part number string. - r_val = strstr( ptno_str, "0x" ); -@@ -1024,9 +973,68 @@ uint32_t bli_cpuid_query - { - *part = strtol( r_val, NULL, 16 ); - } -- //printf( "part#: %x\n", *part ); -+ //printf( "bli_cpuid_query(): part#: %x\n", *part ); - - return VENDOR_ARM; - } - -+char* find_string_in( char* target, char* buffer, size_t buf_len, char* filepath ) -+{ -+ // This function searches for the first line of the file located at -+ // 'filepath' that contains the string 'target' and then copies that -+ // line (actually, the substring of the line starting with 'target') -+ // to 'buffer', which is 'buf_len' bytes long. -+ -+ char* r_val = NULL; -+ -+ // Allocate a temporary local buffer equal to the size of buffer. -+ char* buf_local = malloc( buf_len * sizeof( char ) ); -+ -+ // Open the file stream. -+ FILE* stream = fopen( filepath, "r" ); -+ -+ // Repeatedly read in a line from the stream, storing the contents of -+ // the stream into buf_local. -+ while ( !feof( stream ) ) -+ { -+ // Read in the current line, up to buf_len-1 bytes. -+ r_val = fgets( buf_local, buf_len-1, stream ); -+ -+ //printf( "read line: %s", buf_local ); -+ -+ // fgets() returns the pointer specified by the first argument (in -+ // this case, buf_local) on success and NULL on error. -+ if ( r_val == NULL ) break; -+ -+ // Since fgets() was successful, we can search for the target string -+ // within the current line, as captured in buf_local. -+ r_val = strstr( buf_local, target ); -+ -+ // If the target string was found in buf_local, we save it to buffer. -+ if ( r_val != NULL ) -+ { -+ //printf( " found match to '%s'\n", target ); -+ -+ // Copy the string read by fgets() to the caller's buffer. -+ strncpy( buffer, buf_local, buf_len ); -+ -+ // Make sure that we have a terminating null character by the -+ // end of the buffer. -+ if ( buf_len > 0 ) buffer[ buf_len - 1 ] = '\0'; -+ -+ // Leave the loop since we found the target string. -+ break; -+ } -+ } -+ -+ // Close the file stream. -+ fclose( stream ); -+ -+ // Free the temporary local buffer. -+ free( buf_local ); -+ -+ // Return r_val so the caller knows if we failed. -+ return r_val; -+} -+ - #endif -diff --git a/frame/base/bli_cpuid.h b/frame/base/bli_cpuid.h -index e609dcb..b6ecd3d 100644 ---- a/frame/base/bli_cpuid.h -+++ b/frame/base/bli_cpuid.h -@@ -50,28 +50,28 @@ - #ifndef BLIS_CPUID_H - #define BLIS_CPUID_H - --arch_t bli_cpuid_query_id( void ); -+arch_t bli_cpuid_query_id( void ); - - // Intel --bool_t bli_cpuid_is_skx( uint32_t family, uint32_t model, uint32_t features ); --bool_t bli_cpuid_is_knl( uint32_t family, uint32_t model, uint32_t features ); --bool_t bli_cpuid_is_haswell( uint32_t family, uint32_t model, uint32_t features ); --bool_t bli_cpuid_is_sandybridge( uint32_t family, uint32_t model, uint32_t features ); --bool_t bli_cpuid_is_penryn( uint32_t family, uint32_t model, uint32_t features ); -+bool_t bli_cpuid_is_skx( uint32_t family, uint32_t model, uint32_t features ); -+bool_t bli_cpuid_is_knl( uint32_t family, uint32_t model, uint32_t features ); -+bool_t bli_cpuid_is_haswell( uint32_t family, uint32_t model, uint32_t features ); -+bool_t bli_cpuid_is_sandybridge( uint32_t family, uint32_t model, uint32_t features ); -+bool_t bli_cpuid_is_penryn( uint32_t family, uint32_t model, uint32_t features ); - - // AMD --bool_t bli_cpuid_is_zen( uint32_t family, uint32_t model, uint32_t features ); --bool_t bli_cpuid_is_excavator( uint32_t family, uint32_t model, uint32_t features ); --bool_t bli_cpuid_is_steamroller( uint32_t family, uint32_t model, uint32_t features ); --bool_t bli_cpuid_is_piledriver( uint32_t family, uint32_t model, uint32_t features ); --bool_t bli_cpuid_is_bulldozer( uint32_t family, uint32_t model, uint32_t features ); -+bool_t bli_cpuid_is_zen( uint32_t family, uint32_t model, uint32_t features ); -+bool_t bli_cpuid_is_excavator( uint32_t family, uint32_t model, uint32_t features ); -+bool_t bli_cpuid_is_steamroller( uint32_t family, uint32_t model, uint32_t features ); -+bool_t bli_cpuid_is_piledriver( uint32_t family, uint32_t model, uint32_t features ); -+bool_t bli_cpuid_is_bulldozer( uint32_t family, uint32_t model, uint32_t features ); - - // ARM --bool_t bli_cpuid_is_thunderx2( uint32_t model, uint32_t part, uint32_t features ); --bool_t bli_cpuid_is_cortexa57( uint32_t model, uint32_t part, uint32_t features ); --bool_t bli_cpuid_is_cortexa53( uint32_t model, uint32_t part, uint32_t features ); --bool_t bli_cpuid_is_cortexa15( uint32_t model, uint32_t part, uint32_t features ); --bool_t bli_cpuid_is_cortexa9( uint32_t model, uint32_t part, uint32_t features ); -+bool_t bli_cpuid_is_thunderx2( uint32_t model, uint32_t part, uint32_t features ); -+bool_t bli_cpuid_is_cortexa57( uint32_t model, uint32_t part, uint32_t features ); -+bool_t bli_cpuid_is_cortexa53( uint32_t model, uint32_t part, uint32_t features ); -+bool_t bli_cpuid_is_cortexa15( uint32_t model, uint32_t part, uint32_t features ); -+bool_t bli_cpuid_is_cortexa9( uint32_t model, uint32_t part, uint32_t features ); - - uint32_t bli_cpuid_query( uint32_t* family, uint32_t* model, uint32_t* features ); - -@@ -156,6 +156,8 @@ enum - - #elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) - -+char* find_string_in( char* target, char* buffer, size_t buf_len, char* filepath ); -+ - enum - { - VENDOR_ARM = 0, --- -1.8.3.1 - diff --git a/blis-Use-funsafe-math-optimizations-and-ffp-contract-fast.patch b/blis-Use-funsafe-math-optimizations-and-ffp-contract-fast.patch deleted file mode 100644 index 73a1e90..0000000 --- a/blis-Use-funsafe-math-optimizations-and-ffp-contract-fast.patch +++ /dev/null @@ -1,537 +0,0 @@ -From 138d403b6bb15e687a3fe26d3d967b8ccd1ed97b Mon Sep 17 00:00:00 2001 -From: Devin Matthews -Date: Mon, 26 Aug 2019 18:11:27 -0500 -Subject: [PATCH] Use -funsafe-math-optimizations and -ffp-contract=fast for - all reference kernels when using gcc or clang. (#331) - ---- - config/amd64/make_defs.mk | 6 +++++- - config/arm32/make_defs.mk | 8 ++++++++ - config/arm64/make_defs.mk | 8 ++++++++ - config/bgq/make_defs.mk | 8 ++++++++ - config/bulldozer/make_defs.mk | 6 +++++- - config/cortexa15/make_defs.mk | 8 ++++++++ - config/cortexa53/make_defs.mk | 8 ++++++++ - config/cortexa57/make_defs.mk | 8 ++++++++ - config/cortexa9/make_defs.mk | 8 ++++++++ - config/excavator/make_defs.mk | 6 +++++- - config/generic/make_defs.mk | 6 +++++- - config/haswell/make_defs.mk | 6 +++++- - config/intel64/make_defs.mk | 6 +++++- - config/knc/make_defs.mk | 6 +++++- - config/knl/make_defs.mk | 4 ++-- - config/penryn/make_defs.mk | 6 +++++- - config/piledriver/make_defs.mk | 6 +++++- - config/power7/make_defs.mk | 8 ++++++++ - config/power9/make_defs.mk | 8 ++++++++ - config/sandybridge/make_defs.mk | 6 +++++- - config/skx/make_defs.mk | 4 ++-- - config/steamroller/make_defs.mk | 6 +++++- - config/thunderx2/make_defs.mk | 8 ++++++++ - config/x86_64/make_defs.mk | 6 +++++- - config/zen/make_defs.mk | 6 +++++- - 25 files changed, 149 insertions(+), 17 deletions(-) - -diff --git a/config/amd64/make_defs.mk b/config/amd64/make_defs.mk -index 70c0b69..df7cd20 100644 ---- a/config/amd64/make_defs.mk -+++ b/config/amd64/make_defs.mk -@@ -75,10 +75,14 @@ endif - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) - ifeq ($(CC_VENDOR),gcc) --CRVECFLAGS := $(CKVECFLAGS) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast - else - CRVECFLAGS := $(CKVECFLAGS) - endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/arm32/make_defs.mk b/config/arm32/make_defs.mk -index b592851..0b517a1 100644 ---- a/config/arm32/make_defs.mk -+++ b/config/arm32/make_defs.mk -@@ -70,7 +70,15 @@ endif - - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) -+ifeq ($(CC_VENDOR),gcc) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else - CRVECFLAGS := $(CKVECFLAGS) -+endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/arm64/make_defs.mk b/config/arm64/make_defs.mk -index ac1cd69..5ffb081 100644 ---- a/config/arm64/make_defs.mk -+++ b/config/arm64/make_defs.mk -@@ -70,7 +70,15 @@ endif - - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) -+ifeq ($(CC_VENDOR),gcc) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else - CRVECFLAGS := $(CKVECFLAGS) -+endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/bgq/make_defs.mk b/config/bgq/make_defs.mk -index a577a9a..97ea5a5 100644 ---- a/config/bgq/make_defs.mk -+++ b/config/bgq/make_defs.mk -@@ -79,7 +79,15 @@ endif - - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) -+ifeq ($(CC_VENDOR),gcc) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else - CRVECFLAGS := $(CKVECFLAGS) -+endif -+endif - - # Override the default value for LDFLAGS. - ifeq ($(CC_VENDOR),ibm) -diff --git a/config/bulldozer/make_defs.mk b/config/bulldozer/make_defs.mk -index dec89a4..8f71da3 100644 ---- a/config/bulldozer/make_defs.mk -+++ b/config/bulldozer/make_defs.mk -@@ -75,10 +75,14 @@ endif - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) - ifeq ($(CC_VENDOR),gcc) --CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast - else - CRVECFLAGS := $(CKVECFLAGS) - endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/cortexa15/make_defs.mk b/config/cortexa15/make_defs.mk -index ee4d301..0cbf304 100644 ---- a/config/cortexa15/make_defs.mk -+++ b/config/cortexa15/make_defs.mk -@@ -70,7 +70,15 @@ endif - - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) -+ifeq ($(CC_VENDOR),gcc) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else - CRVECFLAGS := $(CKVECFLAGS) -+endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/cortexa53/make_defs.mk b/config/cortexa53/make_defs.mk -index 9f723bc..3e116cd 100644 ---- a/config/cortexa53/make_defs.mk -+++ b/config/cortexa53/make_defs.mk -@@ -70,7 +70,15 @@ endif - - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) -+ifeq ($(CC_VENDOR),gcc) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else - CRVECFLAGS := $(CKVECFLAGS) -+endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/cortexa57/make_defs.mk b/config/cortexa57/make_defs.mk -index 23bcf51..864872b 100644 ---- a/config/cortexa57/make_defs.mk -+++ b/config/cortexa57/make_defs.mk -@@ -70,7 +70,15 @@ endif - - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) -+ifeq ($(CC_VENDOR),gcc) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else - CRVECFLAGS := $(CKVECFLAGS) -+endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/cortexa9/make_defs.mk b/config/cortexa9/make_defs.mk -index 2adc40e..310b75b 100644 ---- a/config/cortexa9/make_defs.mk -+++ b/config/cortexa9/make_defs.mk -@@ -70,7 +70,15 @@ endif - - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) -+ifeq ($(CC_VENDOR),gcc) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else - CRVECFLAGS := $(CKVECFLAGS) -+endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/excavator/make_defs.mk b/config/excavator/make_defs.mk -index deb85c7..ed73d5d 100644 ---- a/config/excavator/make_defs.mk -+++ b/config/excavator/make_defs.mk -@@ -75,10 +75,14 @@ endif - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) - ifeq ($(CC_VENDOR),gcc) --CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast - else - CRVECFLAGS := $(CKVECFLAGS) - endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/generic/make_defs.mk b/config/generic/make_defs.mk -index 3388291..7f934de 100644 ---- a/config/generic/make_defs.mk -+++ b/config/generic/make_defs.mk -@@ -79,10 +79,14 @@ endif - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) - ifeq ($(CC_VENDOR),gcc) --CRVECFLAGS := $(CKVECFLAGS) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast - else - CRVECFLAGS := $(CKVECFLAGS) - endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/haswell/make_defs.mk b/config/haswell/make_defs.mk -index 3f49f19..6752dde 100644 ---- a/config/haswell/make_defs.mk -+++ b/config/haswell/make_defs.mk -@@ -83,10 +83,14 @@ endif - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) - ifeq ($(CC_VENDOR),gcc) --CRVECFLAGS := $(CKVECFLAGS) #-funsafe-math-optimizations -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast - else - CRVECFLAGS := $(CKVECFLAGS) - endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/intel64/make_defs.mk b/config/intel64/make_defs.mk -index af462fd..f74fb4d 100644 ---- a/config/intel64/make_defs.mk -+++ b/config/intel64/make_defs.mk -@@ -79,10 +79,14 @@ endif - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) - ifeq ($(CC_VENDOR),gcc) --CRVECFLAGS := $(CKVECFLAGS) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast - else - CRVECFLAGS := $(CKVECFLAGS) - endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/knc/make_defs.mk b/config/knc/make_defs.mk -index be3c901..d585219 100644 ---- a/config/knc/make_defs.mk -+++ b/config/knc/make_defs.mk -@@ -71,10 +71,14 @@ endif - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) - ifeq ($(CC_VENDOR),gcc) --CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast - else - CRVECFLAGS := $(CKVECFLAGS) - endif -+endif - - # Override the default value for LDFLAGS. - LDFLAGS := -mmic -diff --git a/config/knl/make_defs.mk b/config/knl/make_defs.mk -index b08cf1e..aa74df3 100644 ---- a/config/knl/make_defs.mk -+++ b/config/knl/make_defs.mk -@@ -99,13 +99,13 @@ endif - # Note: We use AVX2 for reference kernels instead of AVX-512. - CROPTFLAGS := $(CKOPTFLAGS) - ifeq ($(CC_VENDOR),gcc) --CRVECFLAGS := -march=knl -mno-avx512f -mno-avx512pf -mno-avx512er -mno-avx512cd -funsafe-math-optimizations -+CRVECFLAGS := -march=knl -mno-avx512f -mno-avx512pf -mno-avx512er -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast - else - ifeq ($(CC_VENDOR),icc) - CRVECFLAGS := -xMIC-AVX512 - else - ifeq ($(CC_VENDOR),clang) --CRVECFLAGS := -march=knl -mno-avx512f -mno-avx512pf -mno-avx512er -mno-avx512cd -+CRVECFLAGS := -march=knl -mno-avx512f -mno-avx512pf -mno-avx512er -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast - else - $(error gcc, icc, or clang is required for this configuration.) - endif -diff --git a/config/penryn/make_defs.mk b/config/penryn/make_defs.mk -index 41d2d93..573382e 100644 ---- a/config/penryn/make_defs.mk -+++ b/config/penryn/make_defs.mk -@@ -79,10 +79,14 @@ endif - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) - ifeq ($(CC_VENDOR),gcc) --CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast - else - CRVECFLAGS := $(CKVECFLAGS) - endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/piledriver/make_defs.mk b/config/piledriver/make_defs.mk -index bb23fbe..8cf3ac5 100644 ---- a/config/piledriver/make_defs.mk -+++ b/config/piledriver/make_defs.mk -@@ -75,10 +75,14 @@ endif - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) - ifeq ($(CC_VENDOR),gcc) --CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast - else - CRVECFLAGS := $(CKVECFLAGS) - endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/power7/make_defs.mk b/config/power7/make_defs.mk -index 18f111b..9633b4f 100644 ---- a/config/power7/make_defs.mk -+++ b/config/power7/make_defs.mk -@@ -70,7 +70,15 @@ endif - - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) -+ifeq ($(CC_VENDOR),gcc) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else - CRVECFLAGS := $(CKVECFLAGS) -+endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/power9/make_defs.mk b/config/power9/make_defs.mk -index 3d66f60..b2c78b1 100644 ---- a/config/power9/make_defs.mk -+++ b/config/power9/make_defs.mk -@@ -70,7 +70,15 @@ endif - - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) -+ifeq ($(CC_VENDOR),gcc) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else - CRVECFLAGS := $(CKVECFLAGS) -+endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/sandybridge/make_defs.mk b/config/sandybridge/make_defs.mk -index 285c4d5..896cb89 100644 ---- a/config/sandybridge/make_defs.mk -+++ b/config/sandybridge/make_defs.mk -@@ -83,10 +83,14 @@ endif - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) - ifeq ($(CC_VENDOR),gcc) --CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast - else - CRVECFLAGS := $(CKVECFLAGS) - endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/skx/make_defs.mk b/config/skx/make_defs.mk -index 27bea5e..920b42d 100644 ---- a/config/skx/make_defs.mk -+++ b/config/skx/make_defs.mk -@@ -89,13 +89,13 @@ endif - # to overcome the AVX-512 frequency drop". (Issue #187) - CROPTFLAGS := $(CKOPTFLAGS) - ifeq ($(CC_VENDOR),gcc) --CRVECFLAGS := -march=skylake-avx512 -mno-avx512f -mno-avx512vl -mno-avx512bw -mno-avx512dq -mno-avx512cd -funsafe-math-optimizations -+CRVECFLAGS := -march=skylake-avx512 -mno-avx512f -mno-avx512vl -mno-avx512bw -mno-avx512dq -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast - else - ifeq ($(CC_VENDOR),icc) - CRVECFLAGS := -xCORE-AVX2 - else - ifeq ($(CC_VENDOR),clang) --CRVECFLAGS := -march=skylake-avx512 -mno-avx512f -mno-avx512vl -mno-avx512bw -mno-avx512dq -mno-avx512cd -+CRVECFLAGS := -march=skylake-avx512 -mno-avx512f -mno-avx512vl -mno-avx512bw -mno-avx512dq -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast - else - $(error gcc, icc, or clang is required for this configuration.) - endif -diff --git a/config/steamroller/make_defs.mk b/config/steamroller/make_defs.mk -index a5b6707..89c7689 100644 ---- a/config/steamroller/make_defs.mk -+++ b/config/steamroller/make_defs.mk -@@ -75,10 +75,14 @@ endif - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) - ifeq ($(CC_VENDOR),gcc) --CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast - else - CRVECFLAGS := $(CKVECFLAGS) - endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/thunderx2/make_defs.mk b/config/thunderx2/make_defs.mk -index 3227fe2..820919d 100644 ---- a/config/thunderx2/make_defs.mk -+++ b/config/thunderx2/make_defs.mk -@@ -70,7 +70,15 @@ endif - - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) -+ifeq ($(CC_VENDOR),gcc) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else - CRVECFLAGS := $(CKVECFLAGS) -+endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/x86_64/make_defs.mk b/config/x86_64/make_defs.mk -index 4d038ff..520cd42 100644 ---- a/config/x86_64/make_defs.mk -+++ b/config/x86_64/make_defs.mk -@@ -79,10 +79,14 @@ endif - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) - ifeq ($(CC_VENDOR),gcc) --CRVECFLAGS := $(CKVECFLAGS) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast - else - CRVECFLAGS := $(CKVECFLAGS) - endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. -diff --git a/config/zen/make_defs.mk b/config/zen/make_defs.mk -index f0752e9..1b9db53 100644 ---- a/config/zen/make_defs.mk -+++ b/config/zen/make_defs.mk -@@ -83,10 +83,14 @@ endif - # Flags specific to reference kernels. - CROPTFLAGS := $(CKOPTFLAGS) - ifeq ($(CC_VENDOR),gcc) --CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast -+else -+ifeq ($(CC_VENDOR),clang) -+CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast - else - CRVECFLAGS := $(CKVECFLAGS) - endif -+endif - - # Store all of the variables here to new variables containing the - # configuration name. --- -1.8.3.1 - diff --git a/blis-s390x.patch b/blis-s390x.patch index 73c5136..a9f599c 100644 --- a/blis-s390x.patch +++ b/blis-s390x.patch @@ -7,9 +7,9 @@ -#if defined(_M_X64) || defined(__x86_64) || defined(__aarch64__) || \ - defined(_ARCH_PPC64) +// Current 64-bit Unix systems probably define _LP64; there are -+// potentially better macros for GCC. ++// potentially better macros for GCC. glibc has __WORDSIZE. +#if _LP64 || defined(_M_X64) || defined(__x86_64) || defined(__aarch64__) || \ -+ defined(_ARCH_PPC64) || defined(__s390x__) ++ defined(_ARCH_PPC64) || defined(__s390x__) || __WORDSIZE == 64 #define BLIS_ARCH_64 #else #define BLIS_ARCH_32