git: 73b83478c14b - main - biology/hyphy: Fix build with clang 13.0.0

From: Joseph Mingrone <jrm_at_FreeBSD.org>
Date: Mon, 21 Feb 2022 20:13:50 UTC
The branch main has been updated by jrm:

URL: https://cgit.FreeBSD.org/ports/commit/?id=73b83478c14b99126db25691ed42c50a36f9c005

commit 73b83478c14b99126db25691ed42c50a36f9c005
Author:     Joseph Mingrone <jrm@FreeBSD.org>
AuthorDate: 2022-02-21 19:57:10 +0000
Commit:     Joseph Mingrone <jrm@FreeBSD.org>
CommitDate: 2022-02-21 20:13:16 +0000

    biology/hyphy: Fix build with clang 13.0.0
    
    clang 13 complains about duplicate loop unrolling pragmas for clang and
    CGG.  According to [1], clang supports #pragma GCC unroll with identical
    semantics to #pragma clang loop unroll, so remove instances of #pragma
    clang loop unroll and #pragma unroll when they appear with #pragma GCC
    unroll for the same loop.
    
    [1]
    https://clang.llvm.org/docs/AttributeReference.html#pragma-unroll-pragma-nounroll
    
    Sponsored by:   The FreeBSD Foundation
---
 .../patch-src_core_include_function__templates.h   |  10 ++
 biology/hyphy/files/patch-src_core_matrix.cpp      | 106 ++++++++++++++
 .../hyphy/files/patch-src_core_tree__evaluator.cpp | 154 +++++++++++++++++++++
 3 files changed, 270 insertions(+)

diff --git a/biology/hyphy/files/patch-src_core_include_function__templates.h b/biology/hyphy/files/patch-src_core_include_function__templates.h
new file mode 100644
index 000000000000..7ca06b1cc56a
--- /dev/null
+++ b/biology/hyphy/files/patch-src_core_include_function__templates.h
@@ -0,0 +1,10 @@
+--- src/core/include/function_templates.h.orig	2022-02-21 19:44:45 UTC
++++ src/core/include/function_templates.h
+@@ -156,7 +156,6 @@ void ArrayForEach(ARG_TYPE *array, unsigned long dimen
+ template <typename ARG_TYPE>
+ void InitializeArray(ARG_TYPE *array, unsigned long dimension,
+                      ARG_TYPE &&value) {
+-  #pragma clang loop unroll_count(8)
+   #pragma GCC unroll 4
+   for (unsigned long i = 0UL; i < dimension; i++) {
+     array[i] = value;
diff --git a/biology/hyphy/files/patch-src_core_matrix.cpp b/biology/hyphy/files/patch-src_core_matrix.cpp
new file mode 100644
index 000000000000..0d5720b2751b
--- /dev/null
+++ b/biology/hyphy/files/patch-src_core_matrix.cpp
@@ -0,0 +1,106 @@
+--- src/core/matrix.cpp.orig	2022-02-21 19:46:34 UTC
++++ src/core/matrix.cpp
+@@ -3391,7 +3391,6 @@ void    _Matrix::AddMatrix  (_Matrix& storage, _Matrix
+         #pragma GCC unroll 4
+         #pragma clang loop vectorize(enable)
+         #pragma clang loop interleave(enable)
+-        #pragma clang loop unroll(enable)
+         #pragma GCC ivdep
+         #pragma ivdep
+                for (long idx = 0; idx < upto; idx+=16) {
+@@ -3410,7 +3409,6 @@ void    _Matrix::AddMatrix  (_Matrix& storage, _Matrix
+         #pragma GCC unroll 4
+         #pragma clang loop vectorize(enable)
+         #pragma clang loop interleave(enable)
+-        #pragma clang loop unroll(enable)
+         #pragma GCC ivdep
+         #pragma ivdep
+                for (long idx = 0; idx < upto; idx+=8) {
+@@ -3440,7 +3438,6 @@ void    _Matrix::AddMatrix  (_Matrix& storage, _Matrix
+             #pragma GCC unroll 4
+             #pragma clang loop vectorize(enable)
+             #pragma clang loop interleave(enable)
+-            #pragma clang loop unroll(enable)
+                  for (long idx = 0; idx < upto; idx+=16) {
+                      CELL_OP (idx);
+                      CELL_OP (idx+4);
+@@ -3454,7 +3451,6 @@ void    _Matrix::AddMatrix  (_Matrix& storage, _Matrix
+         #pragma GCC unroll 4
+         #pragma clang loop vectorize(enable)
+         #pragma clang loop interleave(enable)
+-        #pragma clang loop unroll(enable)
+         #pragma GCC ivdep
+         #pragma ivdep
+                for (long idx = 0; idx < upto; idx+=8) {
+@@ -3895,7 +3891,6 @@ void    _Matrix::Multiply  (_Matrix& storage, _Matrix 
+                                  #pragma GCC unroll 4
+                                  #pragma clang loop vectorize(enable)
+                                  #pragma clang loop interleave(enable)
+-                                 #pragma clang loop unroll(enable)
+                                  for (long k = 0; k < vDim; k+=4) {
+                                       __m256d D4, B4;
+                                       DO_GROUP_OP (D4, B4, k);
+@@ -3946,7 +3941,6 @@ void    _Matrix::Multiply  (_Matrix& storage, _Matrix 
+                                   #pragma GCC unroll 4
+                                   #pragma clang loop vectorize(enable)
+                                   #pragma clang loop interleave(enable)
+-                                  #pragma clang loop unroll(enable)
+                                   for (long k = 0; k < vDim; k+=2) {
+                                       __m128d D4, B4;
+                                       DO_GROUP_OP1 (D4, B4, k);
+@@ -3998,7 +3992,6 @@ void    _Matrix::Multiply  (_Matrix& storage, _Matrix 
+                             #pragma GCC unroll 4
+                             #pragma clang loop vectorize(enable)
+                             #pragma clang loop interleave(enable)
+-                            #pragma clang loop unroll(enable)
+                             for (long k = 0; k < vDim; k+=2) {
+                                 float64x2_t D4, B4;
+                                 DO_GROUP_OP1 (D4, B4, k);
+@@ -4163,7 +4156,6 @@ void    _Matrix::Multiply  (_Matrix& storage, _Matrix 
+                             #pragma GCC unroll 4
+                             #pragma clang loop vectorize(enable)
+                             #pragma clang loop interleave(enable)
+-                            #pragma clang loop unroll(enable)
+                               for (long k = 0; k < dimm4; k+=4) {
+                                   __m256d D4, B4;
+                                   DO_GROUP_OP (D4, B4, k);
+@@ -4271,7 +4263,6 @@ void    _Matrix::Multiply  (_Matrix& storage, _Matrix 
+                                         #pragma GCC unroll 4
+                                         #pragma clang loop vectorize(enable)
+                                         #pragma clang loop interleave(enable)
+-                                        #pragma clang loop unroll(enable)
+                                         for (long k = 0; k < dimm4; k+=2) {
+                                             __m128d D4, B4;
+                                             DO_GROUP_OP1 (D4, B4, k);
+@@ -4380,7 +4371,6 @@ void    _Matrix::Multiply  (_Matrix& storage, _Matrix 
+                                   #pragma GCC unroll 4
+                                   #pragma clang loop vectorize(enable)
+                                   #pragma clang loop interleave(enable)
+-                                  #pragma clang loop unroll(enable)
+                                   for (long k = 0; k < dimm4; k+=2) {
+                                       float64x2_t D4, B4;
+                                       DO_GROUP_OP1 (D4, B4, k);
+@@ -4441,7 +4431,6 @@ void    _Matrix::Multiply  (_Matrix& storage, _Matrix 
+                         #pragma GCC unroll 8
+                         #pragma clang loop vectorize(enable)
+                         #pragma clang loop interleave(enable)
+-                        #pragma clang loop unroll(enable)
+                         for (long k = 0, column = j*hDim; k < vDim; k++, column ++) {
+                             resCell += row[k] * secondArg.theData [column];
+                         }
+@@ -5822,7 +5811,6 @@ _Matrix*    _Matrix::Exponentiate (hyFloat scale_to, b
+                     #pragma GCC unroll 4
+                     #pragma clang loop vectorize(enable)
+                     #pragma clang loop interleave(enable)
+-                    #pragma clang loop unroll(enable)
+                     for (long c = from; c < compressedIndex[r]; c++, i++) {
+                         theIndex[i] = compressedIndex[c+hDim] * vDim + r;
+                     }
+@@ -6988,7 +6976,6 @@ hyFloat        _Matrix::Sqr (hyFloat* _hprestrict_ sta
+                         #pragma GCC unroll 4
+                         #pragma clang loop vectorize(enable)
+                         #pragma clang loop interleave(enable)
+-                        #pragma clang loop unroll(enable)
+                         #pragma GCC ivdep
+                         #pragma ivdep
+                         for (long k = 0; k < loopBound; k+=4) {
diff --git a/biology/hyphy/files/patch-src_core_tree__evaluator.cpp b/biology/hyphy/files/patch-src_core_tree__evaluator.cpp
new file mode 100644
index 000000000000..3adcafaf5866
--- /dev/null
+++ b/biology/hyphy/files/patch-src_core_tree__evaluator.cpp
@@ -0,0 +1,154 @@
+--- src/core/tree_evaluator.cpp.orig	2022-02-21 19:48:41 UTC
++++ src/core/tree_evaluator.cpp
+@@ -127,7 +127,6 @@ inline double _sse_sum_2 (__m128d const & x) {
+ template<long D> inline void __ll_handle_matrix_transpose (hyFloat const * __restrict transitionMatrix, hyFloat * __restrict tMatrixT) {
+     long i = 0L;
+     for (long r = 0L; r < D; r++) {
+-        #pragma unroll(4)
+         #pragma GCC unroll 4
+         for (long c = 0L; c < D; c++, i++) {
+             tMatrixT[c*D+r] = transitionMatrix[i];
+@@ -154,7 +153,6 @@ template<long D> inline bool __ll_handle_conditional_a
+                 }
+             }*/
+             
+-            #pragma unroll(4)
+             #pragma GCC unroll 4
+             for (long k = 0L; k < D; k++) {
+                 parentConditionals[k] *= tMatrix[siteState+D*k];
+@@ -172,7 +170,6 @@ template<long D> inline bool __ll_handle_conditional_a
+     } else {
+         if (tcc) {
+             if (__builtin_expect((tcc->list_data[currentTCCIndex] & bitMaskArray.masks[currentTCCBit]) > 0 && siteID > siteFrom,0)) {
+-                #pragma unroll(4)
+                 #pragma GCC unroll 4
+                 for (long k = 0L; k < D; k++) {
+                     childVector[k] = lastUpdatedSite[k];
+@@ -200,7 +197,6 @@ inline bool __ll_handle_conditional_array_initializati
+         }
+         if (__builtin_expect(siteState >= 0L,1)) {
+             // a single character state; sweep down the appropriate column
+-            #pragma unroll(4)
+             #pragma GCC unroll 4
+             for (long k = 0L; k < D; k++) {
+                 parentConditionals[k] *= tMatrix[siteState+D*k];
+@@ -212,7 +208,6 @@ inline bool __ll_handle_conditional_array_initializati
+     } else {
+         if (tcc) {
+             if (__builtin_expect((tcc->list_data[currentTCCIndex] & bitMaskArray.masks[currentTCCBit]) > 0 && siteID > siteFrom,0)) {
+-                #pragma unroll(4)
+                 #pragma GCC unroll 4
+                 for (long k = 0L; k < D; k++) {
+                     childVector[k] = lastUpdatedSite[k];
+@@ -542,7 +537,6 @@ template<long D> inline void __ll_product_sum_loop (hy
+         #pragma GCC unroll 8
+         #pragma clang loop vectorize(enable)
+         #pragma clang loop interleave(enable)
+-        #pragma clang loop unroll(enable)
+         for (long c = 0; c < D; c++)
+             accumulator +=  tMatrix[c]   * childVector[c];
+         
+@@ -558,7 +552,6 @@ inline void __ll_product_sum_loop_generic (hyFloat con
+         #pragma GCC unroll 8
+         #pragma clang loop vectorize(enable)
+         #pragma clang loop interleave(enable)
+-        #pragma clang loop unroll(enable)
+         for (long c = 0; c < D; c++)
+             accumulator +=  tMatrix[c]   * childVector[c];
+         
+@@ -595,7 +588,6 @@ template<long D, bool ADJUST> inline void __ll_loop_ha
+             fprintf (stderr, "UP %ld (%ld) %lg\n", didScale, parentCode, scaler);
+         }*/
+         if (didScale) {
+-            #pragma unroll(4)
+             #pragma GCC unroll 4
+             for (long c = 0; c < D; c++) {
+                 parentConditionals [c] *= scaler;
+@@ -626,7 +618,6 @@ template<long D, bool ADJUST> inline void __ll_loop_ha
+                 }*/
+                 
+                 if (didScale) {
+-                    #pragma unroll(4)
+                     #pragma GCC unroll 4
+                     for (long c = 0; c < D; c++) {
+                         parentConditionals [c] *= scaler;
+@@ -657,7 +648,6 @@ template<bool ADJUST> inline void __ll_loop_handle_sca
+         hyFloat scaler = _computeBoostScaler(scalingAdjustments [parentCode*siteCount + siteID] * _lfScalerUpwards, sum, didScale);
+         
+         if (didScale) {
+-            #pragma unroll(8)
+             #pragma GCC unroll 8
+             for (long c = 0; c < D; c++) {
+                 parentConditionals [c] *= scaler;
+@@ -679,7 +669,6 @@ template<bool ADJUST> inline void __ll_loop_handle_sca
+                 hyFloat scaler = _computeReductionScaler (scalingAdjustments [parentCode*siteCount + siteID] * _lfScalingFactorThreshold, sum, didScale);
+                 
+                 if (didScale) {
+-                    #pragma unroll(8)
+                     #pragma GCC unroll 8
+                     for (long c = 0; c < D; c++) {
+                          parentConditionals [c] *= scaler;
+@@ -707,7 +696,6 @@ template<long D> inline void __ll_loop_handle_leaf_cas
+     } else {
+         for (long k = siteFrom; k < siteTo; k++, pp += D) {
+             hyFloat lsf = localScalingFactor[k];
+-#pragma unroll(4)
+ #pragma GCC unroll 4
+             for (long s = 0; s < D; s++) {
+                 pp[s] = lsf;
+@@ -1585,7 +1573,6 @@ hyFloat      _TheTree::ComputeTreeBlockByBranch  (    
+             accumulator         = rootConditionals[rootIndex + rootState] * theProbs[rootState];
+             rootIndex           += alphabetDimension;
+         } else {
+-            #pragma unroll(4)
+             #pragma GCC unroll 4
+             for (long p = 0; p < alphabetDimension; p++,rootIndex++) {
+                 accumulator += rootConditionals[rootIndex] * theProbs[p];
+@@ -1644,7 +1631,6 @@ template<long D> inline bool __lcache_loop_preface (bo
+         long siteState = lNodeFlags[nodeCode*siteCount + siteOrdering.list_data[siteID]] ;
+         if (siteState >= 0L) {
+             unsigned long target_index = siteState;
+-            #pragma unroll(4)
+             #pragma GCC unroll 4
+             for (long k = 0L; k < D; k++, target_index+=D) {
+                 parentConditionals[k]   *= tMatrix[target_index];
+@@ -1659,7 +1645,6 @@ template<long D> inline bool __lcache_loop_preface (bo
+             if ((tcc->list_data[currentTCCIndex] & bitMaskArray.masks[currentTCCBit]) > 0 && siteID > siteFrom)
+                 // the value of this conditional vector needs to be copied from a previously stored site
+                 // subtree duplication
+-                #pragma unroll(4)
+                 #pragma GCC unroll 4
+                 for (long k = 0UL; k < D; k++) {
+                     childVector[k] = lastUpdatedSite[k];
+@@ -1688,7 +1673,6 @@ inline bool __lcache_loop_preface_generic (bool isLeaf
+         long siteState = lNodeFlags[nodeCode*siteCount + siteOrdering.list_data[siteID]] ;
+         if (siteState >= 0L) {
+             unsigned long target_index = siteState;
+-            #pragma unroll(4)
+             #pragma GCC unroll 4
+             for (long k = 0L; k < D; k++, target_index+=D) {
+                 parentConditionals[k]   *= tMatrix[target_index];
+@@ -1704,7 +1688,6 @@ inline bool __lcache_loop_preface_generic (bool isLeaf
+             if ((tcc->list_data[currentTCCIndex] & bitMaskArray.masks[currentTCCBit]) > 0 && siteID > siteFrom)
+                 // the value of this conditional vector needs to be copied from a previously stored site
+                 // subtree duplication
+-                #pragma unroll(4)
+                 #pragma GCC unroll 4
+                 for (long k = 0UL; k < D; k++) {
+                     childVector[k] = lastUpdatedSite[k];
+@@ -1966,7 +1949,6 @@ void            _TheTree::ComputeBranchCache    (
+                 unsigned long k3     = 0UL;
+                 for (unsigned long k = siteFrom; k < siteTo; k++) {
+                     hyFloat scaler = localScalingFactor[k];
+-                    #pragma unroll(4)
+                     #pragma GCC unroll 4
+                     for (unsigned long k2 = 0UL; k2 < alphabetDimension; k2++, k3++) {
+                         parentConditionals [k3] = scaler;
+@@ -2474,7 +2456,6 @@ void            _TheTree::ComputeBranchCache    (
+                     #pragma GCC unroll 8
+                     #pragma clang loop vectorize(enable)
+                     #pragma clang loop interleave(enable)
+-                    #pragma clang loop unroll(enable)
+                     for (long k = 0; k < alphabetDimension; k++) {
+                         sum += parentConditionals[k];
+                     }