8 files changed, 217 insertions, 203 deletions
diff --git a/clang/lib/Headers/__wmmintrin_pclmul.h b/clang/lib/Headers/__wmmintrin_pclmul.h
index e94cc2a4df9..e9c6a9f6d41 100644
--- a/clang/lib/Headers/__wmmintrin_pclmul.h
+++ b/clang/lib/Headers/__wmmintrin_pclmul.h
@@ -42,12 +42,11 @@
 ///    A 128-bit vector of [2 x i64] containing one of the source operands.
 /// \param __I
 ///    An immediate value specifying which 64-bit values to select from the
-///    operands.
-///    Bit 0 is used to select a value from operand \a __X, and bit 4 is used
-///    to select a value from operand \a __Y:
-///    Bit[0]=0 indicates that bits[63:0] of operand \a __X are used.
-///    Bit[0]=1 indicates that bits[127:64] of operand \a __X are used.
-///    Bit[4]=0 indicates that bits[63:0] of operand \a __Y are used.
+///    operands. Bit 0 is used to select a value from operand \a __X, and bit
+///    4 is used to select a value from operand \a __Y: \n
+///    Bit[0]=0 indicates that bits[63:0] of operand \a __X are used. \n
+///    Bit[0]=1 indicates that bits[127:64] of operand \a __X are used. \n
+///    Bit[4]=0 indicates that bits[63:0] of operand \a __Y are used. \n
 ///    Bit[4]=1 indicates that bits[127:64] of operand \a __Y are used.
 /// \returns The 128-bit integer vector containing the result of the carry-less
 ///    multiplication of the selected 64-bit values.
diff --git a/clang/lib/Headers/bmiintrin.h b/clang/lib/Headers/bmiintrin.h
index 650ee04e016..488eb2dbd3d 100644
--- a/clang/lib/Headers/bmiintrin.h
+++ b/clang/lib/Headers/bmiintrin.h
@@ -295,8 +295,8 @@ __tzcnt_u32(unsigned int __X)
 ///
 /// \param __X
 ///    An unsigned 32-bit integer whose trailing zeros are to be counted.
-/// \returns An 32-bit integer containing the number of trailing zero
-///    bits in the operand.
+/// \returns An 32-bit integer containing the number of trailing zero bits in
+///    the operand.
 static __inline__ int __RELAXED_FN_ATTRS
 _mm_tzcnt_32(unsigned int __X)
 {
@@ -532,8 +532,8 @@ __tzcnt_u64(unsigned long long __X)
 ///
 /// \param __X
 ///    An unsigned 64-bit integer whose trailing zeros are to be counted.
-/// \returns An 64-bit integer containing the number of trailing zero
-///    bits in the operand.
+/// \returns An 64-bit integer containing the number of trailing zero bits in
+///    the operand.
 static __inline__ long long __RELAXED_FN_ATTRS
 _mm_tzcnt_64(unsigned long long __X)
 {
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 1830254109d..1512f9f0b47 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -551,7 +551,8 @@ _mm_cmpord_pd(__m128d __a, __m128d __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c> instruction.
+/// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c>
+///   instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [2 x double].
@@ -734,10 +735,10 @@ _mm_cmple_sd(__m128d __a, __m128d __b)
   return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);
 }
 
-/// \brief Compares the lower double-precision floating-point values in each
-///    of the two 128-bit floating-point vectors of [2 x double] to determine
-///    if the value in the first parameter is greater than the corresponding
-///    value in the second parameter. The comparison yields 0h for false,
+/// \brief Compares the lower double-precision floating-point values in each of
+///    the two 128-bit floating-point vectors of [2 x double] to determine if
+///    the value in the first parameter is greater than the corresponding value
+///    in the second parameter. The comparison yields 0h for false,
 ///    FFFFFFFFFFFFFFFFh for true.
 ///
 /// \headerfile <x86intrin.h>
@@ -784,9 +785,9 @@ _mm_cmpge_sd(__m128d __a, __m128d __b)
   return (__m128d) { __c[0], __a[1] };
 }
 
-/// \brief Compares the lower double-precision floating-point values in each
-///    of the two 128-bit floating-point vectors of [2 x double] to determine
-///    if the value in the first parameter is "ordered" with respect to the
+/// \brief Compares the lower double-precision floating-point values in each of
+///    the two 128-bit floating-point vectors of [2 x double] to determine if
+///    the value in the first parameter is "ordered" with respect to the
 ///    corresponding value in the second parameter. The comparison yields 0h for
 ///    false, FFFFFFFFFFFFFFFFh for true. A pair of double-precision values are
 ///    "ordered" with respect to each other if neither value is a NaN.
@@ -809,16 +810,17 @@ _mm_cmpord_sd(__m128d __a, __m128d __b)
   return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);
 }
 
-/// \brief Compares the lower double-precision floating-point values in each
-///    of the two 128-bit floating-point vectors of [2 x double] to determine
-///    if the value in the first parameter is "unordered" with respect to the
+/// \brief Compares the lower double-precision floating-point values in each of
+///    the two 128-bit floating-point vectors of [2 x double] to determine if
+///    the value in the first parameter is "unordered" with respect to the
 ///    corresponding value in the second parameter. The comparison yields 0h
 ///    for false, FFFFFFFFFFFFFFFFh for true. A pair of double-precision values
 ///    are "unordered" with respect to each other if one or both values are NaN.
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c> instruction.
+/// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c>
+///   instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [2 x double]. The lower double-precision value is
@@ -1115,8 +1117,8 @@ _mm_ucomieq_sd(__m128d __a, __m128d __b)
 /// \brief Compares the lower double-precision floating-point values in each of
 ///    the two 128-bit floating-point vectors of [2 x double] to determine if
 ///    the value in the first parameter is less than the corresponding value in
-///    the second parameter. The comparison yields 0 for false, 1 for true.
-///    If either of the two lower double-precision values is NaN, 1 is returned.
+///    the second parameter. The comparison yields 0 for false, 1 for true. If
+///    either of the two lower double-precision values is NaN, 1 is returned.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -1414,7 +1416,8 @@ _mm_cvtss_sd(__m128d __a, __m128 __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c> instruction.
+/// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c>
+///   instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [2 x double].
@@ -1431,7 +1434,8 @@ _mm_cvttpd_epi32(__m128d __a)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> instruction.
+/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>
+///   instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [2 x double]. The lower 64 bits are used in the
@@ -1559,9 +1563,9 @@ _mm_load1_pd(double const *__dp)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction + needed
-/// shuffling instructions. In AVX mode, the shuffling may be combined with the
-/// \c VMOVAPD, resulting in only a \c VPERMILPD instruction.
+/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction +
+/// needed shuffling instructions. In AVX mode, the shuffling may be combined
+/// with the \c VMOVAPD, resulting in only a \c VPERMILPD instruction.
 ///
 /// \param __dp
 ///    A 16-byte aligned pointer to an array of double-precision values to be
@@ -1624,7 +1628,7 @@ _mm_load_sd(double const *__dp)
 /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [2 x double].
+///    A 128-bit vector of [2 x double]. \n
 ///    Bits [63:0] are written to bits [63:0] of the result.
 /// \param __dp
 ///    A pointer to a 64-bit memory location containing a double-precision
@@ -1651,7 +1655,7 @@ _mm_loadh_pd(__m128d __a, double const *__dp)
 /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [2 x double].
+///    A 128-bit vector of [2 x double]. \n
 ///    Bits [127:64] are written to bits [127:64] of the result.
 /// \param __dp
 ///    A pointer to a 64-bit memory location containing a double-precision
@@ -2645,8 +2649,8 @@ _mm_xor_si128(__m128i __a, __m128i __b)
 /// \param a
 ///    A 128-bit integer vector containing the source operand.
 /// \param imm
-///    An immediate value specifying the number of bytes to left-shift
-///    operand \a a.
+///    An immediate value specifying the number of bytes to left-shift operand
+///    \a a.
 /// \returns A 128-bit integer vector containing the left-shifted value.
 #define _mm_slli_si128(a, imm) __extension__ ({                              \
   (__m128i)__builtin_shufflevector(                                          \
@@ -3247,7 +3251,8 @@ _mm_cvtsd_si64(__m128d __a)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> instruction.
+/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>
+///   instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [2 x double]. The lower 64 bits are used in the
@@ -3296,7 +3301,8 @@ _mm_cvtps_epi32(__m128 __a)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c> instruction.
+/// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c>
+///   instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [4 x float].
@@ -3709,7 +3715,8 @@ _mm_set1_epi8(char __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c> instruction.
+/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>
+///   instruction.
 ///
 /// \param __q0
 ///    A 64-bit integral value used to initialize the lower 64 bits of the
@@ -3885,7 +3892,8 @@ _mm_storeu_si128(__m128i *__p, __m128i __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c> instruction.
+/// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c>
+///   instruction.
 ///
 /// \param __d
 ///    A 128-bit integer vector containing the values to be moved.
@@ -4134,14 +4142,14 @@ _mm_packus_epi16(__m128i __a, __m128i __b)
 ///    A 128-bit integer vector.
 /// \param __imm
 ///    An immediate value. Bits [3:0] selects values from \a __a to be assigned
-///    to bits[15:0] of the result.
-///    000: assign values from bits [15:0] of \a __a.
-///    001: assign values from bits [31:16] of \a __a.
-///    010: assign values from bits [47:32] of \a __a.
-///    011: assign values from bits [63:48] of \a __a.
-///    100: assign values from bits [79:64] of \a __a.
-///    101: assign values from bits [95:80] of \a __a.
-///    110: assign values from bits [111:96] of \a __a.
+///    to bits[15:0] of the result. \n
+///    000: assign values from bits [15:0] of \a __a. \n
+///    001: assign values from bits [31:16] of \a __a. \n
+///    010: assign values from bits [47:32] of \a __a. \n
+///    011: assign values from bits [63:48] of \a __a. \n
+///    100: assign values from bits [79:64] of \a __a. \n
+///    101: assign values from bits [95:80] of \a __a. \n
+///    110: assign values from bits [111:96] of \a __a. \n
 ///    111: assign values from bits [127:112] of \a __a.
 /// \returns An integer, whose lower 16 bits are selected from the 128-bit
 ///    integer vector parameter and the remaining bits are assigned zeros.
@@ -4215,16 +4223,16 @@ _mm_movemask_epi8(__m128i __a)
 /// \param imm
 ///    An immediate value containing an 8-bit value specifying which elements to
 ///    copy from a. The destinations within the 128-bit destination are assigned
-///    values as follows:
-///    Bits [1:0] are used to assign values to bits [31:0] of the result.
-///    Bits [3:2] are used to assign values to bits [63:32] of the result.
-///    Bits [5:4] are used to assign values to bits [95:64] of the result.
-///    Bits [7:6] are used to assign values to bits [127:96] of the result.
-///    Bit value assignments:
-///    00: assign values from bits [31:0] of a.
-///    01: assign values from bits [63:32] of a.
-///    10: assign values from bits [95:64] of a.
-///    11: assign values from bits [127:96] of a.
+///    values as follows: \n
+///    Bits [1:0] are used to assign values to bits [31:0] of the result. \n
+///    Bits [3:2] are used to assign values to bits [63:32] of the result. \n
+///    Bits [5:4] are used to assign values to bits [95:64] of the result. \n
+///    Bits [7:6] are used to assign values to bits [127:96] of the result. \n
+///    Bit value assignments: \n
+///    00: assign values from bits [31:0] of \a a. \n
+///    01: assign values from bits [63:32] of \a a. \n
+///    10: assign values from bits [95:64] of \a a. \n
+///    11: assign values from bits [127:96] of \a a.
 /// \returns A 128-bit integer vector containing the shuffled values.
 #define _mm_shuffle_epi32(a, imm) __extension__ ({ \
   (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \
@@ -4248,16 +4256,16 @@ _mm_movemask_epi8(__m128i __a)
 ///    A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits
 ///    [127:64] of the result.
 /// \param imm
-///    An 8-bit immediate value specifying which elements to copy from a.
-///    Bits[1:0] are used to assign values to bits [15:0] of the result.
-///    Bits[3:2] are used to assign values to bits [31:16] of the result.
-///    Bits[5:4] are used to assign values to bits [47:32] of the result.
-///    Bits[7:6] are used to assign values to bits [63:48] of the result.
-///    Bit value assignments:
-///    00: assign values from bits [15:0] of a.
-///    01: assign values from bits [31:16] of a.
-///    10: assign values from bits [47:32] of a.
-///    11: assign values from bits [63:48] of a.
+///    An 8-bit immediate value specifying which elements to copy from \a a. \n
+///    Bits[1:0] are used to assign values to bits [15:0] of the result. \n
+///    Bits[3:2] are used to assign values to bits [31:16] of the result. \n
+///    Bits[5:4] are used to assign values to bits [47:32] of the result. \n
+///    Bits[7:6] are used to assign values to bits [63:48] of the result. \n
+///    Bit value assignments: \n
+///    00: assign values from bits [15:0] of \a a. \n
+///    01: assign values from bits [31:16] of \a a. \n
+///    10: assign values from bits [47:32] of \a a. \n
+///    11: assign values from bits [63:48] of \a a. \n
 /// \returns A 128-bit integer vector containing the shuffled values.
 #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
   (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
@@ -4282,16 +4290,16 @@ _mm_movemask_epi8(__m128i __a)
 ///    A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits
 ///    [63:0] of the result.
 /// \param imm
-///    An 8-bit immediate value specifying which elements to copy from a.
-///    Bits[1:0] are used to assign values to bits [79:64] of the result.
-///    Bits[3:2] are used to assign values to bits [95:80] of the result.
-///    Bits[5:4] are used to assign values to bits [111:96] of the result.
-///    Bits[7:6] are used to assign values to bits [127:112] of the result.
-///    Bit value assignments:
-///    00: assign values from bits [79:64] of a.
-///    01: assign values from bits [95:80] of a.
-///    10: assign values from bits [111:96] of a.
-///    11: assign values from bits [127:112] of a.
+///    An 8-bit immediate value specifying which elements to copy from \a a. \n
+///    Bits[1:0] are used to assign values to bits [79:64] of the result. \n
+///    Bits[3:2] are used to assign values to bits [95:80] of the result. \n
+///    Bits[5:4] are used to assign values to bits [111:96] of the result. \n
+///    Bits[7:6] are used to assign values to bits [127:112] of the result. \n
+///    Bit value assignments: \n
+///    00: assign values from bits [79:64] of \a a. \n
+///    01: assign values from bits [95:80] of \a a. \n
+///    10: assign values from bits [111:96] of \a a. \n
+///    11: assign values from bits [127:112] of \a a. \n
 /// \returns A 128-bit integer vector containing the shuffled values.
 #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
   (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
@@ -4307,28 +4315,29 @@ _mm_movemask_epi8(__m128i __a)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c> instruction.
+/// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c>
+///   instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [16 x i8].
-///    Bits [71:64] are written to bits [7:0] of the result
-///    Bits [79:72] are written to bits [23:16] of the result.
-///    Bits [87:80] are written to bits [39:32] of the result.
-///    Bits [95:88] are written to bits [55:48] of the result.
-///    Bits [103:96] are written to bits [71:64] of the result.
-///    Bits [111:104] are written to bits [87:80] of the result.
-///    Bits [119:112] are written to bits [103:96] of the result.
+///    Bits [71:64] are written to bits [7:0] of the result. \n
+///    Bits [79:72] are written to bits [23:16] of the result. \n
+///    Bits [87:80] are written to bits [39:32] of the result. \n
+///    Bits [95:88] are written to bits [55:48] of the result. \n
+///    Bits [103:96] are written to bits [71:64] of the result. \n
+///    Bits [111:104] are written to bits [87:80] of the result. \n
+///    Bits [119:112] are written to bits [103:96] of the result. \n
 ///    Bits [127:120] are written to bits [119:112] of the result.
 /// \param __b
-///    A 128-bit vector of [16 x i8].
-///    Bits [71:64] are written to bits [15:8] of the result.
-///    Bits [79:72] are written to bits [31:24] of the result.
-///    Bits [87:80] are written to bits [47:40] of the result.
-///    Bits [95:88] are written to bits [63:56] of the result.
-///    Bits [103:96] are written to bits [79:72] of the result.
-///    Bits [111:104] are written to bits [95:88] of the result.
-///    Bits [119:112] are written to bits [111:104] of the result.
-///    Bits [127:120] are written to bits [127:120] of the destination.
+///    A 128-bit vector of [16 x i8]. \n
+///    Bits [71:64] are written to bits [15:8] of the result. \n
+///    Bits [79:72] are written to bits [31:24] of the result. \n
+///    Bits [87:80] are written to bits [47:40] of the result. \n
+///    Bits [95:88] are written to bits [63:56] of the result. \n
+///    Bits [103:96] are written to bits [79:72] of the result. \n
+///    Bits [111:104] are written to bits [95:88] of the result. \n
+///    Bits [119:112] are written to bits [111:104] of the result. \n
+///    Bits [127:120] are written to bits [127:120] of the result.
 /// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_unpackhi_epi8(__m128i __a, __m128i __b)
@@ -4341,19 +4350,20 @@ _mm_unpackhi_epi8(__m128i __a, __m128i __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c> instruction.
+/// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c>
+///   instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [8 x i16].
-///    Bits [79:64] are written to bits [15:0] of the result.
-///    Bits [95:80] are written to bits [47:32] of the result.
-///    Bits [111:96] are written to bits [79:64] of the result.
+///    Bits [79:64] are written to bits [15:0] of the result. \n
+///    Bits [95:80] are written to bits [47:32] of the result. \n
+///    Bits [111:96] are written to bits [79:64] of the result. \n
 ///    Bits [127:112] are written to bits [111:96] of the result.
 /// \param __b
 ///    A 128-bit vector of [8 x i16].
-///    Bits [79:64] are written to bits [31:16] of the result.
-///    Bits [95:80] are written to bits [63:48] of the result.
-///    Bits [111:96] are written to bits [95:80] of the result.
+///    Bits [79:64] are written to bits [31:16] of the result. \n
+///    Bits [95:80] are written to bits [63:48] of the result. \n
+///    Bits [111:96] are written to bits [95:80] of the result. \n
 ///    Bits [127:112] are written to bits [127:112] of the result.
 /// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -4367,15 +4377,16 @@ _mm_unpackhi_epi16(__m128i __a, __m128i __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c> instruction.
+/// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c>
+///   instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [4 x i32].
-///    Bits [95:64] are written to bits [31:0] of the destination.
+///    A 128-bit vector of [4 x i32]. \n
+///    Bits [95:64] are written to bits [31:0] of the destination. \n
 ///    Bits [127:96] are written to bits [95:64] of the destination.
 /// \param __b
-///    A 128-bit vector of [4 x i32].
-///    Bits [95:64] are written to bits [64:32] of the destination.
+///    A 128-bit vector of [4 x i32]. \n
+///    Bits [95:64] are written to bits [64:32] of the destination. \n
 ///    Bits [127:96] are written to bits [127:96] of the destination.
 /// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -4389,13 +4400,14 @@ _mm_unpackhi_epi32(__m128i __a, __m128i __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c> instruction.
+/// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c>
+///   instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [2 x i64].
+///    A 128-bit vector of [2 x i64]. \n
 ///    Bits [127:64] are written to bits [63:0] of the destination.
 /// \param __b
-///    A 128-bit vector of [2 x i64].
+///    A 128-bit vector of [2 x i64]. \n
 ///    Bits [127:64] are written to bits [127:64] of the destination.
 /// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -4409,27 +4421,28 @@ _mm_unpackhi_epi64(__m128i __a, __m128i __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c> instruction.
+/// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c>
+///   instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [16 x i8].
-///    Bits [7:0] are written to bits [7:0] of the result.
-///    Bits [15:8] are written to bits [23:16] of the result.
-///    Bits [23:16] are written to bits [39:32] of the result.
-///    Bits [31:24] are written to bits [55:48] of the result.
-///    Bits [39:32] are written to bits [71:64] of the result.
-///    Bits [47:40] are written to bits [87:80] of the result.
-///    Bits [55:48] are written to bits [103:96] of the result.
-///    Bits [63:56] are written to bits [119:112] of the destination.
+///    A 128-bit vector of [16 x i8]. \n
+///    Bits [7:0] are written to bits [7:0] of the result. \n
+///    Bits [15:8] are written to bits [23:16] of the result. \n
+///    Bits [23:16] are written to bits [39:32] of the result. \n
+///    Bits [31:24] are written to bits [55:48] of the result. \n
+///    Bits [39:32] are written to bits [71:64] of the result. \n
+///    Bits [47:40] are written to bits [87:80] of the result. \n
+///    Bits [55:48] are written to bits [103:96] of the result. \n
+///    Bits [63:56] are written to bits [119:112] of the result.
 /// \param __b
 ///    A 128-bit vector of [16 x i8].
-///    Bits [7:0] are written to bits [15:8] of the result.
-///    Bits [15:8] are written to bits [31:24] of the result.
-///    Bits [23:16] are written to bits [47:40] of the result.
-///    Bits [31:24] are written to bits [63:56] of the result.
-///    Bits [39:32] are written to bits [79:72] of the result.
-///    Bits [47:40] are written to bits [95:88] of the result.
-///    Bits [55:48] are written to bits [111:104] of the result.
+///    Bits [7:0] are written to bits [15:8] of the result. \n
+///    Bits [15:8] are written to bits [31:24] of the result. \n
+///    Bits [23:16] are written to bits [47:40] of the result. \n
+///    Bits [31:24] are written to bits [63:56] of the result. \n
+///    Bits [39:32] are written to bits [79:72] of the result. \n
+///    Bits [47:40] are written to bits [95:88] of the result. \n
+///    Bits [55:48] are written to bits [111:104] of the result. \n
 ///    Bits [63:56] are written to bits [127:120] of the result.
 /// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -4444,19 +4457,20 @@ _mm_unpacklo_epi8(__m128i __a, __m128i __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c> instruction.
+/// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c>
+///   instruction.
 ///
 /// \param __a
 ///    A 128-bit vector of [8 x i16].
-///    Bits [15:0] are written to bits [15:0] of the result.
-///    Bits [31:16] are written to bits [47:32] of the result.
-///    Bits [47:32] are written to bits [79:64] of the result.
+///    Bits [15:0] are written to bits [15:0] of the result. \n
+///    Bits [31:16] are written to bits [47:32] of the result. \n
+///    Bits [47:32] are written to bits [79:64] of the result. \n
 ///    Bits [63:48] are written to bits [111:96] of the result.
 /// \param __b
 ///    A 128-bit vector of [8 x i16].
-///    Bits [15:0] are written to bits [31:16] of the result.
-///    Bits [31:16] are written to bits [63:48] of the result.
-///    Bits [47:32] are written to bits [95:80] of the result.
+///    Bits [15:0] are written to bits [31:16] of the result. \n
+///    Bits [31:16] are written to bits [63:48] of the result. \n
+///    Bits [47:32] are written to bits [95:80] of the result. \n
 ///    Bits [63:48] are written to bits [127:112] of the result.
 /// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -4470,15 +4484,16 @@ _mm_unpacklo_epi16(__m128i __a, __m128i __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c> instruction.
+/// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c>
+///   instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [4 x i32].
-///    Bits [31:0] are written to bits [31:0] of the destination.
+///    A 128-bit vector of [4 x i32]. \n
+///    Bits [31:0] are written to bits [31:0] of the destination. \n
 ///    Bits [63:32] are written to bits [95:64] of the destination.
 /// \param __b
-///    A 128-bit vector of [4 x i32].
-///    Bits [31:0] are written to bits [64:32] of the destination.
+///    A 128-bit vector of [4 x i32]. \n
+///    Bits [31:0] are written to bits [64:32] of the destination. \n
 ///    Bits [63:32] are written to bits [127:96] of the destination.
 /// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -4492,14 +4507,15 @@ _mm_unpacklo_epi32(__m128i __a, __m128i __b)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c> instruction.
+/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>
+///   instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [2 x i64].
-///    Bits [63:0] are written to bits [63:0] of the destination.
+///    A 128-bit vector of [2 x i64]. \n
+///    Bits [63:0] are written to bits [63:0] of the destination. \n
 /// \param __b
-///    A 128-bit vector of [2 x i64].
-///    Bits [63:0] are written to bits [127:64] of the destination.
+///    A 128-bit vector of [2 x i64]. \n
+///    Bits [63:0] are written to bits [127:64] of the destination. \n
 /// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_unpacklo_epi64(__m128i __a, __m128i __b)
@@ -4568,10 +4584,10 @@ _mm_move_epi64(__m128i __a)
 /// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [2 x double].
+///    A 128-bit vector of [2 x double]. \n
 ///    Bits [127:64] are written to bits [63:0] of the destination.
 /// \param __b
-///    A 128-bit vector of [2 x double].
+///    A 128-bit vector of [2 x double]. \n
 ///    Bits [127:64] are written to bits [127:64] of the destination.
 /// \returns A 128-bit vector of [2 x double] containing the interleaved values.
 static __inline__ __m128d __DEFAULT_FN_ATTRS
@@ -4589,10 +4605,10 @@ _mm_unpackhi_pd(__m128d __a, __m128d __b)
 /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [2 x double].
+///    A 128-bit vector of [2 x double]. \n
 ///    Bits [63:0] are written to bits [63:0] of the destination.
 /// \param __b
-///    A 128-bit vector of [2 x double].
+///    A 128-bit vector of [2 x double]. \n
 ///    Bits [63:0] are written to bits [127:64] of the destination.
 /// \returns A 128-bit vector of [2 x double] containing the interleaved values.
 static __inline__ __m128d __DEFAULT_FN_ATTRS
@@ -4639,11 +4655,11 @@ _mm_movemask_pd(__m128d __a)
 ///    A 128-bit vector of [2 x double].
 /// \param i
 ///    An 8-bit immediate value. The least significant two bits specify which
-///    elements to copy from a and b:
-///    Bit[0] = 0: lower element of a copied to lower element of result.
-///    Bit[0] = 1: upper element of a copied to lower element of result.
-///    Bit[1] = 0: lower element of \a b copied to upper element of result.
-///    Bit[1] = 1: upper element of \a b copied to upper element of result.
+///    elements to copy from a and b: \n
+///    Bit[0] = 0: lower element of a copied to lower element of result. \n
+///    Bit[0] = 1: upper element of a copied to lower element of result. \n
+///    Bit[1] = 0: lower element of \a b copied to upper element of result. \n
+///    Bit[1] = 1: upper element of \a b copied to upper element of result. \n
 /// \returns A 128-bit vector of [2 x double] containing the shuffled values.
 #define _mm_shuffle_pd(a, b, i) __extension__ ({ \
   (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
diff --git a/clang/lib/Headers/f16cintrin.h b/clang/lib/Headers/f16cintrin.h
index fbfc1860f9f..180712ffc68 100644
--- a/clang/lib/Headers/f16cintrin.h
+++ b/clang/lib/Headers/f16cintrin.h
@@ -65,11 +65,11 @@ _cvtsh_ss(unsigned short __a)
 ///    A 32-bit single-precision float value to be converted to a 16-bit
 ///    half-precision float value.
 /// \param imm
-///    An immediate value controlling rounding using bits [2:0]:
-///    000: Nearest
-///    001: Down
-///    010: Up
-///    011: Truncate
+///    An immediate value controlling rounding using bits [2:0]: \n
+///    000: Nearest \n
+///    001: Down \n
+///    010: Up \n
+///    011: Truncate \n
 ///    1XX: Use MXCSR.RC for rounding
 /// \returns The converted 16-bit half-precision float value.
 #define _cvtss_sh(a, imm)  \
@@ -90,11 +90,11 @@ _cvtsh_ss(unsigned short __a)
 /// \param a
 ///    A 128-bit vector containing 32-bit float values.
 /// \param imm
-///    An immediate value controlling rounding using bits [2:0]:
-///    000: Nearest
-///    001: Down
-///    010: Up
-///    011: Truncate
+///    An immediate value controlling rounding using bits [2:0]: \n
+///    000: Nearest \n
+///    001: Down \n
+///    010: Up \n
+///    011: Truncate \n
 ///    1XX: Use MXCSR.RC for rounding
 /// \returns A 128-bit vector containing converted 16-bit half-precision float
 ///    values. The lower 64 bits are used to store the converted 16-bit
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 0bdc921c788..7f91d49fbce 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -84,11 +84,11 @@
 ///    A 256-bit vector containing 32-bit single-precision float values to be
 ///    converted to 16-bit half-precision float values.
 /// \param imm
-///    An immediate value controlling rounding using bits [2:0]:
-///    000: Nearest
-///    001: Down
-///    010: Up
-///    011: Truncate
+///    An immediate value controlling rounding using bits [2:0]: \n
+///    000: Nearest \n
+///    001: Down \n
+///    010: Up \n
+///    011: Truncate \n
 ///    1XX: Use MXCSR.RC for rounding
 /// \returns A 128-bit vector containing the converted 16-bit half-precision
 ///    float values.
diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h
index 55db470de39..e0c277a65a3 100644
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@@ -211,16 +211,16 @@ _mm_packs_pu16(__m64 __m1, __m64 __m2)
 /// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.
 ///
 /// \param __m1
-///    A 64-bit integer vector of [8 x i8].
-///    Bits [39:32] are written to bits [7:0] of the result.
-///    Bits [47:40] are written to bits [23:16] of the result.
-///    Bits [55:48] are written to bits [39:32] of the result.
+///    A 64-bit integer vector of [8 x i8]. \n 
+///    Bits [39:32] are written to bits [7:0] of the result. \n
+///    Bits [47:40] are written to bits [23:16] of the result. \n
+///    Bits [55:48] are written to bits [39:32] of the result. \n
 ///    Bits [63:56] are written to bits [55:48] of the result.
 /// \param __m2
 ///    A 64-bit integer vector of [8 x i8].
-///    Bits [39:32] are written to bits [15:8] of the result.
-///    Bits [47:40] are written to bits [31:24] of the result.
-///    Bits [55:48] are written to bits [47:40] of the result.
+///    Bits [39:32] are written to bits [15:8] of the result. \n
+///    Bits [47:40] are written to bits [31:24] of the result. \n
+///    Bits [55:48] are written to bits [47:40] of the result. \n
 ///    Bits [63:56] are written to bits [63:56] of the result.
 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
 ///    values.
@@ -239,11 +239,11 @@ _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
 ///
 /// \param __m1
 ///    A 64-bit integer vector of [4 x i16].
-///    Bits [47:32] are written to bits [15:0] of the result.
+///    Bits [47:32] are written to bits [15:0] of the result. \n
 ///    Bits [63:48] are written to bits [47:32] of the result.
 /// \param __m2
 ///    A 64-bit integer vector of [4 x i16].
-///    Bits [47:32] are written to bits [31:16] of the result.
+///    Bits [47:32] are written to bits [31:16] of the result. \n
 ///    Bits [63:48] are written to bits [63:48] of the result.
 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
 ///    values.
@@ -283,15 +283,15 @@ _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
 ///
 /// \param __m1
 ///    A 64-bit integer vector of [8 x i8].
-///    Bits [7:0] are written to bits [7:0] of the result.
-///    Bits [15:8] are written to bits [23:16] of the result.
-///    Bits [23:16] are written to bits [39:32] of the result.
+///    Bits [7:0] are written to bits [7:0] of the result. \n
+///    Bits [15:8] are written to bits [23:16] of the result. \n
+///    Bits [23:16] are written to bits [39:32] of the result. \n
 ///    Bits [31:24] are written to bits [55:48] of the result.
 /// \param __m2
 ///    A 64-bit integer vector of [8 x i8].
-///    Bits [7:0] are written to bits [15:8] of the result.
-///    Bits [15:8] are written to bits [31:24] of the result.
-///    Bits [23:16] are written to bits [47:40] of the result.
+///    Bits [7:0] are written to bits [15:8] of the result. \n
+///    Bits [15:8] are written to bits [31:24] of the result. \n
+///    Bits [23:16] are written to bits [47:40] of the result. \n
 ///    Bits [31:24] are written to bits [63:56] of the result.
 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
 ///    values.
@@ -310,11 +310,11 @@ _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
 ///
 /// \param __m1
 ///    A 64-bit integer vector of [4 x i16].
-///    Bits [15:0] are written to bits [15:0] of the result.
+///    Bits [15:0] are written to bits [15:0] of the result. \n
 ///    Bits [31:16] are written to bits [47:32] of the result.
 /// \param __m2
 ///    A 64-bit integer vector of [4 x i16].
-///    Bits [15:0] are written to bits [31:16] of the result.
+///    Bits [15:0] are written to bits [31:16] of the result. \n
 ///    Bits [31:16] are written to bits [63:48] of the result.
 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
 ///    values.
diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h
index 5f201fb53bd..d4f6487af17 100644
--- a/clang/lib/Headers/pmmintrin.h
+++ b/clang/lib/Headers/pmmintrin.h
@@ -115,18 +115,18 @@ _mm_hsub_ps(__m128 __a, __m128 __b)
 
 /// \brief Moves and duplicates high-order (odd-indexed) values from a 128-bit
 ///    vector of [4 x float] to float values stored in a 128-bit vector of
-///    [4 x float].
-///    Bits [127:96] of the source are written to bits [127:96] and [95:64] of
-///    the destination.
-///    Bits [63:32] of the source are written to bits [63:32] and [31:0] of the
-///    destination.
+///    [4 x float]. 
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [4 x float].
+///    A 128-bit vector of [4 x float]. \n
+///    Bits [127:96] of the source are written to bits [127:96] and [95:64] of
+///    the destination. \n
+///    Bits [63:32] of the source are written to bits [63:32] and [31:0] of the
+///    destination.
 /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
 ///    values.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -135,20 +135,19 @@ _mm_movehdup_ps(__m128 __a)
   return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);
 }
 
-/// \brief Duplicates low-order (even-indexed) values from a 128-bit
-///    vector of [4 x float] to float values stored in a 128-bit vector of
-///    [4 x float].
-///    Bits [95:64] of the source are written to bits [127:96] and [95:64] of
-///    the destination.
-///    Bits [31:0] of the source are written to bits [63:32] and [31:0] of the
-///    destination.
+/// \brief Duplicates low-order (even-indexed) values from a 128-bit vector of
+///    [4 x float] to float values stored in a 128-bit vector of [4 x float]. 
 ///
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.
 ///
 /// \param __a
-///    A 128-bit vector of [4 x float].
+///    A 128-bit vector of [4 x float] \n
+///    Bits [95:64] of the source are written to bits [127:96] and [95:64] of
+///    the destination. \n
+///    Bits [31:0] of the source are written to bits [63:32] and [31:0] of the
+///    destination.
 /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
 ///    values.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h
index 6a8e5c1bb4e..80664043a06 100644
--- a/clang/lib/Headers/tmmintrin.h
+++ b/clang/lib/Headers/tmmintrin.h
@@ -580,11 +580,11 @@ _mm_mulhrs_pi16(__m64 __a, __m64 __b)
 /// \param __b
 ///    A 128-bit integer vector containing control bytes corresponding to
 ///    positions in the destination:
-///    Bit 7:
-///    1: Clear the corresponding byte in the destination.
+///    Bit 7: \n
+///    1: Clear the corresponding byte in the destination. \n
 ///    0: Copy the selected source byte to the corresponding byte in the
-///    destination.
-///    Bits [6:4] Reserved.
+///    destination. \n
+///    Bits [6:4] Reserved.  \n
 ///    Bits [3:0] select the source byte to be copied.
 /// \returns A 128-bit integer vector containing the copied or cleared values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -606,10 +606,10 @@ _mm_shuffle_epi8(__m128i __a, __m128i __b)
 /// \param __b
 ///    A 64-bit integer vector containing control bytes corresponding to
 ///    positions in the destination:
-///    Bit 7:
-///    1: Clear the corresponding byte in the destination.
+///    Bit 7: \n
+///    1: Clear the corresponding byte in the destination. \n
 ///    0: Copy the selected source byte to the corresponding byte in the
-///    destination.
+///    destination. \n
 ///    Bits [3:0] select the source byte to be copied.
 /// \returns A 64-bit integer vector containing the copied or cleared values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS