diff --git a/include/flang/Error/errmsg-accel.n b/include/flang/Error/errmsg-accel.n index e80467d8c0e..45df545b420 100644 --- a/include/flang/Error/errmsg-accel.n +++ b/include/flang/Error/errmsg-accel.n @@ -14,11 +14,42 @@ .\" * limitations under the License. .\" * .\" */ +.MS W 972 "The directive #pragma acc mirror is deprecated; use #pragma acc declare create instead" +.MS W 973 "The directive #pragma acc reflected is deprecated; use #pragma acc declare present" +.MS W 974 "The directive #pragma acc region is deprecated; use #pragma acc kernels instead" +.MS W 975 "The directive #pragma acc data region is deprecated; use #pragma acc data instead" +.MS W 976 "The directive #pragma acc for is deprecated; use #pragma acc loop instead" +.MS W 977 "The directive #pragma acc for kernel is deprecated; use #pragma acc loop instead" +.MS W 978 "The clause parallel is deprecated; use clause gang instead" +.MS W 979 "The directive #pragma acc region for is deprecated; use #pragma acc kernels loop instead" +.MS W 980 "The directive #pragma acc region loop is deprecated; use #pragma acc kernels loop instead" +.MS W 981 "The directive #pragma acc kernels for is deprecated; use #pragma acc kernels loop instead" +.MS W 982 "The directive #pragma acc deviceptr is deprecated; use #pragma acc declare deviceptr instead" +.MS W 983 "The directive #pragma acc parallel for is deprecated; use #pragma acc parallel loop instead" +.MS W 984 "The directive #pragma acc scalar region is deprecated; use #pragma acc serial instead" +.MS W 985 "The clause local is deprecated; use clause create instead" +.MS W 986 "The clause cache is deprecated; use directive #pragma acc cache instead" +.MS W 987 "The clause update host is deprecated; use separate update host directive after the region instead" +.MS W 988 "The clause update device is deprecated; use separate update device directive before the region instead" +.MS W 989 "The clause update in is deprecated; use separate update device directeve before the region instead" +.MS W 990 "The clause update out is deprecated; use update self instead" +.MS W 991 "The clause pnot is deprecated; use no_create instead" +.MS W 992 "The clause updatein is deprecated; use update device instead" +.MS W 993 "The clause updateout is deprecated; use update self instead" +.MS W 994 "The directive #pragma acc copy is deprecated; use #pragma acc declare copy instead" +.MS W 995 "The directive #pragma acc copyin is deprecated; use #pragma acc declare copyin instead" +.MS W 996 "The directive #pragma acc copyout is deprecated; use #pragma acc declare copyout instead" +.MS W 997 "The directive #pragma acc device_resident is deprecated; use #pragma acc declare device_resident instead" +.MS W 998 "The directive #pragma acc for host is deprecated; no OpenACC equivalent" +.MS W 999 "The directive #pragma acc loop kernel is deprecated; no OpenACC equivalent" .MS S 1000 "Call in OpenACC region to procedure '$' which has no acc routine information" .MS S 1001 "All selected compute capabilities were disabled (see -Minfo)" .MS S 1002 "Reduction type not supported for this variable datatype - $" .MS W 1003 "Lambda capture by reference not supported in Accellerated region" .MS W 1004 "Lambda capture 'this' by reference not supported in Accellerated region" +.MS W 1005 "The clause unroll is deprecated; no OpenACC equivalent" +.MS W 1006 "The clause mirror is deprecated; no OpenACC equivalent" +.MS W 1007 "The clause host is deprecated; no OpenACC equivalent" .\" Skipping ahead to 1011 to keep the same numbers as the fortran front-end. .MS S 1011 "Device variable cannot be THREADPRIVATE - $" .MS S 1012 "Threadprivate variables are not supported in acc routine - $" @@ -42,8 +73,14 @@ .MS F 1028 "The variable $ doesn't have predefined policy $ available" .MS F 1029 "The variable $ using policy $ is not a structure-based type" .MS F 1030 "Policy motion $ is not allowed in $ directive" +.MS W 1031 "The directive #pragma acc create is deprecated; use #pragma acc declare create instead" +.MS W 1032 "The directive #pragma acc present is deprecated; use #pragma acc declare present instead" +.MS W 1033 "The directive #pragma acc link is deprecated; use #pragma acc declare link instead" .MS F 1034 "Only signed/unsigned 32 bits and 64 bits integer variables are allowed in bound expression. $ is is not such variable" .MS F 1035 "Only integer sibling members and global variables are allowed in bound expression. $ is is neither of them." .MS F 1036 "No global variable named $ has been defined" .MS F 1037 "Default clause can only contain include and exclude keyword." .MS W 1038 "COPY clause on a dummy variable with INTENT(IN) attribute demoted to COPYIN - $" +.MS S 1039 "OpenACC data clause expected after $" +.MS S 1040 "OpenACC $ data clause may not follow a device_type clause." +.MS S 1041 "OpenACC $ clause may not follow a device_type clause." diff --git a/include/flang/Error/errmsg-in.n b/include/flang/Error/errmsg-in.n index 4ba298964d4..64dabdb774c 100644 --- a/include/flang/Error/errmsg-in.n +++ b/include/flang/Error/errmsg-in.n @@ -1523,3 +1523,6 @@ A DO CONCURRENT variable with LOCAL_INIT locality must have a host variable of t .MS S 1210 "Parent module $ must declare a separate module procedure." .MS S 1211 "Submodule's ancestor module $ must be a nonintrinsic module." .MS S 1212 "$ was previously declared to be a module procedure." +.MS S 1213 "OpenACC $ data clause may not follow a device_type clause." +.MS S 1214 "PGI Accelerator $ data clause may not follow a device_type clause." +.MS S 1215 "OpenACC data clause expected after $." diff --git a/tools/flang1/flang1exe/dinit.c b/tools/flang1/flang1exe/dinit.c index 95f1039c2aa..d3e4f62b42e 100644 --- a/tools/flang1/flang1exe/dinit.c +++ b/tools/flang1/flang1exe/dinit.c @@ -239,7 +239,7 @@ dinit_data(VAR *ivl, ACL *ict, int dtype) assert(dtype, "dinit_data: no object to initialize", 0, 2); member = DTY(dtype + 1); /* for type extension */ - if (PARENTG(DTY(dtype + 3))) { + if (PARENTG(DTY(dtype + 3)) && get_seen_contains()) { member = SYMLKG(member); } } diff --git a/tools/flang1/flang1exe/rest.c b/tools/flang1/flang1exe/rest.c index ec702298b64..7e78eff534b 100644 --- a/tools/flang1/flang1exe/rest.c +++ b/tools/flang1/flang1exe/rest.c @@ -2530,8 +2530,11 @@ handle_seq_section(int entry, int arr, int loc, int std, int *retval, } else { /* right now, no members can be distributed anyway */ arrayalign = ALIGNG(arraysptr); - if (POINTERG(arraysptr) && !arrayalign) - is_seq_pointer = TRUE; + if (POINTERG(arraysptr)) { + is_pointer = TRUE; + if (!arrayalign) + is_seq_pointer = TRUE; + } } break; default: diff --git a/tools/flang1/flang1exe/semsmp.c b/tools/flang1/flang1exe/semsmp.c index f2652575130..7f76f81dd3c 100644 --- a/tools/flang1/flang1exe/semsmp.c +++ b/tools/flang1/flang1exe/semsmp.c @@ -42,7 +42,7 @@ /* contents of this file: */ static void add_clause(int, LOGICAL); -static void clause_errchk(BIGINT64, char *); +static bool clause_errchk(BIGINT64, char *); static void accel_sched_errchk(); static void accel_nosched_errchk(); static void accel_pragmagen(int, int, int); @@ -5807,14 +5807,19 @@ add_clause(int clause, LOGICAL one_only) CL_PRESENT(clause) = 1; } -static void +static bool clause_errchk(BIGINT64 bt, char *dirname) { int i; + bool any = false; for (i = 0; i < CL_MAXV; i++) - if (CL_PRESENT(i) && !(CL_STMT(i) & bt)) - error(533, 3, gbl.lineno, CL_NAME(i), dirname); + if (CL_PRESENT(i)) { + any = true; + if (!(CL_STMT(i) & bt)) + error(533, 3, gbl.lineno, CL_NAME(i), dirname); + } + return any; } static void diff --git a/tools/flang2/docs/xflag.n b/tools/flang2/docs/xflag.n index b87a54eacae..773eb541f1c 100644 --- a/tools/flang2/docs/xflag.n +++ b/tools/flang2/docs/xflag.n @@ -1317,12 +1317,12 @@ definition on only one side of the conditional. LLVM - disable extended conditional vectorization in all loops where the predicate size is different than the computational size. .XB 0x20: -LLVM - don't allow vectorization if multiple lhs data type sizes exist +LLVM - allow vectorization if multiple lhs data type sizes exist within the inner loop .XB 0x40: LLVM - don't allow vectorization of DBLE ili .XB 0x80: -LLVM - don't allow vectorization of DFLOAT ili +LLVM - don't allow vectorization of DFLOAT/IKMV ili .XF "37": .XB 0x01: @@ -5274,6 +5274,11 @@ Don't demote address KMUL operations .XB 0x200000: in LLVM output, don't output the instruction info (lilix index, opcode) .XB 0x400000: +.XB 0x800000: +If the number of ACIV induction variables is too large, kill off all +but the innermost loop ones. +.XB 0x1000000: +Only find ACIV induction variables for innermost loops. reserved .XF "202:" diff --git a/tools/flang2/flang2exe/exp_rte.cpp b/tools/flang2/flang2exe/exp_rte.cpp index f9989c623f4..988b514b360 100644 --- a/tools/flang2/flang2exe/exp_rte.cpp +++ b/tools/flang2/flang2exe/exp_rte.cpp @@ -2285,7 +2285,7 @@ exp_end(ILM *ilmp, int curilm, bool is_func) } /* emit any mp initialization for the function & its entries */ - exp_mp_func_prologue(); + exp_mp_func_prologue(true); if (!XBIT(121, 0x01) || /* -Mnoframe isn't specified */ (flg.debug && !XBIT(123, 0x400)) || /* -debug is set */ diff --git a/tools/flang2/flang2exe/expatomics.cpp b/tools/flang2/flang2exe/expatomics.cpp index b62a4c8a0ab..97c06ee3046 100644 --- a/tools/flang2/flang2exe/expatomics.cpp +++ b/tools/flang2/flang2exe/expatomics.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -3099,6 +3099,9 @@ exp_mp_atomic_capture(ILM *ilmp) cpt.rhs[cnt] = ILI_OF(ILM_OPND(ilmp, 2)); cpt.nme[cnt] = nme[LHS_IDX] = NME_OF(ILM_OPND(ilmp, 1)); cpt.dtype[cnt] = dt_nme(nme[LHS_IDX]); + if (!cpt.dtype[cnt]) { + cpt.dtype[cnt] = get_dtype_from_ilm(ilmp); + } cpt.mem_order[cnt] = ILM_OPND(ilmp, 3); /* Don't use CSE for LHS */ diff --git a/tools/flang2/flang2exe/expsmp.cpp b/tools/flang2/flang2exe/expsmp.cpp index f2aeabeb067..e857dc59cb7 100644 --- a/tools/flang2/flang2exe/expsmp.cpp +++ b/tools/flang2/flang2exe/expsmp.cpp @@ -2966,7 +2966,7 @@ clear_tplnk(void) /** \brief Generate any mp-specific prologue for a function. */ void -exp_mp_func_prologue(void) +exp_mp_func_prologue(bool process_tp) { SPTR sym; int ili, tmpthread; @@ -2980,7 +2980,7 @@ exp_mp_func_prologue(void) if (CUDAG(GBL_CURRFUNC) == CUDA_GLOBAL || CUDAG(GBL_CURRFUNC) == CUDA_DEVICE) return; #endif - if (1) { + if (process_tp) { for (sym = gbl.threadprivate; sym > NOSYM; sym = TPLNKG(sym)) { /* For each threadprivate common, must 'declare' the threads' * copies by calling: diff --git a/tools/flang2/flang2exe/expsmp.h b/tools/flang2/flang2exe/expsmp.h index e0827d69565..7d1e50db610 100644 --- a/tools/flang2/flang2exe/expsmp.h +++ b/tools/flang2/flang2exe/expsmp.h @@ -99,7 +99,7 @@ void clear_tplnk(void); /** \brief ... */ -void exp_mp_func_prologue(void); +void exp_mp_func_prologue(bool); /** \brief ... diff --git a/tools/flang2/flang2exe/upper.cpp b/tools/flang2/flang2exe/upper.cpp index 7daf22e6402..c53f5d2c15a 100644 --- a/tools/flang2/flang2exe/upper.cpp +++ b/tools/flang2/flang2exe/upper.cpp @@ -2724,7 +2724,7 @@ read_symbol(void) NEW(agototab, int, agotosz); agotomax = 0; } - NEED(agoto, agototab, int, agotosz, agotosz + 32); + NEED(agoto, agototab, int, agotosz, agoto + 32); agototab[agoto - 1] = newsptr; if (agoto > agotomax) agotomax = agoto; diff --git a/tools/flang2/utils/ilitp/x86_64/ilitp.n b/tools/flang2/utils/ilitp/x86_64/ilitp.n index 5b9e6ebd90c..5f5515057ff 100644 --- a/tools/flang2/utils/ilitp/x86_64/ilitp.n +++ b/tools/flang2/utils/ilitp/x86_64/ilitp.n @@ -1126,15 +1126,27 @@ respectively. .IL KIMV krlnk -Move an Integer64 register to a integer register with truncation. -Used also for K to UI, UK to I, and UK to UI conversions. -Bug fix - don't treat this as a move for purposes of register allocation. +Signed or unsigned integer*8 to signed or unsigned integer*4 type +conversion with truncation. Note, we mustn't treat this as a move for +the purposes of register allocation. .AT move null ir cse .CG "mov" 'l' .SI direct lat(1) .SI ld direct lat(3) .SI st direct lat(3) +.IL PKIMVX xmm xmm +Truncate 'N' packed signed or unsigned integer*8 values from 'xmm1' +(an xmm, ymm or zmm register) into 'N' packed signed or unsigned +integer*4 values in 'xmm2' (an xmm or ymm register), where 'N' is 2, 4 +or 8 if 'xmm1' is an xmm, ymm or zmm register respectively. This +instruction requires support for AVX-512F and AVX-512VL, and the +source operand must be a register (unusually for AVX instructions the +destination can be a memory operand!). +.AT other null trm +.CG terminal "vpmovqd" avx_only avx_special + + .IL IAMV irlnk Move an integer reg into an address reg. Sign extension required? diff --git a/tools/shared/pgifeat.h b/tools/shared/pgifeat.h index 3c7038f599e..be9794acd99 100644 --- a/tools/shared/pgifeat.h +++ b/tools/shared/pgifeat.h @@ -87,6 +87,7 @@ #define ACCVERYSTRICT XBIT(186,0x200000) /* - * PGI Compiler Assisted Software Testing (PCAST) + * Compiler Assisted Software Testing (PCAST) */ + #define XBIT_PCAST XBIT(215, 0x8) diff --git a/tools/shared/x86.c b/tools/shared/x86.c index 89772598d46..692917fa53c 100644 --- a/tools/shared/x86.c +++ b/tools/shared/x86.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2006-2019, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,12 @@ X86TYPE mach, mach_count; +#ifdef TARGET_WIN +#define DONT_GENERATE_AVX512 true /* a temporary restriction */ +#else +#define DONT_GENERATE_AVX512 false +#endif + void set_mach(X86TYPE *mach, int machtype) { @@ -181,15 +187,19 @@ set_mach(X86TYPE *mach, int machtype) break; case TP_SKYLAKE: - mach->type[MACH_INTEL_SKYLAKE] = 1; - mach->feature[FEATURE_AVX512VL] = 1; + if (! DONT_GENERATE_AVX512) { + mach->type[MACH_INTEL_SKYLAKE] = 1; + mach->feature[FEATURE_AVX512VL] = 1; + } /* ...and fall through... */ case TP_KNIGHTS_LANDING: - if (machtype == TP_KNIGHTS_LANDING) { - mach->type[MACH_INTEL_KNIGHTS_LANDING] = 1; + if (! DONT_GENERATE_AVX512) { + if (machtype == TP_KNIGHTS_LANDING) { + mach->type[MACH_INTEL_KNIGHTS_LANDING] = 1; + } + mach->feature[FEATURE_AVX512F] = 1; } - mach->feature[FEATURE_AVX512F] = 1; /* ...and fall through... */ case TP_HASWELL: