@@ -342,42 +342,50 @@ define void @freeze_buildvector_single_repeated_maybe_poison_operand(ptr %origin
342
342
define void @freeze_two_frozen_buildvectors (ptr %origin0 , ptr %origin1 , ptr %dst0 , ptr %dst1 ) nounwind {
343
343
; X86-LABEL: freeze_two_frozen_buildvectors:
344
344
; X86: # %bb.0:
345
+ ; X86-NEXT: pushl %esi
345
346
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
346
347
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
347
348
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
349
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
350
+ ; X86-NEXT: movl (%esi), %esi
351
+ ; X86-NEXT: andl $15, %esi
348
352
; X86-NEXT: movl (%edx), %edx
349
353
; X86-NEXT: andl $15, %edx
350
- ; X86-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
351
- ; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7]
352
- ; X86-NEXT: vpand %xmm1, %xmm0, %xmm0
354
+ ; X86-NEXT: vmovd %esi, %xmm0
355
+ ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
356
+ ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
357
+ ; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
358
+ ; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [7,7,7,7]
359
+ ; X86-NEXT: vpand %xmm2, %xmm0, %xmm0
353
360
; X86-NEXT: vmovdqa %xmm0, (%ecx)
354
361
; X86-NEXT: vmovd %edx, %xmm0
355
362
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
356
- ; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2
357
- ; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7]
358
- ; X86-NEXT: vpand %xmm1, %xmm0, %xmm0
363
+ ; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
364
+ ; X86-NEXT: vpand %xmm2, %xmm0, %xmm0
359
365
; X86-NEXT: vmovdqa %xmm0, (%eax)
366
+ ; X86-NEXT: popl %esi
360
367
; X86-NEXT: retl
361
368
;
362
369
; X64-LABEL: freeze_two_frozen_buildvectors:
363
370
; X64: # %bb.0:
364
- ; X64-NEXT: movl (%rdi), %eax
365
- ; X64-NEXT: andl $15, %eax
366
- ; X64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
367
- ; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
368
- ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
371
+ ; X64-NEXT: movl (%rsi), %eax
372
+ ; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
373
+ ; X64-NEXT: vpbroadcastd %xmm0, %xmm0
374
+ ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
375
+ ; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
376
+ ; X64-NEXT: vpbroadcastd {{.*#+}} xmm2 = [7,7,7,7]
377
+ ; X64-NEXT: vpand %xmm2, %xmm0, %xmm0
369
378
; X64-NEXT: vmovdqa %xmm0, (%rdx)
370
379
; X64-NEXT: vmovd %eax, %xmm0
371
380
; X64-NEXT: vpbroadcastd %xmm0, %xmm0
372
- ; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
373
- ; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
374
- ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
381
+ ; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3]
382
+ ; X64-NEXT: vpand %xmm2, %xmm0, %xmm0
375
383
; X64-NEXT: vmovdqa %xmm0, (%rcx)
376
384
; X64-NEXT: retq
377
385
%i0.src = load i32 , ptr %origin0
378
386
%i0 = and i32 %i0.src , 15
379
387
%i1.src = load i32 , ptr %origin1
380
- %i1 = and i32 %i0 .src , 15
388
+ %i1 = and i32 %i1 .src , 15
381
389
%i2 = insertelement <4 x i32 > poison, i32 %i0 , i64 1
382
390
%i3 = and <4 x i32 > %i2 , <i32 7 , i32 7 , i32 7 , i32 7 >
383
391
%i4 = freeze <4 x i32 > %i3
@@ -392,41 +400,43 @@ define void @freeze_two_frozen_buildvectors(ptr %origin0, ptr %origin1, ptr %dst
392
400
define void @freeze_two_buildvectors_only_one_frozen (ptr %origin0 , ptr %origin1 , ptr %dst0 , ptr %dst1 ) nounwind {
393
401
; X86-LABEL: freeze_two_buildvectors_only_one_frozen:
394
402
; X86: # %bb.0:
403
+ ; X86-NEXT: pushl %esi
395
404
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
396
405
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
397
406
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
398
- ; X86-NEXT: movl (%edx), %edx
399
- ; X86-NEXT: andl $15, %edx
400
- ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0
401
- ; X86-NEXT: vmovd %edx, %xmm1
402
- ; X86-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1]
403
- ; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5,6,7]
404
- ; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [7,7,7,7]
405
- ; X86-NEXT: vpand %xmm2, %xmm0, %xmm0
407
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
408
+ ; X86-NEXT: movl (%esi), %esi
409
+ ; X86-NEXT: andl $15, %esi
410
+ ; X86-NEXT: vmovd %esi, %xmm0
411
+ ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
412
+ ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
413
+ ; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
414
+ ; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7]
415
+ ; X86-NEXT: vpand %xmm1, %xmm0, %xmm0
416
+ ; X86-NEXT: vbroadcastss (%edx), %xmm2
406
417
; X86-NEXT: vmovdqa %xmm0, (%ecx)
407
- ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
408
- ; X86-NEXT: vpand %xmm2, %xmm0, %xmm0
418
+ ; X86-NEXT: vpand %xmm1, %xmm2, %xmm0
409
419
; X86-NEXT: vmovdqa %xmm0, (%eax)
420
+ ; X86-NEXT: popl %esi
410
421
; X86-NEXT: retl
411
422
;
412
423
; X64-LABEL: freeze_two_buildvectors_only_one_frozen:
413
424
; X64: # %bb.0:
414
- ; X64-NEXT: movl (%rdi), %eax
415
- ; X64-NEXT: andl $15, %eax
416
- ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0
417
- ; X64-NEXT: vmovd %eax, %xmm1
418
- ; X64-NEXT: vpbroadcastd %xmm1, %xmm1
419
- ; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
420
- ; X64-NEXT: vpbroadcastd {{.*#+}} xmm2 = [7,7,7,7]
421
- ; X64-NEXT: vpand %xmm2, %xmm0, %xmm0
422
- ; X64-NEXT: vmovdqa %xmm0, (%rdx)
423
- ; X64-NEXT: vpand %xmm2, %xmm1, %xmm0
424
- ; X64-NEXT: vmovdqa %xmm0, (%rcx)
425
+ ; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
426
+ ; X64-NEXT: vbroadcastss %xmm0, %xmm0
427
+ ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
428
+ ; X64-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
429
+ ; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7]
430
+ ; X64-NEXT: vandps %xmm1, %xmm0, %xmm0
431
+ ; X64-NEXT: vbroadcastss (%rsi), %xmm2
432
+ ; X64-NEXT: vmovaps %xmm0, (%rdx)
433
+ ; X64-NEXT: vandps %xmm1, %xmm2, %xmm0
434
+ ; X64-NEXT: vmovaps %xmm0, (%rcx)
425
435
; X64-NEXT: retq
426
436
%i0.src = load i32 , ptr %origin0
427
437
%i0 = and i32 %i0.src , 15
428
438
%i1.src = load i32 , ptr %origin1
429
- %i1 = and i32 %i0 .src , 15
439
+ %i1 = and i32 %i1 .src , 15
430
440
%i2 = insertelement <4 x i32 > poison, i32 %i0 , i64 1
431
441
%i3 = and <4 x i32 > %i2 , <i32 7 , i32 7 , i32 7 , i32 7 >
432
442
%i4 = freeze <4 x i32 > %i3
@@ -440,34 +450,40 @@ define void @freeze_two_buildvectors_only_one_frozen(ptr %origin0, ptr %origin1,
440
450
define void @freeze_two_buildvectors_one_undef_elt (ptr %origin0 , ptr %origin1 , ptr %dst0 , ptr %dst1 ) nounwind {
441
451
; X86-LABEL: freeze_two_buildvectors_one_undef_elt:
442
452
; X86: # %bb.0:
453
+ ; X86-NEXT: pushl %esi
443
454
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
444
455
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
445
456
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
446
- ; X86-NEXT: movl (%edx), %edx
447
- ; X86-NEXT: andl $15, %edx
448
- ; X86-NEXT: vmovddup {{.*#+}} xmm0 = [7,0,7,0]
449
- ; X86-NEXT: # xmm0 = mem[0,0]
450
- ; X86-NEXT: vmovd %edx, %xmm1
451
- ; X86-NEXT: vpand %xmm0, %xmm1, %xmm2
452
- ; X86-NEXT: vmovdqa %xmm2, (%ecx)
453
- ; X86-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
454
- ; X86-NEXT: vpand %xmm0, %xmm1, %xmm0
457
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
458
+ ; X86-NEXT: movl (%esi), %esi
459
+ ; X86-NEXT: andl $15, %esi
460
+ ; X86-NEXT: vmovd %esi, %xmm0
461
+ ; X86-NEXT: vmovddup {{.*#+}} xmm1 = [7,0,7,0]
462
+ ; X86-NEXT: # xmm1 = mem[0,0]
463
+ ; X86-NEXT: vpand %xmm1, %xmm0, %xmm0
464
+ ; X86-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
465
+ ; X86-NEXT: vmovdqa %xmm0, (%ecx)
466
+ ; X86-NEXT: vpand %xmm1, %xmm2, %xmm0
455
467
; X86-NEXT: vmovdqa %xmm0, (%eax)
468
+ ; X86-NEXT: popl %esi
456
469
; X86-NEXT: retl
457
470
;
458
471
; X64-LABEL: freeze_two_buildvectors_one_undef_elt:
459
472
; X64: # %bb.0:
460
473
; X64-NEXT: movq (%rdi), %rax
474
+ ; X64-NEXT: andl $15, %eax
461
475
; X64-NEXT: vmovd %eax, %xmm0
462
- ; X64-NEXT: vpbroadcastd %xmm0, %xmm0
463
- ; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
476
+ ; X64-NEXT: vpmovsxbq {{.*#+}} xmm1 = [7,7]
477
+ ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
478
+ ; X64-NEXT: vpbroadcastq (%rsi), %xmm2
464
479
; X64-NEXT: vmovdqa %xmm0, (%rdx)
480
+ ; X64-NEXT: vpand %xmm1, %xmm2, %xmm0
465
481
; X64-NEXT: vmovdqa %xmm0, (%rcx)
466
482
; X64-NEXT: retq
467
483
%i0.src = load i64 , ptr %origin0
468
484
%i0 = and i64 %i0.src , 15
469
485
%i1.src = load i64 , ptr %origin1
470
- %i1 = and i64 %i0 .src , 15
486
+ %i1 = and i64 %i1 .src , 15
471
487
%i2 = insertelement <2 x i64 > poison, i64 %i0 , i64 0
472
488
%i3 = and <2 x i64 > %i2 , <i64 7 , i64 7 >
473
489
%i4 = freeze <2 x i64 > %i3
0 commit comments