Skip to content

Conversation

@anthonycanino
Copy link
Owner

@anthonycanino anthonycanino commented Nov 17, 2025

C# code

public class Program
{
    [MethodImpl(MethodImplOptions.NoInlining)]
	public static Half ProduceHalf(float val, float val2)
	{
		Half h1 = (Half)val;
		Half h2 = (Half)val2;
		return ConsumeHalf(h1, h2);
	}

	[MethodImpl(MethodImplOptions.NoInlining)]
	public static Half ConsumeHalf(Half h1, Half h2)
	{
		Half h3 = h1 + h2;
		return h3;
	}

	[MethodImpl(MethodImplOptions.NoInlining)]
	public static Half ProduceHalf2(float val, float val2)
	{
		Half h1 = (Half)val;
		Half h2 = (Half)val2;
		Half h3 = (Half)(val + 3.0f);
		Half h4 = (Half)(val2 + 4.0f);
		return ConsumeHalf2(h1, h2, h3, h4);
	}

	[MethodImpl(MethodImplOptions.NoInlining)]
	public static Half ConsumeHalf2(Half h1, Half h2, Half h3, Half h4)
	{
		Half ch1 = h1 + h2;
		Half ch2 = ch1 - h3;
		Half ch3 = ch2 + h4;
		return ch3;
	}

    [MethodImpl(MethodImplOptions.NoInlining)]
	public static float TestHalfOperators(float f1, float f2)
	{
		Half h1 = (Half)f1;
		Half h2 = (Half)f2;

		Half h3 = h1 + h2;
		Half h4 = h1 - h2;
		Half h5 = h1 * h2;
		Half h6 = h1 / h2;


		// just fold into one value
		Half h7 = h3 + h4 + h5 + h6;
		return (float)h7;	
	}

 
	[MethodImpl(MethodImplOptions.NoInlining)]
	public static Half TestReturnHalf(float f1, float f2)
    {
		Half h1 = (Half)f1;
		Half h2 = (Half)f2;
		Half h3 = h1 + h2;
		Half h4 = ConsumeHalf(h1, h2);
		return h3 + h4;
    }
}

Generated asm

; Assembly listing for method Program:ProduceHalf(float,float):System.Half (FullOpts)
; Emitting BLENDED_CODE for generic X64 + VEX + EVEX on Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; invoked as altjit
; Final local variable assignments
;
;  V00 arg0         [V00,T00] (  3,  3   )   float  ->  mm0         single-def
;  V01 arg1         [V01,T01] (  3,  3   )   float  ->  mm1         single-def
;* V02 loc0         [V02    ] (  0,  0   )    half  ->  zero-ref   
;  V03 OutArgs      [V03    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace" <UNNAMED>
;
; Lcl frame size = 40

G_M43922_IG01:  ;; offset=0x0000
       4883EC28             sub      rsp, 40
						;; size=4 bbWeight=1 PerfScore 0.25
G_M43922_IG02:  ;; offset=0x0004
       C5E857D2             vxorps   xmm2, xmm2, xmm2
       62F56C081DC0         vcvtss2sh xmm0, xmm2, xmm0
       62F56C081DC9         vcvtss2sh xmm1, xmm2, xmm1
       FF1556BB7E00         call     [Program:ConsumeHalf(System.Half,System.Half):System.Half]
       90                   nop      
						;; size=23 bbWeight=1 PerfScore 5.58
G_M43922_IG03:  ;; offset=0x001B
       4883C428             add      rsp, 40
       C3                   ret      
						;; size=5 bbWeight=1 PerfScore 1.25

; Total bytes of code 32, prolog size 4, PerfScore 7.08, instruction count 8, allocated bytes for code 32 (MethodHash=f2a4546d) for method Program:ProduceHalf(float,float):System.Half (FullOpts)
; ============================================================

; Assembly listing for method Program:ConsumeHalf(System.Half,System.Half):System.Half (FullOpts)
; Emitting BLENDED_CODE for generic X64 + VEX + EVEX on Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; invoked as altjit
; Final local variable assignments
;
;  V00 arg0         [V00,T00] (  3,  3   )    half  ->  mm0         single-def
;  V01 arg1         [V01,T01] (  3,  3   )    half  ->  mm1         single-def
;# V02 OutArgs      [V02    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace" <Empty>
;
; Lcl frame size = 0

G_M44932_IG01:  ;; offset=0x0000
						;; size=0 bbWeight=1 PerfScore 0.00
G_M44932_IG02:  ;; offset=0x0000
       62F57E0858C1         vaddsh   xmm0, xmm0, xmm1
						;; size=6 bbWeight=1 PerfScore 1.00
G_M44932_IG03:  ;; offset=0x0006
       C3                   ret      
						;; size=1 bbWeight=1 PerfScore 1.00

; Total bytes of code 7, prolog size 0, PerfScore 2.00, instruction count 2, allocated bytes for code 7 (MethodHash=5608507b) for method Program:ConsumeHalf(System.Half,System.Half):System.Half (FullOpts)
; ============================================================

85
; Assembly listing for method Program:ProduceHalf2(float,float):System.Half (FullOpts)
; Emitting BLENDED_CODE for generic X64 + VEX + EVEX on Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; invoked as altjit
; Final local variable assignments
;
;  V00 arg0         [V00,T00] (  4,  4   )   float  ->  mm0         single-def
;  V01 arg1         [V01,T01] (  4,  4   )   float  ->  mm1         single-def
;* V02 loc0         [V02    ] (  0,  0   )    half  ->  zero-ref   
;* V03 loc1         [V03    ] (  0,  0   )    half  ->  zero-ref   
;* V04 loc2         [V04    ] (  0,  0   )    half  ->  zero-ref   
;  V05 OutArgs      [V05    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace" <UNNAMED>
;
; Lcl frame size = 40

G_M25248_IG01:  ;; offset=0x0000
       4883EC28             sub      rsp, 40
						;; size=4 bbWeight=1 PerfScore 0.25
G_M25248_IG02:  ;; offset=0x0004
       C5FA581534000000     vaddss   xmm2, xmm0, dword ptr [reloc @RWD00]
       C5E057DB             vxorps   xmm3, xmm3, xmm3
       62F564081DD2         vcvtss2sh xmm2, xmm3, xmm2
       C5F2581D26000000     vaddss   xmm3, xmm1, dword ptr [reloc @RWD04]
       C5D857E4             vxorps   xmm4, xmm4, xmm4
       62F55C081DDB         vcvtss2sh xmm3, xmm4, xmm3
       62F55C081DC0         vcvtss2sh xmm0, xmm4, xmm0
       62F55C081DC9         vcvtss2sh xmm1, xmm4, xmm1
       FF156E297F00         call     [Program:ConsumeHalf2(System.Half,System.Half,System.Half,System.Half):System.Half]
       90                   nop      
						;; size=55 bbWeight=1 PerfScore 17.92
G_M25248_IG03:  ;; offset=0x003B
       4883C428             add      rsp, 40
       C3                   ret      
						;; size=5 bbWeight=1 PerfScore 1.25
RWD00  	dd	40400000h		;         3
RWD04  	dd	40800000h		;         4


; Total bytes of code 64, prolog size 4, PerfScore 19.42, instruction count 13, allocated bytes for code 64 (MethodHash=1ab79d5f) for method Program:ProduceHalf2(float,float):System.Half (FullOpts)
; ============================================================

; Assembly listing for method Program:ConsumeHalf2(System.Half,System.Half,System.Half,System.Half):System.Half (FullOpts)
; Emitting BLENDED_CODE for generic X64 + VEX + EVEX on Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; invoked as altjit
; Final local variable assignments
;
;  V00 arg0         [V00,T00] (  3,  3   )    half  ->  mm0         single-def
;  V01 arg1         [V01,T01] (  3,  3   )    half  ->  mm1         single-def
;  V02 arg2         [V02,T02] (  3,  3   )    half  ->  mm2         single-def
;  V03 arg3         [V03,T03] (  3,  3   )    half  ->  mm3         single-def
;# V04 OutArgs      [V04    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace" <Empty>
;
; Lcl frame size = 0

G_M21238_IG01:  ;; offset=0x0000
						;; size=0 bbWeight=1 PerfScore 0.00
G_M21238_IG02:  ;; offset=0x0000
       62F57E0858C1         vaddsh   xmm0, xmm0, xmm1
       62F57E085CC2         vsubsh   xmm0, xmm0, xmm2
       62F57E0858C3         vaddsh   xmm0, xmm0, xmm3
						;; size=18 bbWeight=1 PerfScore 3.00
G_M21238_IG03:  ;; offset=0x0012
       C3                   ret      
						;; size=1 bbWeight=1 PerfScore 1.00

; Total bytes of code 19, prolog size 0, PerfScore 4.00, instruction count 4, allocated bytes for code 19 (MethodHash=8373ad09) for method Program:ConsumeHalf2(System.Half,System.Half,System.Half,System.Half):System.Half (FullOpts)
; ============================================================

87
; Assembly listing for method Program:TestHalfOperators(float,float):float (FullOpts)
; Emitting BLENDED_CODE for generic X64 + VEX + EVEX on Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; invoked as altjit
; Final local variable assignments
;
;  V00 arg0         [V00,T02] (  3,  3   )   float  ->  mm0         single-def
;  V01 arg1         [V01,T03] (  3,  3   )   float  ->  mm1         single-def
;  V02 loc0         [V02,T01] (  5,  5   )    half  ->  mm1         single-def
;* V03 loc1         [V03    ] (  0,  0   )    half  ->  zero-ref   
;* V04 loc2         [V04    ] (  0,  0   )    half  ->  zero-ref   
;* V05 loc3         [V05    ] (  0,  0   )    half  ->  zero-ref   
;* V06 loc4         [V06    ] (  0,  0   )    half  ->  zero-ref   
;# V07 OutArgs      [V07    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace" <Empty>
;  V08 tmp1         [V08,T00] (  5, 10   )    half  ->  mm0         "dup spill"
;
; Lcl frame size = 0

G_M3989_IG01:  ;; offset=0x0000
						;; size=0 bbWeight=1 PerfScore 0.00
G_M3989_IG02:  ;; offset=0x0000
       C5E857D2             vxorps   xmm2, xmm2, xmm2
       62F56C081DC9         vcvtss2sh xmm1, xmm2, xmm1
       62F56C081DC0         vcvtss2sh xmm0, xmm2, xmm0
       62F57E0858D1         vaddsh   xmm2, xmm0, xmm1
       62F57E085CD9         vsubsh   xmm3, xmm0, xmm1
       62F56E0858D3         vaddsh   xmm2, xmm2, xmm3
       62F57E0859D9         vmulsh   xmm3, xmm0, xmm1
       62F56E0858D3         vaddsh   xmm2, xmm2, xmm3
       62F57E085EC1         vdivsh   xmm0, xmm0, xmm1
       62F56E0858C0         vaddsh   xmm0, xmm2, xmm0
       C5F057C9             vxorps   xmm1, xmm1, xmm1
       62F6740813C0         vcvtsh2ss xmm0, xmm1, xmm0
						;; size=68 bbWeight=1 PerfScore 10.67
G_M3989_IG03:  ;; offset=0x0044
       C3                   ret      
						;; size=1 bbWeight=1 PerfScore 1.00

; Total bytes of code 69, prolog size 0, PerfScore 11.67, instruction count 13, allocated bytes for code 69 (MethodHash=2f60f06a) for method Program:TestHalfOperators(float,float):float (FullOpts)
; ============================================================

1891
; Assembly listing for method Program:TestReturnHalf(float,float):System.Half (FullOpts)
; Emitting BLENDED_CODE for generic X64 + VEX + EVEX on Windows
; FullOpts code
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; invoked as altjit
; Final local variable assignments
;
;  V00 arg0         [V00,T01] (  3,  3   )   float  ->  mm0         single-def
;  V01 arg1         [V01,T02] (  3,  3   )   float  ->  mm1         single-def
;  V02 loc0         [V02,T03] (  3,  3   )    half  ->  [rsp+0x2C]  spill-single-def
;* V03 loc1         [V03    ] (  0,  0   )    half  ->  zero-ref   
;* V04 loc2         [V04    ] (  0,  0   )    half  ->  zero-ref   
;  V05 OutArgs      [V05    ] (  1,  1   )  struct (32) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace" <UNNAMED>
;  V06 tmp1         [V06,T00] (  3,  6   )    half  ->  mm6         "dup spill"
;
; Lcl frame size = 72

G_M18484_IG01:  ;; offset=0x0000
       4883EC48             sub      rsp, 72
       C5F829742430         vmovaps  xmmword ptr [rsp+0x30], xmm6
						;; size=10 bbWeight=1 PerfScore 2.25
G_M18484_IG02:  ;; offset=0x000A
       C5E857D2             vxorps   xmm2, xmm2, xmm2
       62F56C081DC9         vcvtss2sh xmm1, xmm2, xmm1
       62F57E08114C2416     vmovsh   word  ptr [rsp+0x2C], xmm1
       62F56C081DF0         vcvtss2sh xmm6, xmm2, xmm0
       62F57E0810C6         vmovsh   xmm0, xmm6
       FF15B2A47E00         call     [Program:ConsumeHalf(System.Half,System.Half):System.Half]
       62F54E08584C2416     vaddsh   xmm1, xmm6, word  ptr [rsp+0x2C]
       62F5760858C0         vaddsh   xmm0, xmm1, xmm0
						;; size=50 bbWeight=1 PerfScore 8.58
G_M18484_IG03:  ;; offset=0x003C
       C5F828742430         vmovaps  xmm6, xmmword ptr [rsp+0x30]
       4883C448             add      rsp, 72
       C3                   ret      
						;; size=11 bbWeight=1 PerfScore 5.25

; Total bytes of code 71, prolog size 10, PerfScore 16.08, instruction count 13, allocated bytes for code 71 (MethodHash=be2db7cb) for method Program:TestReturnHalf(float,float):System.Half (FullOpts)
; ============================================================

84

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants