Skip to content

Commit

Permalink
Add initial f16 and f128 support to the aarch64 backend
Browse files Browse the repository at this point in the history
  • Loading branch information
beetrees committed Aug 4, 2024
1 parent 25fcf41 commit 6473711
Show file tree
Hide file tree
Showing 27 changed files with 890 additions and 37 deletions.
6 changes: 6 additions & 0 deletions cranelift/codegen/meta/src/isa/arm64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ pub(crate) fn define() -> TargetIsa {
"",
false,
);
settings.add_bool(
"has_fp16",
"Use half-precision floating point (FEAT_FP16) instructions.",
"",
false,
);
settings.add_bool(
"sign_return_address_all",
"If function return address signing is enabled, then apply it to all \
Expand Down
9 changes: 8 additions & 1 deletion cranelift/codegen/src/isa/aarch64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ impl ABIMachineSpec for AArch64MachineDeps {

fn compute_arg_locs(
call_conv: isa::CallConv,
_flags: &settings::Flags,
flags: &settings::Flags,
params: &[ir::AbiParam],
args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool,
Expand Down Expand Up @@ -161,6 +161,13 @@ impl ABIMachineSpec for AArch64MachineDeps {
param.value_type
);

if is_apple_cc && param.value_type == types::F128 && !flags.enable_llvm_abi_extensions()
{
panic!(
"f128 args/return values not supported for apple_aarch64 unless LLVM ABI extensions are enabled"
);
}

let (rcs, reg_types) = Inst::rc_for_type(param.value_type)?;

if matches!(
Expand Down
75 changes: 71 additions & 4 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,18 @@
(rn Reg)
(rm Reg))

;; Floating-point load, half-precision (16 bit).
(FpuLoad16
(rd WritableReg)
(mem AMode)
(flags MemFlags))

;; Floating-point store, half-precision (16 bit).
(FpuStore16
(rd Reg)
(mem AMode)
(flags MemFlags))

;; Floating-point load, single-precision (32 bit).
(FpuLoad32
(rd WritableReg)
Expand Down Expand Up @@ -483,6 +495,13 @@
(rd WritableReg)
(rn Reg))

;; FP conditional select, 16 bit.
(FpuCSel16
(rd WritableReg)
(rn Reg)
(rm Reg)
(cond Cond))

;; FP conditional select, 32 bit.
(FpuCSel32
(rd WritableReg)
Expand Down Expand Up @@ -1701,6 +1720,9 @@
(decl use_lse () Inst)
(extern extractor use_lse use_lse)

(decl pure use_fp16 () bool)
(extern constructor use_fp16 use_fp16)

;; Extractor helpers for various immediate constants ;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl pure partial move_wide_const_from_u64 (Type u64) MoveWideConst)
Expand Down Expand Up @@ -2221,9 +2243,19 @@
(_ Unit (emit (MInst.VecRRLong op dst src high_half))))
dst))

;; Helper for emitting `MInst.FpuCSel32` / `MInst.FpuCSel64`
;; Helper for emitting `MInst.FpuCSel16` / `MInst.FpuCSel32` / `MInst.FpuCSel64`
;; instructions.
(decl fpu_csel (Type Cond Reg Reg) ConsumesFlags)
(rule (fpu_csel $F16 cond if_true if_false)
(fpu_csel $F32 cond if_true if_false))

(rule 1 (fpu_csel $F16 cond if_true if_false)
(if-let $true (use_fp16))
(let ((dst WritableReg (temp_writable_reg $F16)))
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.FpuCSel16 dst if_true if_false cond)
dst)))

(rule (fpu_csel $F32 cond if_true if_false)
(let ((dst WritableReg (temp_writable_reg $F32)))
(ConsumesFlags.ConsumesFlagsReturnsReg
Expand Down Expand Up @@ -2268,6 +2300,9 @@
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.MovToFpu dst x size))))
dst))
(rule 1 (mov_to_fpu x (ScalarSize.Size16))
(if-let $false (use_fp16))
(mov_to_fpu x (ScalarSize.Size32)))

;; Helper for emitting `MInst.FpuMoveFPImm` instructions.
(decl fpu_move_fp_imm (ASIMDFPModImm ScalarSize) Reg)
Expand Down Expand Up @@ -2849,6 +2884,11 @@
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.ULoad64 dst amode flags))))
dst))
(decl aarch64_fpuload16 (AMode MemFlags) Reg)
(rule (aarch64_fpuload16 amode flags)
(let ((dst WritableReg (temp_writable_reg $F64))
(_ Unit (emit (MInst.FpuLoad16 dst amode flags))))
dst))
(decl aarch64_fpuload32 (AMode MemFlags) Reg)
(rule (aarch64_fpuload32 amode flags)
(let ((dst WritableReg (temp_writable_reg $F64))
Expand Down Expand Up @@ -2885,6 +2925,9 @@
(decl aarch64_store64 (AMode MemFlags Reg) SideEffectNoResult)
(rule (aarch64_store64 amode flags val)
(SideEffectNoResult.Inst (MInst.Store64 val amode flags)))
(decl aarch64_fpustore16 (AMode MemFlags Reg) SideEffectNoResult)
(rule (aarch64_fpustore16 amode flags val)
(SideEffectNoResult.Inst (MInst.FpuStore16 val amode flags)))
(decl aarch64_fpustore32 (AMode MemFlags Reg) SideEffectNoResult)
(rule (aarch64_fpustore32 amode flags val)
(SideEffectNoResult.Inst (MInst.FpuStore32 val amode flags)))
Expand Down Expand Up @@ -3229,19 +3272,41 @@
(rule 1 (add_imm_to_addr val (imm12_from_u64 imm)) (add_imm $I64 val imm))
(rule 0 (add_imm_to_addr val offset) (add $I64 val (imm $I64 (ImmExtend.Zero) offset)))

;; Lower a constant f16.
;;
;; Note that we must make sure that all bits outside the lowest 16 are set to 0
;; because this function is also used to load wider constants (that have zeros
;; in their most significant bits).
(decl constant_f16 (u16) Reg)
(rule 3 (constant_f16 n)
(if-let $false (use_fp16))
(constant_f32 n))
(rule 2 (constant_f16 0)
(vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size32))
$false
(VectorSize.Size32x2)))
(rule 1 (constant_f16 n)
(if-let imm (asimd_fp_mod_imm_from_u64 n (ScalarSize.Size16)))
(fpu_move_fp_imm imm (ScalarSize.Size16)))
(rule (constant_f16 n)
(mov_to_fpu (imm $I16 (ImmExtend.Zero) n) (ScalarSize.Size16)))

;; Lower a constant f32.
;;
;; Note that we must make sure that all bits outside the lowest 32 are set to 0
;; because this function is also used to load wider constants (that have zeros
;; in their most significant bits).
(decl constant_f32 (u32) Reg)
(rule 2 (constant_f32 0)
(rule 3 (constant_f32 0)
(vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size32))
$false
(VectorSize.Size32x2)))
(rule 1 (constant_f32 n)
(rule 2 (constant_f32 n)
(if-let imm (asimd_fp_mod_imm_from_u64 n (ScalarSize.Size32)))
(fpu_move_fp_imm imm (ScalarSize.Size32)))
(rule 1 (constant_f32 (u32_as_u16 n))
(if-let $true (use_fp16))
(constant_f16 n))
(rule (constant_f32 n)
(mov_to_fpu (imm $I32 (ImmExtend.Zero) n) (ScalarSize.Size32)))

Expand Down Expand Up @@ -4063,8 +4128,10 @@

;; Helpers for generating select instruction sequences.
(decl lower_select (ProducesFlags Cond Type Value Value) ValueRegs)
(rule 2 (lower_select flags cond (ty_scalar_float ty) rn rm)
(rule 2 (lower_select flags cond (ty_scalar_float (fits_in_64 ty)) rn rm)
(with_flags flags (fpu_csel ty cond rn rm)))
(rule 4 (lower_select flags cond $F128 rn rm)
(with_flags flags (vec_csel cond rn rm)))
(rule 3 (lower_select flags cond (ty_vec128 ty) rn rm)
(with_flags flags (vec_csel cond rn rm)))
(rule (lower_select flags cond ty rn rm)
Expand Down
15 changes: 9 additions & 6 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -958,6 +958,7 @@ impl MachInstEmit for Inst {
| &Inst::ULoad64 {
rd, ref mem, flags, ..
}
| &Inst::FpuLoad16 { rd, ref mem, flags }
| &Inst::FpuLoad32 { rd, ref mem, flags }
| &Inst::FpuLoad64 { rd, ref mem, flags }
| &Inst::FpuLoad128 { rd, ref mem, flags } => {
Expand All @@ -983,6 +984,7 @@ impl MachInstEmit for Inst {
Inst::ULoad32 { .. } => 0b1011100001,
Inst::SLoad32 { .. } => 0b1011100010,
Inst::ULoad64 { .. } => 0b1111100001,
Inst::FpuLoad16 { .. } => 0b0111110001,
Inst::FpuLoad32 { .. } => 0b1011110001,
Inst::FpuLoad64 { .. } => 0b1111110001,
Inst::FpuLoad128 { .. } => 0b0011110011,
Expand Down Expand Up @@ -1098,6 +1100,7 @@ impl MachInstEmit for Inst {
| &Inst::Store16 { rd, ref mem, flags }
| &Inst::Store32 { rd, ref mem, flags }
| &Inst::Store64 { rd, ref mem, flags }
| &Inst::FpuStore16 { rd, ref mem, flags }
| &Inst::FpuStore32 { rd, ref mem, flags }
| &Inst::FpuStore64 { rd, ref mem, flags }
| &Inst::FpuStore128 { rd, ref mem, flags } => {
Expand All @@ -1114,6 +1117,7 @@ impl MachInstEmit for Inst {
Inst::Store16 { .. } => 0b0111100000,
Inst::Store32 { .. } => 0b1011100000,
Inst::Store64 { .. } => 0b1111100000,
Inst::FpuStore16 { .. } => 0b0111110000,
Inst::FpuStore32 { .. } => 0b1011110000,
Inst::FpuStore64 { .. } => 0b1111110000,
Inst::FpuStore128 { .. } => 0b0011110010,
Expand Down Expand Up @@ -2218,6 +2222,9 @@ impl MachInstEmit for Inst {
};
sink.put4(enc_inttofpu(top16, rd, rn));
}
&Inst::FpuCSel16 { rd, rn, rm, cond } => {
sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size16));
}
&Inst::FpuCSel32 { rd, rn, rm, cond } => {
sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
}
Expand All @@ -2239,21 +2246,17 @@ impl MachInstEmit for Inst {
}
&Inst::MovToFpu { rd, rn, size } => {
let template = match size {
ScalarSize::Size16 => 0b000_11110_11_1_00_111_000000_00000_00000,
ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000,
ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000,
_ => unreachable!(),
};
sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
}
&Inst::FpuMoveFPImm { rd, imm, size } => {
let size_code = match size {
ScalarSize::Size32 => 0b00,
ScalarSize::Size64 => 0b01,
_ => unimplemented!(),
};
sink.put4(
0b000_11110_00_1_00_000_000100_00000_00000
| size_code << 22
| size.ftype() << 22
| ((imm.enc_bits() as u32) << 13)
| machreg_to_vec(rd.to_reg()),
);
Expand Down
37 changes: 37 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6699,6 +6699,19 @@ fn test_aarch64_binemit() {
"fcmp d23, d24",
));

insns.push((
Inst::FpuLoad16 {
rd: writable_vreg(16),
mem: AMode::RegScaled {
rn: xreg(8),
rm: xreg(9),
},
flags: MemFlags::trusted(),
},
"1079697C",
"ldr h16, [x8, x9, LSL #1]",
));

insns.push((
Inst::FpuLoad32 {
rd: writable_vreg(16),
Expand Down Expand Up @@ -6774,6 +6787,19 @@ fn test_aarch64_binemit() {
"ldr q16, pc+8",
));

insns.push((
Inst::FpuStore16 {
rd: vreg(16),
mem: AMode::RegScaled {
rn: xreg(8),
rm: xreg(9),
},
flags: MemFlags::trusted(),
},
"1079297C",
"str h16, [x8, x9, LSL #1]",
));

insns.push((
Inst::FpuStore32 {
rd: vreg(16),
Expand Down Expand Up @@ -6973,6 +6999,17 @@ fn test_aarch64_binemit() {
"stp q18, q22, [sp], #304",
));

insns.push((
Inst::FpuCSel16 {
rd: writable_vreg(1),
rn: vreg(2),
rm: vreg(3),
cond: Cond::Hi,
},
"418CE31E",
"fcsel h1, h2, h3, hi",
));

insns.push((
Inst::FpuCSel32 {
rd: writable_vreg(1),
Expand Down
Loading

0 comments on commit 6473711

Please sign in to comment.