URL
https://opencores.org/ocsvn/neorv32/neorv32/trunk
Subversion Repositories neorv32
Compare Revisions
- This comparison shows the changes necessary to convert path
/neorv32/trunk/sw/example/bitmanip_test
- from Rev 68 to Rev 71
- ↔ Reverse comparison
Rev 68 → Rev 71
/main.c
3,7 → 3,7
// # ********************************************************************************************* # |
// # BSD 3-Clause License # |
// # # |
// # Copyright (c) 2021, Stephan Nolting. All rights reserved. # |
// # Copyright (c) 2022, Stephan Nolting. All rights reserved. # |
// # # |
// # Redistribution and use in source and binary forms, with or without modification, are # |
// # permitted provided that the following conditions are met: # |
51,6 → 51,14
#define BAUD_RATE (19200) |
//** Number of test cases for each instruction */ |
#define NUM_TEST_CASES (1000000) |
//** Enable Zbb tests when 1 */ |
#define ENABLE_ZBB (1) |
//** Enable Zba tests when 1 */ |
#define ENABLE_ZBA (1) |
//** Enable Zbs tests when 1 */ |
#define ENABLE_ZBS (1) |
//** Enable Zbc tests when 1 */ |
#define ENABLE_ZBC (1) |
/**@}*/ |
|
|
61,7 → 69,7
|
|
/**********************************************************************//** |
* Main function; test all available operations of the NEORV32 'Zbb' extensions |
* Main function; test all available operations of the NEORV32 'B' extension |
* using bit manipulation intrinsics and software-only reference functions (emulation). |
* |
* @note This program requires the bit-manipulation CPU extension. |
77,7 → 85,7
// capture all exceptions and give debug info via UART |
neorv32_rte_setup(); |
|
// init UART at default baud rate, no parity bits, ho hw flow control |
// init UART at default baud rate, no parity bits, no hw flow control |
neorv32_uart0_setup(BAUD_RATE, PARITY_NONE, FLOW_CONTROL_NONE); |
|
// Disable compilation by default |
91,22 → 99,23
#endif |
|
// intro |
neorv32_uart0_printf("NEORV32 Bit-Manipulation Extension Test (Zba, Zbb)\n\n"); |
neorv32_uart0_printf("<<< NEORV32 Bit-Manipulation Extension ('B') Test >>>\n\n"); |
|
// check available hardware extensions and compare with compiler flags |
neorv32_rte_check_isa(0); // silent = 0 -> show message if isa mismatch |
|
// check if Zbb extension is implemented at all |
// check if B extension is implemented at all |
if ((neorv32_cpu_csr_read(CSR_MISA) & (1<<CSR_MISA_B)) == 0) { |
neorv32_uart0_print("Error! <B> extension not synthesized!\n"); |
neorv32_uart0_print("Error! B extension not synthesized!\n"); |
return 1; |
} |
|
neorv32_uart0_printf("Starting bit-manipulation extension tests (%i test cases per instruction)...\n\n", num_tests); |
|
neorv32_uart0_printf("-----------------------------------------\n"); |
#if (ENABLE_ZBB != 0) |
neorv32_uart0_printf("--------------------------------------------\n"); |
neorv32_uart0_printf("Zbb - Basic bit-manipulation instructions\n"); |
neorv32_uart0_printf("-----------------------------------------\n"); |
neorv32_uart0_printf("--------------------------------------------\n"); |
|
// ANDN |
neorv32_uart0_printf("\nANDN:\n"); |
328,13 → 337,14
err_cnt += check_result(i, opa, 0, res_sw, res_hw); |
} |
print_report(err_cnt, num_tests); |
#endif |
|
|
|
#if (ENABLE_ZBA != 0) |
neorv32_uart0_printf("\n\n"); |
neorv32_uart0_printf("-----------------------------------------\n"); |
neorv32_uart0_printf("Zba - Address generation instructions\n"); |
neorv32_uart0_printf("-----------------------------------------\n"); |
neorv32_uart0_printf("--------------------------------------------\n"); |
neorv32_uart0_printf("Zba - Address-generation instructions\n"); |
neorv32_uart0_printf("--------------------------------------------\n"); |
|
// SH1ADD |
neorv32_uart0_printf("\nSH1ADD:\n"); |
370,16 → 380,168
err_cnt += check_result(i, opa, opb, res_sw, res_hw); |
} |
print_report(err_cnt, num_tests); |
#endif |
|
|
neorv32_uart0_printf("\nBit manipulation extension tests done.\n"); |
#if (ENABLE_ZBS != 0) |
neorv32_uart0_printf("\n\n"); |
neorv32_uart0_printf("--------------------------------------------\n"); |
neorv32_uart0_printf("Zbs - Single-bit instructions\n"); |
neorv32_uart0_printf("--------------------------------------------\n"); |
|
// BCLR |
neorv32_uart0_printf("\nBCLR:\n"); |
err_cnt = 0; |
for (i=0;i<num_tests; i++) { |
opa = xorshift32(); |
opb = xorshift32(); |
res_sw = riscv_emulate_bclr(opa, opb); |
res_hw = riscv_intrinsic_bclr(opa, opb); |
err_cnt += check_result(i, opa, opb, res_sw, res_hw); |
} |
print_report(err_cnt, num_tests); |
|
// BCLRI |
neorv32_uart0_printf("\nBCLRI (imm=20):\n"); // FIXME: static immediate |
err_cnt = 0; |
for (i=0;i<num_tests; i++) { |
opa = xorshift32(); |
res_sw = riscv_emulate_bclr(opa, 20); |
res_hw = riscv_intrinsic_bclri20(opa); |
err_cnt += check_result(i, opa, opb, res_sw, res_hw); |
} |
print_report(err_cnt, num_tests); |
|
|
|
// BEXT |
neorv32_uart0_printf("\nBEXT:\n"); |
err_cnt = 0; |
for (i=0;i<num_tests; i++) { |
opa = xorshift32(); |
opb = xorshift32(); |
res_sw = riscv_emulate_bext(opa, opb); |
res_hw = riscv_intrinsic_bext(opa, opb); |
err_cnt += check_result(i, opa, opb, res_sw, res_hw); |
} |
print_report(err_cnt, num_tests); |
|
// BEXTI |
neorv32_uart0_printf("\nBEXTI (imm=20):\n"); // FIXME: static immediate |
err_cnt = 0; |
for (i=0;i<num_tests; i++) { |
opa = xorshift32(); |
res_sw = riscv_emulate_bext(opa, 20); |
res_hw = riscv_intrinsic_bexti20(opa); |
err_cnt += check_result(i, opa, opb, res_sw, res_hw); |
} |
print_report(err_cnt, num_tests); |
|
|
|
// BINV |
neorv32_uart0_printf("\nBINV:\n"); |
err_cnt = 0; |
for (i=0;i<num_tests; i++) { |
opa = xorshift32(); |
opb = xorshift32(); |
res_sw = riscv_emulate_binv(opa, opb); |
res_hw = riscv_intrinsic_binv(opa, opb); |
err_cnt += check_result(i, opa, opb, res_sw, res_hw); |
} |
print_report(err_cnt, num_tests); |
|
// BINVI |
neorv32_uart0_printf("\nBINVI (imm=20):\n"); // FIXME: static immediate |
err_cnt = 0; |
for (i=0;i<num_tests; i++) { |
opa = xorshift32(); |
res_sw = riscv_emulate_binv(opa, 20); |
res_hw = riscv_intrinsic_binvi20(opa); |
err_cnt += check_result(i, opa, opb, res_sw, res_hw); |
} |
print_report(err_cnt, num_tests); |
|
|
|
// BSET |
neorv32_uart0_printf("\nBSET:\n"); |
err_cnt = 0; |
for (i=0;i<num_tests; i++) { |
opa = xorshift32(); |
opb = xorshift32(); |
res_sw = riscv_emulate_bset(opa, opb); |
res_hw = riscv_intrinsic_bset(opa, opb); |
err_cnt += check_result(i, opa, opb, res_sw, res_hw); |
} |
print_report(err_cnt, num_tests); |
|
// BSETI |
neorv32_uart0_printf("\nBSETI (imm=20):\n"); // FIXME: static immediate |
err_cnt = 0; |
for (i=0;i<num_tests; i++) { |
opa = xorshift32(); |
res_sw = riscv_emulate_bset(opa, 20); |
res_hw = riscv_intrinsic_bseti20(opa); |
err_cnt += check_result(i, opa, opb, res_sw, res_hw); |
} |
print_report(err_cnt, num_tests); |
#endif |
|
|
#if (ENABLE_ZBC != 0) |
neorv32_uart0_printf("\n\n"); |
neorv32_uart0_printf("--------------------------------------------\n"); |
neorv32_uart0_printf("Zbc - Carry-less multiplication instructions\n"); |
neorv32_uart0_printf("--------------------------------------------\n"); |
|
neorv32_uart0_printf("\nNOTE: The emulation functions will take quite some time to execute.\n"); |
|
// CLMUL |
neorv32_uart0_printf("\nCLMUL:\n"); |
err_cnt = 0; |
for (i=0;i<num_tests; i++) { |
opa = xorshift32(); |
opb = xorshift32(); |
res_sw = riscv_emulate_clmul(opa, opb); |
res_hw = riscv_intrinsic_clmul(opa, opb); |
err_cnt += check_result(i, opa, opb, res_sw, res_hw); |
} |
print_report(err_cnt, num_tests); |
|
// CLMULH |
neorv32_uart0_printf("\nCLMULH:\n"); |
err_cnt = 0; |
for (i=0;i<num_tests; i++) { |
opa = xorshift32(); |
opb = xorshift32(); |
res_sw = riscv_emulate_clmulh(opa, opb); |
res_hw = riscv_intrinsic_clmulh(opa, opb); |
err_cnt += check_result(i, opa, opb, res_sw, res_hw); |
} |
print_report(err_cnt, num_tests); |
|
// CLMULR |
neorv32_uart0_printf("\nCLMULR:\n"); |
err_cnt = 0; |
for (i=0;i<num_tests; i++) { |
opa = xorshift32(); |
opb = xorshift32(); |
res_sw = riscv_emulate_clmulr(opa, opb); |
res_hw = riscv_intrinsic_clmulr(opa, opb); |
err_cnt += check_result(i, opa, opb, res_sw, res_hw); |
} |
print_report(err_cnt, num_tests); |
#endif |
|
|
neorv32_uart0_printf("\n\nB extension tests completed.\n"); |
return 0; |
} |
|
|
/**********************************************************************//** |
* Pseudo-Random Number Generator (to generate test vectors). |
* Pseudo-Random Number Generator (to generate deterministic test vectors). |
* |
* @return Random data (32-bit). |
**************************************************************************/ |
435,3 → 597,17
neorv32_uart0_printf("%c[1m[FAILED]%c[0m\n", 27, 27); |
} |
} |
|
|
/**********************************************************************//** |
* "after-main" handler that is executed after the application's |
* main function returns (called by crt0.S start-up code) |
**************************************************************************/ |
int __neorv32_crt0_after_main(int32_t return_code) { |
|
if (return_code) { |
neorv32_uart0_printf("\n<RTE> main function returned with exit code (%i) </RTE>\n", return_code); |
} |
|
return 0; |
} |
/neorv32_b_extension_intrinsics.h
6,7 → 6,7
// # ********************************************************************************************* # |
// # BSD 3-Clause License # |
// # # |
// # Copyright (c) 2021, Stephan Nolting. All rights reserved. # |
// # Copyright (c) 2022, Stephan Nolting. All rights reserved. # |
// # # |
// # Redistribution and use in source and binary forms, with or without modification, are # |
// # permitted provided that the following conditions are met: # |
61,21 → 61,12
/**********************************************************************//** |
* Intrinsic: Bit manipulation CLZ (count leading zeros) [B.Zbb] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @return Number of leading zeros in source operand. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_clz(uint32_t rs1) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a)); |
|
// clz a0, a0 |
CUSTOM_INSTR_R1_TYPE(0b0110000, 0b00000, a0, 0b001, a0, 0b0010011); |
|
return result; |
return CUSTOM_INSTR_R1_TYPE(0b0110000, 0b00000, rs1, 0b001, 0b0010011); |
} |
|
|
82,21 → 73,12
/**********************************************************************//** |
* Intrinsic: Bit manipulation CTZ (count trailing zeros) [B.Zbb] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @return Number of trailing zeros in source operand. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_ctz(uint32_t rs1) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a)); |
|
// ctz a0, a0 |
CUSTOM_INSTR_R1_TYPE(0b0110000, 0b00001, a0, 0b001, a0, 0b0010011); |
|
return result; |
return CUSTOM_INSTR_R1_TYPE(0b0110000, 0b00001, rs1, 0b001, 0b0010011); |
} |
|
|
103,21 → 85,12
/**********************************************************************//** |
* Intrinsic: Bit manipulation CPOP (count set bits) [B.Zbb] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @return Number of set bits in source operand. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_cpop(uint32_t rs1) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a)); |
|
// cpop a0, a0 |
CUSTOM_INSTR_R1_TYPE(0b0110000, 0b00010, a0, 0b001, a0, 0b0010011); |
|
return result; |
return CUSTOM_INSTR_R1_TYPE(0b0110000, 0b00010, rs1, 0b001, 0b0010011); |
} |
|
|
124,21 → 97,12
/**********************************************************************//** |
* Intrinsic: Bit manipulation SEXT.B (sign-extend byte) [B.Zbb] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @return Sign extended byte (operand(7:0)). |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_sextb(uint32_t rs1) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a)); |
|
// sext.b a0, a0 |
CUSTOM_INSTR_R1_TYPE(0b0110000, 0b00100, a0, 0b001, a0, 0b0010011); |
|
return result; |
return CUSTOM_INSTR_R1_TYPE(0b0110000, 0b00100, rs1, 0b001, 0b0010011); |
} |
|
|
145,21 → 109,12
/**********************************************************************//** |
* Intrinsic: Bit manipulation SEXT.H (sign-extend half-word) [B.Zbb] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @return Sign-extended half-word (operand(15:0)). |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_sexth(uint32_t rs1) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a)); |
|
// sext.h a0, a0 |
CUSTOM_INSTR_R1_TYPE(0b0110000, 0b00101, a0, 0b001, a0, 0b0010011); |
|
return result; |
return CUSTOM_INSTR_R1_TYPE(0b0110000, 0b00101, rs1, 0b001, 0b0010011); |
} |
|
|
166,21 → 121,12
/**********************************************************************//** |
* Intrinsic: Bit manipulation ZEXT.H (zero-extend half-word) [B.Zbb] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @return Zero-extended half-word (operand(15:0)). |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_zexth(uint32_t rs1) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a)); |
|
// sext.h a0, a0 |
CUSTOM_INSTR_R1_TYPE(0b0000100, 0b00000, a0, 0b100, a0, 0b0110011); |
|
return result; |
return CUSTOM_INSTR_R1_TYPE(0b0000100, 0b00000, rs1, 0b100, 0b0110011); |
} |
|
|
187,23 → 133,13
/**********************************************************************//** |
* Intrinsic: Bit manipulation MIN (select signed minimum) [B.Zbb] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 2 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return Signed minimum. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_min(uint32_t rs1, uint32_t rs2) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
register uint32_t tmp_b __asm__ ("a1") = rs2; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b)); |
|
// min a0, a0, a1 |
CUSTOM_INSTR_R2_TYPE(0b0000101, a1, a0, 0b100, a0, 0b0110011); |
|
return result; |
return CUSTOM_INSTR_R2_TYPE(0b0000101, rs2, rs1, 0b100, 0b0110011); |
} |
|
|
210,23 → 146,13
/**********************************************************************//** |
* Intrinsic: Bit manipulation MINU (select unsigned minimum) [B.Zbb] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 2 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return Unsigned minimum. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_minu(uint32_t rs1, uint32_t rs2) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
register uint32_t tmp_b __asm__ ("a1") = rs2; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b)); |
|
// minu a0, a0, a1 |
CUSTOM_INSTR_R2_TYPE(0b0000101, a1, a0, 0b101, a0, 0b0110011); |
|
return result; |
return CUSTOM_INSTR_R2_TYPE(0b0000101, rs2, rs1, 0b101, 0b0110011); |
} |
|
|
233,23 → 159,13
/**********************************************************************//** |
* Intrinsic: Bit manipulation MAX (select signed maximum) [B.Zbb] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 2 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return Signed maximum. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_max(uint32_t rs1, uint32_t rs2) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
register uint32_t tmp_b __asm__ ("a1") = rs2; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b)); |
|
// max a0, a0, a1 |
CUSTOM_INSTR_R2_TYPE(0b0000101, a1, a0, 0b110, a0, 0b0110011); |
|
return result; |
return CUSTOM_INSTR_R2_TYPE(0b0000101, rs2, rs1, 0b110, 0b0110011); |
} |
|
|
256,23 → 172,13
/**********************************************************************//** |
* Intrinsic: Bit manipulation MAXU (select unsigned maximum) [B.Zbb] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 2 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return Unsigned maximum. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_maxu(uint32_t rs1, uint32_t rs2) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
register uint32_t tmp_b __asm__ ("a1") = rs2; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b)); |
|
// maxu a0, a0, a1 |
CUSTOM_INSTR_R2_TYPE(0b0000101, a1, a0, 0b111, a0, 0b0110011); |
|
return result; |
return CUSTOM_INSTR_R2_TYPE(0b0000101, rs2, rs1, 0b111, 0b0110011); |
} |
|
|
279,23 → 185,13
/**********************************************************************//** |
* Intrinsic: Bit manipulation ANDN (logical and-negate) [B.Zbb] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 2 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return Operand 1 AND NOT operand 2. |
**************************************************************************/ |
inline inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_andn(uint32_t rs1, uint32_t rs2) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
register uint32_t tmp_b __asm__ ("a1") = rs2; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b)); |
|
// andn a0, a0, a1 |
CUSTOM_INSTR_R2_TYPE(0b0100000, a1, a0, 0b111, a0, 0b0110011); |
|
return result; |
return CUSTOM_INSTR_R2_TYPE(0b0100000, rs2, rs1, 0b111, 0b0110011); |
} |
|
|
302,23 → 198,13
/**********************************************************************//** |
* Intrinsic: Bit manipulation ORN (logical or-negate) [B.Zbb] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 2 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return Operand 1 OR NOT operand 2. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_orn(uint32_t rs1, uint32_t rs2) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
register uint32_t tmp_b __asm__ ("a1") = rs2; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b)); |
|
// orn a0, a0, a1 |
CUSTOM_INSTR_R2_TYPE(0b0100000, a1, a0, 0b110, a0, 0b0110011); |
|
return result; |
return CUSTOM_INSTR_R2_TYPE(0b0100000, rs2, rs1, 0b110, 0b0110011); |
} |
|
|
325,23 → 211,13
/**********************************************************************//** |
* Intrinsic: Bit manipulation XNOR (logical xor-negate) [B.Zbb] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 2 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return Operand 1 XOR NOT operand 2. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_xnor(uint32_t rs1, uint32_t rs2) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
register uint32_t tmp_b __asm__ ("a1") = rs2; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b)); |
|
// xnor a0, a0, a1 |
CUSTOM_INSTR_R2_TYPE(0b0100000, a1, a0, 0b100, a0, 0b0110011); |
|
return result; |
return CUSTOM_INSTR_R2_TYPE(0b0100000, rs2, rs1, 0b100, 0b0110011); |
} |
|
|
348,23 → 224,13
/**********************************************************************//** |
* Intrinsic: Bit manipulation ROL (rotate-left) [B.Zbb] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 2 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return Operand 1 rotated left by operand_2(4:0) positions. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_rol(uint32_t rs1, uint32_t rs2) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
register uint32_t tmp_b __asm__ ("a1") = rs2; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b)); |
|
// rol a0, a0, a1 |
CUSTOM_INSTR_R2_TYPE(0b0110000, a1, a0, 0b001, a0, 0b0110011); |
|
return result; |
return CUSTOM_INSTR_R2_TYPE(0b0110000, rs2, rs1, 0b001, 0b0110011); |
} |
|
|
371,23 → 237,13
/**********************************************************************//** |
* Intrinsic: Bit manipulation ROR (rotate-right) [B.Zbb] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 2 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return Operand 1 rotated right by operand_2(4:0) positions. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_ror(uint32_t rs1, uint32_t rs2) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
register uint32_t tmp_b __asm__ ("a1") = rs2; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b)); |
|
// ror a0, a0, a1 |
CUSTOM_INSTR_R2_TYPE(0b0110000, a1, a0, 0b101, a0, 0b0110011); |
|
return result; |
return CUSTOM_INSTR_R2_TYPE(0b0110000, rs2, rs1, 0b101, 0b0110011); |
} |
|
|
395,21 → 251,12
* Intrinsic: Bit manipulation RORI (rotate-right) by 20 positions. [B.Zbb] |
* @warning Fixed shift amount (20) for now. |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @return Operand 1 rotated right by 20 positions. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_rori20(uint32_t rs1) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a)); |
|
// rori a0, a0, 20 |
CUSTOM_INSTR_R1_TYPE(0b0110000, 0b10100, a0, 0b101, a0, 0b0010011); |
|
return result; |
return CUSTOM_INSTR_R1_TYPE(0b0110000, 0b10100, rs1, 0b101, 0b0010011); |
} |
|
|
416,21 → 263,12
/**********************************************************************//** |
* Intrinsic: Bit manipulation ORC.B (or-combine byte) [B.Zbb] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @return OR-combined bytes of operand 1. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_orcb(uint32_t rs1) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a)); |
|
// gorci a0, a0, 7 (pseudo-instruction: orc.b a0, a0) |
CUSTOM_INSTR_R1_TYPE(0b0010100, 0b00111, a0, 0b101, a0, 0b0010011); |
|
return result; |
return CUSTOM_INSTR_R1_TYPE(0b0010100, 0b00111, rs1, 0b101, 0b0010011); |
} |
|
|
437,48 → 275,29
/**********************************************************************//** |
* Intrinsic: Bit manipulation REV8 (byte-swap) [B.Zbb] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @return Byte swap of operand 1 |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_rev8(uint32_t rs1) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a)); |
|
// grevi a0, a0, -8 (pseudo-instruction: rev8 a0, a0) |
CUSTOM_INSTR_R1_TYPE(0b0110100, 0b11000, a0, 0b101, a0, 0b0010011); |
|
return result; |
return CUSTOM_INSTR_R1_TYPE(0b0110100, 0b11000, rs1, 0b101, 0b0010011); |
} |
|
|
// ================================================================================================ |
// Zbb - Base instructions |
// Zba - Address-generation instructions |
// ================================================================================================ |
|
/**********************************************************************//** |
* Intrinsic: Address generation instructions SH1ADD (add with logical-1-shift) [B.Zba] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 2 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return Operand 2 + (Operand 1 << 1) |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_sh1add(uint32_t rs1, uint32_t rs2) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
register uint32_t tmp_b __asm__ ("a1") = rs2; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b)); |
|
// sh1add a0, a0, a1 |
CUSTOM_INSTR_R2_TYPE(0b0010000, a1, a0, 0b010, a0, 0b0110011); |
|
return result; |
return CUSTOM_INSTR_R2_TYPE(0b0010000, rs2, rs1, 0b010, 0b0110011); |
} |
|
|
485,49 → 304,181
/**********************************************************************//** |
* Intrinsic: Address generation instructions SH2ADD (add with logical-2-shift) [B.Zba] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 2 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return Operand 2 + (Operand 1 << 2) |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_sh2add(uint32_t rs1, uint32_t rs2) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
register uint32_t tmp_b __asm__ ("a1") = rs2; |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b)); |
|
// sh2add a0, a0, a1 |
CUSTOM_INSTR_R2_TYPE(0b0010000, a1, a0, 0b100, a0, 0b0110011); |
|
return result; |
return CUSTOM_INSTR_R2_TYPE(0b0010000, rs2, rs1, 0b100, 0b0110011); |
} |
|
/**********************************************************************//** |
* Intrinsic: Address generation instructions SH1ADD (add with logical-3-shift) [B.Zba] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 2 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return Operand 2 + (Operand 1 << 3) |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_sh3add(uint32_t rs1, uint32_t rs2) { |
|
register uint32_t result __asm__ ("a0"); |
register uint32_t tmp_a __asm__ ("a0") = rs1; |
register uint32_t tmp_b __asm__ ("a1") = rs2; |
return CUSTOM_INSTR_R2_TYPE(0b0010000, rs2, rs1, 0b110, 0b0110011); |
} |
|
// dummy instruction to prevent GCC "constprop" optimization |
asm volatile ("" : [output] "=r" (result) : [input_i] "r" (tmp_a), [input_j] "r" (tmp_b)); |
|
// sh3add a0, a0, a1 |
CUSTOM_INSTR_R2_TYPE(0b0010000, a1, a0, 0b110, a0, 0b0110011); |
// ================================================================================================ |
// Zbs - Single-bit instructions |
// ================================================================================================ |
|
return result; |
|
/**********************************************************************//** |
* Intrinsic: Bit manipulation BCLR (bit-clear) [B.Zbs] |
* |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return Operand 1 with bit cleared indexed by operand_2(4:0). |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_bclr(uint32_t rs1, uint32_t rs2) { |
|
return CUSTOM_INSTR_R2_TYPE(0b0100100, rs2, rs1, 0b001, 0b0110011); |
} |
|
|
/**********************************************************************//** |
* Intrinsic: Bit manipulation BCLRI (bit-clear) by 20 positions. [B.Zbs] |
* @warning Fixed shift amount (20) for now. |
* |
* @param[in] rs1 Source operand 1. |
* @return Operand 1 with bit cleared at position 20. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_bclri20(uint32_t rs1) { |
|
return CUSTOM_INSTR_R1_TYPE(0b0100100, 0b10100, rs1, 0b001, 0b0010011); |
} |
|
|
/**********************************************************************//** |
* Intrinsic: Bit manipulation BEXT (bit-extract) [B.Zbs] |
* |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return Extract bit from Operand 1 indexed by operand_2(4:0). |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_bext(uint32_t rs1, uint32_t rs2) { |
|
return CUSTOM_INSTR_R2_TYPE(0b0100100, rs2, rs1, 0b101, 0b0110011); |
} |
|
|
/**********************************************************************//** |
* Intrinsic: Bit manipulation BEXTI (bit-extract) by 20 positions. [B.Zbs] |
* @warning Fixed shift amount (20) for now. |
* |
* @param[in] rs1 Source operand 1. |
* @return Extract bit from Operand 1 at position 20. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_bexti20(uint32_t rs1) { |
|
return CUSTOM_INSTR_R1_TYPE(0b0100100, 0b10100, rs1, 0b101, 0b0010011); |
} |
|
|
/**********************************************************************//** |
* Intrinsic: Bit manipulation BINV (bit-invert) [B.Zbs] |
* |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return Invert bit from Operand 1 indexed by operand_2(4:0). |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_binv(uint32_t rs1, uint32_t rs2) { |
|
return CUSTOM_INSTR_R2_TYPE(0b0110100, rs2, rs1, 0b001, 0b0110011); |
} |
|
|
/**********************************************************************//** |
* Intrinsic: Bit manipulation BINVI (bit-invert) by 20 positions. [B.Zbs] |
* @warning Fixed shift amount (20) for now. |
* |
* @param[in] rs1 Source operand 1. |
* @return Invert bit from Operand 1 at position 20. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_binvi20(uint32_t rs1) { |
|
return CUSTOM_INSTR_R1_TYPE(0b0110100, 0b10100, rs1, 0b001, 0b0010011); |
} |
|
|
/**********************************************************************//** |
* Intrinsic: Bit manipulation BSET (bit-set) [B.Zbs] |
* |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return set bit from Operand 1 indexed by operand_2(4:0). |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_bset(uint32_t rs1, uint32_t rs2) { |
|
return CUSTOM_INSTR_R2_TYPE(0b0010100, rs2, rs1, 0b001, 0b0110011); |
} |
|
|
/**********************************************************************//** |
* Intrinsic: Bit manipulation BSETI (bit-set) by 20 positions. [B.Zbs] |
* @warning Fixed shift amount (20) for now. |
* |
* @param[in] rs1 Source operand 1. |
* @return Set bit from Operand 1 at position 20. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_bseti20(uint32_t rs1) { |
|
return CUSTOM_INSTR_R1_TYPE(0b0010100, 0b10100, rs1, 0b001, 0b0010011); |
} |
|
|
// ================================================================================================ |
// Zbc - Carry-less multiplication instructions |
// ================================================================================================ |
|
|
/**********************************************************************//** |
* Intrinsic: Bit manipulation CLMUL (carry-less multiplication, low-part) [B.Zbc] |
* |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return Carry-less product, low part. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_clmul(uint32_t rs1, uint32_t rs2) { |
|
return CUSTOM_INSTR_R2_TYPE(0b0000101, rs2, rs1, 0b001, 0b0110011); |
} |
|
|
/**********************************************************************//** |
* Intrinsic: Bit manipulation CLMULH (carry-less multiplication, high-part) [B.Zbc] |
* |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return Carry-less product, high part. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_clmulh(uint32_t rs1, uint32_t rs2) { |
|
return CUSTOM_INSTR_R2_TYPE(0b0000101, rs2, rs1, 0b011, 0b0110011); |
} |
|
|
/**********************************************************************//** |
* Intrinsic: Bit manipulation CLMULR (carry-less multiplication, reversed) [B.Zbc] |
* |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 2. |
* @return Carry-less product, low part, reversed. |
**************************************************************************/ |
inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_clmulr(uint32_t rs1, uint32_t rs2) { |
|
return CUSTOM_INSTR_R2_TYPE(0b0000101, rs1, rs2, 0b010, 0b0110011); |
} |
|
|
// ################################################################################################ |
// Emulation functions |
// ################################################################################################ |
541,7 → 492,7
/**********************************************************************//** |
* Intrinsic: Bit manipulation CLZ (count leading zeros) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @return Number of leading zeros in source operand. |
**************************************************************************/ |
uint32_t riscv_emulate_clz(uint32_t rs1) { |
566,7 → 517,7
/**********************************************************************//** |
* Intrinsic: Bit manipulation CTZ (count trailing zeros) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @return Number of trailing zeros in source operand. |
**************************************************************************/ |
uint32_t riscv_emulate_ctz(uint32_t rs1) { |
591,7 → 542,7
/**********************************************************************//** |
* Intrinsic: Bit manipulation CPOP (population count) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @return Number of set bits in source operand. |
**************************************************************************/ |
uint32_t riscv_emulate_cpop(uint32_t rs1) { |
614,7 → 565,7
/**********************************************************************//** |
* Intrinsic: Bit manipulation SEXT.B (sign-extend byte) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @return Sign-extended byte (operand(7:0)). |
**************************************************************************/ |
uint32_t riscv_emulate_sextb(uint32_t rs1) { |
632,7 → 583,7
/**********************************************************************//** |
* Intrinsic: Bit manipulation SEXT.H (sign-extend half-word) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @return Sign-extended half-word (operand(15:0)). |
**************************************************************************/ |
uint32_t riscv_emulate_sexth(uint32_t rs1) { |
650,7 → 601,7
/**********************************************************************//** |
* Intrinsic: Bit manipulation ZEXT.H (zero-extend half-word) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @return Zero-extended half-word (operand(15:0)). |
**************************************************************************/ |
uint32_t riscv_emulate_zexth(uint32_t rs1) { |
662,8 → 613,8
/**********************************************************************//** |
* Intrinsic: Bit manipulation MIN (select signed minimum) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Signed minimum. |
**************************************************************************/ |
uint32_t riscv_emulate_min(uint32_t rs1, uint32_t rs2) { |
683,8 → 634,8
/**********************************************************************//** |
* Intrinsic: Bit manipulation MINU (select unsigned minimum) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Unsigned minimum. |
**************************************************************************/ |
uint32_t riscv_emulate_minu(uint32_t rs1, uint32_t rs2) { |
701,8 → 652,8
/**********************************************************************//** |
* Intrinsic: Bit manipulation MAX (select signed maximum) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Signed maximum. |
**************************************************************************/ |
uint32_t riscv_emulate_max(uint32_t rs1, uint32_t rs2) { |
722,8 → 673,8
/**********************************************************************//** |
* Intrinsic: Bit manipulation MAXU (select unsigned maximum) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Unsigned maximum. |
**************************************************************************/ |
uint32_t riscv_emulate_maxu(uint32_t rs1, uint32_t rs2) { |
740,8 → 691,8
/**********************************************************************//** |
* Intrinsic: Bit manipulation ANDN (logical and-negate) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Operand 1 AND NOT operand 2. |
**************************************************************************/ |
uint32_t riscv_emulate_andn(uint32_t rs1, uint32_t rs2) { |
753,8 → 704,8
/**********************************************************************//** |
* Intrinsic: Bit manipulation ORN (logical or-negate) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Operand 1 OR NOT operand 2. |
**************************************************************************/ |
uint32_t riscv_emulate_orn(uint32_t rs1, uint32_t rs2) { |
766,8 → 717,8
/**********************************************************************//** |
* Intrinsic: Bit manipulation XNOR (logical xor-negate) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Operand 1 XOR NOT operand 2. |
**************************************************************************/ |
uint32_t riscv_emulate_xnor(uint32_t rs1, uint32_t rs2) { |
779,8 → 730,8
/**********************************************************************//** |
* Intrinsic: Bit manipulation ROL (rotate-left) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Operand 1 rotated left by operand_2(4:0) positions. |
**************************************************************************/ |
uint32_t riscv_emulate_rol(uint32_t rs1, uint32_t rs2) { |
797,8 → 748,8
/**********************************************************************//** |
* Intrinsic: Bit manipulation ROR (rotate-right) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Operand 1 rotated right by operand_2(4:0) positions. |
**************************************************************************/ |
uint32_t riscv_emulate_ror(uint32_t rs1, uint32_t rs2) { |
815,7 → 766,7
/**********************************************************************//** |
* Intrinsic: Bit manipulation REV8 (byte swap) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @return Operand 1 byte swapped. |
**************************************************************************/ |
uint32_t riscv_emulate_rev8(uint32_t rs1) { |
832,7 → 783,7
/**********************************************************************//** |
* Intrinsic: Bit manipulation ORCB (or-combine bytes) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @return OR-combined bytes of operand 1. |
**************************************************************************/ |
uint32_t riscv_emulate_orcb(uint32_t rs1) { |
864,8 → 815,8
/**********************************************************************//** |
* Intrinsic: Address generation instructions SH1ADD (add with logical-1-shift) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Operand 2 + (Operand 1 << 1) |
**************************************************************************/ |
uint32_t riscv_emulate_sh1add(uint32_t rs1, uint32_t rs2) { |
877,8 → 828,8
/**********************************************************************//** |
* Intrinsic: Address generation instructions SH2ADD (add with logical-2-shift) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Operand 2 + (Operand 1 << 2) |
**************************************************************************/ |
uint32_t riscv_emulate_sh2add(uint32_t rs1, uint32_t rs2) { |
890,8 → 841,8
/**********************************************************************//** |
* Intrinsic: Address generation instructions SH3ADD (add with logical-3-shift) [emulation] |
* |
* @param[in] rs1 Source operand 1 (a0). |
* @param[in] rs2 Source operand 1 (a0). |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Operand 2 + (Operand 1 << 3) |
**************************************************************************/ |
uint32_t riscv_emulate_sh3add(uint32_t rs1, uint32_t rs2) { |
900,4 → 851,187
} |
|
|
// ================================================================================================ |
// Zbs - Single-bit instructions |
// ================================================================================================ |
|
|
/**********************************************************************//** |
* Intrinsic: Bit manipulation BCLR (bit-clear) [emulation] |
* |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Operand 1 with cleared bit indexed by operand_2(4:0). |
**************************************************************************/ |
uint32_t riscv_emulate_bclr(uint32_t rs1, uint32_t rs2) { |
|
uint32_t shamt = rs2 & 0x1f; |
uint32_t tmp = 1 << shamt; |
|
return rs1 & (~tmp); |
} |
|
|
/**********************************************************************//** |
* Intrinsic: Bit manipulation BEXT (bit-extract) [emulation] |
* |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Extract bit from operand 1 indexed by operand_2(4:0). |
**************************************************************************/ |
uint32_t riscv_emulate_bext(uint32_t rs1, uint32_t rs2) { |
|
uint32_t shamt = rs2 & 0x1f; |
uint32_t tmp = rs1 >> shamt; |
|
return tmp & 1; |
} |
|
|
/**********************************************************************//** |
* Intrinsic: Bit manipulation BINV (bit-invert) [emulation] |
* |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Invert bit from operand 1 indexed by operand_2(4:0). |
**************************************************************************/ |
uint32_t riscv_emulate_binv(uint32_t rs1, uint32_t rs2) { |
|
uint32_t shamt = rs2 & 0x1f; |
uint32_t tmp = 1 << shamt; |
|
return rs1 ^ tmp; |
} |
|
|
/**********************************************************************//** |
* Intrinsic: Bit manipulation BSET (bit-set) [emulation] |
* |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Set bit from operand 1 indexed by operand_2(4:0). |
**************************************************************************/ |
uint32_t riscv_emulate_bset(uint32_t rs1, uint32_t rs2) { |
|
uint32_t shamt = rs2 & 0x1f; |
uint32_t tmp = 1 << shamt; |
|
return rs1 | tmp; |
} |
|
|
// ================================================================================================ |
// Zbc - Carry-less multiplication instructions |
// ================================================================================================ |
|
|
/**********************************************************************//** |
* Intrinsic: Bit manipulation CLMUL (carry-less multiply, low-part) [emulation] |
* |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Carry-less multiplication product, low part |
**************************************************************************/ |
uint32_t riscv_emulate_clmul(uint32_t rs1, uint32_t rs2) { |
|
uint32_t i; |
uint64_t tmp; |
union { |
uint64_t uint64; |
uint32_t uint32[sizeof(uint64_t)/sizeof(uint32_t)]; |
} res; |
|
res.uint64 = 0; |
for (i=0; i<32; i++) { |
if ((rs2 >> i) & 1) { |
tmp = (uint64_t)rs1; |
tmp = tmp << i; |
res.uint64 = res.uint64 ^ tmp; |
} |
} |
|
return res.uint32[0]; |
} |
|
|
/**********************************************************************//** |
* Intrinsic: Bit manipulation CLMULH (carry-less multiply, high-part) [emulation] |
* |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Carry-less multiplication product, high part |
**************************************************************************/ |
uint32_t riscv_emulate_clmulh(uint32_t rs1, uint32_t rs2) { |
|
uint32_t i; |
uint64_t tmp; |
union { |
uint64_t uint64; |
uint32_t uint32[sizeof(uint64_t)/sizeof(uint32_t)]; |
} res; |
|
res.uint64 = 0; |
for (i=0; i<32; i++) { |
if ((rs2 >> i) & 1) { |
tmp = (uint64_t)rs1; |
tmp = tmp << i; |
res.uint64 = res.uint64 ^ tmp; |
} |
} |
|
return res.uint32[1]; |
} |
|
|
/**********************************************************************//** |
* Intrinsic: Bit manipulation CLMUR (carry-less multiply, reversed) [emulation] |
* |
* @param[in] rs1 Source operand 1. |
* @param[in] rs2 Source operand 1. |
* @return Carry-less multiplication product, low part, reversed |
**************************************************************************/ |
uint32_t riscv_emulate_clmulr(uint32_t rs1, uint32_t rs2) { |
|
uint32_t i; |
uint64_t tmp; |
union { |
uint64_t uint64; |
uint32_t uint32[sizeof(uint64_t)/sizeof(uint32_t)]; |
} res; |
|
// bit-reversal of input operands |
uint32_t rs1_rev = 0, rs2_rev = 0; |
for (i=0; i<32; i++) { |
rs1_rev <<= 1; |
if ((rs1 >> i) & 1) { |
rs1_rev |= 1; |
} |
rs2_rev <<= 1; |
if ((rs2 >> i) & 1) { |
rs2_rev |= 1; |
} |
} |
|
res.uint64 = 0; |
for (i=0; i<32; i++) { |
if ((rs2_rev >> i) & 1) { |
tmp = (uint64_t)rs1_rev; |
tmp = tmp << i; |
res.uint64 = res.uint64 ^ tmp; |
} |
} |
|
// bit-reversal of result |
uint32_t result = 0; |
for (i=0; i<32; i++) { |
result <<= 1; |
if ((res.uint32[0] >> i) & 1) { |
result |= 1; |
} |
} |
|
return result; |
} |
|
|
#endif // neorv32_b_extension_intrinsics_h |