Hello everyone. I’m trying to implement Ivan Kostoski’s code for an ESP32-based sound level meter on a XIAO ESP32-C3. According to the wiki, that chip “is a 32-bit RISC-V CPU, which includes an FPU (Floating Point Unit) for 32-bit single-precision arithmetic”. So, based on this I wrote some RISC-V assembly code for floating point operations. however, when I try to compile I get the error “unrecognized opcode” on every floating point instruction. When I switch to integer instructions, the code does compile.
Here’s my code:
extern "C" {
int sos_filter_f32(float *input, float *output, int len, const SOS_Coefficients &coeffs, SOS_Delay_State &w);
}
__asm__ (
//
// RISC-V implementation of IIR Second-Order Section filter
// Assumes a0 and b0 coefficients are one (1.0)
//
// float* a2 = input;
// float* a3 = output;
// int a4 = len;
// float* a5 = coeffs;
// float* a6 = w;
// float a7 = gain;
//
".text \n"
".align 4 \n"
".global sos_filter_f32 \n"
".type sos_filter_f32,@function\n"
"sos_filter_f32: \n"
" flw f0, 0(a5) \n" // float f0 = coeffs.b1;
" flw f1, 4(a5) \n" // float f1 = coeffs.b2;
" flw f2, 8(a5) \n" // float f2 = coeffs.a1;
" flw f3, 12(a5) \n" // float f3 = coeffs.a2;
" flw f4, 0(a6) \n" // float f4 = w[0];
" flw f5, 4(a6) \n" // float f5 = w[1];
" loop: \n"
" bnez a4, 1f \n" // for (; len>0; len--) {
" j exit \n"
" i: \n"
" flw f6, a2 \n" // float f6 = *input++;
" addi a2, a2, 4 \n" // post-increment by 4
" fmadd.s f6, f2, f4 \n" // f6 += f2 * f4; // coeffs.a1 * w0
" fmadd.s f6, f3, f5 \n" // f6 += f3 * f5; // coeffs.a2 * w1
" fmv.s f7, f6 \n" // f7 = f6; // b0 assumed 1.0
" fmadd.s f7, f0, f4 \n" // f7 += f0 * f4; // coeffs.b1 * w0
" fmadd.s f7, f1, f5 \n" // f7 += f1 * f5; // coeffs.b2 * w1 -> result
" fsw f7, a3 \n" // *output++ = f7;
" addi a3, a3, 4 \n" // post-increment by 4
" fmv.s f5, f4 \n" // f5 = f4; // w1 = w0
" fmv.s f4, f6 \n" // f4 = f6; // w0 = f6
" addi a4, a4, -1 \n" // update loop counter
" bnez a4, 1b \n"
" j exit \n"
" exit: \n" // }
" fsw f4, 0(a6) \n" // w[0] = f4;
" fsw f5, 4(a6) \n" // w[1] = f5;
" fmvi a2, 0 \n" // return 0;
" ret \n"
);
The reference code, written for the Xtensa ISA, is the following:
extern "C" {
int sos_filter_f32(float *input, float *output, int len, const SOS_Coefficients &coeffs, SOS_Delay_State &w);
}
__asm__ (
//
// ESP32 implementation of IIR Second-Order Section filter
// Assumes a0 and b0 coefficients are one (1.0)
//
// float* a2 = input;
// float* a3 = output;
// int a4 = len;
// float* a5 = coeffs;
// float* a6 = w;
// float a7 = gain;
//
".text \n"
".align 4 \n"
".global sos_filter_f32 \n"
".type sos_filter_f32,@function\n"
"sos_filter_f32: \n"
" entry a1, 16 \n"
" lsi f0, a5, 0 \n" // float f0 = coeffs.b1;
" lsi f1, a5, 4 \n" // float f1 = coeffs.b2;
" lsi f2, a5, 8 \n" // float f2 = coeffs.a1;
" lsi f3, a5, 12 \n" // float f3 = coeffs.a2;
" lsi f4, a6, 0 \n" // float f4 = w[0];
" lsi f5, a6, 4 \n" // float f5 = w[1];
" loopnez a4, 1f \n" // for (; len>0; len--) {
" lsip f6, a2, 4 \n" // float f6 = *input++;
" madd.s f6, f2, f4 \n" // f6 += f2 * f4; // coeffs.a1 * w0
" madd.s f6, f3, f5 \n" // f6 += f3 * f5; // coeffs.a2 * w1
" mov.s f7, f6 \n" // f7 = f6; // b0 assumed 1.0
" madd.s f7, f0, f4 \n" // f7 += f0 * f4; // coeffs.b1 * w0
" madd.s f7, f1, f5 \n" // f7 += f1 * f5; // coeffs.b2 * w1 -> result
" ssip f7, a3, 4 \n" // *output++ = f7;
" mov.s f5, f4 \n" // f5 = f4; // w1 = w0
" mov.s f4, f6 \n" // f4 = f6; // w0 = f6
" 1: \n" // }
" ssi f4, a6, 0 \n" // w[0] = f4;
" ssi f5, a6, 4 \n" // w[1] = f5;
" movi.n a2, 0 \n" // return 0;
" retw.n \n"
);