diff --git a/csdr.c b/csdr.c index 27dbf6dc..ece7f3ca 100755 --- a/csdr.c +++ b/csdr.c @@ -52,6 +52,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include "fastddc.h" #include +#include +#include +#include char usage[]= "csdr - a simple commandline tool for Software Defined Radio receiver DSP.\n\n" @@ -171,6 +174,11 @@ char usage[]= " fastddc_fwd_cc [transition_bw [window]]\n" " fastddc_inv_cc [transition_bw [window]]\n" " _fft2octave \n" +" cicddc_s16_c\n" +" cicddc_cs16_c\n" +" shm_cicddc_s16_c\n" +" shm_cicddc_cs16_c\n" +" shm_cicddc_cu8_c\n" " convert_f_i16 #deprecated, use instead: convert_f_s16\n" " convert_i16_f #deprecated, use instead: convert_s16_f\n" " floatdump_f #deprecated, use instead: dump_f\n" @@ -251,17 +259,18 @@ int clone_(int bufsize_param) int init_fifo(int argc, char *argv[]) { - if(argc>=4) - { - if(!strcmp(argv[2],"--fifo")) + int i; + for(i = 2; i < argc-1; i++) { + char *arg_name = argv[i], *arg_param = argv[i+1]; + if(!strcmp(arg_name,"--fifo")) { errhead(); fprintf(stderr,"fifo control mode on\n"); - int fd = open(argv[3], O_RDONLY); + int fd = open(arg_param, O_RDONLY); int flags = fcntl(fd, F_GETFL, 0); fcntl(fd, F_SETFL, flags | O_NONBLOCK); return fd; } - else if(!strcmp(argv[2],"--fd")) + else if(!strcmp(arg_name,"--fd")) { //to use this: //1. Create a pipe(pipedesc) in your process. @@ -271,7 +280,7 @@ int init_fifo(int argc, char *argv[]) //3. From your parent process, write into pipedesc[1]. //This is implemented in ddcd, check there to see how to do it! int fd; - if(sscanf(argv[3], "%d",&fd)<=0) return 0; + if(sscanf(arg_param, "%d",&fd)<=0) return 0; errhead(); fprintf(stderr,"fd control mode on, fd=%d\n", fd); int flags = fcntl(fd, F_GETFL, 0); @@ -1566,7 +1575,8 @@ int main(int argc, char *argv[]) } } - if(!strcmp(argv[1],"fft_cc")) + int combined_logavg; + if((!strcmp(argv[1],"fft_cc")) || (combined_logavg = !strcmp(argv[1],"fft_cc_logavg")) ) { if(argc<=3) return badsyntax("need required parameters (fft_size, out_of_every_n_samples)"); int fft_size; @@ -1577,6 +1587,25 @@ int main(int argc, char *argv[]) int benchmark=0; int octave=0; window_t window = WINDOW_DEFAULT; + + int int_in=0; + int16_t *input_int; + // for averaging: + float add_db=0; + int avgnumber=0, n_averaged=0; + float *output2; + if(combined_logavg) { + if(argc<=5) return badsyntax("need required parameters (fft_size, out_of_every_n_samples, add_db, avgnumber)"); + + sscanf(argv[4],"%g",&add_db); + sscanf(argv[5],"%d",&avgnumber); + + output2 = malloc(sizeof(float) * fft_size); + + add_db -= 10.0*log10(avgnumber); + if(argc >= 7 && !strcmp(argv[6], "--cs16_in")) + int_in = 1; + } else { if(argc>=5) { window=firdes_get_window_from_string(argv[4]); @@ -1591,6 +1620,7 @@ int main(int argc, char *argv[]) benchmark|=!strcmp("--benchmark",argv[6]); octave|=!strcmp("--octave",argv[6]); } + } if(!initialize_buffers()) return -2; sendbufsize(fft_size); @@ -1605,27 +1635,55 @@ int main(int argc, char *argv[]) if(octave) printf("setenv(\"GNUTERM\",\"X11 noraise\");y=zeros(1,%d);semilogy(y,\"ydatasource\",\"y\");\n",fft_size); float *windowt; windowt = precalculate_window(fft_size, window); + size_t sizeof_in = sizeof(complexf); + if(int_in) { + sizeof_in = 2*sizeof(int16_t); + input_int = (int16_t*)malloc(sizeof(int16_t)*fft_size*2); + } for(;;) { FEOF_CHECK; if(every_n_samples>fft_size) { - fread(input, sizeof(complexf), fft_size, stdin); + fread(int_in ?(void*)input_int :(void*)input, sizeof_in, fft_size, stdin); + if(int_in) + convert_i16_f(input_int, (float*)input, 2*fft_size); + //skipping samples before next FFT (but fseek doesn't work for pipes) for(int seek_remain=every_n_samples-fft_size;seek_remain>0;seek_remain-=the_bufsize) { - fread(temp_f, sizeof(complexf), MIN_M(the_bufsize,seek_remain), stdin); + fread(temp_f, sizeof_in, MIN_M(the_bufsize,seek_remain), stdin); } } else { //overlapped FFT for(int i=0;i= avgnumber) { + log_ff(output2, output2, fft_size, add_db); + fwrite (output2, sizeof(float), fft_size, stdout); + TRY_YIELD; + for(i = 0; i < fft_size; i++) { + output2[i] = 0; + } + n_averaged = 0; + } + } else if(octave) { printf("fftdata=["); @@ -3596,6 +3654,158 @@ int main(int argc, char *argv[]) } } + /* + ____ ___ ____ ____ ____ ____ + / ___|_ _/ ___| | _ \| _ \ / ___| + | | | | | | | | | | | | | + | |___ | | |___ | |_| | |_| | |___ + \____|___\____| |____/|____/ \____| + */ + + {int complex_cic, cu8_cic; + if((!strcmp(argv[1],"cicddc_s16_c")) | (complex_cic=!strcmp(argv[1],"cicddc_cs16_c"))) { + float rate=0; + int fd; + int factor=0, insize, outsize; + bigbufs = 1; + + if(argc<=2) return badsyntax("need required parameter(s) (decimation factor, [rate])"); + sscanf(argv[2],"%d",&factor); + if(fd=init_fifo(argc,argv)) + { + while(!read_fifo_ctl(fd,"%g\n",&rate)) usleep(10000); + } + else + { + if(argc<=3) return badsyntax("need required parameters (decimation factor, rate)"); + sscanf(argv[3],"%g",&rate); + } + + the_bufsize = getbufsize(); + outsize = the_bufsize / factor; + insize = outsize * factor; // make it integer multiple of factor + sendbufsize(outsize); + if(complex_cic) insize *= 2; + + int16_t *input_buffer = malloc(sizeof(int16_t) * insize); + complexf *output_buffer = malloc(sizeof(complexf) * outsize); + + void *state = cicddc_init(factor); + for(;;) + { + FEOF_CHECK; + fread(input_buffer, sizeof(int16_t), insize, stdin); + if(complex_cic) + cicddc_cs16_c(state, input_buffer, output_buffer, outsize, rate); + else + cicddc_s16_c(state, input_buffer, output_buffer, outsize, rate); + fwrite(output_buffer, sizeof(complexf), outsize, stdout); + fflush(stdout); + read_fifo_ctl(fd,"%g\n",&rate); + TRY_YIELD; + } + cicddc_free(state); + } + + /* For now, complex uint8 is handled similarly to real int16 because the size is the same. + (Two 8-bit numbers in one "int16_t") */ + if((!strcmp(argv[1],"shm_cicddc_s16_c")) | (complex_cic=!strcmp(argv[1],"shm_cicddc_cs16_c")) + | (cu8_cic=!strcmp(argv[1],"shm_cicddc_cu8_c"))) { + float rate=0; + int fd, shm_fd; + int factor=0, insize, outsize; + //bigbufs = 1; + + if(argc<=3) return badsyntax("need required parameter(s) (shm name, decimation factor, [rate])"); + sscanf(argv[3],"%d",&factor); + if(fd=init_fifo(argc,argv)) + { + while(!read_fifo_ctl(fd,"%g\n",&rate)) usleep(10000); + } + else + { + if(argc<=4) return badsyntax("need required parameters (shm name, decimation factor, rate)"); + sscanf(argv[4],"%g",&rate); + } + + // some code from shmread: + void *shm_buf; + size_t *shm_p; + size_t readpoint_bufs = 0, writepoint_bufs = 0, bufsize_bufs; + size_t shm_size, bufsize_bytes, insize_bytes, prevp; + struct stat shm_stat; + shm_fd = shm_open(argv[2], O_RDONLY, 0644); + if(shm_fd < 0) { perror("shm_open failed"); return 1; } + if(fstat(shm_fd, &shm_stat) < 0) { perror("fstat failed"); return 1; } + shm_size = shm_stat.st_size; + bufsize_bytes = shm_size - sizeof(size_t); + shm_buf = mmap(0, shm_size, PROT_READ, MAP_SHARED, shm_fd, 0); + if(shm_buf == MAP_FAILED) { perror("mmap failed"); return 1; } + shm_p = shm_buf + bufsize_bytes; + prevp = *shm_p; + if(prevp >= bufsize_bytes) return badsyntax("bad pointer value in shm buffer"); + + outsize = 0x400; + insize = outsize * factor; // make it integer multiple of factor + sendbufsize(outsize); + if(complex_cic) insize *= 2; + + insize_bytes = sizeof(int16_t) * insize; + /* We don't need modulo or memory mapping tricks if wrap-around occurs at input block boundary. + This needs the SHM buffer size to be a multiple of insize. */ + if(bufsize_bytes % insize_bytes != 0) return badsyntax("SHM size should be multiple of CIC processing block size"); + + bufsize_bufs = bufsize_bytes / insize_bytes; + writepoint_bufs = readpoint_bufs = prevp / insize_bytes; + + int16_t *input_buffer = malloc(sizeof(int16_t) * insize); + complexf *output_buffer = malloc(sizeof(complexf) * outsize); + + void *state = cicddc_init(factor); + size_t extradelay = 0; + for(;;) + { + //fprintf(stderr, "%d %d\n", writepoint_bufs, readpoint_bufs); + if(writepoint_bufs != readpoint_bufs) { + int r; + ssize_t readpoint_bufs1 = (ssize_t)readpoint_bufs - extradelay; + while(readpoint_bufs1 < 0) readpoint_bufs1 += bufsize_bufs; + + //input_buffer = (int16_t*)(shm_buf + insize_bytes * readpoint_bufs1); + + /* It seems strange but memcpying small blocks and reading from there + is faster than reading directly from the SHM buffer during processing + (at least on the server I tested it on). */ + memcpy(input_buffer, shm_buf + insize_bytes * readpoint_bufs1, insize_bytes); + + if(cu8_cic) + cicddc_cu8_c(state, (uint8_t*)input_buffer, output_buffer, outsize, rate); + else if(complex_cic) + cicddc_cs16_c(state, input_buffer, output_buffer, outsize, rate); + else + cicddc_s16_c(state, input_buffer, output_buffer, outsize, rate); + fwrite(output_buffer, sizeof(complexf), outsize, stdout); + fflush(stdout); + + // advance pointer: + readpoint_bufs++; + if(readpoint_bufs >= bufsize_bufs) readpoint_bufs = 0; + } else { + ssize_t newdelay = -1; + usleep(50000); + read_fifo_ctl(fd,"%g %zd\n",&rate, &newdelay); + if(newdelay >= 0 && newdelay < bufsize_bytes) { + extradelay = newdelay / insize_bytes; + fprintf(stderr, "delaying: %zd %zd\n", extradelay, newdelay); + } + + writepoint_bufs = (*shm_p) / insize_bytes; + } + /*TRY_YIELD;*/ + } + cicddc_free(state); + }} + if(!strcmp(argv[1],"none")) { return 0; diff --git a/libcsdr.c b/libcsdr.c index 1eb91b38..3ee1774d 100755 --- a/libcsdr.c +++ b/libcsdr.c @@ -848,6 +848,212 @@ void apply_fir_fft_cc(FFT_PLAN_T* plan, FFT_PLAN_T* plan_inverse, complexf* taps } + +/* CIC DDC */ +#define SINESHIFT 12 +#define SINESIZE (1<factor = factor; + s->gain = 1.0f / SHRT_MAX / sineamp / factor / factor / factor; // compensate for gain of 3 integrators + + s->sinetable = malloc(sinesize2 * sizeof(*s->sinetable)); + double f = 2.0*M_PI / (double)SINESIZE; + for(i = 0; i < sinesize2; i++) { + s->sinetable[i] = sineamp * cos(f * i); + } + return s; +} + +void cicddc_free(void *state) { + cicddc_t *s = state; + free(s->sinetable); + free(s); +} + +void cicddc_s16_c(void *state, int16_t *input, complexf *output, int outsize, float rate) { + cicddc_t *s = state; + int k; + int factor = s->factor; + cic_dt ig0a = s->ig0a, ig0b = s->ig0b, ig1a = s->ig1a, ig1b = s->ig1b; + cic_dt comb0a = s->comb0a, comb0b = s->comb0b, comb1a = s->comb1a, comb1b = s->comb1b; + uint64_t phase = s->phase, freq; + int16_t *sinetable = s->sinetable; + float gain = s->gain; + + freq = rate * ((float)(1ULL << 63) * 2); + + int16_t *inp = input; + for(k = 0; k < outsize; k++) { + int i; + cic_dt out0a, out0b, out1a, out1b; + cic_dt ig2a = 0, ig2b = 0; // last integrator and first comb replaced simply by sum + for(i = 0; i < factor; i++) { + cic_dt in_a, in_b; + int sinep = phase >> (64-SINESHIFT); + in_a = (int32_t)inp[i] * (int32_t)sinetable[sinep + (1<<(SINESHIFT-2))]; + in_b = (int32_t)inp[i] * (int32_t)sinetable[sinep]; + phase += freq; + /* integrators: + The calculations are ordered so that each integrator + takes a result from previous loop iteration + to make the code more "pipeline-friendly". */ + ig2a += ig1a; ig2b += ig1b; + ig1a += ig0a; ig1b += ig0b; + ig0a += in_a; ig0b += in_b; + } + inp += factor; + // comb filters: + out0a = ig2a - comb0a; out0b = ig2b - comb0b; + comb0a = ig2a; comb0b = ig2b; + out1a = out0a - comb1a; out1b = out0b - comb1b; + comb1a = out0a; comb1b = out0b; + + output[k].i = (float)out1a * gain; + output[k].q = (float)out1b * gain; + } + + s->ig0a = ig0a; s->ig0b = ig0b; + s->ig1a = ig1a; s->ig1b = ig1b; + s->comb0a = comb0a; s->comb0b = comb0b; + s->comb1a = comb1a; s->comb1b = comb1b; + s->phase = phase; +} + +void cicddc_cs16_c(void *state, int16_t *input, complexf *output, int outsize, float rate) { + cicddc_t *s = state; + int k; + int factor = s->factor; + cic_dt ig0a = s->ig0a, ig0b = s->ig0b, ig1a = s->ig1a, ig1b = s->ig1b; + cic_dt comb0a = s->comb0a, comb0b = s->comb0b, comb1a = s->comb1a, comb1b = s->comb1b; + uint64_t phase = s->phase, freq; + int16_t *sinetable = s->sinetable; + float gain = s->gain; + + freq = rate * ((float)(1ULL << 63) * 2); + + int16_t *inp = input; + for(k = 0; k < outsize; k++) { + int i; + cic_dt out0a, out0b, out1a, out1b; + cic_dt ig2a = 0, ig2b = 0; // last integrator and first comb replaced simply by sum + for(i = 0; i < factor; i++) { + cic_dt in_a, in_b; + int32_t m_a, m_b, m_c, m_d; + int sinep = phase >> (64-SINESHIFT); + m_a = inp[2*i]; + m_b = inp[2*i+1]; + m_c = (int32_t)sinetable[sinep + (1<<(SINESHIFT-2))]; + m_d = (int32_t)sinetable[sinep]; + // complex multiplication: + in_a = m_a*m_c - m_b*m_d; + in_b = m_a*m_d + m_b*m_c; + phase += freq; + /* integrators: + The calculations are ordered so that each integrator + takes a result from previous loop iteration + to make the code more "pipeline-friendly". */ + ig2a += ig1a; ig2b += ig1b; + ig1a += ig0a; ig1b += ig0b; + ig0a += in_a; ig0b += in_b; + } + inp += 2*factor; + // comb filters: + out0a = ig2a - comb0a; out0b = ig2b - comb0b; + comb0a = ig2a; comb0b = ig2b; + out1a = out0a - comb1a; out1b = out0b - comb1b; + comb1a = out0a; comb1b = out0b; + + output[k].i = (float)out1a * gain; + output[k].q = (float)out1b * gain; + } + + s->ig0a = ig0a; s->ig0b = ig0b; + s->ig1a = ig1a; s->ig1b = ig1b; + s->comb0a = comb0a; s->comb0b = comb0b; + s->comb1a = comb1a; s->comb1b = comb1b; + s->phase = phase; +} + + +/* This is almost copy paste from cicddc_cs16_c. + I'm afraid this is going to be annoying to maintain... */ +void cicddc_cu8_c(void *state, uint8_t *input, complexf *output, int outsize, float rate) { + cicddc_t *s = state; + int k; + int factor = s->factor; + cic_dt ig0a = s->ig0a, ig0b = s->ig0b, ig1a = s->ig1a, ig1b = s->ig1b; + cic_dt comb0a = s->comb0a, comb0b = s->comb0b, comb1a = s->comb1a, comb1b = s->comb1b; + uint64_t phase = s->phase, freq; + int16_t *sinetable = s->sinetable; + float gain = s->gain; + + freq = rate * ((float)(1ULL << 63) * 2); + + uint8_t *inp = input; + for(k = 0; k < outsize; k++) { + int i; + cic_dt out0a, out0b, out1a, out1b; + cic_dt ig2a = 0, ig2b = 0; // last integrator and first comb replaced simply by sum + for(i = 0; i < factor; i++) { + cic_dt in_a, in_b; + int32_t m_a, m_b, m_c, m_d; + int sinep = phase >> (64-SINESHIFT); + // subtract 127.4 (good for rtl-sdr) + m_a = (((int32_t)inp[2*i]) << 8) - 32614; + m_b = (((int32_t)inp[2*i+1]) << 8) - 32614; + m_c = (int32_t)sinetable[sinep + (1<<(SINESHIFT-2))]; + m_d = (int32_t)sinetable[sinep]; + // complex multiplication: + in_a = m_a*m_c - m_b*m_d; + in_b = m_a*m_d + m_b*m_c; + phase += freq; + /* integrators: + The calculations are ordered so that each integrator + takes a result from previous loop iteration + to make the code more "pipeline-friendly". */ + ig2a += ig1a; ig2b += ig1b; + ig1a += ig0a; ig1b += ig0b; + ig0a += in_a; ig0b += in_b; + } + inp += 2*factor; + // comb filters: + out0a = ig2a - comb0a; out0b = ig2b - comb0b; + comb0a = ig2a; comb0b = ig2b; + out1a = out0a - comb1a; out1b = out0b - comb1b; + comb1a = out0a; comb1b = out0b; + + output[k].i = (float)out1a * gain; + output[k].q = (float)out1b * gain; + } + + s->ig0a = ig0a; s->ig0b = ig0b; + s->ig1a = ig1a; s->ig1b = ig1b; + s->comb0a = comb0a; s->comb0b = comb0b; + s->comb1a = comb1a; s->comb1b = comb1b; + s->phase = phase; +} + + + + + /* __ __ _ _ _ _ /\ | \/ | | | | | | | | | @@ -1277,10 +1483,10 @@ void apply_precalculated_window_c(complexf* input, complexf* output, int size, f void apply_precalculated_window_f(float* input, float* output, int size, float *windowt) { - for(int i=0;i #define MIN_M(x,y) (((x)>(y))?(y):(x)) #define MAX_M(x,y) (((x)<(y))?(y):(x)) @@ -410,3 +411,10 @@ void pack_bits_1to8_u8_u8(unsigned char* input, unsigned char* output, int input unsigned char pack_bits_8to1_u8_u8(unsigned char* input); void dbpsk_decoder_c_u8(complexf* input, unsigned char* output, int input_size); int bfsk_demod_cf(complexf* input, float* output, int input_size, complexf* mark_filter, complexf* space_filter, int taps_length); + + +void *cicddc_init(int factor); +void cicddc_free(void *state); +void cicddc_s16_c(void *state, int16_t *input, complexf *output, int outsize, float rate); +void cicddc_cs16_c(void *state, int16_t *input, complexf *output, int outsize, float rate); +void cicddc_cu8_c(void *state, uint8_t *input, complexf *output, int outsize, float rate);