SDDC_Driver
Loading...
Searching...
No Matches
fft_mt_r2iq_impl.h
1#define TAG "fft_mt_r2iq_def"
2
3{
4 TracePrintln(TAG, "%p", th);
5 DebugPrintln(TAG, "Initialization...");
6
7 const int deci_ratio = decimation_ratio[decimation];
8
9 const int deci_fft_scrap_size = (BASE_FFT_SCRAP_SIZE / 2) / deci_ratio;
10 const int fft_output_size = this->fft_size_per_decimation[decimation];
11 const int fft_output_half_size = fft_output_size / 2;
12 const int fft_useful_size = fft_output_size - deci_fft_scrap_size;
13
14 DebugPrintln(TAG, "Decimation : %d (index %d)", deci_ratio, decimation);
15 DebugPrintln(TAG, "Scrap size : %d", deci_fft_scrap_size);
16 DebugPrintln(TAG, "FFT output size : %d", fft_output_size);
17 DebugPrintln(TAG, "FFT useful output size : %d", fft_useful_size);
18 DebugPrintln(TAG, "Initialization done");
19
20 const fftwf_complex* filter = filterHw[decimation];
21 const auto filter2 = &filter[BASE_FFT_HALF_SIZE - fft_output_half_size];
22
23 plan_freq2time = &plan_freq2time_per_decimation[decimation];
24 int decimate_count = 0;
25
26 vector<float> iq_output(inputbuffer_block_size);
27 size_t output_buffer_offset = 0;
28
29 // Pointer to the current input block
30 vector<int16_t> input_current_block;
31 // Pointer to the end of the previous input block minus BASE_FFT_SCRAP_SIZE
32 // (input_previous_block + inputbuffer_block_size - BASE_FFT_SCRAP_SIZE)
33 vector<int16_t> last_block_end(BASE_FFT_SCRAP_SIZE);
34
35 while(r2iqOn)
36 {
37 {
38 std::unique_lock<std::mutex> lk(mutexR2iqControl);
39 input_current_block = inputbuffer->pop();
40
41 if (!r2iqOn)
42 return 0;
43 }
44
45 // @todo: move the following int16_t conversion to (32-bit) float
46 // directly inside the following loop (for "k < ffts_per_blocks")
47 // just before the forward fft "fftwf_execute_dft_r2c" is called
48 // idea: this should improve cache/memory locality
49#if PRINT_INPUT_RANGE
50 std::pair<int16_t, int16_t> blockMinMax = std::make_pair<int16_t, int16_t>(0, 0);
51#endif
52 if (!this->getRand()) // plain samples no ADC rand set
53 {
54 convert_float<false>(
55 /*source=*/last_block_end.data(),
56 /*dest=*/th->ADCinTime,
57 /*len=*/BASE_FFT_SCRAP_SIZE
58 );
59#if PRINT_INPUT_RANGE
60 auto minmax = std::minmax_element(input_current_block, input_current_block + inputbuffer_block_size);
61 blockMinMax.first = *minmax.first;
62 blockMinMax.second = *minmax.second;
63#endif
64 convert_float<false>(
65 /*source=*/input_current_block.data(),
66 /*dest=*/th->ADCinTime + BASE_FFT_SCRAP_SIZE,
67 /*len=*/inputbuffer_block_size
68 );
69 }
70 else
71 {
72 convert_float<true>(
73 /*source=*/last_block_end.data(),
74 /*dest=*/th->ADCinTime,
75 /*len=*/BASE_FFT_SCRAP_SIZE
76 );
77 convert_float<true>(
78 /*source=*/input_current_block.data(),
79 /*dest=*/th->ADCinTime + BASE_FFT_SCRAP_SIZE,
80 /*len=*/inputbuffer_block_size
81 );
82 }
83
84 last_block_end = vector<int16_t>(
85 input_current_block.data() + inputbuffer_block_size - BASE_FFT_SCRAP_SIZE,
86 input_current_block.data() + inputbuffer_block_size
87 );
88
89#if PRINT_INPUT_RANGE
90 th->MinValue = std::min(blockMinMax.first, th->MinValue);
91 th->MaxValue = std::max(blockMinMax.second, th->MaxValue);
92 ++th->MinMaxBlockCount;
93 if (th->MinMaxBlockCount * processor_count / 3 >= DEFAULT_TRANSFERS_PER_SEC )
94 {
95 float minBits = (th->MinValue < 0) ? (log10f((float)(-th->MinValue)) / log10f(2.0f)) : -1.0f;
96 float maxBits = (th->MaxValue > 0) ? (log10f((float)(th->MaxValue)) / log10f(2.0f)) : -1.0f;
97 printf("r2iq: min = %d (%.1f bits) %.2f%%, max = %d (%.1f bits) %.2f%%\n",
98 (int)th->MinValue, minBits, th->MinValue *-100.0f / 32768.0f,
99 (int)th->MaxValue, maxBits, th->MaxValue * 100.0f / 32768.0f);
100 th->MinValue = 0;
101 th->MaxValue = 0;
102 th->MinMaxBlockCount = 0;
103 }
104#endif
105
106 // decimate in frequency plus tuning
107
108 const int _center_frequency_bin = this->center_frequency_bin;
109
110 // Calculate the parameters for the first half
111 // Includes all frequencies above _center_frequency_bin
112 const auto upper_frequencies_source = &th->ADCinFreq[_center_frequency_bin];
113 const auto upper_frequencies_len = std::min(
114 BASE_FFT_HALF_SIZE - _center_frequency_bin, // Desired value
115 fft_output_half_size // Overflow protection
116 );
117
118 // Calculate the parameters for the second half
119 // Includes all frequencies below _center_frequency_bin
120 const auto lower_frequencies_source = &th->ADCinFreq[_center_frequency_bin - fft_output_half_size];
121 const auto lower_frequencies_start = std::max(
122 fft_output_half_size - _center_frequency_bin,
123 0
124 );
125
126 // Main processing loop based on overlap-save method
127 // It also includes filtering and decimation
128 for (int k = 0; k < ffts_per_blocks; k++)
129 {
130 // core of fast convolution including filter and decimation
131 // main part is 'overlap-scrap' (IMHO better name for 'overlap-save'), see
132 // https://en.wikipedia.org/wiki/Overlap%E2%80%93save_method
133 {
134 // FFT first stage: time to frequency, real to complex
135 // Input buffer: th->ADCinTime + k * (0.75 * BASE_FFT_SIZE)
136 // Transformation size: BASE_FFT_SIZE
137 // Output buffer: th->ADCinFreq[]
138 // Output size: BASE_FFT_HALF_SIZE + 1
139 fftwf_execute_dft_r2c(plan_time2freq_r2c, th->ADCinTime + k * (BASE_FFT_SIZE - BASE_FFT_SCRAP_SIZE), th->ADCinFreq);
140
141 // circular shift (mixing in full bins) and low/bandpass filtering (complex multiplication)
142 {
143 // circular shift tune fs/2 first half array into th->inFreqTmp[]
144 shift_freq(
145 /*destination=*/th->inFreqTmp,
146 /*source1=*/upper_frequencies_source,
147 /*source2=*/filter,
148 /*start=*/0,
149 /*end=*/upper_frequencies_len
150 );
151
152 // Pad with zeroes if needed
153 if(fft_output_half_size != upper_frequencies_len)
154 memset(th->inFreqTmp[upper_frequencies_len], 0, (fft_output_half_size - upper_frequencies_len) * sizeof(fftwf_complex));
155
156 // circular shift tune fs/2 second half array
157 shift_freq(
158 /*destination=*/&th->inFreqTmp[fft_output_half_size],
159 /*source1=*/lower_frequencies_source,
160 /*source2=*/filter2,
161 /*start=*/lower_frequencies_start,
162 /*end=*/fft_output_half_size
163 );
164
165 if (lower_frequencies_start != 0)
166 memset(th->inFreqTmp[fft_output_half_size], 0, lower_frequencies_start * sizeof(fftwf_complex));
167 }
168 // result now in th->inFreqTmp[]
169 // Size: fft_output_size (depending on the decimation)
170
171 // 'shorter' inverse FFT transform (decimation) -> frequency (back) to COMPLEX time domain
172 // transform size: fft_output_size (depending on the decimation)
173 fftwf_execute_dft(*plan_freq2time, th->inFreqTmp, th->inFreqTmp);
174 // result now in th->inFreqTmp[]
175 }
176
177 // postprocessing
178 // @todo: is it possible to ..
179 // 1)
180 // let inverse FFT produce/save it's result directly
181 // in "this->obuffers[modx] + offset" (pout)
182 // ( obuffers[] would need to have additional space ..;
183 // need to move 'scrap' of 'ovelap-scrap'? )
184 // at least FFTW would allow so,
185 // see http://www.fftw.org/fftw3_doc/New_002darray-Execute-Functions.html
186 // attention: multithreading!
187 // 2)
188 // could mirroring (lower sideband) get calculated together
189 // with fine mixer - modifying the mixer frequency? (fs - fc)/fs
190 // (this would reduce one memory pass)
191
192 size_t len = (k+1) * fft_useful_size + output_buffer_offset > 32768 ? 32768 - (k * fft_useful_size + output_buffer_offset) : fft_useful_size;
193
194 if (this->getSideband()) // lower sideband
195 {
196 // mirror just by negating the imaginary Q of complex I/Q
197 copy<true>((fftwf_complex*)&iq_output.data()[(k * fft_useful_size + output_buffer_offset)*2], &th->inFreqTmp[0], len);
198 }
199 else // upper sideband
200 {
201 copy<false>((fftwf_complex*)&iq_output.data()[(k * fft_useful_size + output_buffer_offset)*2], &th->inFreqTmp[0], len);
202 }
203 }
204
205 decimate_count = (decimate_count + 1) & (deci_ratio - 1);
206 if (decimate_count == 0) {
207 outputbuffer->push(iq_output);
208 output_buffer_offset = 0;
209 }
210 else
211 {
212 output_buffer_offset += fft_output_half_size + fft_useful_size * (ffts_per_blocks-1);
213 }
214 }
215 return 0;
216}