Libav
g722enc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) CMU 1993 Computer Science, Speech Group
3  * Chengxiang Lu and Alex Hauptmann
4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5  * Copyright (c) 2009 Kenan Gillet
6  * Copyright (c) 2010 Martin Storsjo
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
30 #include "avcodec.h"
31 #include "internal.h"
32 #include "g722.h"
33 #include "libavutil/common.h"
34 
35 #define FREEZE_INTERVAL 128
36 
37 /* This is an arbitrary value. Allowing insanely large values leads to strange
38  problems, so we limit it to a reasonable value */
39 #define MAX_FRAME_SIZE 32768
40 
41 /* We clip the value of avctx->trellis to prevent data type overflows and
42  undefined behavior. Using larger values is insanely slow anyway. */
43 #define MIN_TRELLIS 0
44 #define MAX_TRELLIS 16
45 
47 {
48  G722Context *c = avctx->priv_data;
49  int i;
50  for (i = 0; i < 2; i++) {
51  av_freep(&c->paths[i]);
52  av_freep(&c->node_buf[i]);
53  av_freep(&c->nodep_buf[i]);
54  }
55  return 0;
56 }
57 
59 {
60  G722Context *c = avctx->priv_data;
61  int ret;
62 
63  if (avctx->channels != 1) {
64  av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
65  return AVERROR_INVALIDDATA;
66  }
67 
68  c->band[0].scale_factor = 8;
69  c->band[1].scale_factor = 2;
70  c->prev_samples_pos = 22;
71 
72  if (avctx->trellis) {
73  int frontier = 1 << avctx->trellis;
74  int max_paths = frontier * FREEZE_INTERVAL;
75  int i;
76  for (i = 0; i < 2; i++) {
77  c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
78  c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
79  c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
80  if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
81  ret = AVERROR(ENOMEM);
82  goto error;
83  }
84  }
85  }
86 
87  if (avctx->frame_size) {
88  /* validate frame size */
89  if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
90  int new_frame_size;
91 
92  if (avctx->frame_size == 1)
93  new_frame_size = 2;
94  else if (avctx->frame_size > MAX_FRAME_SIZE)
95  new_frame_size = MAX_FRAME_SIZE;
96  else
97  new_frame_size = avctx->frame_size - 1;
98 
99  av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
100  "allowed. Using %d instead of %d\n", new_frame_size,
101  avctx->frame_size);
102  avctx->frame_size = new_frame_size;
103  }
104  } else {
105  /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
106  a common packet size for VoIP applications */
107  avctx->frame_size = 320;
108  }
109  avctx->delay = 22;
110 
111  if (avctx->trellis) {
112  /* validate trellis */
113  if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
114  int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
115  av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
116  "allowed. Using %d instead of %d\n", new_trellis,
117  avctx->trellis);
118  avctx->trellis = new_trellis;
119  }
120  }
121 
122  return 0;
123 error:
124  g722_encode_close(avctx);
125  return ret;
126 }
127 
128 static const int16_t low_quant[33] = {
129  35, 72, 110, 150, 190, 233, 276, 323,
130  370, 422, 473, 530, 587, 650, 714, 786,
131  858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
132  1765, 1980, 2195, 2557, 2919
133 };
134 
135 static inline void filter_samples(G722Context *c, const int16_t *samples,
136  int *xlow, int *xhigh)
137 {
138  int xout1, xout2;
139  c->prev_samples[c->prev_samples_pos++] = samples[0];
140  c->prev_samples[c->prev_samples_pos++] = samples[1];
141  ff_g722_apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
142  *xlow = xout1 + xout2 >> 14;
143  *xhigh = xout1 - xout2 >> 14;
145  memmove(c->prev_samples,
146  c->prev_samples + c->prev_samples_pos - 22,
147  22 * sizeof(c->prev_samples[0]));
148  c->prev_samples_pos = 22;
149  }
150 }
151 
152 static inline int encode_high(const struct G722Band *state, int xhigh)
153 {
154  int diff = av_clip_int16(xhigh - state->s_predictor);
155  int pred = 141 * state->scale_factor >> 8;
156  /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
157  return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
158 }
159 
160 static inline int encode_low(const struct G722Band* state, int xlow)
161 {
162  int diff = av_clip_int16(xlow - state->s_predictor);
163  /* = diff >= 0 ? diff : -(diff + 1) */
164  int limit = diff ^ (diff >> (sizeof(diff)*8-1));
165  int i = 0;
166  limit = limit + 1 << 10;
167  if (limit > low_quant[8] * state->scale_factor)
168  i = 9;
169  while (i < 29 && limit > low_quant[i] * state->scale_factor)
170  i++;
171  return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
172 }
173 
174 static void g722_encode_trellis(G722Context *c, int trellis,
175  uint8_t *dst, int nb_samples,
176  const int16_t *samples)
177 {
178  int i, j, k;
179  int frontier = 1 << trellis;
180  struct TrellisNode **nodes[2];
181  struct TrellisNode **nodes_next[2];
182  int pathn[2] = {0, 0}, froze = -1;
183  struct TrellisPath *p[2];
184 
185  for (i = 0; i < 2; i++) {
186  nodes[i] = c->nodep_buf[i];
187  nodes_next[i] = c->nodep_buf[i] + frontier;
188  memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
189  nodes[i][0] = c->node_buf[i] + frontier;
190  nodes[i][0]->ssd = 0;
191  nodes[i][0]->path = 0;
192  nodes[i][0]->state = c->band[i];
193  }
194 
195  for (i = 0; i < nb_samples >> 1; i++) {
196  int xlow, xhigh;
197  struct TrellisNode *next[2];
198  int heap_pos[2] = {0, 0};
199 
200  for (j = 0; j < 2; j++) {
201  next[j] = c->node_buf[j] + frontier*(i & 1);
202  memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
203  }
204 
205  filter_samples(c, &samples[2*i], &xlow, &xhigh);
206 
207  for (j = 0; j < frontier && nodes[0][j]; j++) {
208  /* Only k >> 2 affects the future adaptive state, therefore testing
209  * small steps that don't change k >> 2 is useless, the original
210  * value from encode_low is better than them. Since we step k
211  * in steps of 4, make sure range is a multiple of 4, so that
212  * we don't miss the original value from encode_low. */
213  int range = j < frontier/2 ? 4 : 0;
214  struct TrellisNode *cur_node = nodes[0][j];
215 
216  int ilow = encode_low(&cur_node->state, xlow);
217 
218  for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
219  int decoded, dec_diff, pos;
220  uint32_t ssd;
221  struct TrellisNode* node;
222 
223  if (k < 0)
224  continue;
225 
226  decoded = av_clip((cur_node->state.scale_factor *
227  ff_g722_low_inv_quant6[k] >> 10)
228  + cur_node->state.s_predictor, -16384, 16383);
229  dec_diff = xlow - decoded;
230 
231 #define STORE_NODE(index, UPDATE, VALUE)\
232  ssd = cur_node->ssd + dec_diff*dec_diff;\
233  /* Check for wraparound. Using 64 bit ssd counters would \
234  * be simpler, but is slower on x86 32 bit. */\
235  if (ssd < cur_node->ssd)\
236  continue;\
237  if (heap_pos[index] < frontier) {\
238  pos = heap_pos[index]++;\
239  assert(pathn[index] < FREEZE_INTERVAL * frontier);\
240  node = nodes_next[index][pos] = next[index]++;\
241  node->path = pathn[index]++;\
242  } else {\
243  /* Try to replace one of the leaf nodes with the new \
244  * one, but not always testing the same leaf position */\
245  pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
246  if (ssd >= nodes_next[index][pos]->ssd)\
247  continue;\
248  heap_pos[index]++;\
249  node = nodes_next[index][pos];\
250  }\
251  node->ssd = ssd;\
252  node->state = cur_node->state;\
253  UPDATE;\
254  c->paths[index][node->path].value = VALUE;\
255  c->paths[index][node->path].prev = cur_node->path;\
256  /* Sift the newly inserted node up in the heap to restore \
257  * the heap property */\
258  while (pos > 0) {\
259  int parent = (pos - 1) >> 1;\
260  if (nodes_next[index][parent]->ssd <= ssd)\
261  break;\
262  FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
263  nodes_next[index][pos]);\
264  pos = parent;\
265  }
266  STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
267  }
268  }
269 
270  for (j = 0; j < frontier && nodes[1][j]; j++) {
271  int ihigh;
272  struct TrellisNode *cur_node = nodes[1][j];
273 
274  /* We don't try to get any initial guess for ihigh via
275  * encode_high - since there's only 4 possible values, test
276  * them all. Testing all of these gives a much, much larger
277  * gain than testing a larger range around ilow. */
278  for (ihigh = 0; ihigh < 4; ihigh++) {
279  int dhigh, decoded, dec_diff, pos;
280  uint32_t ssd;
281  struct TrellisNode* node;
282 
283  dhigh = cur_node->state.scale_factor *
284  ff_g722_high_inv_quant[ihigh] >> 10;
285  decoded = av_clip(dhigh + cur_node->state.s_predictor,
286  -16384, 16383);
287  dec_diff = xhigh - decoded;
288 
289  STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
290  }
291  }
292 
293  for (j = 0; j < 2; j++) {
294  FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
295 
296  if (nodes[j][0]->ssd > (1 << 16)) {
297  for (k = 1; k < frontier && nodes[j][k]; k++)
298  nodes[j][k]->ssd -= nodes[j][0]->ssd;
299  nodes[j][0]->ssd = 0;
300  }
301  }
302 
303  if (i == froze + FREEZE_INTERVAL) {
304  p[0] = &c->paths[0][nodes[0][0]->path];
305  p[1] = &c->paths[1][nodes[1][0]->path];
306  for (j = i; j > froze; j--) {
307  dst[j] = p[1]->value << 6 | p[0]->value;
308  p[0] = &c->paths[0][p[0]->prev];
309  p[1] = &c->paths[1][p[1]->prev];
310  }
311  froze = i;
312  pathn[0] = pathn[1] = 0;
313  memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
314  memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
315  }
316  }
317 
318  p[0] = &c->paths[0][nodes[0][0]->path];
319  p[1] = &c->paths[1][nodes[1][0]->path];
320  for (j = i; j > froze; j--) {
321  dst[j] = p[1]->value << 6 | p[0]->value;
322  p[0] = &c->paths[0][p[0]->prev];
323  p[1] = &c->paths[1][p[1]->prev];
324  }
325  c->band[0] = nodes[0][0]->state;
326  c->band[1] = nodes[1][0]->state;
327 }
328 
330  const int16_t *samples)
331 {
332  int xlow, xhigh, ilow, ihigh;
333  filter_samples(c, samples, &xlow, &xhigh);
334  ihigh = encode_high(&c->band[1], xhigh);
335  ilow = encode_low (&c->band[0], xlow);
337  ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
338  ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
339  *dst = ihigh << 6 | ilow;
340 }
341 
343  uint8_t *dst, int nb_samples,
344  const int16_t *samples)
345 {
346  int i;
347  for (i = 0; i < nb_samples; i += 2)
348  encode_byte(c, dst++, &samples[i]);
349 }
350 
351 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
352  const AVFrame *frame, int *got_packet_ptr)
353 {
354  G722Context *c = avctx->priv_data;
355  const int16_t *samples = (const int16_t *)frame->data[0];
356  int nb_samples, out_size, ret;
357 
358  out_size = (frame->nb_samples + 1) / 2;
359  if ((ret = ff_alloc_packet(avpkt, out_size))) {
360  av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
361  return ret;
362  }
363 
364  nb_samples = frame->nb_samples - (frame->nb_samples & 1);
365 
366  if (avctx->trellis)
367  g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
368  else
369  g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
370 
371  /* handle last frame with odd frame_size */
372  if (nb_samples < frame->nb_samples) {
373  int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
374  encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
375  }
376 
377  if (frame->pts != AV_NOPTS_VALUE)
378  avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
379  *got_packet_ptr = 1;
380  return 0;
381 }
382 
384  .name = "g722",
385  .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
386  .type = AVMEDIA_TYPE_AUDIO,
388  .priv_data_size = sizeof(G722Context),
391  .encode2 = g722_encode_frame,
392  .capabilities = CODEC_CAP_SMALL_LAST_FRAME,
393  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
395 };
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:54
struct G722Context::TrellisNode ** nodep_buf[2]
int path
Definition: adpcmenc.c:46
This structure describes decoded (raw) audio or video data.
Definition: frame.h:135
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:129
struct G722Context::TrellisPath * paths[2]
static void filter_samples(G722Context *c, const int16_t *samples, int *xlow, int *xhigh)
Definition: g722enc.c:135
#define MIN_TRELLIS
Definition: g722enc.c:43
AVCodec.
Definition: avcodec.h:2796
static void g722_encode_no_trellis(G722Context *c, uint8_t *dst, int nb_samples, const int16_t *samples)
Definition: g722enc.c:342
void av_freep(void *arg)
Free a memory block which has been allocated with av_malloc(z)() or av_realloc() and set the pointer ...
Definition: mem.c:198
static int encode_high(const struct G722Band *state, int xhigh)
Definition: g722enc.c:152
uint8_t
#define av_cold
Definition: attributes.h:66
#define PREV_SAMPLES_BUF_SIZE
Definition: g722.h:31
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:211
static av_cold int g722_encode_init(AVCodecContext *avctx)
Definition: g722enc.c:58
uint8_t * data
Definition: avcodec.h:973
uint32_t ssd
Definition: adpcmenc.c:45
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:123
struct G722Context::TrellisNode * node_buf[2]
#define AVERROR(e)
Definition: error.h:43
sample_fmts
Definition: avconv_filter.c:68
#define CODEC_CAP_SMALL_LAST_FRAME
Codec can be fed a final frame with a smaller size.
Definition: avcodec.h:718
const int16_t ff_g722_low_inv_quant6[64]
Definition: g722.c:63
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:150
void ff_g722_apply_qmf(const int16_t *prev_samples, int *xout1, int *xout2)
Definition: g722.c:161
int16_t prev_samples[PREV_SAMPLES_BUF_SIZE]
memory of past decoded samples
Definition: g722.h:36
AVCodec ff_adpcm_g722_encoder
Definition: g722enc.c:383
void av_log(void *avcl, int level, const char *fmt,...)
Definition: log.c:168
const char * name
Name of the codec implementation.
Definition: avcodec.h:2803
#define FREEZE_INTERVAL
Definition: g722enc.c:35
struct G722Context::G722Band band[2]
static void g722_encode_trellis(G722Context *c, int trellis, uint8_t *dst, int nb_samples, const int16_t *samples)
Definition: g722enc.c:174
#define MAX_FRAME_SIZE
Definition: g722enc.c:39
static av_cold int g722_encode_close(AVCodecContext *avctx)
Definition: g722enc.c:46
static int encode_low(const struct G722Band *state, int xlow)
Definition: g722enc.c:160
int ff_alloc_packet(AVPacket *avpkt, int size)
Check AVPacket size and/or allocate data.
Definition: utils.c:1245
void ff_g722_update_low_predictor(struct G722Band *band, const int ilow)
Definition: g722.c:139
static const float pred[4]
Definition: siprdata.h:259
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:1811
static const int16_t low_quant[33]
Definition: g722enc.c:128
Libavcodec external API header.
AVSampleFormat
Audio Sample Formats.
Definition: samplefmt.h:61
AV_SAMPLE_FMT_NONE
Definition: avconv_filter.c:68
main external API structure.
Definition: avcodec.h:1050
static void close(AVCodecParserContext *s)
Definition: h264_parser.c:490
static uint32_t state
Definition: trasher.c:27
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:141
common internal api header.
common internal and external API header
signed 16 bits
Definition: samplefmt.h:64
int prev_samples_pos
the number of values in prev_samples
Definition: g722.h:37
static av_cold int init(AVCodecParserContext *s)
Definition: h264_parser.c:499
int trellis
trellis RD quantization
Definition: avcodec.h:2234
void * priv_data
Definition: avcodec.h:1092
#define STORE_NODE(index, UPDATE, VALUE)
const int16_t ff_g722_high_inv_quant[4]
Definition: g722.c:51
int channels
number of audio channels
Definition: avcodec.h:1792
static av_always_inline void encode_byte(G722Context *c, uint8_t *dst, const int16_t *samples)
Definition: g722enc.c:329
static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)
Definition: g722enc.c:351
#define av_always_inline
Definition: attributes.h:40
static av_always_inline int64_t ff_samples_to_time_base(AVCodecContext *avctx, int64_t samples)
Rescale from sample rate to AVCodecContext.time_base.
Definition: internal.h:151
#define MAX_TRELLIS
Definition: g722enc.c:44
#define FFSWAP(type, a, b)
Definition: common.h:60
void ff_g722_update_high_predictor(struct G722Band *band, const int dhigh, const int ihigh)
Definition: g722.c:150
This structure stores compressed data.
Definition: avcodec.h:950
int delay
Codec delay.
Definition: avcodec.h:1212
int16_t scale_factor
delayed quantizer scale factor
Definition: g722.h:51
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:179
void * av_mallocz(size_t size)
Allocate a block of size bytes with alignment suitable for all memory accesses (including vectors if ...
Definition: mem.c:205
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: avcodec.h:966
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:228