Logo Search packages:      
Sourcecode: vat version File versions  Download package

controller.cc

/*
 * Copyright (c) 1991-1995 Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *    This product includes software developed by the Computer Systems
 *    Engineering Group at Lawrence Berkeley Laboratory.
 * 4. Neither the name of the University nor of the Laboratory may be used
 *    to endorse or promote products derived from this software without
 *    specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
static const char rcsid[] =
    "@(#) $Header: controller.cc,v 1.47 96/05/16 05:20:07 van Exp $ (LBL)";

#define AUDIO_SPS 8000  /* audio samples per second (used to convert
                   * playout delay from seconds to bytes of buffer).
                   * This is the sample rate used by the audio
                   * hardware.  If this rate is different than
                   * the sample rate of some network audio format,
                   * sample rate conversion has to be done between
                   * the encoder/decoder objects & here.  Note
                   * that sample rate conversion is a very compute
                   * intensive operation & there isn't currently
                   * any support for it in vat.  Also note that
                   * there are many control variables expressed
                   * in terms of a 160 sample (20ms at 8KHz)
                   * audio frame size and some of these need to
                   * be changed if the AUDIO_SPS is changed.
                   */
#define SS_GRANULARITY 1440   /* sample stream sizes are always rounded
                         * to some integer multiple of this number.
                         * It should be set to the least common
                         * multiple of the possible *output* audio
                         * frame sizes (see comments below).
                         * In our case, possible frame sizes are
                         * 160 & 180 samples.
                         */
/*
 * following is timeout interval (in ms) when vat does not have
 * the audio & is running off a system timer instead.  This interval
 * must be the same as the audio read blocksize (FRAMESIZE /
 * AUDIO_SPS * 1000 ms) and must be < 1 sec.
 */
#define FRAME_TIME (FRAMESIZE * 1000 / AUDIO_SPS)

#include "config.h"
#include "sys-time.h"
#include "audio.h"
#include "ss.h"
#include "controller.h"
#include "encoder.h"
#include "mulaw.h"
#include "vu.h"
#include "Tcl.h"
#include "ntp-time.h"

#define METER_UPDATE_FREQ 3

/*
 * A controller class specifically for handling half-duplex audio
 * devices like those commonly found in PCs.  It derives its timebase
 * from a 20ms system timer rather than using audio read completions
 * like the normal controller class.
 */
class HDController : public Controller {
    public:
      HDController();
      virtual void update(Observable*);
    protected:
      virtual void timeout();
      virtual void audio_handle();
};

static class ControllerMatch : public Matcher {
    public:
      ControllerMatch() : Matcher("controller") {}
      TclObject* match(const char* id) {
            if (strcasecmp(id, "full-duplex") == 0)
                  return (new Controller);
            else if (strcasecmp(id, "half-duplex") == 0)
                  return (new HDController);
            else
                  return (0);
      }
} controller_matcher;


Controller::Controller() :
      audio_(0),
      lastnetout_(0),
      lastaudout_(0),
#ifdef WIN32
      /* windows is very, very slow & can't hack 20ms events */
      timer_interval_(FRAME_TIME * 4),
#else
      timer_interval_(FRAME_TIME),
#endif
      tsec_(0),
      tusec_(0),
      ostate_(TALK_TAIL + TALK_LEAD),
      pmeter_(0),
      rmeter_(0),
      talk_thresh_(0),
      echo_thresh_(0),
      echo_suppress_time_(0),
      meter_update_(METER_UPDATE_FREQ),
      active_(0),
      encoder_(0),
      outmax_(FRAMESIZE),
      outlen_(0),
      out_ts_(0),
      out_(0)
{
      Tcl& tcl = Tcl::instance();
      echo_thresh_ = atoi(tcl.attr("echoThresh"));
      echo_suppress_time_ = atoi(tcl.attr("echoSuppressTime")) / 20 * FRAMESIZE;
      idle_drop_time_ = atoi(tcl.attr("idleDropTime")) * 50;

      int maxplayout = atoi(tcl.attr("maxPlayout")) * AUDIO_SPS;
      if (maxplayout < (TALK_LEAD + TALK_TAIL + 2) * FRAMESIZE) {
            printf(" max playout delay %d too short - using %d sec.\n",
                  maxplayout,
                  ((TALK_LEAD + TALK_TAIL + 2) * FRAMESIZE +
                  AUDIO_SPS - 1) / AUDIO_SPS);
            maxplayout = (TALK_LEAD + TALK_TAIL + 2) * FRAMESIZE;
      }
      maxplayout = (maxplayout + (SS_GRANULARITY - 1)) / SS_GRANULARITY;
      maxplayout *= SS_GRANULARITY / FRAMESIZE;

      /*XXX tk script */
      int magclevel = atoi(tcl.attr("mikeAGCLevel"));
      int sagclevel = atoi(tcl.attr("speakerAGCLevel"));

      as_ = new SampleStream(FRAMESIZE, maxplayout,
                         (TALK_LEAD+1)*FRAMESIZE, magclevel);
      ns_ = new SampleStream(FRAMESIZE, maxplayout,
                         (TALK_LEAD+1)*FRAMESIZE, sagclevel);

      lastaudout_ = as_->Clock();
      lastnetout_ = ns_->Clock();

      int thresh = atoi(tcl.attr("silenceThresh"));
      as_->ssthresh(thresh);
      ns_->ssthresh(thresh);
}

void Controller::update(Observable*)
{
      if (!audio_->HaveAudio()) {
            timeval tv;
            ::gettimeofday(&tv, 0);
            tsec_ = tv.tv_sec;
            tusec_ = tv.tv_usec;
            /* Reset the meters and force a redraw. */
            if (pmeter_ != 0) {
                  pmeter_->set(0.); pmeter_->redraw();
                  rmeter_->set(0.); rmeter_->redraw();
            }
            msched(timer_interval_);
      } else {
            cancel();
      }
}

void Controller::DoAudio()
{
      double rlevel, plevel;
      register int nsmean = ns_->LTMean();

      if (ns_->Max())
            lastnetout_ = as_->Clock();
      ns_->Compute();
      plevel = ns_->Mean();
      mixaudio(*ns_);
      ns_->UpdateAGC();

      if (audio_->RMuted()) {
            rlevel = 0.;
      } else {
            as_->Compute();
            rlevel = as_->Mean();
            /*
             * Next check is to cut residual echo in speakerphone &
             * echo-cancel modes: If we haven't been talking recently, have
             * just sent data to the speaker and the signal we got from the
             * mike was about the same as the signal we played, assume the
             * mike signal is echo.  NOTE that nsmean was grabbed before
             * ns_->Compute() so we are comparing the previous audio output
             * to the current audio input - this should make it more likely
             * that we're looking at the echo signal (though under Sun OS
             * there's still a lot of buffer in the STREAM I/O system).
             */
            if (audio_->Mode() != Audio::mode_none && !sending() &&
                (as_->LTMean() - nsmean < echo_thresh_ ||
                 as_->Silent(talk_thresh_)))
                  ;
            else if (audio_->Mode() != Audio::mode_netmutesmike ||
                   lastnetout_ == 0 ||
                   u_int(as_->Clock() - lastnetout_) > echo_suppress_time_) {
                  if (Output()) {
                        lastnetout_ = 0;
                        active_ = 1;
                        as_->UpdateAGC();
                  }
            }
      }
      /*
       * Update the meters.  We control the rate with METER_UPDATE_FREQ
       * to cut down on CPU load from the X window updates.
       */
      if (pmeter_ != 0 && --meter_update_ <= 0) {
            pmeter_->set(plevel);
            rmeter_->set(rlevel);
            meter_update_ = METER_UPDATE_FREQ;
      }
}

void Controller::DoTimer()
{
      /* Advance to next audio frame */
      as_->Advance();
      ns_->Advance();
}

/*
 * Called from decoders to mix in the block of samples 'del'
 * samples into the future.
 */
void Controller::mix_from_net(int del, const u_int8_t* frame, int len)
{
      if (del >= 0)
            ns_->Mix(del, frame, len);
}

void Controller::audio_handle()
{
      DoAudio();
      DoTimer();
}

/*
 * Called when we don't have the audio device.  Normally, our time
 * base comes from the audio device's sample clock, so when we don't
 * have the device open, we revert to timers.
 */
void Controller::timeout()
{
      /*
       * Use get time of day and keep track of the current hard time
       * in the tsec_/tusec_ variables.  Tk timers are an unreliable
       * time base.  We use them to dispatch an event here then call
       * gettimeofday to see how many times we really should have
       * been called in the intervening period.
       */
      timeval tv;
      ::gettimeofday(&tv, 0);
      u_int u = (u_int)tv.tv_usec;
      u_int s = (u_int)tv.tv_sec;
      if (s > tsec_ + 3) {
            /*
             * We're way behind.  Most likely we were suspended and
             * then resumed.  Instead of trying to catch up, just resync.
             */
            tusec_ = u;
            tsec_ = s;
      }
      while ((int(tusec_ - u) <= 0 && s == tsec_) || int(tsec_ - s) < 0) {
            DoTimer();
            while ((tusec_ += 1000 * FRAME_TIME) >= 1000000) {
                  tusec_ -= 1000000;
                  ++tsec_;
            }
      }
      msched(timer_interval_);
}

/*
 * send the next block of samples to the encoder.  We might coalesce
 * several calls into a larger chunk (depending on the value of outmax_).
 * The point is to stuff more data into a single packet to amortize the
 * packet header overhead (at the cost of increased latency).
 */
void Controller::send_block(u_int32_t ts, u_int8_t* blk, int len)
{
      if (out_ == 0) {
            out_ = blk;
            outlen_ = 0;
            out_ts_ = ts;
      } else if (&out_[outlen_] != blk) {
            /*
             * frames wrapped in ss buffer -- copy to buf to
             * keep things contiguous.
             */
            if (out_ != overflow_) {
                  /* first time */
                  memcpy(overflow_, out_, outlen_);
                  out_ = overflow_;
            }
            /* copy current chunk */
            memcpy(&out_[outlen_], blk, len);
      }
      outlen_ += len;
      if (outlen_ >= outmax_) {
            encoder_->encode(out_ts_, out_, outlen_);
            out_ = 0;
      }
}

/*
 * get the current media timestamp
 */
u_int32_t Controller::media_ts()
{
      ::gettimeofday(&last_uts_, 0);
      u_int32_t ts = as_->Clock();
      last_mts_ = ts;
      return (ts);
}

u_int32_t Controller::ref_ts()
{
      timeval now;
      ::gettimeofday(&now, 0);
      int t = (now.tv_sec - last_uts_.tv_sec) * 8000;
      t += ((now.tv_usec - last_uts_.tv_usec) << 3) / 1000;
      return (last_mts_ + t);
}

/*
 * Check for silence and otherwise send audio frames to the encoder.
 * If this is the start of a talk-spurt, go back TALK_LEAD blocks into
 * the past and send them too (because the silence detector isn't
 * perfect especially near a silence-to-speech transition).  At the
 * end of a talk-spurt, send TALK_TAIL extra blocks because the
 * silence detector is unreliable near a speech-to-silence transition.
 */
int Controller::Output()
{
      register int bs = as_->BlkSize();
      if (as_->Silent()) {
            if (ostate_ >= TALK_TAIL) {
                  /* between talk spurts */
                  if (out_ != 0) {
                        /* flush the partial last block */
                        send_block(media_ts(), as_->BlkBack(0), bs);
                        return (1);
                  }
                  if (ostate_ < TALK_TAIL+TALK_LEAD)
                        ++ostate_;
                  return (0);
            }
            ++ostate_;
      } else if (ostate_) {
            /*
             * if start of talk after silence, generate packets for
             * any leading speech that we might have missed.
             */
            for (int i = (ostate_ - TALK_TAIL) * bs; i > 0; i -= bs) {
                  u_int32_t ts = as_->Clock() - i;
                  send_block(ts, as_->BlkBack(i), bs);
            }
            ostate_ = 0;
      }
      send_block(media_ts(), as_->BlkBack(0), bs);
      return (1);
}

extern "C" {
extern u_char tonemax[];
extern u_char tone0dBm[];
extern u_char tone6dBm[];
}

void Controller::mixaudio(SampleStream& ss)
{
      /*
       * The audio driver is ready to give us the next packet.
       * This serves as our time base.  We do the following:
       *  - output next chunk to audio driver.
       *    (We do the output *first* so the rest of our
       *    processing is overlapped with the real-time audio
       *    output, otherwise we tend to accumulate estimate
       *    random delays.)
       *  - If we're doing echo cancellation, estimate the
       *    echo resulting from the block just output and mix
       *    the echo inverse into the *input* sample stream
       *    at the estimated echo delay.
       *  - read packet from audio
       *  - Mix the input data into the input sample stream
       *    (this is a mix because we might have an inverse
       *    echo signal or tone that we want summed with the
       *    input).
       */
      u_char* blk = 0;

      int loopback = audio_->GetLoopback();
      /*XXX*/
#ifdef notdef
      int blksize = audio_->BlockSize();
#else
      int blksize = as_->BlkSize();
#endif

      if (loopback != Audio::loop_none) {
            blk = audio_->Read();
            switch (loopback) {
            case Audio::loop_none:
                  break;
            case Audio::loop_mike:
                  ss.Mix(0, blk, blksize);
                  break;
            case Audio::loop_tone6:
                  ss.Mix(0, tone6dBm, blksize);
                  break;
            case Audio::loop_tone0:
                  ss.Mix(0, tone0dBm, blksize);
                  break;
            case Audio::loop_tonemax:
                  ss.Mix(0, tonemax, blksize);
                  break;
            }
      }

      /*
       * Now write a block of samples to the audio device provided
       * the following conditions hold:
       *
       * (1)      the audio output isn't muted
       *
       * (2)      there won't be an echo problem i.e., we're not sending, OR
       *    we're in not in the mode where input has priority over output,
       *    OR the mike is muted.  this last check is not strictly
       *    necessary but allows the user to immediately hear the
       *    far end after muting the mike (rather than waiting for
       *    TALK_TAIL extra audio blocks to drain).
       *
       * (3)      the signal to output isn't slience, or we're running
       *    a loopback test.  i.e., if we're about to write a
       *    completely silent frame (i.e., no packets at all from
       *    the network) then don't do it.  This prevents a backlog
       *    of samples (i.e., a net delay) to build up in the audio
       *    driver.  Note that this problem is completely independent
       *    of the silent suppression solution for the outbound path.
       */
      if (! audio_->PMuted() &&
          (!sending() || audio_->Mode() != Audio::mode_mikemutesnet ||
           audio_->RMuted()) &&
          (loopback || (ss.Max() != 0 && ss.LTMean() != 0))) {
            /*
             * if we haven't written for a while,
             * write an extra block to generate a bit
             * of a backlog between us & the driver.
             */
            u_int32_t sc = as_->Clock();
            if (u_int(sc - lastaudout_) > 4*FRAMESIZE)
                  audio_->Write(ss.BlkBack(blksize));
            audio_->Write(ss.CurBlk());
            lastaudout_ = sc;
            active_ = 1;
#ifdef notyet
            if (mode == mode_ec && !rmute) {
                  int offset = AdjustTime(0);
                  u_char ecblk[MAXAUDIOSIZE+4];
                  int resid = offset & 3;
                  if (resid)
                        ecblk[0] = 0x7f7f7f7f;
                  filter->Compute(os, &ecblk[resid],
                              blksize);
                  as->Mix(offset &~ 3, ecblk, blksize);
            }
#endif
      }
      if (loopback == Audio::loop_none)
            blk = audio_->Read();

      as_->Mix(0, blk, blksize);
}


HDController::HDController()
{
      timeval tv;
      ::gettimeofday(&tv, 0);
      tsec_ = tv.tv_sec;
      tusec_ = tv.tv_usec;
      msched(timer_interval_);
}

void HDController::update(Observable*)
{
      /*
       * do nothing -- leave the timer running even when
       * we have the audio
       */
}

void HDController::timeout()
{
      timeval tv;
      ::gettimeofday(&tv, 0);
      u_int u = (u_int)tv.tv_usec;
      u_int s = (u_int)tv.tv_sec;
      if (s > tsec_ + 3) {
            /*
             * We're way behind.  Most likely we were suspended and
             * then resumed.  Instead of trying to catch up, just resync.
             */
            tusec_ = u;
            tsec_ = s;
      }
      while ((int(tusec_ - u) <= 0 && s == tsec_) || int(tsec_ - s) < 0) {
            DoTimer();
            if (audio_->HaveAudio() && audio_->FrameReady())
                  DoAudio();

            while ((tusec_ += 1000 * FRAME_TIME) >= 1000000) {
                  tusec_ -= 1000000;
                  ++tsec_;
            }
      }
      msched(timer_interval_);
}

void HDController::audio_handle()
{
      printf("HDController::audio_handle()\n");
}

int Controller::command(int argc, const char*const* argv)
{
      Tcl& tcl = Tcl::instance();
      if (argc == 2) {
            if (strcmp(argv[1], "ntp-time") == 0) {
                  sprintf(tcl.buffer(), "%u", ntptime());
                  tcl.result(tcl.buffer());
                  return (TCL_OK);
            }
            if (strcmp(argv[1], "unix-time") == 0) {
                  sprintf(tcl.buffer(), "%u", unixtime().tv_sec);
                  tcl.result(tcl.buffer());
                  return (TCL_OK);
            }
            if (strcmp(argv[1], "media-time") == 0) {
                  sprintf(tcl.buffer(), "%u", as_->Clock());
                  tcl.result(tcl.buffer());
                  return (TCL_OK);
            }
            if (strcmp(argv[1], "active") == 0) {
                  tcl.result(active_ ? "1" : "0");
                  return (TCL_OK);
            }
            if (strcmp(argv[1], "agc-input") == 0) {
                  sprintf(tcl.buffer(), "%d", as_->AGCLevel() / 10 - 10);
                  tcl.result(tcl.buffer());
                  return (TCL_OK);
            }
            if (strcmp(argv[1], "agc-output") == 0) {
                  sprintf(tcl.buffer(), "%d", ns_->AGCLevel() / 10 - 10);
                  tcl.result(tcl.buffer());
                  return (TCL_OK);
            }
      } else if (argc == 3) {
            if (strcmp(argv[1], "audio") == 0) {
                  audio_ = (Audio*)TclObject::lookup(argv[2]);
                  audio_->attach(this);
                  audio_->handler(this);
                  update(audio_);
                  return (TCL_OK);
            }
            if (strcmp(argv[1], "encoder") == 0) {
                  encoder_ = (PCM_Encoder*)TclObject::lookup(argv[2]);
                  return (TCL_OK);
            }
            if (strcmp(argv[1], "input-meter") == 0) {
                  rmeter_ = (VUMeter*)TclObject::lookup(argv[2]);
                  return (TCL_OK);
            }
            if (strcmp(argv[1], "output-meter") == 0) {
                  pmeter_ = (VUMeter*)TclObject::lookup(argv[2]);
                  return (TCL_OK);
            }
            if (strcmp(argv[1], "silence-thresh") == 0) {
                  int thresh = atoi(argv[2]);
                  as_->ssthresh(thresh);
                  ns_->ssthresh(thresh);
                  return (TCL_OK);
            }
            if (strcmp(argv[1], "talk-thresh") == 0) {
                  talk_thresh_ = atoi(argv[2]);
                  return (TCL_OK);
            }
            if (strcmp(argv[1], "echothresh") == 0) {
                  echo_thresh_ = atoi(argv[2]);
                  return (TCL_OK);
            }
            if (strcmp(argv[1], "echodelay") == 0) {
                  echo_suppress_time_ = atoi(argv[2]) / 20 * FRAMESIZE;
                  return (TCL_OK);
            }
            if (strcmp(argv[1], "blocks-per-packet") == 0) {
                  outmax_ = atoi(argv[2]) * FRAMESIZE;
                  return (TCL_OK);
            }
            if (strcmp(argv[1], "agc-input") == 0) {
                  int level = atoi(argv[2]);
                  level = 10 * (level + 10);
                  as_->SetAGCLevel(level);
                  return (TCL_OK);
            }
            if (strcmp(argv[1], "agc-input-enable") == 0) {
                  as_->DoAGC(atoi(argv[2]));
                  return (TCL_OK);
            }
            if (strcmp(argv[1], "agc-output") == 0) {
                  int level = atoi(argv[2]);
                  level = 10 * (level + 10);
                  ns_->SetAGCLevel(level);
                  return (TCL_OK);
            }
            if (strcmp(argv[1], "agc-output-enable") == 0) {
                  ns_->DoAGC(atoi(argv[2]));
                  return (TCL_OK);
            }
            if (strcmp(argv[1], "active") == 0) {
                  active_ = atoi(argv[2]);
                  return (TCL_OK);
            }
      }
      return (TclObject::command(argc, argv));
}

Generated by  Doxygen 1.6.0   Back to index