Using a pseudo-device for IPC

Thu Mar 10 22:23:59 PST 2005

Hi, 

I am trying to use a network pseudo-device to do (bidirectional) IPC
between 2 processes. Why such a weird thing? Well, one of the
processes (S) is a simulator of a NIC we're implementing in HW. I want
the other process (O) to always interact with a network device,
BPF-style.

My idea is, if I want to test the real NIC, I'll tell O that the 
network device name is "mycard0." Otherwise, I'll fire up S, and let 
O know that the network device name is something like "mysimulatorcard0."
I don't want to create my own pseudo-devices, so I'm trying to use
what there is into 4.10-RELEASE.

I need a virtual device where both O and S can read and write packets.
FYI, both processes open a pcap interface using a modified version of
pcap_open_live (which opens the descriptor in O_RDWR mode, instead of
O_RDONLY). Processes read packets using plain libpcap, and write them
using "write (p->fd, ...)".

Which pseudo-device should I use? 

- my first thought was using the discard interface. I can read and 
	write traffic to ds0. The problem is, with only 1 device, I don't 
	know which side a packet comes from. This means that, for 
	bidirectional comm, I need 2 discard interfaces, and 4.10 only
	supports one. 

- I've been considering tap (I want to keep Ethernet addresses, so tap is 
	better than tun for me). The problem is, the same piece of code that 
	is able to inject traffic into all my other devices (I've tried ds, 
	em, bge, and lo, and all of them work), fails silently in tap. I 
	enclode the writer code as a PS. I've checked /var/log/messages, and
	every time I try to inject a packet I get the following 2 lines:

...
Mar 10 22:09:48 alpo /kernel: tap1 starting, minor = 0x1
Mar 10 22:09:48 alpo /kernel: tap1 not ready. minor = 0x1, tap_flags = 0x6
...

	After browsing /usr/src/sys/net, it seems tap_flags is TAP_INITED|TAP_RWAIT,
	i.e., the TAP_OPEN flag is not set, so tapifstart() fails. Interestingly 
	enough, the if that makes tapifstart() fail is actually:

	if (((tp->tap_flags & TAP_VMNET) == 0) &&
			((tp->tap_flags & TAP_READY) != TAP_READY)) {
		...
	}

	This seems to refer to vmnet devices. While in tap(4) says that the 
	only difference is the minor number and that "VMnet devices do not
	ifconfig(8) themselves down when the control device is closed", it 
	seems that a vmnet device may pass through this if, and therefore I
	could be able to use it. Am I right? 

Is there a better approach to do this? 

Thanks for any help. 
-Chema

PS: This is the program that tests interfaces: 

pcap_t * my_pcap_open_live (const char *device, int snaplen, int promisc,
    int to_ms, char *ebuf);

int main () 
{
	test_interface ("tap1");
	return 0;
}

void test_interface (const char *interface)
{
  char ebuf[PCAP_ERRBUF_SIZE];
  int snaplen, promiscuous;
  pcap_t *_pcap;
  int timeout;
  int pcap_fd;
  int outbound, yes;
  bpf_u_int32 netmask, localnet;
  struct bpf_program fcode;
  int datalink;
  char *bpf_filter = "";
  unsigned char *packet;
  int i, retval;

  fprintf (stdout, "%s: testing interface %s\n", __func__, interface);

  snaplen = 65000;
  promiscuous = 1;
  timeout = 1; // don't wait for packets
  _pcap = my_pcap_open_live(interface, snaplen, promiscuous, timeout, ebuf);
  if ( !_pcap ) {
    fprintf (stderr, "Error opening interface %s: %s\n", interface, ebuf);
    exit (-1);
  }

  // capture outbound traffic
  pcap_fd = pcap_fileno(_pcap);
  outbound = 1;
  if ( ioctl(pcap_fd, BIOCSSEESENT, &outbound) != 0 ) {
    fprintf (stderr, "%s: BIOCSSEESENT failed\n", __func__);
    exit (-1);
  }

  // set immediate capturing
  yes = 1;
  if ( ioctl(pcap_fd, BIOCIMMEDIATE, &yes) != 0 ) {
    fprintf (stderr, "%s: BIOCIMMEDIATE failed\n", __func__);
    exit (-1);
  }

  // get local net info
  if ( pcap_lookupnet((char *)interface, &localnet, &netmask, ebuf) < 0 ) {
    if ( errno != EADDRNOTAVAIL ) {
      fprintf (stderr, "%s: pcap_lookupnet failed (%s)\n", __func__, ebuf);
      exit (-1);
    } else {
      fprintf (stderr, "warning @%s: pcap_lookupnet failed (%s)\n", __func__,
          ebuf);
      netmask = 0xffffff00;
    }
  }
  fprintf (stdout, "%s: pcap_lookupnet produced netmask=0x%08x, "
      "localnet=0x%08x\n",  __func__, netmask, localnet);

  // compile and set a void BPF filter
  if ( pcap_compile(_pcap, &fcode, bpf_filter, 0, netmask) < 0 ) {
    fprintf (stderr, "%s: pcap_compile failed (%s)\n", __func__,
        pcap_geterr(_pcap));
    exit (-1);
  }
  if ( pcap_setfilter(_pcap, &fcode) < 0 ) {
    fprintf (stderr, "%s: pcap_setfilter failed (%s)\n", __func__,
        pcap_geterr(_pcap));
    exit (-1);
  }

  // get datalink
  datalink = pcap_datalink(_pcap);

  // inject a packet
  packet = (unsigned char *) malloc (1024 * sizeof(unsigned char));
  for (i = 0; i < 1024; i++) {
    packet[i] = (unsigned char)(i % 256);
  }
  retval = ((uint32_t) write(_pcap->fd, packet, 1024) == 1024 ? 0 : -1);

  if ( retval < 0 ) {
    fprintf (stderr, "%s: write failed (%s)\n", __func__,
        strerror(errno));
    exit (-1);
  }

  // cleanup
  pcap_close(_pcap);

  return;
}

// the original pcap_open_live() doesn't open for writing
// this is just copied from pcap-bpf.c:pcap_open_live()

pcap_t *
my_pcap_open_live (const char *device, int snaplen, int promisc, int to_ms,
    char *ebuf)
{
  int fd;
  struct ifreq ifr;
  struct bpf_version bv;
  u_int v;
  pcap_t *p;
  int i;

  // alloc the object
  p = (pcap_t *) malloc(sizeof(*p));
  if ( p == NULL ) {
    snprintf(ebuf, PCAP_ERRBUF_SIZE, "malloc: %s", pcap_strerror(errno));
    return NULL;
  }
  memset(p, 0, sizeof(*p));

  // open the device
  //fd = bpf_open(p, ebuf);
  fd = -1;
  for ( i = 0; i < 16 && fd < 0; i++ ) {
    char tmp[64];
    sprintf(tmp, "/dev/bpf%d", i);
    fd = open(tmp, O_RDWR);
  }
  if ( fd < 0 ) {
    snprintf(ebuf, PCAP_ERRBUF_SIZE, "error opening /dev/bpf* for write: %s",
        strerror(errno)); 
    return NULL;
  } 

  // fill up the object
  p->fd = fd;
  p->snapshot = snaplen;
  if ( ioctl(fd, BIOCVERSION, (caddr_t)&bv) < 0 ) {
    snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCVERSION: %s", pcap_strerror(errno));
    return NULL;
  }

  // check BPF version
  if ( bv.bv_major != BPF_MAJOR_VERSION || bv.bv_minor < BPF_MINOR_VERSION ) {
    snprintf(ebuf, PCAP_ERRBUF_SIZE, "kernel bpf filter out of date");
    return NULL;
  }

  // set kernel buffer size
  if ( (ioctl(fd, BIOCGBLEN, (caddr_t)&v) < 0) || v < 32768 )
    v = 32768;
  for ( ; v != 0; v >>= 1) {
    (void) ioctl(fd, BIOCSBLEN, (caddr_t)&v);
    (void)strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
    if ( ioctl(fd, BIOCSETIF, (caddr_t)&ifr) >= 0 )
      break;  /* that size worked; we're done */

    if (errno != ENOBUFS) {
      snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCSETIF: %s: %s", device,
          pcap_strerror(errno));
      return NULL;
    }
  }

  if (v == 0) {
    snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCSBLEN: %s: No buffer size worked",
        device);
    return NULL;
  }

  // get the data link layer type
  if ( ioctl(fd, BIOCGDLT, (caddr_t)&v) < 0 ) {
    snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCGDLT: %s", pcap_strerror(errno));
    return NULL;
  }

  v = DLT_EN10MB;

  /* set timeout */ 
  if ( to_ms != 0 ) {
    struct timeval to;
    to.tv_sec = to_ms / 1000;
    to.tv_usec = (to_ms * 1000) % 1000000;
    if (ioctl(p->fd, BIOCSRTIMEOUT, (caddr_t)&to) < 0) {
      snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCSRTIMEOUT: %s",
          pcap_strerror(errno));
      return NULL;
    }
  }

  v = 1;
  if ( ioctl(p->fd, BIOCIMMEDIATE, &v) < 0 ) {
    snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCIMMEDIATE: %s", pcap_strerror(errno));
    return NULL;
  }

  if (promisc) {
    /* set promiscuous mode, okay if it fails */
    if ( ioctl(p->fd, BIOCPROMISC, NULL) < 0 ) {
      snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCPROMISC: %s",
          pcap_strerror(errno));
      return NULL;
    }
  }

  if (ioctl(fd, BIOCGBLEN, (caddr_t)&v) < 0) {
    snprintf(ebuf, PCAP_ERRBUF_SIZE, "BIOCGBLEN: %s", pcap_strerror(errno));
    return NULL;
  }

  p->bufsize = v;
  p->buffer = (u_char *)malloc(p->bufsize);
  if (p->buffer == NULL) {
    snprintf(ebuf, PCAP_ERRBUF_SIZE, "malloc: %s", pcap_strerror(errno));
    return NULL;
  }

  return (p);
}