/*
 * Copyright (c) 1996 by Sun Microsystems, Inc.
 * All rights reserved.
 */

/*
 * Note: This source files is for *demonstration purposes only*, and is
 * utterly and completely unsupported.
 *
 * This file is meant to be compiled into a Solaris DSO, to be place
 * in /usr/lib/vold (as dev_rmscsi.so.1).  Once that's done, and the
 * following line is added to /etc/vold.conf (and vold restarted),
 * Volume Management will manage your "removable SCSI" drives.
 *
 * NOTE that IOmega ZIP drives are one example of this type of
 * media, but this in no means implies that SunSoft endorses IOMega,
 * dudes/dudettes.
 *
 * Limitations of this program include (but aren't limited to):
 *
 *	- uses interfaces between vold and itself that aren't public, so
aren't
 *		guaranteed to continue to work, and aren't supported
 *
 *	- there is *no* support for this
 *
 *	- only removable SCSI disks are supported, and will only work
 *		with UFS, since (a) FDISK isn't (yet?) supported on SPARC,
 *		and (b) there is (currently) no way on Intel Solaris to
 *		create such a filesystem
 *
 *	- this code works on Solaris SPARC *and* x86 (and probably PPC).
 *		BUT, on x86/PPC, volmgt *won't* find the name (if any)
 *		of the media correctly (it'll be called `unlabeled`),
 *		and rmmount *won't* automagically mount the media
 *		for you.  But the following manual mount will work:
 *
 *			mount -F ufs /vol/dsk/c.../NAME /MOUNT_DIR
 *
 *	- this code will very likely be replaced some time in the Solaris
 *		2.6 time frame by a more general (i.e. not limited to
 *		SCSI) mechanism.  At this time, issues such as x86/PPC/SPARC
 *		interoperability, and auto-mounting on x86/PPC will be
 *		addressed
 *
 * Feel free to play with this, but please keep all comments here as
they are.
 *
 * Compile with:
 *	cc -v -K pic -G -o dev_rmscsi.so.1 dev_rmscsi.c -lintl -lc -ztext
 *
 * To get Volume Management to actually use this ...
 *
 *	1. compile this DSO and place result in /usr/lib/vold
 *
 *	2. to get vold to use the new DSO, edit /etc/vold.conf in
 *	   the following manor:
 *
 *	  a. modify the line starting with "label sun ...", adding
 *	     " rmscsi" to it's end.  For example, it might look
 *	     like:
 *
 *		label sun label_sun.so floppy rmscsi
 *
 *	  b. add a "use" line, after the others, consisting of:
 *
 *		use rmscsi drive /dev/rdsk/c*s2 dev_rmscsi.so rmscsi%d
 *
 *	3. to get rmmount to mount/unmount your UFS filesystem,
 *	   edit /etc/rmmount.conf, changing the "ident ufs ..." line
 *	   by adding "rmscsi" to its end (see 2.a above)
 *
 *
 * A NOTE FOR TROUBLESHOOTING:
 *
 * If you have any problems, ensure that you can manually mount the
 * removable SCSI disk (i.e. w/o vold running).  If you can't even run
 * newfs on the sucker, then you probably don't yet have the drive
 * geometry set up, and should do so before proceeding.  Get info such
as
 * this from the device manufacturer (i.e. not from me).  In short, if
the
 * drive doesn't work w/o vold, then it won't work with it. (like, dah!)
 *
 *
 * Lee Duncan, SunSoft, RMTC, Colorado Springs, CO /
lduncan@central.sun.com
 *
 * (but email with questions about this code *may* be silently ignored
(;-))
 */

/* #pragma ident	"$Id: dev_rmscsi.c,v 1.3 1996/03/13 17:29:56 lduncan Exp
$" */


#include	<rpc/types.h>
#include	<sys/types.h>
#include	<sys/stat.h>
#include	<sys/mkdev.h>
#include	<sys/dkio.h>
#include	<sys/vtoc.h>
#if defined(_FIRMWARE_NEEDS_FDISK)
#include	<sys/dktp/fdisk.h>
#endif
#include	<errno.h>
#include	<signal.h>
#include	<string.h>
#include	<dirent.h>
#include	<stdlib.h>
#include	<unistd.h>
#include	<thread.h>
#include	<synch.h>
#include	<sys/scsi/impl/uscsi.h>
#include	<sys/scsi/generic/commands.h>
#include	<sys/scsi/generic/inquiry.h>
#include	<sys/vol.h>
#include	<rpcsvc/nfs_prot.h>
#include	<libintl.h>



/*
 * Types used to communicate with vold
 *
 * XXX: 90% of these data structures aren't used by this file, but
 * the state of the interface requires each and every struct (honest!)
 * I never said this stuff was clean -- *or* supported.
 */

struct q {
	struct q	*q_next;
	struct q	*q_prev;
};

typedef struct obj {
	struct q	q;		/* for future use */
	struct dbops	*o_dbops;	/* database this object is in */
	char		*o_name;	/* name of the object */
	char		*o_dir;		/* directory it lives in */
	u_longlong_t	o_xid;		/* version we have a copy of */
	u_int		o_type;		/* type of object */
	u_longlong_t	o_id;		/* unique id for the object */
	uid_t		o_uid;		/* user id of the owner */
	gid_t		o_gid;		/* group id */
	mode_t		o_mode;		/* unix permissions */
	u_int		o_nlinks;	/* hard link count */
	struct timeval  o_atime;	/* access time */
	struct timeval  o_ctime;	/* creation time */
	struct timeval  o_mtime;	/* modified time */
	u_longlong_t	o_upmask;	/* bitmask of changed fields */
	char		*o_props;	/* property string for object */
	int		o_pad[10];	/* room to grow */
} obj_t;

typedef struct label {
	int		l_type;
	void		*l_label;
} label;

typedef struct devmap {
	dev_t	dm_voldev;	/* from (vol device name) */
	char	*dm_path;	/* to (path of device media is in */
	dev_t	dm_realdev;	/* cache of the dev_t */
} devmap_t;

typedef struct vol {
	obj_t		v_obj;		/* object stuff */
	char		*v_mtype;	/* volume type (cdrom, floppy, ..) */
	label 		v_label;	/* volume label */
	u_long		v_parts;	/* per-vol partitions (bitmap) */
	bool_t		v_confirmed;	/* it is really there */
	devmap_t	*v_devmap;	/* map of devices (from v_basedev) */
	u_char		v_ndev;		/* number of devmaps */
	dev_t		v_basedev;	/* base device of location */
	char		*v_location;	/* location string */
	struct clue {
		minor_t		c_volume;	/* volume event happened on */
		uid_t		c_uid;		/* uid causing trouble */
		dev_t		c_tty;		/* his controlling tty */
		struct ve_error	*c_error;	/* error info */
	} v_clue; /* Hint for the various user friendy action features */
	u_long		v_eject;	/* count of outstanding eject acts */
	bool_t		v_ejfail;	/* failed the ejection */
	bool_t		v_ej_inprog;	/* ejection in progress */
	bool_t		v_ej_force;	/* already gone! */
	bool_t		v_checkresp;	/* respond to check request */
	u_long		v_flags;	/* per-vol flags (bitmap) */
	long		v_pad[9];	/* room to grow */
} vol_t;

typedef struct dp_vol_lock {
	mutex_t		dp_vol_vg_mutex;/* for access to cv */
	cond_t		dp_vol_vg_cv;	/* for signalling "vol gone" */
} dp_vol_lock_t;

struct devs {
	struct q	q;		/* hash queue */
	struct devsw	*dp_dsw;	/* devsw that is for this dev */
	dev_t		dp_dev;		/* device this represents */
	char		*dp_path;	/* path to this device */
	void		*dp_priv;	/* driver private info */
	struct vvnode	*dp_rvn;	/* pointer to the char vn */
	struct vvnode	*dp_bvn;	/* pointer to the block vn */
	struct vol	*dp_vol;	/* vol_t that's in this device */
	bool_t		dp_writeprot;	/* dev is write protected */
	char		*dp_symname;	/* symbolic name for this dev */
	struct vvnode	*dp_symvn;	/* pointer to alias vn */
	int		dp_ndgrp;	/* number of devices in group */
	struct devs	**dp_dgrp;	/* pointers to dp's in group */
	bool_t		dp_checkresp;	/* respond to checker */
	dp_vol_lock_t	*dp_lock;	/* for signalling between threads */
	int		dp_pad[6];	/* room to grow */
};

struct devsw {
				/* begin using a device */
	bool_t	(*d_use)(char *, char *);
				/* deal with an error on a device */
	bool_t	(*d_error)(struct ve_error *);
	int	(*d_getfd)(dev_t);	/* return an fd to the dev_t */
	void	(*d_poll)(dev_t);	/* launch the poll again */
				/* build devmap */
	void	(*d_devmap)(struct vol *, int, int);
	void	(*d_close)(char *);	/* stop using device */
				/* special eject support */
	void	(*d_eject)(struct devs *);
				/* find a missing volume */
	dev_t	(*d_find)(dev_t, struct vol *);
				/* check to see if new media has arrived */
	int	(*d_check)(struct devs *);
	char	*d_mtype;	/* type of media this device handles */
	char	*d_dtype;	/* type of device */
	u_long	d_flags;	/* flags for volumes here */
	uid_t	d_uid;		/* uid for new inserts */
	gid_t	d_gid;		/* gid for new inserts */
	mode_t	d_mode;		/* mode for new inserts */
	bool_t	(*d_test)(char *); /* see if a path is okay for this device */
	long	d_pad[9];	/* room to grow */
	struct q d_pathl;	/* for reconfig stuff */
};

typedef struct dirat {
	obj_t		da_obj;
	int		da_pad[10];
} dirat_t;

typedef struct symat {
	obj_t		sla_obj;
	char		*sla_ptr;	/* who it points at */
	int		sla_pad[10];
} symat_t;

typedef struct linkat {
	obj_t		la_obj;
	u_longlong_t	la_id;		/* id of the object we point to */
	int		la_pad[10];
} linkat_t;

typedef struct partat {
	obj_t		pa_obj;
	int		pa_pad[10];
} partat_t;

typedef struct vvnode {
	struct	q	q;
	char		*vn_name;	/* name of the vvnode */
	u_int		vn_type;	/* type of this vvnode */
	u_int		vn_otype;	/* bit of a hack... for parts */
	u_int		vn_num;		/* offset into devmap (if partat_t) */
	union {
		vol_t		*vn_u_vol;	/* volume data */
		dirat_t		*vn_u_dir;	/* directory data */
		symat_t		*vn_u_sym;	/* symlink data */
		linkat_t	*vn_u_lnk;	/* hardlink data */
		partat_t	*vn_u_part;	/* partition data */
		obj_t		*vn_u_obj;	/* object ref */
	} vv_u;
	struct vvnode	*vn_parent;	/* parent */
	struct vvnode	*vn_sib;	/* list of siblings */
	struct vvnode	*vn_twin;	/* twin, for dsk/rdsk mirror */
	struct vvnode	*vn_child;	/* for directories */
	u_int		vn_nlinks;	/* number of links */
	nfs_fh		vn_fh;		/* fhandle */
	char		vn_update;	/* marker */
	char		vn_dirtype;	/* type of dir this lives in */
} vvnode_t;




/*
 * Routines used from vold
 *
 * XXX: NOTE (again): These routines are currently *private*, i.e.
shouldn't
 * be used, since they *will* change out from under you.  But, on the
 * other hand, there's no other way to do most of these things, so ol'
 * 22's caught us again!
 */

/* print routines */
extern void		debug(int level, const char *fmt, ...);
extern void		warning(const char *fmt, ...);
extern void		info(const char *fmt, ...);
extern void		noise(const char *fmt, ...);
extern int		failass(char *assertion, char *file, int line);
extern int		partition_low(struct vtoc *v);
extern char		*rawpath(char *path);

/* util routines */
extern void		dev_new(struct devsw *new);
extern struct devs	*dev_getdp(dev_t dev);
extern vvnode_t		*dev_dirpath(char *path);
extern struct devs	*dev_makedp(struct devsw *dsw, char *path);
extern void		node_unlink(vvnode_t *vn);
extern void		dev_freedp(struct devs *dp);
extern void		vol_event(struct vioc_event *vie);
extern void		dev_eject(vol_t *v, bool_t ans);


/*
 * data from vold
 */
extern cond_t 		running_cv;
extern mutex_t		running_mutex;
extern int		vold_running;


/*
 * interface routines (to be called by vold via rmscsidevsw)
 */
static bool_t	rmscsi_use(char *, char *);
static bool_t	rmscsi_error(struct ve_error *);
static int	rmscsi_getfd(dev_t);
static void	rmscsi_devmap(vol_t *, int, int);
static void	rmscsi_close(char *path);
static void	rmscsi_thread_wait(struct devs *dp);
static bool_t	rmscsi_testpath(char *);



#define	RMSCSI_MTYPE	"rmscsi"
#define	DRIVE_CLASS	"drive"

#define	D_POLL		0x01			/* uses d_poll entry point */
#define	D_MEJECTABLE	0x08			/* has eject button */


static struct devsw rmscsidevsw = {
	rmscsi_use,		/* d_use */
	rmscsi_error,		/* d_error */
	rmscsi_getfd,		/* d_getfd */
	NULL,			/* d_poll */
	rmscsi_devmap,		/* d_devmap */
	rmscsi_close,		/* d_close */
	NULL, 			/* d_eject */
	NULL, 			/* d_find */
	NULL,			/* d_check */
	RMSCSI_MTYPE,		/* d_mtype */
	DRIVE_CLASS,		/* d_dtype */
	D_POLL|D_MEJECTABLE,	/* d_flags */
	(uid_t) 0,		/* d_uid */
	(gid_t) 0,		/* d_gid */
	(mode_t) 0,		/* d_mode */
	rmscsi_testpath		/* d_test */
};


#define	V_MAXPART		8	/* # of partitions supported */
#define	DEFAULT_PARTITION	2	/* default starting "main" partition */

/* our own home-grown assert statement */
#ifdef	DEBUG
#define	ASSERT(EX)	((void)((EX) || failass(#EX, __FILE__, __LINE__)))
#else
#define	ASSERT(EX)
#endif

#if defined(_FIRMWARE_NEEDS_FDISK)
#define	P0_WA				/* work around having p0, p1, ... */
#endif


/*
 * called when this DSO is first loaded
 */
bool_t
dev_init()
{
	extern void	dev_new(struct devsw *dsw); /* in vold */

	dev_new(&rmscsidevsw);
	return (TRUE);
}


/*
 * private (this file only) RMSCSI data structure
 */
static struct rmscsi_priv {
	char	*rs_rawpath[V_MAXPART];
	mutex_t	rs_killmutex;			/* mutex for killing thread */
	int	rs_tid;				/* thread id */
	int	rs_fd[V_MAXPART];
	int	rs_defpart;
#ifdef	P0_WA
	char	*rs_blk_p0_path;		/* the p0 blk name */
	int	rs_p0_part;			/* part to substitute p0 for */
#endif
#if defined(_FIRMWARE_NEEDS_FDISK)
	int	rs_raw_pfd[FD_NUMPART+1];	/* char fdisk-partition fds */
#endif
};


#define	RMSCSI_NAMEPROTO_DEFD	"%sd0s%d"
#define	RMSCSI_BASEPART		DEFAULT_PARTITION

#define	RMSCSI_NAMEPROTO	"%ss%d"

#if defined(_FIRMWARE_NEEDS_FDISK)
#define	RMSCSI_NAMEPROTO_P	"%sp%d"
#ifdef	P0_WA
#define	RMSCSI_NAMEPROTO_P_ALL	0
#endif
static void	rmscsi_open_exclusive(struct rmscsi_priv *, char *, char *);
#endif


/* thread stack size */
#define	RMSCSI_STKSIZE		(32 * 1024)	/* 32k! */


/*
 * rmscsi_use -- this routine expects either a raw or block path that
 *	to a removable scsi disk (removability being detected
 *	using a SCSI inquiry command)
 *
 *	it further expects that the supplied
 *	path starts with "/dev/dsk/" for block devices or
 *	"/dev/rdsk" for character devices
 *
 *	it finds the complimentary device by switching this
 *	segment, e.g. if you supply "/dev/dsk/c0t6" for a
 *	group of block devices, then this routine will
 *	expect the raw devices to be at "/dev/rdsk/c0t6"
 *
 *	a thread is created which will handle this new group of
 *	interfaces to a device
 *
 *	a devs struct is filled in and passed on to the thread
 *
 *	return TRUE implies that the device is one which isn't
 *	currently managed, and needs to be
 */
static bool_t
rmscsi_use(char *path, char *symname)
{
	struct stat		statbuf;
	char			namebuf1[MAXPATHLEN];
	char			full_path[MAXPATHLEN+1];
	char			*path_trunc = path;
	char			namebuf[MAXNAMELEN];
	struct devs		*dp;
	struct rmscsi_priv	*rsp;
	char			*s;
	char			*p;
	vvnode_t		*bvn;
	vvnode_t		*rvn;
	int			i;



	info(gettext("rmscsi_use: %s, %s\n"), path, symname);

	/*
	 * we don't do an open for the device because it'll probably just
	 * return ENODEV if there isn't a device there
	 *
	 * instead, we just stat the device and make sure the device
	 * node is there and is a reasonable type
	 */

	/* just take a path if they hand it to us. */
	if (stat(path, &statbuf) < 0) {
		/*
		 * we can accept a path of the form:
		 *
		 * 	/dev/{dsk,rdsk}/cNtN
		 *
		 * we fill in the rest by appending "d0sN"
		 */
		(void) sprintf(full_path, RMSCSI_NAMEPROTO_DEFD, path,
		    RMSCSI_BASEPART);
		if (stat(full_path, &statbuf) < 0) {
			/* can't even find it with "d0sN" appended! */
			debug(1, "rmscsi_use: %s; %m\n", full_path);
			return (FALSE);
		}
	} else {
		/*
		 * the supplied path is complete -- truncate at the "slice"
		 * part of the name
		 *
		 * XXX: assume all rmscsi pathnames end in "sN"
		 */
		(void) strcpy(full_path, path);
		if ((s = strrchr(path, 's')) != 0) {
			/* XXX: should make sure a slice number follows */
			*s = '\0';		/* truncate at the "sN" */
		} else {
			/* the full path didn't have an "s" in it! */
			warning(gettext("rmscsi: %s is an invalid path\n"),
			    full_path);
			return (FALSE);
		}
	}

	/*
	 * check to see if this guy is already configured
	 */
	if (dev_getdp(statbuf.st_rdev)) {
		debug(1, "rmscsi_use: %s already in use\n", full_path);
		return (FALSE);
	}

	/*
	 * check the modes to make sure that the path is either
	 * a block or a character device
	 */
	if (!S_ISCHR(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)) {
		warning(gettext(
		    "rmscsi: %s not block or char device (mode 0x%x)\n"),
		    namebuf, statbuf.st_mode);
		return (FALSE);
	}

	/* create en "empty" 'rmscsi-private' data struct */
	rsp = (struct rmscsi_priv *)calloc(1, sizeof (struct rmscsi_priv));
	for (i = 0; i < V_MAXPART; i++) {
		rsp->rs_fd[i] = -1;
	}
	rsp->rs_defpart = -1;
#ifdef	P0_WA
	rsp->rs_p0_part = -1;
#endif

	/* stick some good stuff in the device hierarchy */
	if ((s = strstr(path_trunc, "rdsk")) != 0) {

		/* he gave us a raw path (i.e. "rdsk" in it) */

		/* save a pointer to the raw vv-node */
		rvn = dev_dirpath(path_trunc);

		/* create the names for rawpath */
		for (i = 0; i < V_MAXPART; i++) {
			(void) sprintf(namebuf1, RMSCSI_NAMEPROTO,
			    path_trunc, i);
			rsp->rs_rawpath[i] = strdup(namebuf1);
		}

		/* get the block path now from the raw one */

		/* skip past "rdsk/" */
		if ((p = strchr(s, '/')) != 0) {
			p++;
			(void) sprintf(namebuf, "/dev/dsk/%s", p);
		} else {
			/* no slash after rdsk? */
			debug(1, "rmscsi_use: malformed pathname '%s'\n",
			    path_trunc);
			/* what else can we do? */
			(void) strcpy(namebuf, path_trunc);
		}

		/* get the block vv-node */
		bvn = dev_dirpath(namebuf);

#ifdef	P0_WA
		/* set up the p0 block pathname */
		(void) sprintf(namebuf1, RMSCSI_NAMEPROTO_P, namebuf,
		    RMSCSI_NAMEPROTO_P_ALL);
		rsp->rs_blk_p0_path = strdup(namebuf1);
#endif

	} else if (s = strstr(path_trunc, "dsk")) {

		/* he gave us the block path */

		/* save pointer to block vv-node */
		bvn = dev_dirpath(path_trunc);

#ifdef	P0_WA
		(void) sprintf(namebuf1, RMSCSI_NAMEPROTO_P, path_trunc,
		    RMSCSI_NAMEPROTO_P_ALL);
		rsp->rs_blk_p0_path = strdup(namebuf1);
#endif

		/* skip past "dsk/" */
		if ((p = strchr(s, '/')) != 0) {
			p++;
			(void) sprintf(namebuf, "/dev/rdsk/%s", p);
		} else {
			/* no slash after "dsk"? */
			debug(1, "rmscsi_use: malformed path name '%s'\n",
			    path);
			/* what else can we do? */
			(void) strcpy(namebuf, path_trunc);
		}

		/* save a pointer to the raw vv-node */
		rvn = dev_dirpath(namebuf);

		/* create the names for rawpath */
		for (i = 0; i < V_MAXPART; i++) {
			(void) sprintf(namebuf1, RMSCSI_NAMEPROTO, namebuf, i);
			rsp->rs_rawpath[i] = strdup(namebuf1);
		}

	} else {
		debug(1, "rmscsi_use: malformed path name '%s'\n", path_trunc);
		return (FALSE);
	}

#if	defined(P0_WA) && defined(DEBUG)
	debug(6, "rmscsi_use: p0 block path is \"%s\"\n", rsp->rs_blk_p0_path);
#endif

	if ((dp = dev_makedp(&rmscsidevsw,
	    rsp->rs_rawpath[RMSCSI_BASEPART])) == NULL) {
		debug(1, "rmscsi_use: dev_makedp failed for %s\n",
		    rsp->rs_rawpath[RMSCSI_BASEPART]);
		return (FALSE);
	}

#if defined(_FIRMWARE_NEEDS_FDISK)
	/*
	 * serious hackery --  open the p? interfaces (so others can't
	 *	get around us)
	 */
	rmscsi_open_exclusive(rsp, namebuf, path);
#endif

	dp->dp_priv = (void *)rsp;		/* ptr to our private data */
	dp->dp_symname = strdup(symname);	/* symbolic name */
	dp->dp_bvn = bvn;			/* ptr to block vv-node */
	dp->dp_rvn = rvn;			/* ptr to raw vv-node */

	(void) mutex_init(&rsp->rs_killmutex, USYNC_THREAD, NULL);
	if (thr_create(0, RMSCSI_STKSIZE,
	    (void *(*)(void *))rmscsi_thread_wait, (void *)dp, THR_BOUND,
	    (thread_t *)&(rsp->rs_tid)) < 0) {
		warning(gettext("rmscsi thread create failed; %m\n"));
		return (FALSE);
	}
#ifdef	DEBUG
	debug(6, "rmscsi_use: rmscsi_thread_wait id %d created\n",
	    rsp->rs_tid);
#endif
	return (TRUE);
}


/*ARGSUSED*/
static void
rmscsi_devmap(vol_t *v, int part, int off)
{
	struct devs		*dp;
	struct rmscsi_priv	*rsp;


	dp = dev_getdp(v->v_basedev);
	rsp = (struct rmscsi_priv *)dp->dp_priv;

#ifdef	P0_WA
	/* return P0 path for bigest slice that starts at 0 */
	if (part == rsp->rs_p0_part) {
		/* hack! -- use p0 instead of the requested slice */
		v->v_devmap[off].dm_path = strdup(rsp->rs_blk_p0_path);
		debug(6, "rmscsi_devmap: hacking path for p0 workaround\n");
	} else {
		/* return the actual slice requested */
		v->v_devmap[off].dm_path = strdup(rsp->rs_rawpath[part]);
	}
#else
	v->v_devmap[off].dm_path = strdup(rsp->rs_rawpath[part]);
#endif

#ifdef	DEBUG
	debug(9, "rmscsi_devmap: returning (part %d, off %d): \"%s\"\n",
	    part, off, v->v_devmap[off].dm_path);
#endif
}


static int
rmscsi_getfd(dev_t dev)
{
	struct devs		*dp;
	struct rmscsi_priv	*rsp;
	int			fd;


	dp = dev_getdp(dev);
	ASSERT(dp != NULL);
	rsp = (struct rmscsi_priv *)dp->dp_priv;
	ASSERT(rsp->rs_defpart != -1);
	ASSERT(rsp->rs_fd[rsp->rs_defpart] >= 0);
	fd = rsp->rs_fd[rsp->rs_defpart];
	return (fd);
}


/*ARGSUSED*/
static bool_t
rmscsi_error(struct ve_error *vie)
{
	debug(1, "rmscsi_error\n");
	return (TRUE);
}


/*
 * State that must be cleaned up:
 *	name in the name space
 *	the "dp"
 *	any pointers to the media
 *	eject any existing media
 *	the priv structure
 */

static void
rmscsi_close(char *path)
{
	char			namebuf[MAXNAMELEN];
	struct	stat		sb;
	struct devs		*dp;
	struct rmscsi_priv	*rsp;
	int			i;



	debug(1, "rmscsi_close %s\n", path);

	(void) sprintf(namebuf, RMSCSI_NAMEPROTO, path, RMSCSI_BASEPART);
	if (stat(namebuf, &sb) < 0) {
		warning(gettext("rmscsi_close: %s; %m\n"), namebuf);
		return;
	}

	if ((dp = dev_getdp(sb.st_rdev)) == NULL) {
		debug(1, "rmscsi_close: %s not in use\n", path);
		return;
	}

	/* get our private data */
	rsp = (struct rmscsi_priv *)dp->dp_priv;

	/*
	 * take care of the listner thread
	 */
	(void) mutex_lock(&rsp->rs_killmutex);
	(void) thr_kill(rsp->rs_tid, SIGUSR1);
	(void) mutex_unlock(&rsp->rs_killmutex);
	(void) thr_join(rsp->rs_tid, 0, 0);
	debug(1, "rmscsi_close: thread id %d reaped (killed/joined)\n",
	    rsp->rs_tid);

	/*
	 * if there is a volume inserted in this device ...
	 */
	if (dp->dp_vol) {
		/*
		 * clean up the name space and the device maps
		 * to remove references to any volume that might
		 * be in the device right now
		 *
		 * this crap with the flags is to keep the
		 * "poll" from being relaunched by this function
		 *
		 * yes, its a hack and there should be a better way
		 */
		if (dp->dp_dsw->d_flags & D_POLL) {
			dp->dp_dsw->d_flags &= ~D_POLL;
			dev_eject(dp->dp_vol, TRUE);
			dp->dp_dsw->d_flags |= D_POLL;
		} else {
			dev_eject(dp->dp_vol, TRUE);
		}

		/* do the eject work */
		(void) ioctl(rsp->rs_fd[RMSCSI_BASEPART], DKIOCEJECT, 0);
	}

	/*
	 * clean up the names in the name space
	 */
	node_unlink(dp->dp_bvn);
	node_unlink(dp->dp_rvn);

	/*
	 * free the private data we've allocated
	 */
	for (i = 0; i < V_MAXPART; i++) {
		if (rsp->rs_rawpath[i]) {
			free(rsp->rs_rawpath[i]);
		}
		if (rsp->rs_fd[i] != -1) {
			(void) close(rsp->rs_fd[i]);
		}
	}
#if defined(_FIRMWARE_NEEDS_FDISK)
	for (i = 0; i < (FD_NUMPART+1); i++) {
		if (rsp->rs_raw_pfd[i] >= 0) {
			(void) close(rsp->rs_raw_pfd[i]);
		}
	}
#endif
	free(rsp);

	/*
	 * free the dp, so no one points at us anymore
	 */
	dev_freedp(dp);
}

#ifdef	P0_WA
/*
 * return the numnber of the slice that starts at zero and maps the
largest
 *	portion of the device.  if none found return a -1
 */
static int
part_to_hack(struct vtoc *v)
{
	int	i;
	int	part_no = -1;
	int	part_start = -1;
	int	part_size = 0;
	int	sz;
	int	st;


	/* scan for lowest starting part that has biggest chunk */
	for (i = 0; i < V_MAXPART; i++) {

		/* get size and start, ignoring this slice if no size */
		if ((sz = v->v_part[i].p_size) <= 0) {
			continue;
		}
		st = v->v_part[i].p_start;

		/*
		 * 3 possible cases of choosing this partition over
		 *	our previous best:
		 *	-> we don't have a previous best
		 *	-> this part starts earlier than previous best
		 *	-> this part starts at same place but is larger
		 */
		if ((part_start < 0) ||
		    (st < part_start) ||
		    ((st == part_start) && (sz > part_size))) {
			part_start = st;
			part_size = sz;
			part_no = i;
			continue;
		}
	}

	/* return part found (or -1 if none) */
	return (part_no);
}
#endif	/* P0_WA */


static void
rmscsi_thread_wait(struct devs *dp)
{
	extern void		vol_event(struct vioc_event *);
#ifdef	DEBUG
	static char		*dkiostate_to_str(enum dkio_state);
#endif
	static int		reopen_rmscsi(struct rmscsi_priv *);
	extern int		vold_running;
	extern cond_t 		running_cv;
	extern mutex_t		running_mutex;
	int			fd = -1;
	struct rmscsi_priv	*rsp = (struct rmscsi_priv *)dp->dp_priv;
	struct vioc_event	vie;
	struct vtoc		vtoc;
	enum dkio_state 	rmscsi_state;
	int			i;
	struct dk_cinfo		dkc;
	struct vol		*v = dp->dp_vol;



	(void) mutex_lock(&running_mutex);
	while (vold_running == 0) {
		(void) cond_wait(&running_cv, &running_mutex);
	}
	(void) mutex_unlock(&running_mutex);

	for (i = 0; i < V_MAXPART; i++) {
		debug(1, "rmscsi_thread_wait: opening \"%s\" RDONLY ...\n",
		    rsp->rs_rawpath[i]);
		if ((fd = open(rsp->rs_rawpath[i],
		    O_RDWR|O_NONBLOCK|O_EXCL)) < 0) {
			noise("rmscsi: %s; %m\n", rsp->rs_rawpath[i]);
			goto errout;
		}
		(void) fcntl(fd, F_SETFD, 1);	/* close-on-exec */
		rsp->rs_fd[i] = fd;
	}

	/*
	 * check to make sure device is a SCSI disk
	 *
	 * XXX: isn't this redundant with rmscsi_testpath() ?
	 */
	if (ioctl(rsp->rs_fd[RMSCSI_BASEPART], DKIOCINFO, &dkc) < 0) {
		noise("rmscsi: %s DKIOCINFO failed; %m\n",
		    rsp->rs_rawpath[RMSCSI_BASEPART]);
		goto errout;
	}
	if (dkc.dki_ctype != DKC_SCSI_CCS) {
		noise( "rmscsi: %s is not a SCSI disk drive (disk type %d expected, %d found)\n",
		    rsp->rs_rawpath[RMSCSI_BASEPART], DKC_SCSI_CCS,
			dkc.dki_ctype);
		goto errout;
	}

	rmscsi_state = DKIO_NONE;
	rsp->rs_defpart = DEFAULT_PARTITION;

	/*CONSTCOND*/
	while (1) {

		fd = rsp->rs_fd[rsp->rs_defpart];

		/*
		 * this ioctl blocks until state changes.
		 */
#ifdef	DEBUG
		debug(3,
		"rmscsi_thread_wait: ioctl(DKIOCSTATE, \"%s\") on \"%s\"\n",
		    dkiostate_to_str(rmscsi_state),
		    rsp->rs_rawpath[rsp->rs_defpart]);
#else
		debug(3, "rmscsi_thread_wait: ioctl(DKIOCSTATE) on \"%s\"\n",
		    rsp->rs_rawpath[rsp->rs_defpart]);
#endif
		if (ioctl(fd, DKIOCSTATE, &rmscsi_state) < 0) {
			debug(1,
			    "rmscsi_thread_wait: DKIOCSTATE of \"%s\"; %m\n",
			    rsp->rs_rawpath[rsp->rs_defpart]);
			if (errno == ENOTTY) {
				goto errout;
			}
			(void) sleep(1);
			continue;
		}
#ifdef	DEBUG
		debug(5, "rmscsi_thread_wait: new state = \"%s\"\n",
		    dkiostate_to_str(rmscsi_state));
#endif
		if (rmscsi_state == DKIO_NONE) {
			continue;		/* steady state -- ignore */
		}

		(void) memset(&vie, 0, sizeof (struct vioc_event));

		(void) mutex_lock(&rsp->rs_killmutex);
		/*
		 * we have media in the drive
		 */
		if (rmscsi_state == DKIO_INSERTED) {

			/*
			 * if we already know about the media in the
			 * device, just ignore the information
			 */
			if (dp->dp_vol != NULL) {
				(void) mutex_unlock(&rsp->rs_killmutex);
				continue;
			}

			/*
			 * find out the lowest partition that maps the
			 * beginning of the drive
			 */
			if (ioctl(fd, DKIOCGVTOC, &vtoc) == 0) {
				rsp->rs_defpart = partition_low(&vtoc);
				debug(1,
				    "rmscsi_thread_wait: rs_defpart now %d\n",
				    rsp->rs_defpart);
#ifdef	P0_WA
				rsp->rs_p0_part = part_to_hack(&vtoc);
				debug(1,
				    "rmscsi_thread_wait: part to hack = %d\n",
				    rsp->rs_p0_part);
#endif
			}

			/* generate an "insert" event */
			vie.vie_type = VIE_INSERT;
			vie.vie_insert.viei_dev = dp->dp_dev;
			dp->dp_writeprot = reopen_rmscsi(rsp);
			vol_event(&vie);
		}

		/*
		 * we have NO media in the drive (it's just been ejected)
		 */
		if (rmscsi_state == DKIO_EJECTED) {

			/*
			 * if we already know about the ejection,
			 * just continue in our happy loop
			 */
			if (dp->dp_vol == NULL) {
				(void) mutex_unlock(&rsp->rs_killmutex);
				continue;
			}

			/*
			 * generate an eject event (if we have a unit)
			 *
			 * XXX: this doesn't work because the DKIOCSTATE ioctl
			 * never seems to return DKIO_EJECTED for some
			 * devices, such as the ZIP 100
			 */
			for (i = 0; i < (int)v->v_ndev; i++) {
				if (v->v_devmap[i].dm_voldev == dp->dp_dev) {
					vie.vie_type = VIE_EJECT;
					vie.vie_eject.viej_force = TRUE;
					vie.vie_eject.viej_unit =
					    minor(v->v_devmap[i].dm_voldev);
					vol_event(&vie);
					break;
				}
			}
		}
		(void) mutex_unlock(&rsp->rs_killmutex);
	}

errout:
	/* close all the open fd's */
	for (i = 0; i < V_MAXPART; i++) {
		(void) close(rsp->rs_fd[i]);
		rsp->rs_fd[i] = -1;
	}
}


static int
reopen_rmscsi(struct rmscsi_priv *rsp)
{
	int	rdonly = 0;

	/*
	 * XXX: boy, is this a hack
	 *
	 * this works around a bug in scsi drivers were
	 * you can't seem to read from a file descriptor you've opened
	 * O_NDELAY where there wasn't any media in the drive
	 *
	 * this open can take forever, by the way ...
	 */

#ifdef	DEBUG
	debug(11, "reopen_scsi: closing part %d fd (%d)\n",
	    rsp->rs_defpart, rsp->rs_fd[rsp->rs_defpart]);
#endif
	(void) close(rsp->rs_fd[rsp->rs_defpart]);
	if ((rsp->rs_fd[rsp->rs_defpart] =
	    open(rsp->rs_rawpath[rsp->rs_defpart], O_RDWR|O_EXCL)) < 0) {
		if (errno == EROFS) {
			rsp->rs_fd[rsp->rs_defpart] = open(
			    rsp->rs_rawpath[rsp->rs_defpart],
			    O_RDONLY|O_NDELAY|O_EXCL);
			rdonly = 1;
		} else {
			rsp->rs_fd[rsp->rs_defpart] = open(
			    rsp->rs_rawpath[rsp->rs_defpart],
			    O_RDWR|O_NDELAY|O_EXCL);
		}
	}

	if (rsp->rs_fd[rsp->rs_defpart] < 0) {
		warning(gettext("rmscsi: open error on %s; %m\n"),
		    rsp->rs_rawpath[rsp->rs_defpart]);
	}

	/* set close-on-exec */
	(void) fcntl(rsp->rs_fd[rsp->rs_defpart], F_SETFD, 1);

	debug(1,
	    "reopen_rmscsi: fd = %d (part %d), rdonly = %d (path \"%s\")\n",
	    rsp->rs_fd[rsp->rs_defpart], rsp->rs_defpart, rdonly,
	    rsp->rs_rawpath[rsp->rs_defpart]);

	return (rdonly);
}


#ifdef	DEBUG

static char *
dkiostate_to_str(enum dkio_state st)
{
	static char		state_buf[30];


	switch (st) {
	case DKIO_NONE:
		(void) sprintf(state_buf, "DKIO_NONE");
		break;
	case DKIO_INSERTED:
		(void) sprintf(state_buf, "DKIO_INSERTED");
		break;
	case DKIO_EJECTED:
		(void) sprintf(state_buf, "DKIO_EJECTED");
		break;
	default:
		(void) sprintf(state_buf, "?unknown? (%d)", (int)st);
		break;
	}

	return (state_buf);
}
#endif	/* DEBUG */


static bool_t
rmscsi_testpath(char *path)
{
	int			fd = -1;
	struct dk_cinfo		dkc;
	struct stat		sb;
	char			*rp = NULL;
	struct scsi_inquiry	inquiry;
	struct uscsi_cmd	ucmd;
	union scsi_cdb		cdb;
	bool_t			res = FALSE;	/* return result */




	/* check to see if we're already using it */
	if (stat(path, &sb) != 0) {
		/* something's seriously wrong */
		debug(5, "rmscsi(probing): stat of \"%s\"; %m\n", path);
		goto dun;
	}

	if (dev_getdp(sb.st_rdev) != NULL) {
		debug(5, "rmscsi(probing): \"%s\" already in use\n", path);
		goto dun;	/* this one's legit, but already handled */
	}

	/* make sure our path is a raw device */
	if ((rp = rawpath(path)) == NULL) {
		debug(5, "rmscsi(probing): can't get rawpath of \"%s\"\n",
		    path);
		goto dun;
	}

	/*
	 * if we can't open it, assume that it's because it's busy or
	 * something else is wrong
	 *
	 * in any event, dev_use couldn't open it either, so it's
	 * not worth trying to use the device
	 */
	if ((fd = open(rp, O_RDONLY|O_NONBLOCK|O_EXCL)) < 0) {
		debug(5, "rmscsi(probing): open of \"%s\"; %m\n", rp);
		goto dun;
	}

	/* check to make sure device is a SCSI device */
	if (ioctl(fd, DKIOCINFO, &dkc) < 0) {
		debug(5, "rmscsi(probing): DKIOCINFO on \"%s\" failed; %m\n",
		    rp);
		goto dun;
	}
	if (dkc.dki_ctype != DKC_SCSI_CCS) {
		debug(5, "rmscsi(probing): \"%s\" is not a SCSI disk drive\n",
		    rp);
		debug(5,
		    "rmscsi(probing): (disk type %d expected, %d found)\n",
		    DKC_SCSI_CCS, dkc.dki_ctype);
		goto dun;
	}

	/*
	 * if we stop here, we'll end up trying to manage hard disks that
	 * are found, since they also return DKC_SCSI_CCS,
	 * so do a SCSI inquiry here, checking for "removability"
	 */

	/* build command */
	(void) memset(&inquiry, 0, sizeof (inquiry));
	(void) memset(&ucmd, 0, sizeof (ucmd));
	(void) memset(&cdb, 0, sizeof (cdb));
	cdb.scc_cmd = SCMD_INQUIRY;
	cdb.g0_count0 = sizeof (inquiry);
	ucmd.uscsi_cdb = (caddr_t)&cdb;
	ucmd.uscsi_cdblen = CDB_GROUP0;
	ucmd.uscsi_bufaddr = (caddr_t)&inquiry;
	ucmd.uscsi_buflen = sizeof (inquiry);
	ucmd.uscsi_flags = USCSI_READ | USCSI_ISOLATE | USCSI_DIAGNOSE;
	ucmd.uscsi_timeout = 15;

	/* do the SCSI inquiry */
	if (ioctl(fd, USCSICMD, &ucmd) != 0) {
		debug(5,
		    "rmscsi(probing): \"%s\" USCSICMD inquiry failed; %m\n",
		    rp);
		goto dun;
	}

	if (!inquiry.inq_rmb) {
		debug(5, "rmscsi(probing): SCSI device \"%s\" not removable\n",
		    rp);
		goto dun;
	}

	res = TRUE;
#ifdef	DEBUG
	debug(3, "rmscsi(probing): found removable scsi drive at \"%s\"\n",
	    rp);
#endif

dun:
	/* all done */
	if (fd >= 0) {
		(void) close(fd);
	}
	if (rp != NULL) {
		free(rp);
	}
	return (res);
}


#if defined(_FIRMWARE_NEEDS_FDISK)

/*
 * serious hackery -- attempt to open the p? interfaces of the specified
 *		device -- just to keep users from getting around volmgt
 *		(e.g. "eject /dev/dsk/c0t6d0p0" -- oops)
 *
 *		If this fails, just ignore it.
 *
 *		The supplied params path1 and path2 will be the block
 *		and char prototype paths (but not necessarily in that
 *		order).
 */
static void
rmscsi_open_exclusive(struct rmscsi_priv *rsp, char *path1, char *path2)
{
	char	namebuf[MAXNAMELEN];
	int	i;
	char	*raw_proto;			/* for the "rdsk" path */


	/* initialized all of the fds */
	for (i = 0; i < (FD_NUMPART+1); i++) {
		rsp->rs_raw_pfd[i] = -1;
	}

	/* find out which one is the raw path prototype */
	if (strstr(path1, "rdsk")) {
		raw_proto = path1;
	} else if (strstr(path1, "dsk")) {
		raw_proto = path2;
	} else {
		return;
	}

	/* (attempt to) open each p device */
	for (i = 0; i < (FD_NUMPART+1); i++) {
		/* do the raw device */
		(void) sprintf(namebuf, RMSCSI_NAMEPROTO_P, raw_proto, i);
		rsp->rs_raw_pfd[i] = open(namebuf, O_RDONLY|O_EXCL|O_NDELAY);
#ifdef	DEBUG
		debug(6, "rmscsi_open_exclusive: open(\"%s\") -> %d\n",
		    namebuf, rsp->rs_raw_pfd[i]);
#endif
	}

}

#endif	/* _FIRMWARE_NEEDS_FDISK */
