Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > fc11cd6e1c513a17304da94a5390f3cd > files > 3069

kernel-2.6.18-194.11.1.el5.src.rpm

From: Hans-Joachim Picht <hpicht@redhat.com>
Date: Tue, 22 Sep 2009 22:20:13 +0200
Subject: [s390] AF_IUCV SOCK_SEQPACKET support
Message-id: 20090922202013.GF32244@blc4eb509856389.ibm.com
O-Subject: [RHEL5 U5 PATCH 1/1] FEAT: s390 - AF_IUCV SOCK_SEQPACKET support
Bugzilla: 512006
RH-Acked-by: Pete Zaitcev <zaitcev@redhat.com>

Description
===========

Offer AF_IUCV connection-oriented datagram sockets in addition to the
existing AF_IUCV byte stream-oriented sockets.

IUCV is an important z/VM communication facility. The Linux AF-IUCV protocol
support facilitates exploitation of IUCV-communication through a socket
interface.

Applications can benefit from the new SOCK_SEQPACKET socket type if their
socket data to be transferred must not be fragmented.
Further, the new socket type facilitates applications that use AF_IUCV to
communicate to other applications using native IUCV functions.

One such future application in z/VSE is already identified. In general,
this feature improves the close collaboration between Linux on System z
and z/VM in the networking area.

Bugzilla
========

BZ 512006
https://bugzilla.redhat.com/show_bug.cgi?id=512006

Upstream status of the patch
============================

The feature is upstream as of kernel version 2.6.31

The feature consists of the following commits in linux-2.6.git:

0ea920d af_iucv: Return -EAGAIN if iucv msg limit is exceeded
bb664f4 af_iucv: Change if condition in sendmsg() for more readability
09488e2 af_iucv: New socket option for setting IUCV MSGLIMITs
802788b af_iucv: cleanup and refactor recvmsg() EFAULT handling
aa8e71f af_iucv: Provide new socket type SOCK_SEQPACKET
44b1e6b af_iucv: Modify iucv msg target class using control msghdr
b8942e3 af_iucv: Support data in IUCV msg parameter lists (IPRMDATA)
9d5c5d8 af_iucv: add sockopt() to enable/disable use of IPRM_DATA msgs
af88b52 af_iucv: sync sk shutdown flag if iucv path is quiesced
e14ad5f af_iucv: Test additional sk states in iucv_sock_shutdown
fe86e54 af_iucv: Reject incoming msgs if RECV_SHUTDOWN is set
bbe188c af_iucv: consider state IUCV_CLOSING when closing a socket

Test status
===========

The patch has been tested and fixes the problem.
The fix has been verified by the IBM test department.

Please ACK.

With best regards,

	Hans

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 9a4cfb0..b30409f 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -283,6 +283,7 @@ struct ucred {
 #define SOL_DCCP	269
 #define SOL_NETLINK	270
 #define SOL_TIPC	271
+#define SOL_IUCV	277
 
 /* IPX options */
 #define IPX_TYPE	1
diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h
index 85f80ea..f82a1e8 100644
--- a/include/net/iucv/af_iucv.h
+++ b/include/net/iucv/af_iucv.h
@@ -73,8 +73,17 @@ struct iucv_sock {
 	struct sk_buff_head	backlog_skb_q;
 	struct sock_msg_q	message_q;
 	unsigned int		send_tag;
+	u8			flags;
+	u16			msglimit;
 };
 
+/* iucv socket options (SOL_IUCV) */
+#define SO_IPRMDATA_MSG	0x0080		/* send/recv IPRM_DATA msgs */
+#define SO_MSGLIMIT	0x1000		/* get/set IUCV MSGLIMIT */
+
+/* iucv related control messages (scm) */
+#define SCM_IUCV_TRGCLS	0x0001		/* target class control message */
+
 struct iucv_sock_list {
 	struct hlist_head head;
 	rwlock_t	  lock;
@@ -85,8 +94,6 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
 			    poll_table *wait);
 void iucv_sock_link(struct iucv_sock_list *l, struct sock *s);
 void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s);
-int  iucv_sock_wait_state(struct sock *sk, int state, int state2,
-			  unsigned long timeo);
 int  iucv_sock_wait_cnt(struct sock *sk, unsigned long timeo);
 void iucv_accept_enqueue(struct sock *parent, struct sock *sk);
 void iucv_accept_unlink(struct sock *sk);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 18a809b..1a97c7a 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -3,7 +3,7 @@
  *
  *  IUCV protocol stack for Linux on zSeries
  *
- *  Copyright 2006 IBM Corporation
+ *  Copyright IBM Corp. 2006, 2009
  *
  *  Author(s):	Jennifer Hunt <jenhunt@us.ibm.com>
  */
@@ -28,8 +28,7 @@
 
 #define CONFIG_IUCV_SOCK_DEBUG 1
 
-#define IPRMDATA 0x80
-#define VERSION "1.0"
+#define VERSION "1.1"
 
 static char iucv_userid[80];
 
@@ -41,6 +40,51 @@ static struct proto iucv_proto = {
 	.obj_size	= sizeof(struct iucv_sock),
 };
 
+/* special AF_IUCV IPRM messages */
+static const u8 iprm_shutdown[8] =
+	{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01};
+
+#define TRGCLS_SIZE	(sizeof(((struct iucv_message *)0)->class))
+
+/* macros to set/get socket control buffer at correct offset */
+#define CB_TAG(skb)	((skb)->cb)		/* iucv message tag */
+#define CB_TAG_LEN	(sizeof(((struct iucv_message *) 0)->tag))
+#define CB_TRGCLS(skb)	((skb)->cb + CB_TAG_LEN) /* iucv msg target class */
+#define CB_TRGCLS_LEN	(TRGCLS_SIZE)
+
+#define __iucv_sock_wait(sk, condition, timeo, ret)			\
+do {									\
+	DEFINE_WAIT(__wait);						\
+	long __timeo = timeo;						\
+	ret = 0;							\
+	prepare_to_wait(sk->sk_sleep, &__wait, TASK_INTERRUPTIBLE);	\
+	while (!(condition)) {						\
+		if (!__timeo) {						\
+			ret = -EAGAIN;					\
+			break;						\
+		}							\
+		if (signal_pending(current)) {				\
+			ret = sock_intr_errno(__timeo);			\
+			break;						\
+		}							\
+		release_sock(sk);					\
+		__timeo = schedule_timeout(__timeo);			\
+		lock_sock(sk);						\
+		ret = sock_error(sk);					\
+		if (ret)						\
+			break;						\
+	}								\
+	finish_wait(sk->sk_sleep, &__wait);				\
+} while (0)
+
+#define iucv_sock_wait(sk, condition, timeo)				\
+({									\
+	int __ret = 0;							\
+	if (!(condition))						\
+		__iucv_sock_wait(sk, condition, timeo, __ret);		\
+	__ret;								\
+})
+
 static void iucv_sock_kill(struct sock *sk);
 static void iucv_sock_close(struct sock *sk);
 
@@ -51,6 +95,7 @@ static void iucv_callback_connack(struct iucv_path *, u8 ipuser[16]);
 static int iucv_callback_connreq(struct iucv_path *, u8 ipvmid[8],
 				 u8 ipuser[16]);
 static void iucv_callback_connrej(struct iucv_path *, u8 ipuser[16]);
+static void iucv_callback_shutdown(struct iucv_path *, u8 ipuser[16]);
 
 static struct iucv_sock_list iucv_sk_list = {
 	.lock = RW_LOCK_UNLOCKED,
@@ -62,7 +107,8 @@ static struct iucv_handler af_iucv_handler = {
 	.path_complete	  = iucv_callback_connack,
 	.path_severed	  = iucv_callback_connrej,
 	.message_pending  = iucv_callback_rx,
-	.message_complete = iucv_callback_txdone
+	.message_complete = iucv_callback_txdone,
+	.path_quiesced	  = iucv_callback_shutdown,
 };
 
 static inline void high_nmcpy(unsigned char *dst, char *src)
@@ -75,6 +121,79 @@ static inline void low_nmcpy(unsigned char *dst, char *src)
        memcpy(&dst[8], src, 8);
 }
 
+/**
+ * iucv_msg_length() - Returns the length of an iucv message.
+ * @msg:	Pointer to struct iucv_message, MUST NOT be NULL
+ *
+ * The function returns the length of the specified iucv message @msg of data
+ * stored in a buffer and of data stored in the parameter list (PRMDATA).
+ *
+ * For IUCV_IPRMDATA, AF_IUCV uses the following convention to transport socket
+ * data:
+ *	PRMDATA[0..6]	socket data (max 7 bytes);
+ *	PRMDATA[7]	socket data length value (len is 0xff - PRMDATA[7])
+ *
+ * The socket data length is computed by substracting the socket data length
+ * value from 0xFF.
+ * If the socket data len is greater 7, then PRMDATA can be used for special
+ * notifications (see iucv_sock_shutdown); and further,
+ * if the socket data len is > 7, the function returns 8.
+ *
+ * Use this function to allocate socket buffers to store iucv message data.
+ */
+static inline size_t iucv_msg_length(struct iucv_message *msg)
+{
+	size_t datalen;
+
+	if (msg->flags & IUCV_IPRMDATA) {
+		datalen = 0xff - msg->rmmsg[7];
+		return (datalen < 8) ? datalen : 8;
+	}
+	return msg->length;
+}
+
+/**
+ * iucv_sock_in_state() - check for specific states
+ * @sk:		sock structure
+ * @state:	first iucv sk state
+ * @state:	second iucv sk state
+ *
+ * Returns true if the socket in either in the first or second state.
+ */
+static int iucv_sock_in_state(struct sock *sk, int state, int state2)
+{
+	return (sk->sk_state == state || sk->sk_state == state2);
+}
+
+/**
+ * iucv_below_msglim() - function to check if messages can be sent
+ * @sk:		sock structure
+ *
+ * Returns true if the send queue length is lower than the message limit.
+ * Always returns true if the socket is not connected (no iucv path for
+ * checking the message limit).
+ */
+static inline int iucv_below_msglim(struct sock *sk)
+{
+	struct iucv_sock *iucv = iucv_sk(sk);
+
+	if (sk->sk_state != IUCV_CONNECTED)
+		return 1;
+	return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim);
+}
+
+/**
+ * iucv_sock_wake_msglim() - Wake up thread waiting on msg limit
+ */
+static void iucv_sock_wake_msglim(struct sock *sk)
+{
+	read_lock(&sk->sk_callback_lock);
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		wake_up_interruptible_all(sk->sk_sleep);
+	sk_wake_async(sk, 2, POLL_OUT);
+	read_unlock(&sk->sk_callback_lock);
+}
+
 /* Timers */
 static void iucv_sock_timeout(unsigned long arg)
 {
@@ -173,9 +292,12 @@ static void iucv_sock_close(struct sock *sk)
 				timeo = sk->sk_lingertime;
 			else
 				timeo = IUCV_DISCONN_TIMEOUT;
-			err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo);
+			err = iucv_sock_wait(sk,
+					     iucv_sock_in_state(sk, IUCV_CLOSED, 0),
+					     timeo);
 		}
 
+	case IUCV_CLOSING:   /* fall through */
 		sk->sk_state = IUCV_CLOSED;
 		sk->sk_state_change(sk);
 
@@ -228,6 +350,10 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
 	spin_lock_init(&iucv_sk(sk)->message_q.lock);
 	skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q);
 	iucv_sk(sk)->send_tag = 0;
+	iucv_sk(sk)->flags = 0;
+	iucv_sk(sk)->msglimit = IUCV_QUEUELEN_DEFAULT;
+	iucv_sk(sk)->path = NULL;
+	memset(&iucv_sk(sk)->src_user_id , 0, 32);
 
 	sk->sk_destruct = iucv_sock_destruct;
 	sk->sk_sndtimeo = IUCV_CONN_TIMEOUT;
@@ -249,11 +375,22 @@ static int iucv_sock_create(struct socket *sock, int protocol)
 {
 	struct sock *sk;
 
-	if (sock->type != SOCK_STREAM)
-		return -ESOCKTNOSUPPORT;
+	if (protocol && protocol != PF_IUCV)
+		return -EPROTONOSUPPORT;
 
 	sock->state = SS_UNCONNECTED;
-	sock->ops = &iucv_sock_ops;
+
+	switch (sock->type) {
+	case SOCK_STREAM:
+		sock->ops = &iucv_sock_ops;
+		break;
+	case SOCK_SEQPACKET:
+		/* currently, proto ops can handle both sk types */
+		sock->ops = &iucv_sock_ops;
+		break;
+	default:
+		return -ESOCKTNOSUPPORT;
+	}
 
 	sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL);
 	if (!sk)
@@ -338,38 +475,6 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock)
 	return NULL;
 }
 
-int iucv_sock_wait_state(struct sock *sk, int state, int state2,
-			 unsigned long timeo)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	int err = 0;
-
-	add_wait_queue(sk->sk_sleep, &wait);
-	while (sk->sk_state != state && sk->sk_state != state2) {
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		if (!timeo) {
-			err = -EAGAIN;
-			break;
-		}
-
-		if (signal_pending(current)) {
-			err = sock_intr_errno(timeo);
-			break;
-		}
-
-		release_sock(sk);
-		timeo = schedule_timeout(timeo);
-		lock_sock(sk);
-
-		err = sock_error(sk);
-		if (err)
-			break;
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
-	return err;
-}
 
 /* Bind an unbound socket */
 static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
@@ -464,11 +569,9 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
 	if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND)
 		return -EBADFD;
 
-	if (sk->sk_type != SOCK_STREAM)
+	if (sk->sk_type != SOCK_STREAM && sk->sk_type != SOCK_SEQPACKET)
 		return -EINVAL;
 
-	iucv = iucv_sk(sk);
-
 	if (sk->sk_state == IUCV_OPEN) {
 		err = iucv_sock_autobind(sk);
 		if (unlikely(err))
@@ -487,8 +590,8 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
 
 	iucv = iucv_sk(sk);
 	/* Create path. */
-	iucv->path = iucv_path_alloc(IUCV_QUEUELEN_DEFAULT,
-				     IPRMDATA, GFP_KERNEL);
+	iucv->path = iucv_path_alloc(iucv->msglimit,
+				     IUCV_IPRMDATA, GFP_KERNEL);
 	if (!iucv->path) {
 		err = -ENOMEM;
 		goto done;
@@ -517,13 +620,13 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
 	}
 
 	if (sk->sk_state != IUCV_CONNECTED) {
-		err = iucv_sock_wait_state(sk, IUCV_CONNECTED, IUCV_DISCONN,
-				sock_sndtimeo(sk, flags & O_NONBLOCK));
+		err = iucv_sock_wait(sk, iucv_sock_in_state(sk, IUCV_CONNECTED,
+							    IUCV_DISCONN),
+				     sock_sndtimeo(sk, flags & O_NONBLOCK));
 	}
 
 	if (sk->sk_state == IUCV_DISCONN) {
-		release_sock(sk);
-		return -ECONNREFUSED;
+		err = -ECONNREFUSED;
 	}
 
 	if (err) {
@@ -546,7 +649,10 @@ static int iucv_sock_listen(struct socket *sock, int backlog)
 	lock_sock(sk);
 
 	err = -EINVAL;
-	if (sk->sk_state != IUCV_BOUND || sock->type != SOCK_STREAM)
+	if (sk->sk_state != IUCV_BOUND)
+		goto done;
+
+	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 		goto done;
 
 	sk->sk_max_ack_backlog = backlog;
@@ -637,6 +743,30 @@ static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
 	return 0;
 }
 
+/**
+ * iucv_send_iprm() - Send socket data in parameter list of an iucv message.
+ * @path:	IUCV path
+ * @msg:	Pointer to a struct iucv_message
+ * @skb:	The socket data to send, skb->len MUST BE <= 7
+ *
+ * Send the socket data in the parameter list in the iucv message
+ * (IUCV_IPRMDATA). The socket data is stored at index 0 to 6 in the parameter
+ * list and the socket data len at index 7 (last byte).
+ * See also iucv_msg_length().
+ *
+ * Returns the error code from the iucv_message_send() call.
+ */
+static int iucv_send_iprm(struct iucv_path *path, struct iucv_message *msg,
+			  struct sk_buff *skb)
+{
+	u8 prmdata[8];
+
+	memcpy(prmdata, (void *) skb->data, skb->len);
+	prmdata[7] = 0xff - (u8) skb->len;
+	return iucv_message_send(path, msg, IUCV_IPRMDATA, 0,
+				 (void *) prmdata, 8);
+}
+
 static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 			     struct msghdr *msg, size_t len)
 {
@@ -644,7 +774,11 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	struct iucv_sock *iucv = iucv_sk(sk);
 	struct sk_buff *skb;
 	struct iucv_message txmsg;
+	struct cmsghdr *cmsg;
+	int cmsg_done;
+	long timeo;
 	int err;
+	int noblock = msg->msg_flags & MSG_DONTWAIT;
 
 	err = sock_error(sk);
 	if (err)
@@ -653,6 +787,10 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (msg->msg_flags & MSG_OOB)
 		return -EOPNOTSUPP;
 
+	/* SOCK_SEQPACKET: we do not support segmented records */
+	if (sk->sk_type == SOCK_SEQPACKET && !(msg->msg_flags & MSG_EOR))
+		return -EOPNOTSUPP;
+
 	lock_sock(sk);
 
 	if (sk->sk_shutdown & SEND_SHUTDOWN) {
@@ -660,34 +798,112 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 		goto out;
 	}
 
-	if (sk->sk_state == IUCV_CONNECTED) {
-		if (!(skb = sock_alloc_send_skb(sk, len,
-						msg->msg_flags & MSG_DONTWAIT,
-						&err)))
+	/* Return if the socket is not in connected state */
+	if (sk->sk_state != IUCV_CONNECTED) {
+		err = -ENOTCONN;
+		goto out;
+	}
+
+	/* initialize defaults */
+	cmsg_done   = 0;	/* check for duplicate headers */
+	txmsg.class = 0;
+
+	/* iterate over control messages */
+	for (cmsg = CMSG_FIRSTHDR(msg); cmsg;
+		cmsg = CMSG_NXTHDR(msg, cmsg)) {
+
+		if (!CMSG_OK(msg, cmsg)) {
+			err = -EINVAL;
 			goto out;
+		}
 
-		if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
-			err = -EFAULT;
-			goto fail;
+		if (cmsg->cmsg_level != SOL_IUCV)
+			continue;
+
+		if (cmsg->cmsg_type & cmsg_done) {
+			err = -EINVAL;
+			goto out;
 		}
+		cmsg_done |= cmsg->cmsg_type;
 
-		txmsg.class = 0;
-		txmsg.tag = iucv->send_tag++;
-		memcpy(skb->cb, &txmsg.tag, 4);
-		skb_queue_tail(&iucv->send_skb_q, skb);
-		err = iucv_message_send(iucv->path, &txmsg, 0, 0,
-					(void *) skb->data, skb->len);
-		if (err) {
-			if (err == 3)
-				printk(KERN_ERR "AF_IUCV msg limit exceeded\n");
+		switch (cmsg->cmsg_type) {
+		case SCM_IUCV_TRGCLS:
+			if (cmsg->cmsg_len != CMSG_LEN(TRGCLS_SIZE)) {
+				err = -EINVAL;
+				goto out;
+			}
+
+			/* set iucv message target class */
+			memcpy(&txmsg.class,
+				(void *) CMSG_DATA(cmsg), TRGCLS_SIZE);
+
+			break;
+
+		default:
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	/* allocate one skb for each iucv message:
+	 * this is fine for SOCK_SEQPACKET (unless we want to support
+	 * segmented records using the MSG_EOR flag), but
+	 * for SOCK_STREAM we might want to improve it in future */
+	skb = sock_alloc_send_skb(sk, len, noblock, &err);
+	if (!skb)
+		goto out;
+	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
+		err = -EFAULT;
+		goto fail;
+	}
+
+	/* wait if outstanding messages for iucv path has reached */
+	timeo = sock_sndtimeo(sk, noblock);
+	err = iucv_sock_wait(sk, iucv_below_msglim(sk), timeo);
+	if (err)
+		goto fail;
+
+	/* return -ECONNRESET if the socket is no longer connected */
+	if (sk->sk_state != IUCV_CONNECTED) {
+		err = -ECONNRESET;
+		goto fail;
+	}
+
+	/* increment and save iucv message tag for msg_completion cbk */
+	txmsg.tag = iucv->send_tag++;
+	memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN);
+	skb_queue_tail(&iucv->send_skb_q, skb);
+
+	if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags)
+	      && skb->len <= 7) {
+		err = iucv_send_iprm(iucv->path, &txmsg, skb);
+
+		/* on success: there is no message_complete callback
+		 * for an IPRMDATA msg; remove skb from send queue */
+		if (err == 0) {
+			skb_unlink(skb, &iucv->send_skb_q);
+			kfree_skb(skb);
+		}
+
+		/* this error should never happen since the
+		 * IUCV_IPRMDATA path flag is set... sever path */
+		if (err == 0x15) {
+			iucv_path_sever(iucv->path, NULL);
 			skb_unlink(skb, &iucv->send_skb_q);
 			err = -EPIPE;
 			goto fail;
 		}
-
-	} else {
-		err = -ENOTCONN;
-		goto out;
+	} else
+		err = iucv_message_send(iucv->path, &txmsg, 0, 0,
+					(void *) skb->data, skb->len);
+	if (err) {
+		if (err == 3) {
+			printk(KERN_ERR "AF_IUCV msg limit exceeded\n");
+			err = -EAGAIN;
+		} else
+			err = -EPIPE;
+		skb_unlink(skb, &iucv->send_skb_q);
+		goto fail;
 	}
 
 	release_sock(sk);
@@ -716,6 +932,10 @@ static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len)
 		if (!nskb)
 			return -ENOMEM;
 
+		/* copy target class to control buffer of new skb */
+		memcpy(CB_TRGCLS(nskb), CB_TRGCLS(skb), CB_TRGCLS_LEN);
+
+		/* copy data fragment */
 		memcpy(nskb->data, skb->data + copied, size);
 		copied += size;
 		dataleft -= size;
@@ -735,19 +955,33 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
 				 struct iucv_message *msg)
 {
 	int rc;
+	unsigned int len;
+
+	len = iucv_msg_length(msg);
 
-	if (msg->flags & IPRMDATA) {
-		skb->data = NULL;
-		skb->len = 0;
+	/* store msg target class in the second 4 bytes of skb ctrl buffer */
+	/* Note: the first 4 bytes are reserved for msg tag */
+	memcpy(CB_TRGCLS(skb), &msg->class, CB_TRGCLS_LEN);
+
+	/* check for special IPRM messages (e.g. iucv_sock_shutdown) */
+	if ((msg->flags & IUCV_IPRMDATA) && len > 7) {
+		if (memcmp(msg->rmmsg, iprm_shutdown, 8) == 0) {
+			skb->data = NULL;
+			skb->len = 0;
+		}
 	} else {
-		rc = iucv_message_receive(path, msg, 0, skb->data,
-					  msg->length, NULL);
+		rc = iucv_message_receive(path, msg, msg->flags & IUCV_IPRMDATA,
+					  skb->data, len, NULL);
 		if (rc) {
 			kfree_skb(skb);
 			return;
 		}
-		if (skb->truesize >= sk->sk_rcvbuf / 4) {
-			rc = iucv_fragment_skb(sk, skb, msg->length);
+		/* we need to fragment iucv messages for SOCK_STREAM only;
+		 * for SOCK_SEQPACKET, it is only relevant if we support
+		 * record segmentation using MSG_EOR (see also recvmsg()) */
+		if (sk->sk_type == SOCK_STREAM &&
+		    skb->truesize >= sk->sk_rcvbuf / 4) {
+			rc = iucv_fragment_skb(sk, skb, len);
 			kfree_skb(skb);
 			skb = NULL;
 			if (rc) {
@@ -758,7 +992,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
 		} else {
 			skb->h.raw = skb->data;
 			skb->nh.raw = skb->data;
-			skb->len = msg->length;
+			skb->len = len;
 		}
 	}
 
@@ -773,7 +1007,7 @@ static void iucv_process_message_q(struct sock *sk)
 	struct sock_msg_q *p, *n;
 
 	list_for_each_entry_safe(p, n, &iucv->message_q.list, list) {
-		skb = alloc_skb(p->msg.length, GFP_ATOMIC | GFP_DMA);
+		skb = alloc_skb(iucv_msg_length(&p->msg), GFP_ATOMIC | GFP_DMA);
 		if (!skb)
 			break;
 		iucv_process_message(sk, skb, p->path, &p->msg);
@@ -790,7 +1024,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	int noblock = flags & MSG_DONTWAIT;
 	struct sock *sk = sock->sk;
 	struct iucv_sock *iucv = iucv_sk(sk);
-	int target, copied = 0;
+	unsigned int copied, rlen;
 	struct sk_buff *skb, *rskb, *cskb;
 	int err = 0;
 
@@ -803,8 +1037,6 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (flags & (MSG_OOB))
 		return -EOPNOTSUPP;
 
-	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
-
 	/* receive/dequeue next skb:
 	 * the function understands MSG_PEEK and, thus, does not dequeue skb */
 	skb = skb_recv_datagram(sk, flags, noblock, &err);
@@ -814,25 +1046,45 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		return err;
 	}
 
-	copied = min_t(unsigned int, skb->len, len);
+	rlen   = skb->len;		/* real length of skb */
+	copied = min_t(unsigned int, rlen, len);
 
 	cskb = skb;
 	if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) {
-		skb_queue_head(&sk->sk_receive_queue, skb);
-		if (copied == 0)
-			return -EFAULT;
-		goto done;
+		if (!(flags & MSG_PEEK))
+			skb_queue_head(&sk->sk_receive_queue, skb);
+		return -EFAULT;
 	}
 
-	len -= copied;
+	/* SOCK_SEQPACKET: set MSG_TRUNC if recv buf size is too small */
+	if (sk->sk_type == SOCK_SEQPACKET) {
+		if (copied < rlen)
+			msg->msg_flags |= MSG_TRUNC;
+		/* each iucv message contains a complete record */
+		msg->msg_flags |= MSG_EOR;
+	}
+
+	/* create control message to store iucv msg target class:
+	 * get the trgcls from the control buffer of the skb due to
+	 * fragmentation of original iucv message. */
+	err = put_cmsg(msg, SOL_IUCV, SCM_IUCV_TRGCLS,
+			CB_TRGCLS_LEN, CB_TRGCLS(skb));
+	if (err) {
+		if (!(flags & MSG_PEEK))
+			skb_queue_head(&sk->sk_receive_queue, skb);
+		return err;
+	}
 
 	/* Mark read part of skb as used */
 	if (!(flags & MSG_PEEK)) {
-		skb_pull(skb, copied);
 
-		if (skb->len) {
-			skb_queue_head(&sk->sk_receive_queue, skb);
-			goto done;
+		/* SOCK_STREAM: re-queue skb if it contains unreceived data */
+		if (sk->sk_type == SOCK_STREAM) {
+			skb_pull(skb, copied);
+			if (skb->len) {
+				skb_queue_head(&sk->sk_receive_queue, skb);
+				goto done;
+			}
 		}
 
 		kfree_skb(skb);
@@ -857,7 +1109,11 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	}
 
 done:
-	return err ? : copied;
+	/* SOCK_SEQPACKET: return real length if MSG_TRUNC is set */
+	if (sk->sk_type == SOCK_SEQPACKET && (flags & MSG_TRUNC))
+		copied = rlen;
+
+	return copied;
 }
 
 static inline unsigned int iucv_accept_poll(struct sock *parent)
@@ -919,7 +1175,6 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
 	struct iucv_sock *iucv = iucv_sk(sk);
 	struct iucv_message txmsg;
 	int err = 0;
-	u8 prmmsg[8] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01};
 
 	how++;
 
@@ -928,6 +1183,9 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
 
 	lock_sock(sk);
 	switch (sk->sk_state) {
+	case IUCV_DISCONN:
+	case IUCV_CLOSING:
+	case IUCV_SEVERED:
 	case IUCV_CLOSED:
 		err = -ENOTCONN;
 		goto fail;
@@ -941,7 +1199,7 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
 		txmsg.class = 0;
 		txmsg.tag = 0;
 		err = iucv_message_send(iucv->path, &txmsg, IUCV_IPRMDATA, 0,
-					(void *) prmmsg, 8);
+					(void *) iprm_shutdown, 8);
 		if (err) {
 			switch (err) {
 			case 1:
@@ -995,6 +1253,98 @@ static int iucv_sock_release(struct socket *sock)
 	return err;
 }
 
+/* getsockopt and setsockopt */
+static int iucv_sock_setsockopt(struct socket *sock, int level, int optname,
+				char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct iucv_sock *iucv = iucv_sk(sk);
+	int val;
+	int rc;
+
+	if (level != SOL_IUCV)
+		return -ENOPROTOOPT;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *) optval))
+		return -EFAULT;
+
+	rc = 0;
+
+	lock_sock(sk);
+	switch (optname) {
+	case SO_IPRMDATA_MSG:
+		if (val)
+			iucv->flags |= IUCV_IPRMDATA;
+		else
+			iucv->flags &= ~IUCV_IPRMDATA;
+		break;
+	case SO_MSGLIMIT:
+		switch (sk->sk_state) {
+		case IUCV_OPEN:
+		case IUCV_BOUND:
+			if (val < 1 || val > (u16)(~0))
+				rc = -EINVAL;
+			else
+				iucv->msglimit = val;
+			break;
+		default:
+			rc = -EINVAL;
+			break;
+		}
+		break;
+	default:
+		rc = -ENOPROTOOPT;
+		break;
+	}
+	release_sock(sk);
+
+	return rc;
+}
+
+static int iucv_sock_getsockopt(struct socket *sock, int level, int optname,
+				char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct iucv_sock *iucv = iucv_sk(sk);
+	int val, len;
+
+	if (level != SOL_IUCV)
+		return -ENOPROTOOPT;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	if (len < 0)
+		return -EINVAL;
+
+	len = min_t(unsigned int, len, sizeof(int));
+
+	switch (optname) {
+	case SO_IPRMDATA_MSG:
+		val = (iucv->flags & IUCV_IPRMDATA) ? 1 : 0;
+		break;
+	case SO_MSGLIMIT:
+		lock_sock(sk);
+		val = (iucv->path != NULL) ? iucv->path->msglim	/* connected */
+					   : iucv->msglimit;	/* default */
+		release_sock(sk);
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+
+	return 0;
+}
+
+
 /* Callback wrappers - called from iucv base support */
 static int iucv_callback_connreq(struct iucv_path *path,
 				 u8 ipvmid[8], u8 ipuser[16])
@@ -1048,7 +1398,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
 	}
 
 	/* Create the new socket */
-	nsk = iucv_sock_alloc(NULL, SOCK_STREAM, GFP_ATOMIC);
+	nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC);
 	if (!nsk) {
 		err = iucv_path_sever(path, user_data);
 		iucv_path_free(path);
@@ -1071,7 +1421,9 @@ static int iucv_callback_connreq(struct iucv_path *path,
 	memcpy(nuser_data + 8, niucv->src_name, 8);
 	ASCEBC(nuser_data + 8, 8);
 
-	path->msglim = IUCV_QUEUELEN_DEFAULT;
+	/* set message limit for path based on msglimit of accepting socket */
+	niucv->msglimit = iucv->msglimit;
+	path->msglim = iucv->msglimit;
 	err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk);
 	if (err) {
 		err = iucv_path_sever(path, user_data);
@@ -1107,8 +1459,10 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
 	struct sock_msg_q *save_msg;
 	int len;
 
-	if (sk->sk_shutdown & RCV_SHUTDOWN)
+	if (sk->sk_shutdown & RCV_SHUTDOWN) {
+		iucv_message_reject(path, msg);
 		return;
+	}
 
 	spin_lock(&iucv->message_q.lock);
 
@@ -1117,11 +1471,11 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
 		goto save_message;
 
 	len = atomic_read(&sk->sk_rmem_alloc);
-	len += msg->length + sizeof(struct sk_buff);
+	len += iucv_msg_length(msg) + sizeof(struct sk_buff);
 	if (len > sk->sk_rcvbuf)
 		goto save_message;
 
-	skb = alloc_skb(msg->length, GFP_ATOMIC | GFP_DMA);
+	skb = alloc_skb(iucv_msg_length(msg), GFP_ATOMIC | GFP_DMA);
 	if (!skb)
 		goto save_message;
 
@@ -1154,7 +1508,7 @@ static void iucv_callback_txdone(struct iucv_path *path,
 		spin_lock_irqsave(&list->lock, flags);
 
 		while (list_skb != (struct sk_buff *)list) {
-			if (!memcmp(&msg->tag, list_skb->cb, 4)) {
+			if (!memcmp(&msg->tag, CB_TAG(list_skb), CB_TAG_LEN)) {
 				this = list_skb;
 				break;
 			}
@@ -1165,8 +1519,11 @@ static void iucv_callback_txdone(struct iucv_path *path,
 
 		spin_unlock_irqrestore(&list->lock, flags);
 
-		if (this)
+		if (this) {
 			kfree_skb(this);
+			/* wake up any process waiting for sending */
+			iucv_sock_wake_msglim(sk);
+		}
 	}
 	if (!this)
 		printk(KERN_ERR "AF_IUCV msg tag %u not found\n", msg->tag);
@@ -1192,6 +1549,21 @@ static void iucv_callback_connrej(struct iucv_path *path, u8 ipuser[16])
 	sk->sk_state_change(sk);
 }
 
+/* called if the other communication side shuts down its RECV direction;
+ * in turn, the callback sets SEND_SHUTDOWN to disable sending of data.
+ */
+static void iucv_callback_shutdown(struct iucv_path *path, u8 ipuser[16])
+{
+	struct sock *sk = path->private;
+
+	bh_lock_sock(sk);
+	if (sk->sk_state != IUCV_CLOSED) {
+		sk->sk_shutdown |= SEND_SHUTDOWN;
+		sk->sk_state_change(sk);
+	}
+	bh_unlock_sock(sk);
+}
+
 static struct proto_ops iucv_sock_ops = {
 	.family		= PF_IUCV,
 	.owner		= THIS_MODULE,
@@ -1208,8 +1580,8 @@ static struct proto_ops iucv_sock_ops = {
 	.mmap		= sock_no_mmap,
 	.socketpair	= sock_no_socketpair,
 	.shutdown	= iucv_sock_shutdown,
-	.setsockopt	= sock_no_setsockopt,
-	.getsockopt	= sock_no_getsockopt
+	.setsockopt	= iucv_sock_setsockopt,
+	.getsockopt	= iucv_sock_getsockopt,
 };
 
 static struct net_proto_family iucv_sock_family_ops = {
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 569c6e4..b14e081 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1511,6 +1511,8 @@ static void iucv_path_complete(struct iucv_irq_data *data)
 	struct iucv_path_complete *ipc = (void *) data;
 	struct iucv_path *path = iucv_path_table[ipc->ippathid];
 
+	if (path)
+		path->flags = ipc->ipflags1;
 	if (path && path->handler && path->handler->path_complete)
 		path->handler->path_complete(path, ipc->ipuser);
 }