From: Hans-Joachim Picht <hpicht@redhat.com> Date: Tue, 22 Sep 2009 22:20:13 +0200 Subject: [s390] AF_IUCV SOCK_SEQPACKET support Message-id: 20090922202013.GF32244@blc4eb509856389.ibm.com O-Subject: [RHEL5 U5 PATCH 1/1] FEAT: s390 - AF_IUCV SOCK_SEQPACKET support Bugzilla: 512006 RH-Acked-by: Pete Zaitcev <zaitcev@redhat.com> Description =========== Offer AF_IUCV connection-oriented datagram sockets in addition to the existing AF_IUCV byte stream-oriented sockets. IUCV is an important z/VM communication facility. The Linux AF-IUCV protocol support facilitates exploitation of IUCV-communication through a socket interface. Applications can benefit from the new SOCK_SEQPACKET socket type if their socket data to be transferred must not be fragmented. Further, the new socket type facilitates applications that use AF_IUCV to communicate to other applications using native IUCV functions. One such future application in z/VSE is already identified. In general, this feature improves the close collaboration between Linux on System z and z/VM in the networking area. Bugzilla ======== BZ 512006 https://bugzilla.redhat.com/show_bug.cgi?id=512006 Upstream status of the patch ============================ The feature is upstream as of kernel version 2.6.31 The feature consists of the following commits in linux-2.6.git: 0ea920d af_iucv: Return -EAGAIN if iucv msg limit is exceeded bb664f4 af_iucv: Change if condition in sendmsg() for more readability 09488e2 af_iucv: New socket option for setting IUCV MSGLIMITs 802788b af_iucv: cleanup and refactor recvmsg() EFAULT handling aa8e71f af_iucv: Provide new socket type SOCK_SEQPACKET 44b1e6b af_iucv: Modify iucv msg target class using control msghdr b8942e3 af_iucv: Support data in IUCV msg parameter lists (IPRMDATA) 9d5c5d8 af_iucv: add sockopt() to enable/disable use of IPRM_DATA msgs af88b52 af_iucv: sync sk shutdown flag if iucv path is quiesced e14ad5f af_iucv: Test additional sk states in iucv_sock_shutdown fe86e54 af_iucv: Reject incoming msgs if RECV_SHUTDOWN is set bbe188c af_iucv: consider state IUCV_CLOSING when closing a socket Test status =========== The patch has been tested and fixes the problem. The fix has been verified by the IBM test department. Please ACK. With best regards, Hans diff --git a/include/linux/socket.h b/include/linux/socket.h index 9a4cfb0..b30409f 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -283,6 +283,7 @@ struct ucred { #define SOL_DCCP 269 #define SOL_NETLINK 270 #define SOL_TIPC 271 +#define SOL_IUCV 277 /* IPX options */ #define IPX_TYPE 1 diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index 85f80ea..f82a1e8 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -73,8 +73,17 @@ struct iucv_sock { struct sk_buff_head backlog_skb_q; struct sock_msg_q message_q; unsigned int send_tag; + u8 flags; + u16 msglimit; }; +/* iucv socket options (SOL_IUCV) */ +#define SO_IPRMDATA_MSG 0x0080 /* send/recv IPRM_DATA msgs */ +#define SO_MSGLIMIT 0x1000 /* get/set IUCV MSGLIMIT */ + +/* iucv related control messages (scm) */ +#define SCM_IUCV_TRGCLS 0x0001 /* target class control message */ + struct iucv_sock_list { struct hlist_head head; rwlock_t lock; @@ -85,8 +94,6 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock, poll_table *wait); void iucv_sock_link(struct iucv_sock_list *l, struct sock *s); void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s); -int iucv_sock_wait_state(struct sock *sk, int state, int state2, - unsigned long timeo); int iucv_sock_wait_cnt(struct sock *sk, unsigned long timeo); void iucv_accept_enqueue(struct sock *parent, struct sock *sk); void iucv_accept_unlink(struct sock *sk); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 18a809b..1a97c7a 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -3,7 +3,7 @@ * * IUCV protocol stack for Linux on zSeries * - * Copyright 2006 IBM Corporation + * Copyright IBM Corp. 2006, 2009 * * Author(s): Jennifer Hunt <jenhunt@us.ibm.com> */ @@ -28,8 +28,7 @@ #define CONFIG_IUCV_SOCK_DEBUG 1 -#define IPRMDATA 0x80 -#define VERSION "1.0" +#define VERSION "1.1" static char iucv_userid[80]; @@ -41,6 +40,51 @@ static struct proto iucv_proto = { .obj_size = sizeof(struct iucv_sock), }; +/* special AF_IUCV IPRM messages */ +static const u8 iprm_shutdown[8] = + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}; + +#define TRGCLS_SIZE (sizeof(((struct iucv_message *)0)->class)) + +/* macros to set/get socket control buffer at correct offset */ +#define CB_TAG(skb) ((skb)->cb) /* iucv message tag */ +#define CB_TAG_LEN (sizeof(((struct iucv_message *) 0)->tag)) +#define CB_TRGCLS(skb) ((skb)->cb + CB_TAG_LEN) /* iucv msg target class */ +#define CB_TRGCLS_LEN (TRGCLS_SIZE) + +#define __iucv_sock_wait(sk, condition, timeo, ret) \ +do { \ + DEFINE_WAIT(__wait); \ + long __timeo = timeo; \ + ret = 0; \ + prepare_to_wait(sk->sk_sleep, &__wait, TASK_INTERRUPTIBLE); \ + while (!(condition)) { \ + if (!__timeo) { \ + ret = -EAGAIN; \ + break; \ + } \ + if (signal_pending(current)) { \ + ret = sock_intr_errno(__timeo); \ + break; \ + } \ + release_sock(sk); \ + __timeo = schedule_timeout(__timeo); \ + lock_sock(sk); \ + ret = sock_error(sk); \ + if (ret) \ + break; \ + } \ + finish_wait(sk->sk_sleep, &__wait); \ +} while (0) + +#define iucv_sock_wait(sk, condition, timeo) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __iucv_sock_wait(sk, condition, timeo, __ret); \ + __ret; \ +}) + static void iucv_sock_kill(struct sock *sk); static void iucv_sock_close(struct sock *sk); @@ -51,6 +95,7 @@ static void iucv_callback_connack(struct iucv_path *, u8 ipuser[16]); static int iucv_callback_connreq(struct iucv_path *, u8 ipvmid[8], u8 ipuser[16]); static void iucv_callback_connrej(struct iucv_path *, u8 ipuser[16]); +static void iucv_callback_shutdown(struct iucv_path *, u8 ipuser[16]); static struct iucv_sock_list iucv_sk_list = { .lock = RW_LOCK_UNLOCKED, @@ -62,7 +107,8 @@ static struct iucv_handler af_iucv_handler = { .path_complete = iucv_callback_connack, .path_severed = iucv_callback_connrej, .message_pending = iucv_callback_rx, - .message_complete = iucv_callback_txdone + .message_complete = iucv_callback_txdone, + .path_quiesced = iucv_callback_shutdown, }; static inline void high_nmcpy(unsigned char *dst, char *src) @@ -75,6 +121,79 @@ static inline void low_nmcpy(unsigned char *dst, char *src) memcpy(&dst[8], src, 8); } +/** + * iucv_msg_length() - Returns the length of an iucv message. + * @msg: Pointer to struct iucv_message, MUST NOT be NULL + * + * The function returns the length of the specified iucv message @msg of data + * stored in a buffer and of data stored in the parameter list (PRMDATA). + * + * For IUCV_IPRMDATA, AF_IUCV uses the following convention to transport socket + * data: + * PRMDATA[0..6] socket data (max 7 bytes); + * PRMDATA[7] socket data length value (len is 0xff - PRMDATA[7]) + * + * The socket data length is computed by substracting the socket data length + * value from 0xFF. + * If the socket data len is greater 7, then PRMDATA can be used for special + * notifications (see iucv_sock_shutdown); and further, + * if the socket data len is > 7, the function returns 8. + * + * Use this function to allocate socket buffers to store iucv message data. + */ +static inline size_t iucv_msg_length(struct iucv_message *msg) +{ + size_t datalen; + + if (msg->flags & IUCV_IPRMDATA) { + datalen = 0xff - msg->rmmsg[7]; + return (datalen < 8) ? datalen : 8; + } + return msg->length; +} + +/** + * iucv_sock_in_state() - check for specific states + * @sk: sock structure + * @state: first iucv sk state + * @state: second iucv sk state + * + * Returns true if the socket in either in the first or second state. + */ +static int iucv_sock_in_state(struct sock *sk, int state, int state2) +{ + return (sk->sk_state == state || sk->sk_state == state2); +} + +/** + * iucv_below_msglim() - function to check if messages can be sent + * @sk: sock structure + * + * Returns true if the send queue length is lower than the message limit. + * Always returns true if the socket is not connected (no iucv path for + * checking the message limit). + */ +static inline int iucv_below_msglim(struct sock *sk) +{ + struct iucv_sock *iucv = iucv_sk(sk); + + if (sk->sk_state != IUCV_CONNECTED) + return 1; + return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim); +} + +/** + * iucv_sock_wake_msglim() - Wake up thread waiting on msg limit + */ +static void iucv_sock_wake_msglim(struct sock *sk) +{ + read_lock(&sk->sk_callback_lock); + if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + wake_up_interruptible_all(sk->sk_sleep); + sk_wake_async(sk, 2, POLL_OUT); + read_unlock(&sk->sk_callback_lock); +} + /* Timers */ static void iucv_sock_timeout(unsigned long arg) { @@ -173,9 +292,12 @@ static void iucv_sock_close(struct sock *sk) timeo = sk->sk_lingertime; else timeo = IUCV_DISCONN_TIMEOUT; - err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo); + err = iucv_sock_wait(sk, + iucv_sock_in_state(sk, IUCV_CLOSED, 0), + timeo); } + case IUCV_CLOSING: /* fall through */ sk->sk_state = IUCV_CLOSED; sk->sk_state_change(sk); @@ -228,6 +350,10 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) spin_lock_init(&iucv_sk(sk)->message_q.lock); skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q); iucv_sk(sk)->send_tag = 0; + iucv_sk(sk)->flags = 0; + iucv_sk(sk)->msglimit = IUCV_QUEUELEN_DEFAULT; + iucv_sk(sk)->path = NULL; + memset(&iucv_sk(sk)->src_user_id , 0, 32); sk->sk_destruct = iucv_sock_destruct; sk->sk_sndtimeo = IUCV_CONN_TIMEOUT; @@ -249,11 +375,22 @@ static int iucv_sock_create(struct socket *sock, int protocol) { struct sock *sk; - if (sock->type != SOCK_STREAM) - return -ESOCKTNOSUPPORT; + if (protocol && protocol != PF_IUCV) + return -EPROTONOSUPPORT; sock->state = SS_UNCONNECTED; - sock->ops = &iucv_sock_ops; + + switch (sock->type) { + case SOCK_STREAM: + sock->ops = &iucv_sock_ops; + break; + case SOCK_SEQPACKET: + /* currently, proto ops can handle both sk types */ + sock->ops = &iucv_sock_ops; + break; + default: + return -ESOCKTNOSUPPORT; + } sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL); if (!sk) @@ -338,38 +475,6 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock) return NULL; } -int iucv_sock_wait_state(struct sock *sk, int state, int state2, - unsigned long timeo) -{ - DECLARE_WAITQUEUE(wait, current); - int err = 0; - - add_wait_queue(sk->sk_sleep, &wait); - while (sk->sk_state != state && sk->sk_state != state2) { - set_current_state(TASK_INTERRUPTIBLE); - - if (!timeo) { - err = -EAGAIN; - break; - } - - if (signal_pending(current)) { - err = sock_intr_errno(timeo); - break; - } - - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock(sk); - - err = sock_error(sk); - if (err) - break; - } - set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); - return err; -} /* Bind an unbound socket */ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, @@ -464,11 +569,9 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND) return -EBADFD; - if (sk->sk_type != SOCK_STREAM) + if (sk->sk_type != SOCK_STREAM && sk->sk_type != SOCK_SEQPACKET) return -EINVAL; - iucv = iucv_sk(sk); - if (sk->sk_state == IUCV_OPEN) { err = iucv_sock_autobind(sk); if (unlikely(err)) @@ -487,8 +590,8 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, iucv = iucv_sk(sk); /* Create path. */ - iucv->path = iucv_path_alloc(IUCV_QUEUELEN_DEFAULT, - IPRMDATA, GFP_KERNEL); + iucv->path = iucv_path_alloc(iucv->msglimit, + IUCV_IPRMDATA, GFP_KERNEL); if (!iucv->path) { err = -ENOMEM; goto done; @@ -517,13 +620,13 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, } if (sk->sk_state != IUCV_CONNECTED) { - err = iucv_sock_wait_state(sk, IUCV_CONNECTED, IUCV_DISCONN, - sock_sndtimeo(sk, flags & O_NONBLOCK)); + err = iucv_sock_wait(sk, iucv_sock_in_state(sk, IUCV_CONNECTED, + IUCV_DISCONN), + sock_sndtimeo(sk, flags & O_NONBLOCK)); } if (sk->sk_state == IUCV_DISCONN) { - release_sock(sk); - return -ECONNREFUSED; + err = -ECONNREFUSED; } if (err) { @@ -546,7 +649,10 @@ static int iucv_sock_listen(struct socket *sock, int backlog) lock_sock(sk); err = -EINVAL; - if (sk->sk_state != IUCV_BOUND || sock->type != SOCK_STREAM) + if (sk->sk_state != IUCV_BOUND) + goto done; + + if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) goto done; sk->sk_max_ack_backlog = backlog; @@ -637,6 +743,30 @@ static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr, return 0; } +/** + * iucv_send_iprm() - Send socket data in parameter list of an iucv message. + * @path: IUCV path + * @msg: Pointer to a struct iucv_message + * @skb: The socket data to send, skb->len MUST BE <= 7 + * + * Send the socket data in the parameter list in the iucv message + * (IUCV_IPRMDATA). The socket data is stored at index 0 to 6 in the parameter + * list and the socket data len at index 7 (last byte). + * See also iucv_msg_length(). + * + * Returns the error code from the iucv_message_send() call. + */ +static int iucv_send_iprm(struct iucv_path *path, struct iucv_message *msg, + struct sk_buff *skb) +{ + u8 prmdata[8]; + + memcpy(prmdata, (void *) skb->data, skb->len); + prmdata[7] = 0xff - (u8) skb->len; + return iucv_message_send(path, msg, IUCV_IPRMDATA, 0, + (void *) prmdata, 8); +} + static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) { @@ -644,7 +774,11 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct iucv_sock *iucv = iucv_sk(sk); struct sk_buff *skb; struct iucv_message txmsg; + struct cmsghdr *cmsg; + int cmsg_done; + long timeo; int err; + int noblock = msg->msg_flags & MSG_DONTWAIT; err = sock_error(sk); if (err) @@ -653,6 +787,10 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; + /* SOCK_SEQPACKET: we do not support segmented records */ + if (sk->sk_type == SOCK_SEQPACKET && !(msg->msg_flags & MSG_EOR)) + return -EOPNOTSUPP; + lock_sock(sk); if (sk->sk_shutdown & SEND_SHUTDOWN) { @@ -660,34 +798,112 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto out; } - if (sk->sk_state == IUCV_CONNECTED) { - if (!(skb = sock_alloc_send_skb(sk, len, - msg->msg_flags & MSG_DONTWAIT, - &err))) + /* Return if the socket is not in connected state */ + if (sk->sk_state != IUCV_CONNECTED) { + err = -ENOTCONN; + goto out; + } + + /* initialize defaults */ + cmsg_done = 0; /* check for duplicate headers */ + txmsg.class = 0; + + /* iterate over control messages */ + for (cmsg = CMSG_FIRSTHDR(msg); cmsg; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + + if (!CMSG_OK(msg, cmsg)) { + err = -EINVAL; goto out; + } - if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { - err = -EFAULT; - goto fail; + if (cmsg->cmsg_level != SOL_IUCV) + continue; + + if (cmsg->cmsg_type & cmsg_done) { + err = -EINVAL; + goto out; } + cmsg_done |= cmsg->cmsg_type; - txmsg.class = 0; - txmsg.tag = iucv->send_tag++; - memcpy(skb->cb, &txmsg.tag, 4); - skb_queue_tail(&iucv->send_skb_q, skb); - err = iucv_message_send(iucv->path, &txmsg, 0, 0, - (void *) skb->data, skb->len); - if (err) { - if (err == 3) - printk(KERN_ERR "AF_IUCV msg limit exceeded\n"); + switch (cmsg->cmsg_type) { + case SCM_IUCV_TRGCLS: + if (cmsg->cmsg_len != CMSG_LEN(TRGCLS_SIZE)) { + err = -EINVAL; + goto out; + } + + /* set iucv message target class */ + memcpy(&txmsg.class, + (void *) CMSG_DATA(cmsg), TRGCLS_SIZE); + + break; + + default: + err = -EINVAL; + goto out; + } + } + + /* allocate one skb for each iucv message: + * this is fine for SOCK_SEQPACKET (unless we want to support + * segmented records using the MSG_EOR flag), but + * for SOCK_STREAM we might want to improve it in future */ + skb = sock_alloc_send_skb(sk, len, noblock, &err); + if (!skb) + goto out; + if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { + err = -EFAULT; + goto fail; + } + + /* wait if outstanding messages for iucv path has reached */ + timeo = sock_sndtimeo(sk, noblock); + err = iucv_sock_wait(sk, iucv_below_msglim(sk), timeo); + if (err) + goto fail; + + /* return -ECONNRESET if the socket is no longer connected */ + if (sk->sk_state != IUCV_CONNECTED) { + err = -ECONNRESET; + goto fail; + } + + /* increment and save iucv message tag for msg_completion cbk */ + txmsg.tag = iucv->send_tag++; + memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); + skb_queue_tail(&iucv->send_skb_q, skb); + + if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) + && skb->len <= 7) { + err = iucv_send_iprm(iucv->path, &txmsg, skb); + + /* on success: there is no message_complete callback + * for an IPRMDATA msg; remove skb from send queue */ + if (err == 0) { + skb_unlink(skb, &iucv->send_skb_q); + kfree_skb(skb); + } + + /* this error should never happen since the + * IUCV_IPRMDATA path flag is set... sever path */ + if (err == 0x15) { + iucv_path_sever(iucv->path, NULL); skb_unlink(skb, &iucv->send_skb_q); err = -EPIPE; goto fail; } - - } else { - err = -ENOTCONN; - goto out; + } else + err = iucv_message_send(iucv->path, &txmsg, 0, 0, + (void *) skb->data, skb->len); + if (err) { + if (err == 3) { + printk(KERN_ERR "AF_IUCV msg limit exceeded\n"); + err = -EAGAIN; + } else + err = -EPIPE; + skb_unlink(skb, &iucv->send_skb_q); + goto fail; } release_sock(sk); @@ -716,6 +932,10 @@ static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len) if (!nskb) return -ENOMEM; + /* copy target class to control buffer of new skb */ + memcpy(CB_TRGCLS(nskb), CB_TRGCLS(skb), CB_TRGCLS_LEN); + + /* copy data fragment */ memcpy(nskb->data, skb->data + copied, size); copied += size; dataleft -= size; @@ -735,19 +955,33 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, struct iucv_message *msg) { int rc; + unsigned int len; + + len = iucv_msg_length(msg); - if (msg->flags & IPRMDATA) { - skb->data = NULL; - skb->len = 0; + /* store msg target class in the second 4 bytes of skb ctrl buffer */ + /* Note: the first 4 bytes are reserved for msg tag */ + memcpy(CB_TRGCLS(skb), &msg->class, CB_TRGCLS_LEN); + + /* check for special IPRM messages (e.g. iucv_sock_shutdown) */ + if ((msg->flags & IUCV_IPRMDATA) && len > 7) { + if (memcmp(msg->rmmsg, iprm_shutdown, 8) == 0) { + skb->data = NULL; + skb->len = 0; + } } else { - rc = iucv_message_receive(path, msg, 0, skb->data, - msg->length, NULL); + rc = iucv_message_receive(path, msg, msg->flags & IUCV_IPRMDATA, + skb->data, len, NULL); if (rc) { kfree_skb(skb); return; } - if (skb->truesize >= sk->sk_rcvbuf / 4) { - rc = iucv_fragment_skb(sk, skb, msg->length); + /* we need to fragment iucv messages for SOCK_STREAM only; + * for SOCK_SEQPACKET, it is only relevant if we support + * record segmentation using MSG_EOR (see also recvmsg()) */ + if (sk->sk_type == SOCK_STREAM && + skb->truesize >= sk->sk_rcvbuf / 4) { + rc = iucv_fragment_skb(sk, skb, len); kfree_skb(skb); skb = NULL; if (rc) { @@ -758,7 +992,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, } else { skb->h.raw = skb->data; skb->nh.raw = skb->data; - skb->len = msg->length; + skb->len = len; } } @@ -773,7 +1007,7 @@ static void iucv_process_message_q(struct sock *sk) struct sock_msg_q *p, *n; list_for_each_entry_safe(p, n, &iucv->message_q.list, list) { - skb = alloc_skb(p->msg.length, GFP_ATOMIC | GFP_DMA); + skb = alloc_skb(iucv_msg_length(&p->msg), GFP_ATOMIC | GFP_DMA); if (!skb) break; iucv_process_message(sk, skb, p->path, &p->msg); @@ -790,7 +1024,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); - int target, copied = 0; + unsigned int copied, rlen; struct sk_buff *skb, *rskb, *cskb; int err = 0; @@ -803,8 +1037,6 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags & (MSG_OOB)) return -EOPNOTSUPP; - target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); - /* receive/dequeue next skb: * the function understands MSG_PEEK and, thus, does not dequeue skb */ skb = skb_recv_datagram(sk, flags, noblock, &err); @@ -814,25 +1046,45 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return err; } - copied = min_t(unsigned int, skb->len, len); + rlen = skb->len; /* real length of skb */ + copied = min_t(unsigned int, rlen, len); cskb = skb; if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) { - skb_queue_head(&sk->sk_receive_queue, skb); - if (copied == 0) - return -EFAULT; - goto done; + if (!(flags & MSG_PEEK)) + skb_queue_head(&sk->sk_receive_queue, skb); + return -EFAULT; } - len -= copied; + /* SOCK_SEQPACKET: set MSG_TRUNC if recv buf size is too small */ + if (sk->sk_type == SOCK_SEQPACKET) { + if (copied < rlen) + msg->msg_flags |= MSG_TRUNC; + /* each iucv message contains a complete record */ + msg->msg_flags |= MSG_EOR; + } + + /* create control message to store iucv msg target class: + * get the trgcls from the control buffer of the skb due to + * fragmentation of original iucv message. */ + err = put_cmsg(msg, SOL_IUCV, SCM_IUCV_TRGCLS, + CB_TRGCLS_LEN, CB_TRGCLS(skb)); + if (err) { + if (!(flags & MSG_PEEK)) + skb_queue_head(&sk->sk_receive_queue, skb); + return err; + } /* Mark read part of skb as used */ if (!(flags & MSG_PEEK)) { - skb_pull(skb, copied); - if (skb->len) { - skb_queue_head(&sk->sk_receive_queue, skb); - goto done; + /* SOCK_STREAM: re-queue skb if it contains unreceived data */ + if (sk->sk_type == SOCK_STREAM) { + skb_pull(skb, copied); + if (skb->len) { + skb_queue_head(&sk->sk_receive_queue, skb); + goto done; + } } kfree_skb(skb); @@ -857,7 +1109,11 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, } done: - return err ? : copied; + /* SOCK_SEQPACKET: return real length if MSG_TRUNC is set */ + if (sk->sk_type == SOCK_SEQPACKET && (flags & MSG_TRUNC)) + copied = rlen; + + return copied; } static inline unsigned int iucv_accept_poll(struct sock *parent) @@ -919,7 +1175,6 @@ static int iucv_sock_shutdown(struct socket *sock, int how) struct iucv_sock *iucv = iucv_sk(sk); struct iucv_message txmsg; int err = 0; - u8 prmmsg[8] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}; how++; @@ -928,6 +1183,9 @@ static int iucv_sock_shutdown(struct socket *sock, int how) lock_sock(sk); switch (sk->sk_state) { + case IUCV_DISCONN: + case IUCV_CLOSING: + case IUCV_SEVERED: case IUCV_CLOSED: err = -ENOTCONN; goto fail; @@ -941,7 +1199,7 @@ static int iucv_sock_shutdown(struct socket *sock, int how) txmsg.class = 0; txmsg.tag = 0; err = iucv_message_send(iucv->path, &txmsg, IUCV_IPRMDATA, 0, - (void *) prmmsg, 8); + (void *) iprm_shutdown, 8); if (err) { switch (err) { case 1: @@ -995,6 +1253,98 @@ static int iucv_sock_release(struct socket *sock) return err; } +/* getsockopt and setsockopt */ +static int iucv_sock_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, int optlen) +{ + struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); + int val; + int rc; + + if (level != SOL_IUCV) + return -ENOPROTOOPT; + + if (optlen < sizeof(int)) + return -EINVAL; + + if (get_user(val, (int __user *) optval)) + return -EFAULT; + + rc = 0; + + lock_sock(sk); + switch (optname) { + case SO_IPRMDATA_MSG: + if (val) + iucv->flags |= IUCV_IPRMDATA; + else + iucv->flags &= ~IUCV_IPRMDATA; + break; + case SO_MSGLIMIT: + switch (sk->sk_state) { + case IUCV_OPEN: + case IUCV_BOUND: + if (val < 1 || val > (u16)(~0)) + rc = -EINVAL; + else + iucv->msglimit = val; + break; + default: + rc = -EINVAL; + break; + } + break; + default: + rc = -ENOPROTOOPT; + break; + } + release_sock(sk); + + return rc; +} + +static int iucv_sock_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); + int val, len; + + if (level != SOL_IUCV) + return -ENOPROTOOPT; + + if (get_user(len, optlen)) + return -EFAULT; + + if (len < 0) + return -EINVAL; + + len = min_t(unsigned int, len, sizeof(int)); + + switch (optname) { + case SO_IPRMDATA_MSG: + val = (iucv->flags & IUCV_IPRMDATA) ? 1 : 0; + break; + case SO_MSGLIMIT: + lock_sock(sk); + val = (iucv->path != NULL) ? iucv->path->msglim /* connected */ + : iucv->msglimit; /* default */ + release_sock(sk); + break; + default: + return -ENOPROTOOPT; + } + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + + return 0; +} + + /* Callback wrappers - called from iucv base support */ static int iucv_callback_connreq(struct iucv_path *path, u8 ipvmid[8], u8 ipuser[16]) @@ -1048,7 +1398,7 @@ static int iucv_callback_connreq(struct iucv_path *path, } /* Create the new socket */ - nsk = iucv_sock_alloc(NULL, SOCK_STREAM, GFP_ATOMIC); + nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); if (!nsk) { err = iucv_path_sever(path, user_data); iucv_path_free(path); @@ -1071,7 +1421,9 @@ static int iucv_callback_connreq(struct iucv_path *path, memcpy(nuser_data + 8, niucv->src_name, 8); ASCEBC(nuser_data + 8, 8); - path->msglim = IUCV_QUEUELEN_DEFAULT; + /* set message limit for path based on msglimit of accepting socket */ + niucv->msglimit = iucv->msglimit; + path->msglim = iucv->msglimit; err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk); if (err) { err = iucv_path_sever(path, user_data); @@ -1107,8 +1459,10 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) struct sock_msg_q *save_msg; int len; - if (sk->sk_shutdown & RCV_SHUTDOWN) + if (sk->sk_shutdown & RCV_SHUTDOWN) { + iucv_message_reject(path, msg); return; + } spin_lock(&iucv->message_q.lock); @@ -1117,11 +1471,11 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) goto save_message; len = atomic_read(&sk->sk_rmem_alloc); - len += msg->length + sizeof(struct sk_buff); + len += iucv_msg_length(msg) + sizeof(struct sk_buff); if (len > sk->sk_rcvbuf) goto save_message; - skb = alloc_skb(msg->length, GFP_ATOMIC | GFP_DMA); + skb = alloc_skb(iucv_msg_length(msg), GFP_ATOMIC | GFP_DMA); if (!skb) goto save_message; @@ -1154,7 +1508,7 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_lock_irqsave(&list->lock, flags); while (list_skb != (struct sk_buff *)list) { - if (!memcmp(&msg->tag, list_skb->cb, 4)) { + if (!memcmp(&msg->tag, CB_TAG(list_skb), CB_TAG_LEN)) { this = list_skb; break; } @@ -1165,8 +1519,11 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_unlock_irqrestore(&list->lock, flags); - if (this) + if (this) { kfree_skb(this); + /* wake up any process waiting for sending */ + iucv_sock_wake_msglim(sk); + } } if (!this) printk(KERN_ERR "AF_IUCV msg tag %u not found\n", msg->tag); @@ -1192,6 +1549,21 @@ static void iucv_callback_connrej(struct iucv_path *path, u8 ipuser[16]) sk->sk_state_change(sk); } +/* called if the other communication side shuts down its RECV direction; + * in turn, the callback sets SEND_SHUTDOWN to disable sending of data. + */ +static void iucv_callback_shutdown(struct iucv_path *path, u8 ipuser[16]) +{ + struct sock *sk = path->private; + + bh_lock_sock(sk); + if (sk->sk_state != IUCV_CLOSED) { + sk->sk_shutdown |= SEND_SHUTDOWN; + sk->sk_state_change(sk); + } + bh_unlock_sock(sk); +} + static struct proto_ops iucv_sock_ops = { .family = PF_IUCV, .owner = THIS_MODULE, @@ -1208,8 +1580,8 @@ static struct proto_ops iucv_sock_ops = { .mmap = sock_no_mmap, .socketpair = sock_no_socketpair, .shutdown = iucv_sock_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt + .setsockopt = iucv_sock_setsockopt, + .getsockopt = iucv_sock_getsockopt, }; static struct net_proto_family iucv_sock_family_ops = { diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 569c6e4..b14e081 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -1511,6 +1511,8 @@ static void iucv_path_complete(struct iucv_irq_data *data) struct iucv_path_complete *ipc = (void *) data; struct iucv_path *path = iucv_path_table[ipc->ippathid]; + if (path) + path->flags = ipc->ipflags1; if (path && path->handler && path->handler->path_complete) path->handler->path_complete(path, ipc->ipuser); }