walters / rpms / kernel

Forked from rpms/kernel 5 years ago
Clone
Kyle McMartin c8d2616
Author: Neil Horman <nhorman@tuxdriver.com>
Kyle McMartin c8d2616
Date:   Fri Dec 17 13:35:36 2010 -0500
Kyle McMartin c8d2616
Kyle McMartin c8d2616
Enhance AF_PACKET to support using non-contiguous memory when allocating ring
Kyle McMartin c8d2616
buffer space.  This is a combined backport of the following commits from
Kyle McMartin c8d2616
net-next-2.6:
Kyle McMartin c8d2616
0e3125c755445664f00ad036e4fc2cd32fd52877
Kyle McMartin c8d2616
bbce5a59e4e0e6e1dbc85492caaf310ff6611309
Kyle McMartin c8d2616
0af55bb58f8fa7865004ac48d16affe125ac1b7f
Kyle McMartin c8d2616
920b8d913bd3d963d5c88bca160a272b71e0c95a
Kyle McMartin c8d2616
Kyle McMartin c8d2616
diff -up linux-2.6.34.x86_64/net/packet/af_packet.c.orig linux-2.6.34.x86_64/net/packet/af_packet.c
Kyle McMartin c8d2616
--- linux-2.6.34.x86_64/net/packet/af_packet.c.orig	2010-12-17 12:16:58.000000000 -0500
Kyle McMartin c8d2616
+++ linux-2.6.34.x86_64/net/packet/af_packet.c	2010-12-17 12:30:14.000000000 -0500
Kyle McMartin c8d2616
@@ -61,6 +61,7 @@
Kyle McMartin c8d2616
 #include <linux/kernel.h>
Kyle McMartin c8d2616
 #include <linux/kmod.h>
Kyle McMartin c8d2616
 #include <linux/slab.h>
Kyle McMartin c8d2616
+#include <linux/vmalloc.h>
Kyle McMartin c8d2616
 #include <net/net_namespace.h>
Kyle McMartin c8d2616
 #include <net/ip.h>
Kyle McMartin c8d2616
 #include <net/protocol.h>
Kyle McMartin c8d2616
@@ -161,8 +162,14 @@ struct packet_mreq_max {
Kyle McMartin c8d2616
 static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
Kyle McMartin c8d2616
 		int closing, int tx_ring);
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
+#define PGV_FROM_VMALLOC 1
Kyle McMartin c8d2616
+struct pgv {
Kyle McMartin c8d2616
+	char *buffer;
Kyle McMartin c8d2616
+	unsigned char flags;
Kyle McMartin c8d2616
+};
Kyle McMartin c8d2616
+
Kyle McMartin c8d2616
 struct packet_ring_buffer {
Kyle McMartin c8d2616
-	char			**pg_vec;
Kyle McMartin c8d2616
+	struct pgv		*pg_vec;
Kyle McMartin c8d2616
 	unsigned int		head;
Kyle McMartin c8d2616
 	unsigned int		frames_per_block;
Kyle McMartin c8d2616
 	unsigned int		frame_size;
Kyle McMartin c8d2616
@@ -214,6 +221,13 @@ struct packet_skb_cb {
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
 #define PACKET_SKB_CB(__skb)	((struct packet_skb_cb *)((__skb)->cb))
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
+static inline struct page *pgv_to_page(void *addr)
Kyle McMartin c8d2616
+{
Kyle McMartin c8d2616
+	if (is_vmalloc_addr(addr))
Kyle McMartin c8d2616
+		return vmalloc_to_page(addr);
Kyle McMartin c8d2616
+	return virt_to_page(addr);
Kyle McMartin c8d2616
+}
Kyle McMartin c8d2616
+
Kyle McMartin c8d2616
 static void __packet_set_status(struct packet_sock *po, void *frame, int status)
Kyle McMartin c8d2616
 {
Kyle McMartin c8d2616
 	union {
Kyle McMartin c8d2616
@@ -226,11 +240,11 @@ static void __packet_set_status(struct p
Kyle McMartin c8d2616
 	switch (po->tp_version) {
Kyle McMartin c8d2616
 	case TPACKET_V1:
Kyle McMartin c8d2616
 		h.h1->tp_status = status;
Kyle McMartin c8d2616
-		flush_dcache_page(virt_to_page(&h.h1->tp_status));
Kyle McMartin c8d2616
+		flush_dcache_page(pgv_to_page(&h.h1->tp_status));
Kyle McMartin c8d2616
 		break;
Kyle McMartin c8d2616
 	case TPACKET_V2:
Kyle McMartin c8d2616
 		h.h2->tp_status = status;
Kyle McMartin c8d2616
-		flush_dcache_page(virt_to_page(&h.h2->tp_status));
Kyle McMartin c8d2616
+		flush_dcache_page(pgv_to_page(&h.h2->tp_status));
Kyle McMartin c8d2616
 		break;
Kyle McMartin c8d2616
 	default:
Kyle McMartin c8d2616
 		pr_err("TPACKET version not supported\n");
Kyle McMartin c8d2616
@@ -253,10 +267,10 @@ static int __packet_get_status(struct pa
Kyle McMartin c8d2616
 	h.raw = frame;
Kyle McMartin c8d2616
 	switch (po->tp_version) {
Kyle McMartin c8d2616
 	case TPACKET_V1:
Kyle McMartin c8d2616
-		flush_dcache_page(virt_to_page(&h.h1->tp_status));
Kyle McMartin c8d2616
+		flush_dcache_page(pgv_to_page(&h.h1->tp_status));
Kyle McMartin c8d2616
 		return h.h1->tp_status;
Kyle McMartin c8d2616
 	case TPACKET_V2:
Kyle McMartin c8d2616
-		flush_dcache_page(virt_to_page(&h.h2->tp_status));
Kyle McMartin c8d2616
+		flush_dcache_page(pgv_to_page(&h.h2->tp_status));
Kyle McMartin c8d2616
 		return h.h2->tp_status;
Kyle McMartin c8d2616
 	default:
Kyle McMartin c8d2616
 		pr_err("TPACKET version not supported\n");
Kyle McMartin c8d2616
@@ -280,7 +294,8 @@ static void *packet_lookup_frame(struct 
Kyle McMartin c8d2616
 	pg_vec_pos = position / rb->frames_per_block;
Kyle McMartin c8d2616
 	frame_offset = position % rb->frames_per_block;
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
-	h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size);
Kyle McMartin c8d2616
+	h.raw = rb->pg_vec[pg_vec_pos].buffer +
Kyle McMartin c8d2616
+		(frame_offset * rb->frame_size);
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
 	if (status != __packet_get_status(po, h.raw))
Kyle McMartin c8d2616
 		return NULL;
Kyle McMartin c8d2616
@@ -771,15 +786,11 @@ static int tpacket_rcv(struct sk_buff *s
Kyle McMartin c8d2616
 	__packet_set_status(po, h.raw, status);
Kyle McMartin c8d2616
 	smp_mb();
Kyle McMartin c8d2616
 	{
Kyle McMartin c8d2616
-		struct page *p_start, *p_end;
Kyle McMartin c8d2616
-		u8 *h_end = h.raw + macoff + snaplen - 1;
Kyle McMartin c8d2616
+		u8 *start, *end;
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
-		p_start = virt_to_page(h.raw);
Kyle McMartin c8d2616
-		p_end = virt_to_page(h_end);
Kyle McMartin c8d2616
-		while (p_start <= p_end) {
Kyle McMartin c8d2616
-			flush_dcache_page(p_start);
Kyle McMartin c8d2616
-			p_start++;
Kyle McMartin c8d2616
-		}
Kyle McMartin c8d2616
+		end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen);
Kyle McMartin c8d2616
+		for (start = h.raw; start < end; start += PAGE_SIZE)
Kyle McMartin c8d2616
+			flush_dcache_page(pgv_to_page(start));
Kyle McMartin c8d2616
 	}
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
 	sk->sk_data_ready(sk, 0);
Kyle McMartin c8d2616
@@ -886,7 +897,6 @@ static int tpacket_fill_skb(struct packe
Kyle McMartin c8d2616
 	}
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
 	err = -EFAULT;
Kyle McMartin c8d2616
-	page = virt_to_page(data);
Kyle McMartin c8d2616
 	offset = offset_in_page(data);
Kyle McMartin c8d2616
 	len_max = PAGE_SIZE - offset;
Kyle McMartin c8d2616
 	len = ((to_write > len_max) ? len_max : to_write);
Kyle McMartin c8d2616
@@ -905,11 +915,11 @@ static int tpacket_fill_skb(struct packe
Kyle McMartin c8d2616
 			return -EFAULT;
Kyle McMartin c8d2616
 		}
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
+		page = pgv_to_page(data);
Kyle McMartin c8d2616
+		data += len;
Kyle McMartin c8d2616
 		flush_dcache_page(page);
Kyle McMartin c8d2616
 		get_page(page);
Kyle McMartin c8d2616
-		skb_fill_page_desc(skb,
Kyle McMartin c8d2616
-				nr_frags,
Kyle McMartin c8d2616
-				page++, offset, len);
Kyle McMartin c8d2616
+		skb_fill_page_desc(skb, nr_frags, page, offset, len);
Kyle McMartin c8d2616
 		to_write -= len;
Kyle McMartin c8d2616
 		offset = 0;
Kyle McMartin c8d2616
 		len_max = PAGE_SIZE;
Kyle McMartin c8d2616
@@ -2230,37 +2240,76 @@ static const struct vm_operations_struct
Kyle McMartin c8d2616
 	.close	=	packet_mm_close,
Kyle McMartin c8d2616
 };
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
-static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
Kyle McMartin c8d2616
+static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
Kyle McMartin c8d2616
+			unsigned int len)
Kyle McMartin c8d2616
 {
Kyle McMartin c8d2616
 	int i;
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
 	for (i = 0; i < len; i++) {
Kyle McMartin c8d2616
-		if (likely(pg_vec[i]))
Kyle McMartin c8d2616
-			free_pages((unsigned long) pg_vec[i], order);
Kyle McMartin c8d2616
+		if (likely(pg_vec[i].buffer)) {
Kyle McMartin c8d2616
+			if (pg_vec[i].flags & PGV_FROM_VMALLOC)
Kyle McMartin c8d2616
+				vfree(pg_vec[i].buffer);
Kyle McMartin c8d2616
+			else
Kyle McMartin c8d2616
+				free_pages((unsigned long)pg_vec[i].buffer,
Kyle McMartin c8d2616
+					   order);
Kyle McMartin c8d2616
+			pg_vec[i].buffer = NULL;
Kyle McMartin c8d2616
+		}
Kyle McMartin c8d2616
 	}
Kyle McMartin c8d2616
 	kfree(pg_vec);
Kyle McMartin c8d2616
 }
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
-static inline char *alloc_one_pg_vec_page(unsigned long order)
Kyle McMartin c8d2616
+static inline char *alloc_one_pg_vec_page(unsigned long order,
Kyle McMartin c8d2616
+					  unsigned char *flags)
Kyle McMartin c8d2616
 {
Kyle McMartin c8d2616
-	gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN;
Kyle McMartin c8d2616
+	char *buffer = NULL;
Kyle McMartin c8d2616
+	gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
Kyle McMartin c8d2616
+			  __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
Kyle McMartin c8d2616
+
Kyle McMartin c8d2616
+	buffer = (char *) __get_free_pages(gfp_flags, order);
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
-	return (char *) __get_free_pages(gfp_flags, order);
Kyle McMartin c8d2616
+	if (buffer)
Kyle McMartin c8d2616
+		return buffer;
Kyle McMartin c8d2616
+
Kyle McMartin c8d2616
+	/*
Kyle McMartin c8d2616
+	 * __get_free_pages failed, fall back to vmalloc
Kyle McMartin c8d2616
+	 */
Kyle McMartin c8d2616
+	*flags |= PGV_FROM_VMALLOC;
Kyle McMartin c8d2616
+	buffer = vmalloc((1 << order) * PAGE_SIZE);
Kyle McMartin c8d2616
+
Kyle McMartin c8d2616
+	if (buffer) {
Kyle McMartin c8d2616
+		memset(buffer, 0, (1 << order) * PAGE_SIZE);
Kyle McMartin c8d2616
+		return buffer;
Kyle McMartin c8d2616
+	}
Kyle McMartin c8d2616
+
Kyle McMartin c8d2616
+	/*
Kyle McMartin c8d2616
+	 * vmalloc failed, lets dig into swap here
Kyle McMartin c8d2616
+	 */
Kyle McMartin c8d2616
+	*flags = 0;
Kyle McMartin c8d2616
+	gfp_flags &= ~__GFP_NORETRY;
Kyle McMartin c8d2616
+	buffer = (char *)__get_free_pages(gfp_flags, order);
Kyle McMartin c8d2616
+	if (buffer)
Kyle McMartin c8d2616
+		return buffer;
Kyle McMartin c8d2616
+
Kyle McMartin c8d2616
+	/*
Kyle McMartin c8d2616
+	 * complete and utter failure
Kyle McMartin c8d2616
+	 */
Kyle McMartin c8d2616
+	return NULL;
Kyle McMartin c8d2616
 }
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
-static char **alloc_pg_vec(struct tpacket_req *req, int order)
Kyle McMartin c8d2616
+static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
Kyle McMartin c8d2616
 {
Kyle McMartin c8d2616
 	unsigned int block_nr = req->tp_block_nr;
Kyle McMartin c8d2616
-	char **pg_vec;
Kyle McMartin c8d2616
+	struct pgv *pg_vec;
Kyle McMartin c8d2616
 	int i;
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
-	pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
Kyle McMartin c8d2616
+	pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
Kyle McMartin c8d2616
 	if (unlikely(!pg_vec))
Kyle McMartin c8d2616
 		goto out;
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
 	for (i = 0; i < block_nr; i++) {
Kyle McMartin c8d2616
-		pg_vec[i] = alloc_one_pg_vec_page(order);
Kyle McMartin c8d2616
-		if (unlikely(!pg_vec[i]))
Kyle McMartin c8d2616
+		pg_vec[i].buffer = alloc_one_pg_vec_page(order,
Kyle McMartin c8d2616
+							 &pg_vec[i].flags);
Kyle McMartin c8d2616
+		if (unlikely(!pg_vec[i].buffer))
Kyle McMartin c8d2616
 			goto out_free_pgvec;
Kyle McMartin c8d2616
 	}
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
@@ -2276,7 +2325,7 @@ out_free_pgvec:
Kyle McMartin c8d2616
 static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
Kyle McMartin c8d2616
 		int closing, int tx_ring)
Kyle McMartin c8d2616
 {
Kyle McMartin c8d2616
-	char **pg_vec = NULL;
Kyle McMartin c8d2616
+	struct pgv *pg_vec = NULL;
Kyle McMartin c8d2616
 	struct packet_sock *po = pkt_sk(sk);
Kyle McMartin c8d2616
 	int was_running, order = 0;
Kyle McMartin c8d2616
 	struct packet_ring_buffer *rb;
Kyle McMartin c8d2616
@@ -2438,15 +2487,22 @@ static int packet_mmap(struct file *file
Kyle McMartin c8d2616
 			continue;
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
 		for (i = 0; i < rb->pg_vec_len; i++) {
Kyle McMartin c8d2616
-			struct page *page = virt_to_page(rb->pg_vec[i]);
Kyle McMartin c8d2616
+			struct page *page;
Kyle McMartin c8d2616
+			void *kaddr = rb->pg_vec[i].buffer;
Kyle McMartin c8d2616
 			int pg_num;
Kyle McMartin c8d2616
 
Kyle McMartin c8d2616
 			for (pg_num = 0; pg_num < rb->pg_vec_pages;
Kyle McMartin c8d2616
-					pg_num++, page++) {
Kyle McMartin c8d2616
+					pg_num++) {
Kyle McMartin c8d2616
+				if (rb->pg_vec[i].flags & PGV_FROM_VMALLOC)
Kyle McMartin c8d2616
+					page = vmalloc_to_page(kaddr);
Kyle McMartin c8d2616
+				else
Kyle McMartin c8d2616
+					page = virt_to_page(kaddr);
Kyle McMartin c8d2616
+
Kyle McMartin c8d2616
 				err = vm_insert_page(vma, start, page);
Kyle McMartin c8d2616
 				if (unlikely(err))
Kyle McMartin c8d2616
 					goto out;
Kyle McMartin c8d2616
 				start += PAGE_SIZE;
Kyle McMartin c8d2616
+				kaddr += PAGE_SIZE;
Kyle McMartin c8d2616
 			}
Kyle McMartin c8d2616
 		}
Kyle McMartin c8d2616
 	}