iPXE
pcimsix.c
Go to the documentation of this file.
1/*
2 * Copyright (C) 2019 Michael Brown <mbrown@fensystems.co.uk>.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA.
18 *
19 * You can also choose to distribute this program under the terms of
20 * the Unmodified Binary Distribution Licence (as given in the file
21 * COPYING.UBDL), provided that you have satisfied its requirements.
22 */
23
24FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25FILE_SECBOOT ( PERMITTED );
26
27#include <stdint.h>
28#include <errno.h>
29#include <assert.h>
30#include <ipxe/pci.h>
31#include <ipxe/pcimsix.h>
32
33/** @file
34 *
35 * PCI MSI-X interrupts
36 *
37 * Interrupts as such are not used in iPXE, which operates in polling
38 * mode. However, some network cards (such as the Intel 40GbE and
39 * 100GbE NICs) will defer writing out completions until the point of
40 * asserting an MSI-X interrupt.
41 *
42 * From the point of view of the PCI device, asserting an MSI-X
43 * interrupt is just a 32-bit DMA write of an opaque value to an
44 * opaque target address. The PCI device has no know to know whether
45 * or not the target address corresponds to a real APIC.
46 *
47 * We can therefore trick the PCI device into believing that it is
48 * asserting an MSI-X interrupt, by configuring it to write an opaque
49 * 32-bit value to a dummy target address in host memory. This is
50 * sufficient to trigger the associated write of the completions to
51 * host memory.
52 *
53 * When running in a virtual machine, the hypervisor will intercept
54 * our attempt to configure MSI-X on the PCI device. The physical
55 * hardware will be configured to raise an interrupt under the
56 * hypervisor's control, which will then be reflected back into the
57 * virtual machine. The opaque value that we write will be assumed to
58 * indicate an interrupt vector number (as would normally be the case
59 * when configuring MSI-X), and the opaque address will generally be
60 * ignored. The reflected interrupt will be ignored (since it is not
61 * enabled within the virtual machine), but the device still asserts
62 * an MSI-X interrupt and so still triggers the associated write of
63 * the completions to host memory.
64 *
65 * Note that since the opaque target address will generally be ignored
66 * by the hypervisor, we cannot examine the value present at the dummy
67 * target address to find out whether or not an interrupt has been
68 * raised.
69 */
70
71/**
72 * Get MSI-X descriptor name (for debugging)
73 *
74 * @v cfg Configuration space offset
75 * @ret name Descriptor name
76 */
77static const char * pci_msix_name ( unsigned int cfg ) {
78
79 switch ( cfg ) {
80 case PCI_MSIX_DESC_TABLE: return "table";
81 case PCI_MSIX_DESC_PBA: return "PBA";
82 default: return "<UNKNOWN>";
83 }
84}
85
86/**
87 * Map MSI-X BAR portion
88 *
89 * @v pci PCI device
90 * @v msix MSI-X capability
91 * @v cfg Configuration space offset
92 * @ret io I/O address
93 */
94static void * pci_msix_ioremap ( struct pci_device *pci, struct pci_msix *msix,
95 unsigned int cfg ) {
97 unsigned int bar;
98 unsigned long start;
99 unsigned long offset;
100 unsigned long base;
101 void *io;
102
103 /* Read descriptor */
104 pci_read_config_dword ( pci, ( msix->cap + cfg ), &desc );
105
106 /* Get BAR */
107 bar = PCI_MSIX_DESC_BIR ( desc );
109 start = pci_bar_start ( pci, PCI_BASE_ADDRESS ( bar ) );
110 if ( ! start ) {
111 DBGC ( msix, "MSI-X %p %s could not find BAR%d\n",
112 msix, pci_msix_name ( cfg ), bar );
113 return NULL;
114 }
115 base = ( start + offset );
116 DBGC ( msix, "MSI-X %p %s at %#08lx (BAR%d+%#lx)\n",
117 msix, pci_msix_name ( cfg ), base, bar, offset );
118
119 /* Map BAR portion */
120 io = pci_ioremap ( pci, ( start + offset ), PCI_MSIX_LEN );
121 if ( ! io ) {
122 DBGC ( msix, "MSI-X %p %s could not map %#08lx\n",
123 msix, pci_msix_name ( cfg ), base );
124 return NULL;
125 }
126
127 return io;
128}
129
130/**
131 * Enable MSI-X interrupts
132 *
133 * @v pci PCI device
134 * @v msix MSI-X capability
135 * @ret rc Return status code
136 */
137int pci_msix_enable ( struct pci_device *pci, struct pci_msix *msix ) {
140 unsigned int i;
141 int rc;
142
143 /* Locate capability */
144 msix->cap = pci_find_capability ( pci, PCI_CAP_ID_MSIX );
145 if ( ! msix->cap ) {
146 DBGC ( msix, "MSI-X %p found no MSI-X capability in "
147 PCI_FMT "\n", msix, PCI_ARGS ( pci ) );
148 rc = -ENOENT;
149 goto err_cap;
150 }
151
152 /* Extract interrupt count */
153 pci_read_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), &ctrl );
154 msix->count = ( PCI_MSIX_CTRL_SIZE ( ctrl ) + 1 );
155 DBGC ( msix, "MSI-X %p has %d vectors for " PCI_FMT "\n",
156 msix, msix->count, PCI_ARGS ( pci ) );
157
158 /* Map MSI-X table */
159 msix->table = pci_msix_ioremap ( pci, msix, PCI_MSIX_DESC_TABLE );
160 if ( ! msix->table ) {
161 rc = -ENOENT;
162 goto err_table;
163 }
164
165 /* Map pending bit array */
166 msix->pba = pci_msix_ioremap ( pci, msix, PCI_MSIX_DESC_PBA );
167 if ( ! msix->pba ) {
168 rc = -ENOENT;
169 goto err_pba;
170 }
171
172 /* Allocate dummy target */
173 msix->msg = dma_alloc ( &pci->dma, &msix->map, sizeof ( *msix->msg ),
174 sizeof ( *msix->msg ) );
175 if ( ! msix->msg ) {
176 rc = -ENOMEM;
177 goto err_msg;
178 }
179
180 /* Map all interrupts to dummy target by default */
181 msg = dma ( &msix->map, msix->msg );
182 for ( i = 0 ; i < msix->count ; i++ )
183 pci_msix_map ( msix, i, msg, 0 );
184
185 /* Enable MSI-X */
188 pci_write_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), ctrl );
189
190 return 0;
191
192 dma_free ( &msix->map, msix->msg, sizeof ( *msix->msg ) );
193 err_msg:
194 iounmap ( msix->pba );
195 err_pba:
196 iounmap ( msix->table );
197 err_table:
198 err_cap:
199 return rc;
200}
201
202/**
203 * Disable MSI-X interrupts
204 *
205 * @v pci PCI device
206 * @v msix MSI-X capability
207 */
208void pci_msix_disable ( struct pci_device *pci, struct pci_msix *msix ) {
210
211 /* Disable MSI-X */
212 pci_read_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), &ctrl );
214 pci_write_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), ctrl );
215
216 /* Free dummy target */
217 dma_free ( &msix->map, msix->msg, sizeof ( *msix->msg ) );
218
219 /* Unmap pending bit array */
220 iounmap ( msix->pba );
221
222 /* Unmap MSI-X table */
223 iounmap ( msix->table );
224}
225
226/**
227 * Map MSI-X interrupt vector
228 *
229 * @v msix MSI-X capability
230 * @v vector MSI-X vector
231 * @v address Message address
232 * @v data Message data
233 */
234void pci_msix_map ( struct pci_msix *msix, unsigned int vector,
236 void *base;
237
238 /* Sanity check */
240
241 /* Map interrupt vector */
242 base = ( msix->table + PCI_MSIX_VECTOR ( vector ) );
243 writel ( ( address & 0xffffffffUL ), ( base + PCI_MSIX_ADDRESS_LO ) );
244 if ( sizeof ( address ) > sizeof ( uint32_t ) ) {
245 writel ( ( ( ( uint64_t ) address ) >> 32 ),
247 } else {
248 writel ( 0, ( base + PCI_MSIX_ADDRESS_HI ) );
249 }
250 writel ( data, ( base + PCI_MSIX_DATA ) );
251}
252
253/**
254 * Control MSI-X interrupt vector
255 *
256 * @v msix MSI-X capability
257 * @v vector MSI-X vector
258 * @v mask Control mask
259 */
260void pci_msix_control ( struct pci_msix *msix, unsigned int vector,
261 uint32_t mask ) {
262 void *base;
264
265 /* Mask/unmask interrupt vector */
266 base = ( msix->table + PCI_MSIX_VECTOR ( vector ) );
269 ctrl |= mask;
270 writel ( ctrl, ( base + PCI_MSIX_CONTROL ) );
271}
272
273/**
274 * Dump MSI-X interrupt state (for debugging)
275 *
276 * @v msix MSI-X capability
277 * @v vector MSI-X vector
278 */
279void pci_msix_dump ( struct pci_msix *msix, unsigned int vector ) {
280 void *base;
281 uint32_t address_hi;
282 uint32_t address_lo;
286 uint32_t pba;
287
288 /* Do nothing in non-debug builds */
289 if ( ! DBG_LOG )
290 return;
291
292 /* Mask/unmask interrupt vector */
293 base = ( msix->table + PCI_MSIX_VECTOR ( vector ) );
294 address_hi = readl ( base + PCI_MSIX_ADDRESS_HI );
295 address_lo = readl ( base + PCI_MSIX_ADDRESS_LO );
298 pba = readl ( msix->pba );
299 address = ( ( ( ( uint64_t ) address_hi ) << 32 ) | address_lo );
300 DBGC ( msix, "MSI-X %p vector %d %#08x => %#08lx%s%s\n",
301 msix, vector, data, address,
302 ( ( ctrl & PCI_MSIX_CONTROL_MASK ) ? " (masked)" : "" ),
303 ( ( pba & ( 1 << vector ) ) ? " (pending)" : "" ) );
304}
#define NULL
NULL pointer (VOID *)
Definition Base.h:322
struct arbelprm_rc_send_wqe rc
Definition arbel.h:3
unsigned short uint16_t
Definition stdint.h:11
unsigned int uint32_t
Definition stdint.h:12
unsigned long physaddr_t
Definition stdint.h:20
unsigned long long uint64_t
Definition stdint.h:13
Assertions.
#define assert(condition)
Assert a condition at run-time.
Definition assert.h:50
uint16_t offset
Offset to command line.
Definition bzimage.h:3
uint8_t ctrl
Ring control.
Definition dwmac.h:7
uint8_t data[48]
Additional event data.
Definition ena.h:11
uint32_t vector
MSI-X vector.
Definition ena.h:9
uint64_t address
Base address.
Definition ena.h:13
struct ena_llq_option desc
Descriptor counts.
Definition ena.h:9
Error codes.
#define DBGC(...)
Definition compiler.h:505
#define DBG_LOG
Definition compiler.h:317
uint32_t start
Starting offset.
Definition netvsc.h:1
static unsigned int count
Number of entries.
Definition dwmac.h:220
#define FILE_LICENCE(_licence)
Declare a particular licence as applying to a file.
Definition compiler.h:896
#define ENOENT
No such file or directory.
Definition errno.h:515
#define ENOMEM
Not enough space.
Definition errno.h:535
#define FILE_SECBOOT(_status)
Declare a file's UEFI Secure Boot permission status.
Definition compiler.h:926
void iounmap(volatile const void *io_addr)
Unmap I/O address.
int pci_read_config_dword(struct pci_device *pci, unsigned int where, uint32_t *value)
Read 32-bit dword from PCI configuration space.
int pci_read_config_word(struct pci_device *pci, unsigned int where, uint16_t *value)
Read 16-bit word from PCI configuration space.
void * pci_ioremap(struct pci_device *pci, unsigned long bus_addr, size_t len)
Map PCI bus address as an I/O address.
int pci_write_config_word(struct pci_device *pci, unsigned int where, uint16_t value)
Write 16-bit word to PCI configuration space.
void dma_free(struct dma_mapping *map, void *addr, size_t len)
Unmap and free DMA-coherent buffer.
void * dma_alloc(struct dma_device *dma, struct dma_mapping *map, size_t len, size_t align)
Allocate and map DMA-coherent buffer.
physaddr_t dma(struct dma_mapping *map, void *addr)
Get DMA address from virtual address.
uint32_t base
Base.
Definition librm.h:3
void msg(unsigned int row, const char *fmt,...)
Print message centred on specified row.
Definition message.c:62
unsigned long pci_bar_start(struct pci_device *pci, unsigned int reg)
Find the start of a PCI BAR.
Definition pci.c:97
PCI bus.
#define PCI_MSIX_CTRL
MSI-X interrupts.
Definition pci.h:116
#define PCI_FMT
PCI device debug message format.
Definition pci.h:312
#define PCI_MSIX_DESC_TABLE
Definition pci.h:120
#define PCI_MSIX_CTRL_ENABLE
Enable MSI-X.
Definition pci.h:117
#define PCI_CAP_ID_MSIX
MSI-X.
Definition pci.h:99
#define PCI_ARGS(pci)
PCI device debug message arguments.
Definition pci.h:315
#define PCI_BASE_ADDRESS(n)
PCI base address registers.
Definition pci.h:62
#define PCI_MSIX_DESC_PBA
Definition pci.h:121
#define PCI_MSIX_DESC_BIR(x)
BAR index.
Definition pci.h:122
#define PCI_MSIX_CTRL_SIZE(x)
Table size.
Definition pci.h:119
#define PCI_MSIX_CTRL_MASK
Mask all interrupts.
Definition pci.h:118
#define PCI_MSIX_DESC_OFFSET(x)
BAR offset.
Definition pci.h:123
int pci_find_capability(struct pci_device *pci, int cap)
Look for a PCI capability.
Definition pciextra.c:39
int pci_msix_enable(struct pci_device *pci, struct pci_msix *msix)
Enable MSI-X interrupts.
Definition pcimsix.c:137
static void * pci_msix_ioremap(struct pci_device *pci, struct pci_msix *msix, unsigned int cfg)
Map MSI-X BAR portion.
Definition pcimsix.c:94
void pci_msix_dump(struct pci_msix *msix, unsigned int vector)
Dump MSI-X interrupt state (for debugging)
Definition pcimsix.c:279
void pci_msix_disable(struct pci_device *pci, struct pci_msix *msix)
Disable MSI-X interrupts.
Definition pcimsix.c:208
static const char * pci_msix_name(unsigned int cfg)
Get MSI-X descriptor name (for debugging)
Definition pcimsix.c:77
void pci_msix_control(struct pci_msix *msix, unsigned int vector, uint32_t mask)
Control MSI-X interrupt vector.
Definition pcimsix.c:260
void pci_msix_map(struct pci_msix *msix, unsigned int vector, physaddr_t address, uint32_t data)
Map MSI-X interrupt vector.
Definition pcimsix.c:234
PCI MSI-X interrupts.
#define PCI_MSIX_CONTROL
MSI-X vector control.
Definition pcimsix.h:31
#define PCI_MSIX_LEN
MSI-X BAR mapped length.
Definition pcimsix.h:16
#define PCI_MSIX_ADDRESS_LO
MSI-X vector address low 32 bits.
Definition pcimsix.h:22
#define PCI_MSIX_VECTOR(n)
MSI-X vector offset.
Definition pcimsix.h:19
#define PCI_MSIX_CONTROL_MASK
Vector is masked.
Definition pcimsix.h:32
#define PCI_MSIX_DATA
MSI-X vector data.
Definition pcimsix.h:28
#define PCI_MSIX_ADDRESS_HI
MSI-X vector address high 32 bits.
Definition pcimsix.h:25
@ cfg
Definition sis900.h:23
A PCI device.
Definition pci.h:211
struct dma_device dma
DMA device.
Definition pci.h:215
PCI MSI-X capability.
Definition pcimsix.h:35
void * pba
Pending bit array.
Definition pcimsix.h:43
unsigned int cap
Capability offset.
Definition pcimsix.h:37
uint32_t * msg
Dummy message target.
Definition pcimsix.h:45
struct dma_mapping map
Dummy message target mapping.
Definition pcimsix.h:47
unsigned int count
Number of vectors.
Definition pcimsix.h:39
void * table
MSI-X table.
Definition pcimsix.h:41
#define readl
Definition w89c840.c:157
#define writel
Definition w89c840.c:160