iPXE
pcimsix.c File Reference

PCI MSI-X interrupts. More...

#include <stdint.h>
#include <errno.h>
#include <assert.h>
#include <ipxe/pci.h>
#include <ipxe/pcimsix.h>

Go to the source code of this file.

Functions

 FILE_LICENCE (GPL2_OR_LATER_OR_UBDL)
 FILE_SECBOOT (PERMITTED)
static const char * pci_msix_name (unsigned int cfg)
 Get MSI-X descriptor name (for debugging)
static void * pci_msix_ioremap (struct pci_device *pci, struct pci_msix *msix, unsigned int cfg)
 Map MSI-X BAR portion.
int pci_msix_enable (struct pci_device *pci, struct pci_msix *msix)
 Enable MSI-X interrupts.
void pci_msix_disable (struct pci_device *pci, struct pci_msix *msix)
 Disable MSI-X interrupts.
void pci_msix_map (struct pci_msix *msix, unsigned int vector, physaddr_t address, uint32_t data)
 Map MSI-X interrupt vector.
void pci_msix_control (struct pci_msix *msix, unsigned int vector, uint32_t mask)
 Control MSI-X interrupt vector.
void pci_msix_dump (struct pci_msix *msix, unsigned int vector)
 Dump MSI-X interrupt state (for debugging)

Detailed Description

PCI MSI-X interrupts.

Interrupts as such are not used in iPXE, which operates in polling mode. However, some network cards (such as the Intel 40GbE and 100GbE NICs) will defer writing out completions until the point of asserting an MSI-X interrupt.

From the point of view of the PCI device, asserting an MSI-X interrupt is just a 32-bit DMA write of an opaque value to an opaque target address. The PCI device has no know to know whether or not the target address corresponds to a real APIC.

We can therefore trick the PCI device into believing that it is asserting an MSI-X interrupt, by configuring it to write an opaque 32-bit value to a dummy target address in host memory. This is sufficient to trigger the associated write of the completions to host memory.

When running in a virtual machine, the hypervisor will intercept our attempt to configure MSI-X on the PCI device. The physical hardware will be configured to raise an interrupt under the hypervisor's control, which will then be reflected back into the virtual machine. The opaque value that we write will be assumed to indicate an interrupt vector number (as would normally be the case when configuring MSI-X), and the opaque address will generally be ignored. The reflected interrupt will be ignored (since it is not enabled within the virtual machine), but the device still asserts an MSI-X interrupt and so still triggers the associated write of the completions to host memory.

Note that since the opaque target address will generally be ignored by the hypervisor, we cannot examine the value present at the dummy target address to find out whether or not an interrupt has been raised.

Definition in file pcimsix.c.

Function Documentation

◆ FILE_LICENCE()

FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL )

◆ FILE_SECBOOT()

FILE_SECBOOT ( PERMITTED )

◆ pci_msix_name()

const char * pci_msix_name ( unsigned int cfg)
static

Get MSI-X descriptor name (for debugging)

Parameters
cfgConfiguration space offset
Return values
nameDescriptor name

Definition at line 77 of file pcimsix.c.

77 {
78
79 switch ( cfg ) {
80 case PCI_MSIX_DESC_TABLE: return "table";
81 case PCI_MSIX_DESC_PBA: return "PBA";
82 default: return "<UNKNOWN>";
83 }
84}
#define PCI_MSIX_DESC_TABLE
Definition pci.h:120
#define PCI_MSIX_DESC_PBA
Definition pci.h:121
@ cfg
Definition sis900.h:23

References cfg, PCI_MSIX_DESC_PBA, and PCI_MSIX_DESC_TABLE.

Referenced by pci_msix_ioremap().

◆ pci_msix_ioremap()

void * pci_msix_ioremap ( struct pci_device * pci,
struct pci_msix * msix,
unsigned int cfg )
static

Map MSI-X BAR portion.

Parameters
pciPCI device
msixMSI-X capability
cfgConfiguration space offset
Return values
ioI/O address

Definition at line 94 of file pcimsix.c.

95 {
97 unsigned int bar;
98 unsigned long start;
99 unsigned long offset;
100 unsigned long base;
101 void *io;
102
103 /* Read descriptor */
104 pci_read_config_dword ( pci, ( msix->cap + cfg ), &desc );
105
106 /* Get BAR */
107 bar = PCI_MSIX_DESC_BIR ( desc );
109 start = pci_bar_start ( pci, PCI_BASE_ADDRESS ( bar ) );
110 if ( ! start ) {
111 DBGC ( msix, "MSI-X %p %s could not find BAR%d\n",
112 msix, pci_msix_name ( cfg ), bar );
113 return NULL;
114 }
115 base = ( start + offset );
116 DBGC ( msix, "MSI-X %p %s at %#08lx (BAR%d+%#lx)\n",
117 msix, pci_msix_name ( cfg ), base, bar, offset );
118
119 /* Map BAR portion */
120 io = pci_ioremap ( pci, ( start + offset ), PCI_MSIX_LEN );
121 if ( ! io ) {
122 DBGC ( msix, "MSI-X %p %s could not map %#08lx\n",
123 msix, pci_msix_name ( cfg ), base );
124 return NULL;
125 }
126
127 return io;
128}
#define NULL
NULL pointer (VOID *)
Definition Base.h:322
unsigned int uint32_t
Definition stdint.h:12
uint16_t offset
Offset to command line.
Definition bzimage.h:3
struct ena_llq_option desc
Descriptor counts.
Definition ena.h:9
#define DBGC(...)
Definition compiler.h:505
uint32_t start
Starting offset.
Definition netvsc.h:1
int pci_read_config_dword(struct pci_device *pci, unsigned int where, uint32_t *value)
Read 32-bit dword from PCI configuration space.
void * pci_ioremap(struct pci_device *pci, unsigned long bus_addr, size_t len)
Map PCI bus address as an I/O address.
uint32_t base
Base.
Definition librm.h:3
unsigned long pci_bar_start(struct pci_device *pci, unsigned int reg)
Find the start of a PCI BAR.
Definition pci.c:97
#define PCI_BASE_ADDRESS(n)
PCI base address registers.
Definition pci.h:62
#define PCI_MSIX_DESC_BIR(x)
BAR index.
Definition pci.h:122
#define PCI_MSIX_DESC_OFFSET(x)
BAR offset.
Definition pci.h:123
static const char * pci_msix_name(unsigned int cfg)
Get MSI-X descriptor name (for debugging)
Definition pcimsix.c:77
#define PCI_MSIX_LEN
MSI-X BAR mapped length.
Definition pcimsix.h:16
unsigned int cap
Capability offset.
Definition pcimsix.h:37

References base, pci_msix::cap, cfg, DBGC, desc, NULL, offset, pci_bar_start(), PCI_BASE_ADDRESS, pci_ioremap(), PCI_MSIX_DESC_BIR, PCI_MSIX_DESC_OFFSET, PCI_MSIX_LEN, pci_msix_name(), pci_read_config_dword(), and start.

Referenced by pci_msix_enable().

◆ pci_msix_enable()

int pci_msix_enable ( struct pci_device * pci,
struct pci_msix * msix )

Enable MSI-X interrupts.

Parameters
pciPCI device
msixMSI-X capability
Return values
rcReturn status code

Definition at line 137 of file pcimsix.c.

137 {
140 unsigned int i;
141 int rc;
142
143 /* Locate capability */
144 msix->cap = pci_find_capability ( pci, PCI_CAP_ID_MSIX );
145 if ( ! msix->cap ) {
146 DBGC ( msix, "MSI-X %p found no MSI-X capability in "
147 PCI_FMT "\n", msix, PCI_ARGS ( pci ) );
148 rc = -ENOENT;
149 goto err_cap;
150 }
151
152 /* Extract interrupt count */
153 pci_read_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), &ctrl );
154 msix->count = ( PCI_MSIX_CTRL_SIZE ( ctrl ) + 1 );
155 DBGC ( msix, "MSI-X %p has %d vectors for " PCI_FMT "\n",
156 msix, msix->count, PCI_ARGS ( pci ) );
157
158 /* Map MSI-X table */
159 msix->table = pci_msix_ioremap ( pci, msix, PCI_MSIX_DESC_TABLE );
160 if ( ! msix->table ) {
161 rc = -ENOENT;
162 goto err_table;
163 }
164
165 /* Map pending bit array */
166 msix->pba = pci_msix_ioremap ( pci, msix, PCI_MSIX_DESC_PBA );
167 if ( ! msix->pba ) {
168 rc = -ENOENT;
169 goto err_pba;
170 }
171
172 /* Allocate dummy target */
173 msix->msg = dma_alloc ( &pci->dma, &msix->map, sizeof ( *msix->msg ),
174 sizeof ( *msix->msg ) );
175 if ( ! msix->msg ) {
176 rc = -ENOMEM;
177 goto err_msg;
178 }
179
180 /* Map all interrupts to dummy target by default */
181 msg = dma ( &msix->map, msix->msg );
182 for ( i = 0 ; i < msix->count ; i++ )
183 pci_msix_map ( msix, i, msg, 0 );
184
185 /* Enable MSI-X */
188 pci_write_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), ctrl );
189
190 return 0;
191
192 dma_free ( &msix->map, msix->msg, sizeof ( *msix->msg ) );
193 err_msg:
194 iounmap ( msix->pba );
195 err_pba:
196 iounmap ( msix->table );
197 err_table:
198 err_cap:
199 return rc;
200}
struct arbelprm_rc_send_wqe rc
Definition arbel.h:3
unsigned short uint16_t
Definition stdint.h:11
unsigned long physaddr_t
Definition stdint.h:20
uint8_t ctrl
Ring control.
Definition dwmac.h:7
#define ENOENT
No such file or directory.
Definition errno.h:515
#define ENOMEM
Not enough space.
Definition errno.h:535
void iounmap(volatile const void *io_addr)
Unmap I/O address.
int pci_read_config_word(struct pci_device *pci, unsigned int where, uint16_t *value)
Read 16-bit word from PCI configuration space.
int pci_write_config_word(struct pci_device *pci, unsigned int where, uint16_t value)
Write 16-bit word to PCI configuration space.
void dma_free(struct dma_mapping *map, void *addr, size_t len)
Unmap and free DMA-coherent buffer.
void * dma_alloc(struct dma_device *dma, struct dma_mapping *map, size_t len, size_t align)
Allocate and map DMA-coherent buffer.
physaddr_t dma(struct dma_mapping *map, void *addr)
Get DMA address from virtual address.
void msg(unsigned int row, const char *fmt,...)
Print message centred on specified row.
Definition message.c:62
#define PCI_MSIX_CTRL
MSI-X interrupts.
Definition pci.h:116
#define PCI_FMT
PCI device debug message format.
Definition pci.h:312
#define PCI_MSIX_CTRL_ENABLE
Enable MSI-X.
Definition pci.h:117
#define PCI_CAP_ID_MSIX
MSI-X.
Definition pci.h:99
#define PCI_ARGS(pci)
PCI device debug message arguments.
Definition pci.h:315
#define PCI_MSIX_CTRL_SIZE(x)
Table size.
Definition pci.h:119
#define PCI_MSIX_CTRL_MASK
Mask all interrupts.
Definition pci.h:118
int pci_find_capability(struct pci_device *pci, int cap)
Look for a PCI capability.
Definition pciextra.c:39
static void * pci_msix_ioremap(struct pci_device *pci, struct pci_msix *msix, unsigned int cfg)
Map MSI-X BAR portion.
Definition pcimsix.c:94
void pci_msix_map(struct pci_msix *msix, unsigned int vector, physaddr_t address, uint32_t data)
Map MSI-X interrupt vector.
Definition pcimsix.c:234
struct dma_device dma
DMA device.
Definition pci.h:215
void * pba
Pending bit array.
Definition pcimsix.h:43
uint32_t * msg
Dummy message target.
Definition pcimsix.h:45
struct dma_mapping map
Dummy message target mapping.
Definition pcimsix.h:47
unsigned int count
Number of vectors.
Definition pcimsix.h:39
void * table
MSI-X table.
Definition pcimsix.h:41

References pci_msix::cap, pci_msix::count, ctrl, DBGC, dma(), pci_device::dma, dma_alloc(), dma_free(), ENOENT, ENOMEM, iounmap(), pci_msix::map, msg(), pci_msix::msg, pci_msix::pba, PCI_ARGS, PCI_CAP_ID_MSIX, pci_find_capability(), PCI_FMT, PCI_MSIX_CTRL, PCI_MSIX_CTRL_ENABLE, PCI_MSIX_CTRL_MASK, PCI_MSIX_CTRL_SIZE, PCI_MSIX_DESC_PBA, PCI_MSIX_DESC_TABLE, pci_msix_ioremap(), pci_msix_map(), pci_read_config_word(), pci_write_config_word(), rc, and pci_msix::table.

Referenced by gve_probe(), and intelxl_msix_enable().

◆ pci_msix_disable()

void pci_msix_disable ( struct pci_device * pci,
struct pci_msix * msix )

Disable MSI-X interrupts.

Parameters
pciPCI device
msixMSI-X capability

Definition at line 208 of file pcimsix.c.

208 {
210
211 /* Disable MSI-X */
212 pci_read_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), &ctrl );
214 pci_write_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), ctrl );
215
216 /* Free dummy target */
217 dma_free ( &msix->map, msix->msg, sizeof ( *msix->msg ) );
218
219 /* Unmap pending bit array */
220 iounmap ( msix->pba );
221
222 /* Unmap MSI-X table */
223 iounmap ( msix->table );
224}

References pci_msix::cap, ctrl, dma_free(), iounmap(), pci_msix::map, pci_msix::msg, pci_msix::pba, PCI_MSIX_CTRL, PCI_MSIX_CTRL_ENABLE, pci_read_config_word(), pci_write_config_word(), and pci_msix::table.

Referenced by gve_probe(), gve_remove(), intelxl_msix_disable(), and intelxl_msix_enable().

◆ pci_msix_map()

void pci_msix_map ( struct pci_msix * msix,
unsigned int vector,
physaddr_t address,
uint32_t data )

Map MSI-X interrupt vector.

Parameters
msixMSI-X capability
vectorMSI-X vector
addressMessage address
dataMessage data

Definition at line 234 of file pcimsix.c.

235 {
236 void *base;
237
238 /* Sanity check */
240
241 /* Map interrupt vector */
242 base = ( msix->table + PCI_MSIX_VECTOR ( vector ) );
243 writel ( ( address & 0xffffffffUL ), ( base + PCI_MSIX_ADDRESS_LO ) );
244 if ( sizeof ( address ) > sizeof ( uint32_t ) ) {
245 writel ( ( ( ( uint64_t ) address ) >> 32 ),
247 } else {
248 writel ( 0, ( base + PCI_MSIX_ADDRESS_HI ) );
249 }
250 writel ( data, ( base + PCI_MSIX_DATA ) );
251}
unsigned long long uint64_t
Definition stdint.h:13
#define assert(condition)
Assert a condition at run-time.
Definition assert.h:50
uint8_t data[48]
Additional event data.
Definition ena.h:11
uint32_t vector
MSI-X vector.
Definition ena.h:9
uint64_t address
Base address.
Definition ena.h:13
static unsigned int count
Number of entries.
Definition dwmac.h:220
#define PCI_MSIX_ADDRESS_LO
MSI-X vector address low 32 bits.
Definition pcimsix.h:22
#define PCI_MSIX_VECTOR(n)
MSI-X vector offset.
Definition pcimsix.h:19
#define PCI_MSIX_DATA
MSI-X vector data.
Definition pcimsix.h:28
#define PCI_MSIX_ADDRESS_HI
MSI-X vector address high 32 bits.
Definition pcimsix.h:25
#define writel
Definition w89c840.c:160

References address, assert, base, count, data, PCI_MSIX_ADDRESS_HI, PCI_MSIX_ADDRESS_LO, PCI_MSIX_DATA, PCI_MSIX_VECTOR, pci_msix::table, vector, and writel.

Referenced by pci_msix_enable().

◆ pci_msix_control()

void pci_msix_control ( struct pci_msix * msix,
unsigned int vector,
uint32_t mask )

Control MSI-X interrupt vector.

Parameters
msixMSI-X capability
vectorMSI-X vector
maskControl mask

Definition at line 260 of file pcimsix.c.

261 {
262 void *base;
264
265 /* Mask/unmask interrupt vector */
266 base = ( msix->table + PCI_MSIX_VECTOR ( vector ) );
269 ctrl |= mask;
270 writel ( ctrl, ( base + PCI_MSIX_CONTROL ) );
271}
#define PCI_MSIX_CONTROL
MSI-X vector control.
Definition pcimsix.h:31
#define PCI_MSIX_CONTROL_MASK
Vector is masked.
Definition pcimsix.h:32
#define readl
Definition w89c840.c:157

References base, ctrl, PCI_MSIX_CONTROL, PCI_MSIX_CONTROL_MASK, PCI_MSIX_VECTOR, readl, pci_msix::table, vector, and writel.

Referenced by pci_msix_mask(), and pci_msix_unmask().

◆ pci_msix_dump()

void pci_msix_dump ( struct pci_msix * msix,
unsigned int vector )

Dump MSI-X interrupt state (for debugging)

Parameters
msixMSI-X capability
vectorMSI-X vector

Definition at line 279 of file pcimsix.c.

279 {
280 void *base;
281 uint32_t address_hi;
282 uint32_t address_lo;
286 uint32_t pba;
287
288 /* Do nothing in non-debug builds */
289 if ( ! DBG_LOG )
290 return;
291
292 /* Mask/unmask interrupt vector */
293 base = ( msix->table + PCI_MSIX_VECTOR ( vector ) );
294 address_hi = readl ( base + PCI_MSIX_ADDRESS_HI );
295 address_lo = readl ( base + PCI_MSIX_ADDRESS_LO );
298 pba = readl ( msix->pba );
299 address = ( ( ( ( uint64_t ) address_hi ) << 32 ) | address_lo );
300 DBGC ( msix, "MSI-X %p vector %d %#08x => %#08lx%s%s\n",
301 msix, vector, data, address,
302 ( ( ctrl & PCI_MSIX_CONTROL_MASK ) ? " (masked)" : "" ),
303 ( ( pba & ( 1 << vector ) ) ? " (pending)" : "" ) );
304}
#define DBG_LOG
Definition compiler.h:317

References address, base, ctrl, data, DBG_LOG, DBGC, pci_msix::pba, PCI_MSIX_ADDRESS_HI, PCI_MSIX_ADDRESS_LO, PCI_MSIX_CONTROL, PCI_MSIX_CONTROL_MASK, PCI_MSIX_DATA, PCI_MSIX_VECTOR, readl, pci_msix::table, and vector.