iPXE
Functions
pcimsix.c File Reference

PCI MSI-X interrupts. More...

#include <stdint.h>
#include <errno.h>
#include <assert.h>
#include <ipxe/pci.h>
#include <ipxe/pcimsix.h>

Go to the source code of this file.

Functions

 FILE_LICENCE (GPL2_OR_LATER_OR_UBDL)
 
static const char * pci_msix_name (unsigned int cfg)
 Get MSI-X descriptor name (for debugging) More...
 
static void * pci_msix_ioremap (struct pci_device *pci, struct pci_msix *msix, unsigned int cfg)
 Map MSI-X BAR portion. More...
 
int pci_msix_enable (struct pci_device *pci, struct pci_msix *msix)
 Enable MSI-X interrupts. More...
 
void pci_msix_disable (struct pci_device *pci, struct pci_msix *msix)
 Disable MSI-X interrupts. More...
 
void pci_msix_map (struct pci_msix *msix, unsigned int vector, physaddr_t address, uint32_t data)
 Map MSI-X interrupt vector. More...
 
void pci_msix_control (struct pci_msix *msix, unsigned int vector, uint32_t mask)
 Control MSI-X interrupt vector. More...
 
void pci_msix_dump (struct pci_msix *msix, unsigned int vector)
 Dump MSI-X interrupt state (for debugging) More...
 

Detailed Description

PCI MSI-X interrupts.

Interrupts as such are not used in iPXE, which operates in polling mode. However, some network cards (such as the Intel 40GbE and 100GbE NICs) will defer writing out completions until the point of asserting an MSI-X interrupt.

From the point of view of the PCI device, asserting an MSI-X interrupt is just a 32-bit DMA write of an opaque value to an opaque target address. The PCI device has no know to know whether or not the target address corresponds to a real APIC.

We can therefore trick the PCI device into believing that it is asserting an MSI-X interrupt, by configuring it to write an opaque 32-bit value to a dummy target address in host memory. This is sufficient to trigger the associated write of the completions to host memory.

When running in a virtual machine, the hypervisor will intercept our attempt to configure MSI-X on the PCI device. The physical hardware will be configured to raise an interrupt under the hypervisor's control, which will then be reflected back into the virtual machine. The opaque value that we write will be assumed to indicate an interrupt vector number (as would normally be the case when configuring MSI-X), and the opaque address will generally be ignored. The reflected interrupt will be ignored (since it is not enabled within the virtual machine), but the device still asserts an MSI-X interrupt and so still triggers the associated write of the completions to host memory.

Note that since the opaque target address will generally be ignored by the hypervisor, we cannot examine the value present at the dummy target address to find out whether or not an interrupt has been raised.

Definition in file pcimsix.c.

Function Documentation

◆ FILE_LICENCE()

FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL  )

◆ pci_msix_name()

static const char* pci_msix_name ( unsigned int  cfg)
static

Get MSI-X descriptor name (for debugging)

Parameters
cfgConfiguration space offset
Return values
nameDescriptor name

Definition at line 76 of file pcimsix.c.

76  {
77 
78  switch ( cfg ) {
79  case PCI_MSIX_DESC_TABLE: return "table";
80  case PCI_MSIX_DESC_PBA: return "PBA";
81  default: return "<UNKNOWN>";
82  }
83 }
Definition: sis900.h:23
#define PCI_MSIX_DESC_PBA
Definition: pci.h:120
#define PCI_MSIX_DESC_TABLE
Definition: pci.h:119

References cfg, PCI_MSIX_DESC_PBA, and PCI_MSIX_DESC_TABLE.

Referenced by pci_msix_ioremap().

◆ pci_msix_ioremap()

static void* pci_msix_ioremap ( struct pci_device pci,
struct pci_msix msix,
unsigned int  cfg 
)
static

Map MSI-X BAR portion.

Parameters
pciPCI device
msixMSI-X capability
cfgConfiguration space offset
Return values
ioI/O address

Definition at line 93 of file pcimsix.c.

94  {
95  uint32_t desc;
96  unsigned int bar;
97  unsigned long start;
98  unsigned long offset;
99  unsigned long base;
100  void *io;
101 
102  /* Read descriptor */
103  pci_read_config_dword ( pci, ( msix->cap + cfg ), &desc );
104 
105  /* Get BAR */
106  bar = PCI_MSIX_DESC_BIR ( desc );
108  start = pci_bar_start ( pci, PCI_BASE_ADDRESS ( bar ) );
109  if ( ! start ) {
110  DBGC ( msix, "MSI-X %p %s could not find BAR%d\n",
111  msix, pci_msix_name ( cfg ), bar );
112  return NULL;
113  }
114  base = ( start + offset );
115  DBGC ( msix, "MSI-X %p %s at %#08lx (BAR%d+%#lx)\n",
116  msix, pci_msix_name ( cfg ), base, bar, offset );
117 
118  /* Map BAR portion */
119  io = pci_ioremap ( pci, ( start + offset ), PCI_MSIX_LEN );
120  if ( ! io ) {
121  DBGC ( msix, "MSI-X %p %s could not map %#08lx\n",
122  msix, pci_msix_name ( cfg ), base );
123  return NULL;
124  }
125 
126  return io;
127 }
uint32_t base
Base.
Definition: librm.h:138
#define DBGC(...)
Definition: compiler.h:505
#define PCI_MSIX_DESC_BIR(x)
BAR index.
Definition: pci.h:121
#define PCI_BASE_ADDRESS(n)
PCI base address registers.
Definition: pci.h:61
uint32_t start
Starting offset.
Definition: netvsc.h:12
struct ena_llq_option desc
Descriptor counts.
Definition: ena.h:20
int pci_read_config_dword(struct pci_device *pci, unsigned int where, uint32_t *value)
Read 32-bit dword from PCI configuration space.
unsigned long pci_bar_start(struct pci_device *pci, unsigned int reg)
Find the start of a PCI BAR.
Definition: pci.c:96
unsigned int cap
Capability offset.
Definition: pcimsix.h:36
unsigned int uint32_t
Definition: stdint.h:12
#define PCI_MSIX_LEN
MSI-X BAR mapped length.
Definition: pcimsix.h:15
Definition: sis900.h:23
#define PCI_MSIX_DESC_OFFSET(x)
BAR offset.
Definition: pci.h:122
uint16_t offset
Offset to command line.
Definition: bzimage.h:8
void * pci_ioremap(struct pci_device *pci, unsigned long bus_addr, size_t len)
Map PCI bus address as an I/O address.
static const char * pci_msix_name(unsigned int cfg)
Get MSI-X descriptor name (for debugging)
Definition: pcimsix.c:76
#define NULL
NULL pointer (VOID *)
Definition: Base.h:321

References base, pci_msix::cap, cfg, DBGC, desc, NULL, offset, pci_bar_start(), PCI_BASE_ADDRESS, pci_ioremap(), PCI_MSIX_DESC_BIR, PCI_MSIX_DESC_OFFSET, PCI_MSIX_LEN, pci_msix_name(), pci_read_config_dword(), and start.

Referenced by pci_msix_enable().

◆ pci_msix_enable()

int pci_msix_enable ( struct pci_device pci,
struct pci_msix msix 
)

Enable MSI-X interrupts.

Parameters
pciPCI device
msixMSI-X capability
Return values
rcReturn status code

Definition at line 136 of file pcimsix.c.

136  {
137  uint16_t ctrl;
138  physaddr_t msg;
139  unsigned int i;
140  int rc;
141 
142  /* Locate capability */
143  msix->cap = pci_find_capability ( pci, PCI_CAP_ID_MSIX );
144  if ( ! msix->cap ) {
145  DBGC ( msix, "MSI-X %p found no MSI-X capability in "
146  PCI_FMT "\n", msix, PCI_ARGS ( pci ) );
147  rc = -ENOENT;
148  goto err_cap;
149  }
150 
151  /* Extract interrupt count */
152  pci_read_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), &ctrl );
153  msix->count = ( PCI_MSIX_CTRL_SIZE ( ctrl ) + 1 );
154  DBGC ( msix, "MSI-X %p has %d vectors for " PCI_FMT "\n",
155  msix, msix->count, PCI_ARGS ( pci ) );
156 
157  /* Map MSI-X table */
158  msix->table = pci_msix_ioremap ( pci, msix, PCI_MSIX_DESC_TABLE );
159  if ( ! msix->table ) {
160  rc = -ENOENT;
161  goto err_table;
162  }
163 
164  /* Map pending bit array */
165  msix->pba = pci_msix_ioremap ( pci, msix, PCI_MSIX_DESC_PBA );
166  if ( ! msix->pba ) {
167  rc = -ENOENT;
168  goto err_pba;
169  }
170 
171  /* Allocate dummy target */
172  msix->msg = dma_alloc ( &pci->dma, &msix->map, sizeof ( *msix->msg ),
173  sizeof ( *msix->msg ) );
174  if ( ! msix->msg ) {
175  rc = -ENOMEM;
176  goto err_msg;
177  }
178 
179  /* Map all interrupts to dummy target by default */
180  msg = dma ( &msix->map, msix->msg );
181  for ( i = 0 ; i < msix->count ; i++ )
182  pci_msix_map ( msix, i, msg, 0 );
183 
184  /* Enable MSI-X */
187  pci_write_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), ctrl );
188 
189  return 0;
190 
191  dma_free ( &msix->map, msix->msg, sizeof ( *msix->msg ) );
192  err_msg:
193  iounmap ( msix->pba );
194  err_pba:
195  iounmap ( msix->table );
196  err_table:
197  err_cap:
198  return rc;
199 }
struct arbelprm_rc_send_wqe rc
Definition: arbel.h:14
unsigned short uint16_t
Definition: stdint.h:11
struct dma_device dma
DMA device.
Definition: pci.h:214
void msg(unsigned int row, const char *fmt,...)
Print message centred on specified row.
Definition: message.c:61
int pci_find_capability(struct pci_device *pci, int cap)
Look for a PCI capability.
Definition: pciextra.c:38
int pci_write_config_word(struct pci_device *pci, unsigned int where, uint16_t value)
Write 16-bit word to PCI configuration space.
#define DBGC(...)
Definition: compiler.h:505
#define ENOENT
No such file or directory.
Definition: errno.h:514
int pci_read_config_word(struct pci_device *pci, unsigned int where, uint16_t *value)
Read 16-bit word from PCI configuration space.
void dma_free(struct dma_mapping *map, void *addr, size_t len)
Unmap and free DMA-coherent buffer.
#define ENOMEM
Not enough space.
Definition: errno.h:534
#define PCI_MSIX_CTRL
MSI-X interrupts.
Definition: pci.h:115
unsigned int cap
Capability offset.
Definition: pcimsix.h:36
uint32_t * msg
Dummy message target.
Definition: pcimsix.h:44
#define PCI_MSIX_CTRL_SIZE(x)
Table size.
Definition: pci.h:118
#define PCI_MSIX_CTRL_MASK
Mask all interrupts.
Definition: pci.h:117
#define PCI_FMT
PCI device debug message format.
Definition: pci.h:311
void * table
MSI-X table.
Definition: pcimsix.h:40
void * dma_alloc(struct dma_device *dma, struct dma_mapping *map, size_t len, size_t align)
Allocate and map DMA-coherent buffer.
unsigned long physaddr_t
Definition: stdint.h:20
static void * pci_msix_ioremap(struct pci_device *pci, struct pci_msix *msix, unsigned int cfg)
Map MSI-X BAR portion.
Definition: pcimsix.c:93
#define PCI_CAP_ID_MSIX
MSI-X.
Definition: pci.h:98
#define PCI_ARGS(pci)
PCI device debug message arguments.
Definition: pci.h:314
uint8_t ctrl
Ring control.
Definition: dwmac.h:18
void iounmap(volatile const void *io_addr)
Unmap I/O address.
#define PCI_MSIX_CTRL_ENABLE
Enable MSI-X.
Definition: pci.h:116
void pci_msix_map(struct pci_msix *msix, unsigned int vector, physaddr_t address, uint32_t data)
Map MSI-X interrupt vector.
Definition: pcimsix.c:233
#define PCI_MSIX_DESC_PBA
Definition: pci.h:120
struct dma_mapping map
Dummy message target mapping.
Definition: pcimsix.h:46
void * pba
Pending bit array.
Definition: pcimsix.h:42
physaddr_t dma(struct dma_mapping *map, void *addr)
Get DMA address from virtual address.
unsigned int count
Number of vectors.
Definition: pcimsix.h:38
#define PCI_MSIX_DESC_TABLE
Definition: pci.h:119

References pci_msix::cap, pci_msix::count, ctrl, DBGC, pci_device::dma, dma(), dma_alloc(), dma_free(), ENOENT, ENOMEM, iounmap(), pci_msix::map, pci_msix::msg, msg(), pci_msix::pba, PCI_ARGS, PCI_CAP_ID_MSIX, pci_find_capability(), PCI_FMT, PCI_MSIX_CTRL, PCI_MSIX_CTRL_ENABLE, PCI_MSIX_CTRL_MASK, PCI_MSIX_CTRL_SIZE, PCI_MSIX_DESC_PBA, PCI_MSIX_DESC_TABLE, pci_msix_ioremap(), pci_msix_map(), pci_read_config_word(), pci_write_config_word(), rc, and pci_msix::table.

Referenced by gve_probe(), and intelxl_msix_enable().

◆ pci_msix_disable()

void pci_msix_disable ( struct pci_device pci,
struct pci_msix msix 
)

Disable MSI-X interrupts.

Parameters
pciPCI device
msixMSI-X capability

Definition at line 207 of file pcimsix.c.

207  {
208  uint16_t ctrl;
209 
210  /* Disable MSI-X */
211  pci_read_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), &ctrl );
213  pci_write_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), ctrl );
214 
215  /* Free dummy target */
216  dma_free ( &msix->map, msix->msg, sizeof ( *msix->msg ) );
217 
218  /* Unmap pending bit array */
219  iounmap ( msix->pba );
220 
221  /* Unmap MSI-X table */
222  iounmap ( msix->table );
223 }
unsigned short uint16_t
Definition: stdint.h:11
int pci_write_config_word(struct pci_device *pci, unsigned int where, uint16_t value)
Write 16-bit word to PCI configuration space.
int pci_read_config_word(struct pci_device *pci, unsigned int where, uint16_t *value)
Read 16-bit word from PCI configuration space.
void dma_free(struct dma_mapping *map, void *addr, size_t len)
Unmap and free DMA-coherent buffer.
#define PCI_MSIX_CTRL
MSI-X interrupts.
Definition: pci.h:115
unsigned int cap
Capability offset.
Definition: pcimsix.h:36
uint32_t * msg
Dummy message target.
Definition: pcimsix.h:44
void * table
MSI-X table.
Definition: pcimsix.h:40
uint8_t ctrl
Ring control.
Definition: dwmac.h:18
void iounmap(volatile const void *io_addr)
Unmap I/O address.
#define PCI_MSIX_CTRL_ENABLE
Enable MSI-X.
Definition: pci.h:116
struct dma_mapping map
Dummy message target mapping.
Definition: pcimsix.h:46
void * pba
Pending bit array.
Definition: pcimsix.h:42

References pci_msix::cap, ctrl, dma_free(), iounmap(), pci_msix::map, pci_msix::msg, pci_msix::pba, PCI_MSIX_CTRL, PCI_MSIX_CTRL_ENABLE, pci_read_config_word(), pci_write_config_word(), and pci_msix::table.

Referenced by gve_probe(), gve_remove(), intelxl_msix_disable(), and intelxl_msix_enable().

◆ pci_msix_map()

void pci_msix_map ( struct pci_msix msix,
unsigned int  vector,
physaddr_t  address,
uint32_t  data 
)

Map MSI-X interrupt vector.

Parameters
msixMSI-X capability
vectorMSI-X vector
addressMessage address
dataMessage data

Definition at line 233 of file pcimsix.c.

234  {
235  void *base;
236 
237  /* Sanity check */
238  assert ( vector < msix->count );
239 
240  /* Map interrupt vector */
241  base = ( msix->table + PCI_MSIX_VECTOR ( vector ) );
242  writel ( ( address & 0xffffffffUL ), ( base + PCI_MSIX_ADDRESS_LO ) );
243  if ( sizeof ( address ) > sizeof ( uint32_t ) ) {
244  writel ( ( ( ( uint64_t ) address ) >> 32 ),
245  ( base + PCI_MSIX_ADDRESS_HI ) );
246  } else {
247  writel ( 0, ( base + PCI_MSIX_ADDRESS_HI ) );
248  }
249  writel ( data, ( base + PCI_MSIX_DATA ) );
250 }
uint32_t base
Base.
Definition: librm.h:138
uint32_t vector
MSI-X vector.
Definition: ena.h:20
uint64_t address
Base address.
Definition: ena.h:24
unsigned long long uint64_t
Definition: stdint.h:13
assert((readw(&hdr->flags) &(GTF_reading|GTF_writing))==0)
void writel(uint32_t data, volatile uint32_t *io_addr)
Write 32-bit dword to memory-mapped device.
static unsigned int count
Number of entries.
Definition: dwmac.h:225
#define PCI_MSIX_VECTOR(n)
MSI-X vector offset.
Definition: pcimsix.h:18
void * table
MSI-X table.
Definition: pcimsix.h:40
unsigned int uint32_t
Definition: stdint.h:12
#define PCI_MSIX_ADDRESS_LO
MSI-X vector address low 32 bits.
Definition: pcimsix.h:21
#define PCI_MSIX_DATA
MSI-X vector data.
Definition: pcimsix.h:27
uint8_t data[48]
Additional event data.
Definition: ena.h:22
#define PCI_MSIX_ADDRESS_HI
MSI-X vector address high 32 bits.
Definition: pcimsix.h:24

References address, assert(), base, count, data, PCI_MSIX_ADDRESS_HI, PCI_MSIX_ADDRESS_LO, PCI_MSIX_DATA, PCI_MSIX_VECTOR, pci_msix::table, vector, and writel().

Referenced by pci_msix_enable().

◆ pci_msix_control()

void pci_msix_control ( struct pci_msix msix,
unsigned int  vector,
uint32_t  mask 
)

Control MSI-X interrupt vector.

Parameters
msixMSI-X capability
vectorMSI-X vector
maskControl mask

Definition at line 259 of file pcimsix.c.

260  {
261  void *base;
262  uint32_t ctrl;
263 
264  /* Mask/unmask interrupt vector */
265  base = ( msix->table + PCI_MSIX_VECTOR ( vector ) );
268  ctrl |= mask;
269  writel ( ctrl, ( base + PCI_MSIX_CONTROL ) );
270 }
uint32_t base
Base.
Definition: librm.h:138
uint32_t vector
MSI-X vector.
Definition: ena.h:20
uint32_t readl(volatile uint32_t *io_addr)
Read 32-bit dword from memory-mapped device.
#define PCI_MSIX_CONTROL
MSI-X vector control.
Definition: pcimsix.h:30
void writel(uint32_t data, volatile uint32_t *io_addr)
Write 32-bit dword to memory-mapped device.
#define PCI_MSIX_VECTOR(n)
MSI-X vector offset.
Definition: pcimsix.h:18
void * table
MSI-X table.
Definition: pcimsix.h:40
unsigned int uint32_t
Definition: stdint.h:12
#define PCI_MSIX_CONTROL_MASK
Vector is masked.
Definition: pcimsix.h:31
uint8_t ctrl
Ring control.
Definition: dwmac.h:18

References base, ctrl, PCI_MSIX_CONTROL, PCI_MSIX_CONTROL_MASK, PCI_MSIX_VECTOR, readl(), pci_msix::table, vector, and writel().

Referenced by pci_msix_mask(), and pci_msix_unmask().

◆ pci_msix_dump()

void pci_msix_dump ( struct pci_msix msix,
unsigned int  vector 
)

Dump MSI-X interrupt state (for debugging)

Parameters
msixMSI-X capability
vectorMSI-X vector

Definition at line 278 of file pcimsix.c.

278  {
279  void *base;
280  uint32_t address_hi;
281  uint32_t address_lo;
283  uint32_t data;
284  uint32_t ctrl;
285  uint32_t pba;
286 
287  /* Do nothing in non-debug builds */
288  if ( ! DBG_LOG )
289  return;
290 
291  /* Mask/unmask interrupt vector */
292  base = ( msix->table + PCI_MSIX_VECTOR ( vector ) );
293  address_hi = readl ( base + PCI_MSIX_ADDRESS_HI );
294  address_lo = readl ( base + PCI_MSIX_ADDRESS_LO );
295  data = readl ( base + PCI_MSIX_DATA );
297  pba = readl ( msix->pba );
298  address = ( ( ( ( uint64_t ) address_hi ) << 32 ) | address_lo );
299  DBGC ( msix, "MSI-X %p vector %d %#08x => %#08lx%s%s\n",
300  msix, vector, data, address,
301  ( ( ctrl & PCI_MSIX_CONTROL_MASK ) ? " (masked)" : "" ),
302  ( ( pba & ( 1 << vector ) ) ? " (pending)" : "" ) );
303 }
uint32_t base
Base.
Definition: librm.h:138
uint32_t vector
MSI-X vector.
Definition: ena.h:20
uint64_t address
Base address.
Definition: ena.h:24
uint32_t readl(volatile uint32_t *io_addr)
Read 32-bit dword from memory-mapped device.
#define DBGC(...)
Definition: compiler.h:505
unsigned long long uint64_t
Definition: stdint.h:13
#define PCI_MSIX_CONTROL
MSI-X vector control.
Definition: pcimsix.h:30
#define PCI_MSIX_VECTOR(n)
MSI-X vector offset.
Definition: pcimsix.h:18
void * table
MSI-X table.
Definition: pcimsix.h:40
unsigned int uint32_t
Definition: stdint.h:12
#define PCI_MSIX_ADDRESS_LO
MSI-X vector address low 32 bits.
Definition: pcimsix.h:21
unsigned long physaddr_t
Definition: stdint.h:20
#define PCI_MSIX_DATA
MSI-X vector data.
Definition: pcimsix.h:27
#define PCI_MSIX_CONTROL_MASK
Vector is masked.
Definition: pcimsix.h:31
uint8_t ctrl
Ring control.
Definition: dwmac.h:18
uint8_t data[48]
Additional event data.
Definition: ena.h:22
#define DBG_LOG
Definition: compiler.h:317
#define PCI_MSIX_ADDRESS_HI
MSI-X vector address high 32 bits.
Definition: pcimsix.h:24
void * pba
Pending bit array.
Definition: pcimsix.h:42

References address, base, ctrl, data, DBG_LOG, DBGC, pci_msix::pba, PCI_MSIX_ADDRESS_HI, PCI_MSIX_ADDRESS_LO, PCI_MSIX_CONTROL, PCI_MSIX_CONTROL_MASK, PCI_MSIX_DATA, PCI_MSIX_VECTOR, readl(), pci_msix::table, and vector.