iPXE
pcimsix.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2019 Michael Brown <mbrown@fensystems.co.uk>.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17  * 02110-1301, USA.
18  *
19  * You can also choose to distribute this program under the terms of
20  * the Unmodified Binary Distribution Licence (as given in the file
21  * COPYING.UBDL), provided that you have satisfied its requirements.
22  */
23 
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25 FILE_SECBOOT ( PERMITTED );
26 
27 #include <stdint.h>
28 #include <errno.h>
29 #include <assert.h>
30 #include <ipxe/pci.h>
31 #include <ipxe/pcimsix.h>
32 
33 /** @file
34  *
35  * PCI MSI-X interrupts
36  *
37  * Interrupts as such are not used in iPXE, which operates in polling
38  * mode. However, some network cards (such as the Intel 40GbE and
39  * 100GbE NICs) will defer writing out completions until the point of
40  * asserting an MSI-X interrupt.
41  *
42  * From the point of view of the PCI device, asserting an MSI-X
43  * interrupt is just a 32-bit DMA write of an opaque value to an
44  * opaque target address. The PCI device has no know to know whether
45  * or not the target address corresponds to a real APIC.
46  *
47  * We can therefore trick the PCI device into believing that it is
48  * asserting an MSI-X interrupt, by configuring it to write an opaque
49  * 32-bit value to a dummy target address in host memory. This is
50  * sufficient to trigger the associated write of the completions to
51  * host memory.
52  *
53  * When running in a virtual machine, the hypervisor will intercept
54  * our attempt to configure MSI-X on the PCI device. The physical
55  * hardware will be configured to raise an interrupt under the
56  * hypervisor's control, which will then be reflected back into the
57  * virtual machine. The opaque value that we write will be assumed to
58  * indicate an interrupt vector number (as would normally be the case
59  * when configuring MSI-X), and the opaque address will generally be
60  * ignored. The reflected interrupt will be ignored (since it is not
61  * enabled within the virtual machine), but the device still asserts
62  * an MSI-X interrupt and so still triggers the associated write of
63  * the completions to host memory.
64  *
65  * Note that since the opaque target address will generally be ignored
66  * by the hypervisor, we cannot examine the value present at the dummy
67  * target address to find out whether or not an interrupt has been
68  * raised.
69  */
70 
71 /**
72  * Get MSI-X descriptor name (for debugging)
73  *
74  * @v cfg Configuration space offset
75  * @ret name Descriptor name
76  */
77 static const char * pci_msix_name ( unsigned int cfg ) {
78 
79  switch ( cfg ) {
80  case PCI_MSIX_DESC_TABLE: return "table";
81  case PCI_MSIX_DESC_PBA: return "PBA";
82  default: return "<UNKNOWN>";
83  }
84 }
85 
86 /**
87  * Map MSI-X BAR portion
88  *
89  * @v pci PCI device
90  * @v msix MSI-X capability
91  * @v cfg Configuration space offset
92  * @ret io I/O address
93  */
94 static void * pci_msix_ioremap ( struct pci_device *pci, struct pci_msix *msix,
95  unsigned int cfg ) {
96  uint32_t desc;
97  unsigned int bar;
98  unsigned long start;
99  unsigned long offset;
100  unsigned long base;
101  void *io;
102 
103  /* Read descriptor */
104  pci_read_config_dword ( pci, ( msix->cap + cfg ), &desc );
105 
106  /* Get BAR */
107  bar = PCI_MSIX_DESC_BIR ( desc );
109  start = pci_bar_start ( pci, PCI_BASE_ADDRESS ( bar ) );
110  if ( ! start ) {
111  DBGC ( msix, "MSI-X %p %s could not find BAR%d\n",
112  msix, pci_msix_name ( cfg ), bar );
113  return NULL;
114  }
115  base = ( start + offset );
116  DBGC ( msix, "MSI-X %p %s at %#08lx (BAR%d+%#lx)\n",
117  msix, pci_msix_name ( cfg ), base, bar, offset );
118 
119  /* Map BAR portion */
120  io = pci_ioremap ( pci, ( start + offset ), PCI_MSIX_LEN );
121  if ( ! io ) {
122  DBGC ( msix, "MSI-X %p %s could not map %#08lx\n",
123  msix, pci_msix_name ( cfg ), base );
124  return NULL;
125  }
126 
127  return io;
128 }
129 
130 /**
131  * Enable MSI-X interrupts
132  *
133  * @v pci PCI device
134  * @v msix MSI-X capability
135  * @ret rc Return status code
136  */
137 int pci_msix_enable ( struct pci_device *pci, struct pci_msix *msix ) {
138  uint16_t ctrl;
139  physaddr_t msg;
140  unsigned int i;
141  int rc;
142 
143  /* Locate capability */
144  msix->cap = pci_find_capability ( pci, PCI_CAP_ID_MSIX );
145  if ( ! msix->cap ) {
146  DBGC ( msix, "MSI-X %p found no MSI-X capability in "
147  PCI_FMT "\n", msix, PCI_ARGS ( pci ) );
148  rc = -ENOENT;
149  goto err_cap;
150  }
151 
152  /* Extract interrupt count */
153  pci_read_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), &ctrl );
154  msix->count = ( PCI_MSIX_CTRL_SIZE ( ctrl ) + 1 );
155  DBGC ( msix, "MSI-X %p has %d vectors for " PCI_FMT "\n",
156  msix, msix->count, PCI_ARGS ( pci ) );
157 
158  /* Map MSI-X table */
159  msix->table = pci_msix_ioremap ( pci, msix, PCI_MSIX_DESC_TABLE );
160  if ( ! msix->table ) {
161  rc = -ENOENT;
162  goto err_table;
163  }
164 
165  /* Map pending bit array */
166  msix->pba = pci_msix_ioremap ( pci, msix, PCI_MSIX_DESC_PBA );
167  if ( ! msix->pba ) {
168  rc = -ENOENT;
169  goto err_pba;
170  }
171 
172  /* Allocate dummy target */
173  msix->msg = dma_alloc ( &pci->dma, &msix->map, sizeof ( *msix->msg ),
174  sizeof ( *msix->msg ) );
175  if ( ! msix->msg ) {
176  rc = -ENOMEM;
177  goto err_msg;
178  }
179 
180  /* Map all interrupts to dummy target by default */
181  msg = dma ( &msix->map, msix->msg );
182  for ( i = 0 ; i < msix->count ; i++ )
183  pci_msix_map ( msix, i, msg, 0 );
184 
185  /* Enable MSI-X */
188  pci_write_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), ctrl );
189 
190  return 0;
191 
192  dma_free ( &msix->map, msix->msg, sizeof ( *msix->msg ) );
193  err_msg:
194  iounmap ( msix->pba );
195  err_pba:
196  iounmap ( msix->table );
197  err_table:
198  err_cap:
199  return rc;
200 }
201 
202 /**
203  * Disable MSI-X interrupts
204  *
205  * @v pci PCI device
206  * @v msix MSI-X capability
207  */
208 void pci_msix_disable ( struct pci_device *pci, struct pci_msix *msix ) {
209  uint16_t ctrl;
210 
211  /* Disable MSI-X */
212  pci_read_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), &ctrl );
214  pci_write_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), ctrl );
215 
216  /* Free dummy target */
217  dma_free ( &msix->map, msix->msg, sizeof ( *msix->msg ) );
218 
219  /* Unmap pending bit array */
220  iounmap ( msix->pba );
221 
222  /* Unmap MSI-X table */
223  iounmap ( msix->table );
224 }
225 
226 /**
227  * Map MSI-X interrupt vector
228  *
229  * @v msix MSI-X capability
230  * @v vector MSI-X vector
231  * @v address Message address
232  * @v data Message data
233  */
234 void pci_msix_map ( struct pci_msix *msix, unsigned int vector,
236  void *base;
237 
238  /* Sanity check */
239  assert ( vector < msix->count );
240 
241  /* Map interrupt vector */
242  base = ( msix->table + PCI_MSIX_VECTOR ( vector ) );
243  writel ( ( address & 0xffffffffUL ), ( base + PCI_MSIX_ADDRESS_LO ) );
244  if ( sizeof ( address ) > sizeof ( uint32_t ) ) {
245  writel ( ( ( ( uint64_t ) address ) >> 32 ),
246  ( base + PCI_MSIX_ADDRESS_HI ) );
247  } else {
248  writel ( 0, ( base + PCI_MSIX_ADDRESS_HI ) );
249  }
250  writel ( data, ( base + PCI_MSIX_DATA ) );
251 }
252 
253 /**
254  * Control MSI-X interrupt vector
255  *
256  * @v msix MSI-X capability
257  * @v vector MSI-X vector
258  * @v mask Control mask
259  */
260 void pci_msix_control ( struct pci_msix *msix, unsigned int vector,
261  uint32_t mask ) {
262  void *base;
263  uint32_t ctrl;
264 
265  /* Mask/unmask interrupt vector */
266  base = ( msix->table + PCI_MSIX_VECTOR ( vector ) );
269  ctrl |= mask;
270  writel ( ctrl, ( base + PCI_MSIX_CONTROL ) );
271 }
272 
273 /**
274  * Dump MSI-X interrupt state (for debugging)
275  *
276  * @v msix MSI-X capability
277  * @v vector MSI-X vector
278  */
279 void pci_msix_dump ( struct pci_msix *msix, unsigned int vector ) {
280  void *base;
281  uint32_t address_hi;
282  uint32_t address_lo;
284  uint32_t data;
285  uint32_t ctrl;
286  uint32_t pba;
287 
288  /* Do nothing in non-debug builds */
289  if ( ! DBG_LOG )
290  return;
291 
292  /* Mask/unmask interrupt vector */
293  base = ( msix->table + PCI_MSIX_VECTOR ( vector ) );
294  address_hi = readl ( base + PCI_MSIX_ADDRESS_HI );
295  address_lo = readl ( base + PCI_MSIX_ADDRESS_LO );
296  data = readl ( base + PCI_MSIX_DATA );
298  pba = readl ( msix->pba );
299  address = ( ( ( ( uint64_t ) address_hi ) << 32 ) | address_lo );
300  DBGC ( msix, "MSI-X %p vector %d %#08x => %#08lx%s%s\n",
301  msix, vector, data, address,
302  ( ( ctrl & PCI_MSIX_CONTROL_MASK ) ? " (masked)" : "" ),
303  ( ( pba & ( 1 << vector ) ) ? " (pending)" : "" ) );
304 }
uint32_t base
Base.
Definition: librm.h:138
struct arbelprm_rc_send_wqe rc
Definition: arbel.h:14
void pci_msix_disable(struct pci_device *pci, struct pci_msix *msix)
Disable MSI-X interrupts.
Definition: pcimsix.c:208
unsigned short uint16_t
Definition: stdint.h:11
struct dma_device dma
DMA device.
Definition: pci.h:215
void msg(unsigned int row, const char *fmt,...)
Print message centred on specified row.
Definition: message.c:62
int pci_find_capability(struct pci_device *pci, int cap)
Look for a PCI capability.
Definition: pciextra.c:39
Error codes.
uint32_t vector
MSI-X vector.
Definition: ena.h:20
uint64_t address
Base address.
Definition: ena.h:24
int pci_write_config_word(struct pci_device *pci, unsigned int where, uint16_t value)
Write 16-bit word to PCI configuration space.
uint32_t readl(volatile uint32_t *io_addr)
Read 32-bit dword from memory-mapped device.
#define DBGC(...)
Definition: compiler.h:505
#define ENOENT
No such file or directory.
Definition: errno.h:515
unsigned long long uint64_t
Definition: stdint.h:13
PCI MSI-X interrupts.
#define PCI_MSIX_CONTROL
MSI-X vector control.
Definition: pcimsix.h:31
int pci_read_config_word(struct pci_device *pci, unsigned int where, uint16_t *value)
Read 16-bit word from PCI configuration space.
FILE_SECBOOT(PERMITTED)
#define PCI_MSIX_DESC_BIR(x)
BAR index.
Definition: pci.h:122
#define PCI_BASE_ADDRESS(n)
PCI base address registers.
Definition: pci.h:62
uint32_t start
Starting offset.
Definition: netvsc.h:12
struct ena_llq_option desc
Descriptor counts.
Definition: ena.h:20
void dma_free(struct dma_mapping *map, void *addr, size_t len)
Unmap and free DMA-coherent buffer.
#define ENOMEM
Not enough space.
Definition: errno.h:535
Assertions.
assert((readw(&hdr->flags) &(GTF_reading|GTF_writing))==0)
int pci_read_config_dword(struct pci_device *pci, unsigned int where, uint32_t *value)
Read 32-bit dword from PCI configuration space.
void pci_msix_control(struct pci_msix *msix, unsigned int vector, uint32_t mask)
Control MSI-X interrupt vector.
Definition: pcimsix.c:260
PCI MSI-X capability.
Definition: pcimsix.h:35
void writel(uint32_t data, volatile uint32_t *io_addr)
Write 32-bit dword to memory-mapped device.
#define PCI_MSIX_CTRL
MSI-X interrupts.
Definition: pci.h:116
static unsigned int count
Number of entries.
Definition: dwmac.h:225
unsigned long pci_bar_start(struct pci_device *pci, unsigned int reg)
Find the start of a PCI BAR.
Definition: pci.c:97
unsigned int cap
Capability offset.
Definition: pcimsix.h:37
uint32_t * msg
Dummy message target.
Definition: pcimsix.h:45
#define PCI_MSIX_VECTOR(n)
MSI-X vector offset.
Definition: pcimsix.h:19
#define PCI_MSIX_CTRL_SIZE(x)
Table size.
Definition: pci.h:119
#define PCI_MSIX_CTRL_MASK
Mask all interrupts.
Definition: pci.h:118
#define PCI_FMT
PCI device debug message format.
Definition: pci.h:312
PCI bus.
A PCI device.
Definition: pci.h:211
void * table
MSI-X table.
Definition: pcimsix.h:41
unsigned int uint32_t
Definition: stdint.h:12
void * dma_alloc(struct dma_device *dma, struct dma_mapping *map, size_t len, size_t align)
Allocate and map DMA-coherent buffer.
#define PCI_MSIX_LEN
MSI-X BAR mapped length.
Definition: pcimsix.h:16
#define PCI_MSIX_ADDRESS_LO
MSI-X vector address low 32 bits.
Definition: pcimsix.h:22
unsigned long physaddr_t
Definition: stdint.h:20
Definition: sis900.h:23
FILE_LICENCE(GPL2_OR_LATER_OR_UBDL)
static void * pci_msix_ioremap(struct pci_device *pci, struct pci_msix *msix, unsigned int cfg)
Map MSI-X BAR portion.
Definition: pcimsix.c:94
void pci_msix_dump(struct pci_msix *msix, unsigned int vector)
Dump MSI-X interrupt state (for debugging)
Definition: pcimsix.c:279
#define PCI_CAP_ID_MSIX
MSI-X.
Definition: pci.h:99
#define PCI_MSIX_DATA
MSI-X vector data.
Definition: pcimsix.h:28
int pci_msix_enable(struct pci_device *pci, struct pci_msix *msix)
Enable MSI-X interrupts.
Definition: pcimsix.c:137
#define PCI_MSIX_DESC_OFFSET(x)
BAR offset.
Definition: pci.h:123
#define PCI_MSIX_CONTROL_MASK
Vector is masked.
Definition: pcimsix.h:32
#define PCI_ARGS(pci)
PCI device debug message arguments.
Definition: pci.h:315
uint8_t ctrl
Ring control.
Definition: dwmac.h:18
void iounmap(volatile const void *io_addr)
Unmap I/O address.
uint8_t data[48]
Additional event data.
Definition: ena.h:22
#define PCI_MSIX_CTRL_ENABLE
Enable MSI-X.
Definition: pci.h:117
uint16_t offset
Offset to command line.
Definition: bzimage.h:8
void pci_msix_map(struct pci_msix *msix, unsigned int vector, physaddr_t address, uint32_t data)
Map MSI-X interrupt vector.
Definition: pcimsix.c:234
void * pci_ioremap(struct pci_device *pci, unsigned long bus_addr, size_t len)
Map PCI bus address as an I/O address.
#define DBG_LOG
Definition: compiler.h:317
static const char * pci_msix_name(unsigned int cfg)
Get MSI-X descriptor name (for debugging)
Definition: pcimsix.c:77
#define PCI_MSIX_ADDRESS_HI
MSI-X vector address high 32 bits.
Definition: pcimsix.h:25
#define PCI_MSIX_DESC_PBA
Definition: pci.h:121
#define NULL
NULL pointer (VOID *)
Definition: Base.h:322
struct dma_mapping map
Dummy message target mapping.
Definition: pcimsix.h:47
void * pba
Pending bit array.
Definition: pcimsix.h:43
physaddr_t dma(struct dma_mapping *map, void *addr)
Get DMA address from virtual address.
unsigned int count
Number of vectors.
Definition: pcimsix.h:39
#define PCI_MSIX_DESC_TABLE
Definition: pci.h:120