iPXE
pcimsix.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2019 Michael Brown <mbrown@fensystems.co.uk>.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17  * 02110-1301, USA.
18  *
19  * You can also choose to distribute this program under the terms of
20  * the Unmodified Binary Distribution Licence (as given in the file
21  * COPYING.UBDL), provided that you have satisfied its requirements.
22  */
23 
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25 
26 #include <stdint.h>
27 #include <errno.h>
28 #include <assert.h>
29 #include <ipxe/pci.h>
30 #include <ipxe/pcimsix.h>
31 
32 /** @file
33  *
34  * PCI MSI-X interrupts
35  *
36  * Interrupts as such are not used in iPXE, which operates in polling
37  * mode. However, some network cards (such as the Intel 40GbE and
38  * 100GbE NICs) will defer writing out completions until the point of
39  * asserting an MSI-X interrupt.
40  *
41  * From the point of view of the PCI device, asserting an MSI-X
42  * interrupt is just a 32-bit DMA write of an opaque value to an
43  * opaque target address. The PCI device has no know to know whether
44  * or not the target address corresponds to a real APIC.
45  *
46  * We can therefore trick the PCI device into believing that it is
47  * asserting an MSI-X interrupt, by configuring it to write an opaque
48  * 32-bit value to a dummy target address in host memory. This is
49  * sufficient to trigger the associated write of the completions to
50  * host memory.
51  *
52  * When running in a virtual machine, the hypervisor will intercept
53  * our attempt to configure MSI-X on the PCI device. The physical
54  * hardware will be configured to raise an interrupt under the
55  * hypervisor's control, which will then be reflected back into the
56  * virtual machine. The opaque value that we write will be assumed to
57  * indicate an interrupt vector number (as would normally be the case
58  * when configuring MSI-X), and the opaque address will generally be
59  * ignored. The reflected interrupt will be ignored (since it is not
60  * enabled within the virtual machine), but the device still asserts
61  * an MSI-X interrupt and so still triggers the associated write of
62  * the completions to host memory.
63  *
64  * Note that since the opaque target address will generally be ignored
65  * by the hypervisor, we cannot examine the value present at the dummy
66  * target address to find out whether or not an interrupt has been
67  * raised.
68  */
69 
70 /**
71  * Get MSI-X descriptor name (for debugging)
72  *
73  * @v cfg Configuration space offset
74  * @ret name Descriptor name
75  */
76 static const char * pci_msix_name ( unsigned int cfg ) {
77 
78  switch ( cfg ) {
79  case PCI_MSIX_DESC_TABLE: return "table";
80  case PCI_MSIX_DESC_PBA: return "PBA";
81  default: return "<UNKNOWN>";
82  }
83 }
84 
85 /**
86  * Map MSI-X BAR portion
87  *
88  * @v pci PCI device
89  * @v msix MSI-X capability
90  * @v cfg Configuration space offset
91  * @ret io I/O address
92  */
93 static void * pci_msix_ioremap ( struct pci_device *pci, struct pci_msix *msix,
94  unsigned int cfg ) {
95  uint32_t desc;
96  unsigned int bar;
97  unsigned long start;
98  unsigned long offset;
99  unsigned long base;
100  void *io;
101 
102  /* Read descriptor */
103  pci_read_config_dword ( pci, ( msix->cap + cfg ), &desc );
104 
105  /* Get BAR */
106  bar = PCI_MSIX_DESC_BIR ( desc );
108  start = pci_bar_start ( pci, PCI_BASE_ADDRESS ( bar ) );
109  if ( ! start ) {
110  DBGC ( msix, "MSI-X %p %s could not find BAR%d\n",
111  msix, pci_msix_name ( cfg ), bar );
112  return NULL;
113  }
114  base = ( start + offset );
115  DBGC ( msix, "MSI-X %p %s at %#08lx (BAR%d+%#lx)\n",
116  msix, pci_msix_name ( cfg ), base, bar, offset );
117 
118  /* Map BAR portion */
119  io = pci_ioremap ( pci, ( start + offset ), PCI_MSIX_LEN );
120  if ( ! io ) {
121  DBGC ( msix, "MSI-X %p %s could not map %#08lx\n",
122  msix, pci_msix_name ( cfg ), base );
123  return NULL;
124  }
125 
126  return io;
127 }
128 
129 /**
130  * Enable MSI-X interrupts
131  *
132  * @v pci PCI device
133  * @v msix MSI-X capability
134  * @ret rc Return status code
135  */
136 int pci_msix_enable ( struct pci_device *pci, struct pci_msix *msix ) {
137  uint16_t ctrl;
138  physaddr_t msg;
139  unsigned int i;
140  int rc;
141 
142  /* Locate capability */
143  msix->cap = pci_find_capability ( pci, PCI_CAP_ID_MSIX );
144  if ( ! msix->cap ) {
145  DBGC ( msix, "MSI-X %p found no MSI-X capability in "
146  PCI_FMT "\n", msix, PCI_ARGS ( pci ) );
147  rc = -ENOENT;
148  goto err_cap;
149  }
150 
151  /* Extract interrupt count */
152  pci_read_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), &ctrl );
153  msix->count = ( PCI_MSIX_CTRL_SIZE ( ctrl ) + 1 );
154  DBGC ( msix, "MSI-X %p has %d vectors for " PCI_FMT "\n",
155  msix, msix->count, PCI_ARGS ( pci ) );
156 
157  /* Map MSI-X table */
158  msix->table = pci_msix_ioremap ( pci, msix, PCI_MSIX_DESC_TABLE );
159  if ( ! msix->table ) {
160  rc = -ENOENT;
161  goto err_table;
162  }
163 
164  /* Map pending bit array */
165  msix->pba = pci_msix_ioremap ( pci, msix, PCI_MSIX_DESC_PBA );
166  if ( ! msix->pba ) {
167  rc = -ENOENT;
168  goto err_pba;
169  }
170 
171  /* Allocate dummy target */
172  msix->msg = dma_alloc ( &pci->dma, &msix->map, sizeof ( *msix->msg ),
173  sizeof ( *msix->msg ) );
174  if ( ! msix->msg ) {
175  rc = -ENOMEM;
176  goto err_msg;
177  }
178 
179  /* Map all interrupts to dummy target by default */
180  msg = dma ( &msix->map, msix->msg );
181  for ( i = 0 ; i < msix->count ; i++ )
182  pci_msix_map ( msix, i, msg, 0 );
183 
184  /* Enable MSI-X */
187  pci_write_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), ctrl );
188 
189  return 0;
190 
191  dma_free ( &msix->map, msix->msg, sizeof ( *msix->msg ) );
192  err_msg:
193  iounmap ( msix->pba );
194  err_pba:
195  iounmap ( msix->table );
196  err_table:
197  err_cap:
198  return rc;
199 }
200 
201 /**
202  * Disable MSI-X interrupts
203  *
204  * @v pci PCI device
205  * @v msix MSI-X capability
206  */
207 void pci_msix_disable ( struct pci_device *pci, struct pci_msix *msix ) {
208  uint16_t ctrl;
209 
210  /* Disable MSI-X */
211  pci_read_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), &ctrl );
213  pci_write_config_word ( pci, ( msix->cap + PCI_MSIX_CTRL ), ctrl );
214 
215  /* Free dummy target */
216  dma_free ( &msix->map, msix->msg, sizeof ( *msix->msg ) );
217 
218  /* Unmap pending bit array */
219  iounmap ( msix->pba );
220 
221  /* Unmap MSI-X table */
222  iounmap ( msix->table );
223 }
224 
225 /**
226  * Map MSI-X interrupt vector
227  *
228  * @v msix MSI-X capability
229  * @v vector MSI-X vector
230  * @v address Message address
231  * @v data Message data
232  */
233 void pci_msix_map ( struct pci_msix *msix, unsigned int vector,
235  void *base;
236 
237  /* Sanity check */
238  assert ( vector < msix->count );
239 
240  /* Map interrupt vector */
241  base = ( msix->table + PCI_MSIX_VECTOR ( vector ) );
242  writel ( ( address & 0xffffffffUL ), ( base + PCI_MSIX_ADDRESS_LO ) );
243  if ( sizeof ( address ) > sizeof ( uint32_t ) ) {
244  writel ( ( ( ( uint64_t ) address ) >> 32 ),
245  ( base + PCI_MSIX_ADDRESS_HI ) );
246  } else {
247  writel ( 0, ( base + PCI_MSIX_ADDRESS_HI ) );
248  }
249  writel ( data, ( base + PCI_MSIX_DATA ) );
250 }
251 
252 /**
253  * Control MSI-X interrupt vector
254  *
255  * @v msix MSI-X capability
256  * @v vector MSI-X vector
257  * @v mask Control mask
258  */
259 void pci_msix_control ( struct pci_msix *msix, unsigned int vector,
260  uint32_t mask ) {
261  void *base;
262  uint32_t ctrl;
263 
264  /* Mask/unmask interrupt vector */
265  base = ( msix->table + PCI_MSIX_VECTOR ( vector ) );
268  ctrl |= mask;
269  writel ( ctrl, ( base + PCI_MSIX_CONTROL ) );
270 }
271 
272 /**
273  * Dump MSI-X interrupt state (for debugging)
274  *
275  * @v msix MSI-X capability
276  * @v vector MSI-X vector
277  */
278 void pci_msix_dump ( struct pci_msix *msix, unsigned int vector ) {
279  void *base;
280  uint32_t address_hi;
281  uint32_t address_lo;
283  uint32_t data;
284  uint32_t ctrl;
285  uint32_t pba;
286 
287  /* Do nothing in non-debug builds */
288  if ( ! DBG_LOG )
289  return;
290 
291  /* Mask/unmask interrupt vector */
292  base = ( msix->table + PCI_MSIX_VECTOR ( vector ) );
293  address_hi = readl ( base + PCI_MSIX_ADDRESS_HI );
294  address_lo = readl ( base + PCI_MSIX_ADDRESS_LO );
295  data = readl ( base + PCI_MSIX_DATA );
297  pba = readl ( msix->pba );
298  address = ( ( ( ( uint64_t ) address_hi ) << 32 ) | address_lo );
299  DBGC ( msix, "MSI-X %p vector %d %#08x => %#08lx%s%s\n",
300  msix, vector, data, address,
301  ( ( ctrl & PCI_MSIX_CONTROL_MASK ) ? " (masked)" : "" ),
302  ( ( pba & ( 1 << vector ) ) ? " (pending)" : "" ) );
303 }
uint32_t base
Base.
Definition: librm.h:138
struct arbelprm_rc_send_wqe rc
Definition: arbel.h:14
void pci_msix_disable(struct pci_device *pci, struct pci_msix *msix)
Disable MSI-X interrupts.
Definition: pcimsix.c:207
unsigned short uint16_t
Definition: stdint.h:11
struct dma_device dma
DMA device.
Definition: pci.h:214
void msg(unsigned int row, const char *fmt,...)
Print message centred on specified row.
Definition: message.c:61
int pci_find_capability(struct pci_device *pci, int cap)
Look for a PCI capability.
Definition: pciextra.c:38
Error codes.
uint32_t vector
MSI-X vector.
Definition: ena.h:20
uint64_t address
Base address.
Definition: ena.h:24
int pci_write_config_word(struct pci_device *pci, unsigned int where, uint16_t value)
Write 16-bit word to PCI configuration space.
uint32_t readl(volatile uint32_t *io_addr)
Read 32-bit dword from memory-mapped device.
#define DBGC(...)
Definition: compiler.h:505
#define ENOENT
No such file or directory.
Definition: errno.h:514
unsigned long long uint64_t
Definition: stdint.h:13
PCI MSI-X interrupts.
#define PCI_MSIX_CONTROL
MSI-X vector control.
Definition: pcimsix.h:30
int pci_read_config_word(struct pci_device *pci, unsigned int where, uint16_t *value)
Read 16-bit word from PCI configuration space.
#define PCI_MSIX_DESC_BIR(x)
BAR index.
Definition: pci.h:121
#define PCI_BASE_ADDRESS(n)
PCI base address registers.
Definition: pci.h:61
uint32_t start
Starting offset.
Definition: netvsc.h:12
struct ena_llq_option desc
Descriptor counts.
Definition: ena.h:20
void dma_free(struct dma_mapping *map, void *addr, size_t len)
Unmap and free DMA-coherent buffer.
#define ENOMEM
Not enough space.
Definition: errno.h:534
Assertions.
assert((readw(&hdr->flags) &(GTF_reading|GTF_writing))==0)
int pci_read_config_dword(struct pci_device *pci, unsigned int where, uint32_t *value)
Read 32-bit dword from PCI configuration space.
void pci_msix_control(struct pci_msix *msix, unsigned int vector, uint32_t mask)
Control MSI-X interrupt vector.
Definition: pcimsix.c:259
PCI MSI-X capability.
Definition: pcimsix.h:34
void writel(uint32_t data, volatile uint32_t *io_addr)
Write 32-bit dword to memory-mapped device.
#define PCI_MSIX_CTRL
MSI-X interrupts.
Definition: pci.h:115
static unsigned int count
Number of entries.
Definition: dwmac.h:225
unsigned long pci_bar_start(struct pci_device *pci, unsigned int reg)
Find the start of a PCI BAR.
Definition: pci.c:96
unsigned int cap
Capability offset.
Definition: pcimsix.h:36
uint32_t * msg
Dummy message target.
Definition: pcimsix.h:44
#define PCI_MSIX_VECTOR(n)
MSI-X vector offset.
Definition: pcimsix.h:18
#define PCI_MSIX_CTRL_SIZE(x)
Table size.
Definition: pci.h:118
#define PCI_MSIX_CTRL_MASK
Mask all interrupts.
Definition: pci.h:117
#define PCI_FMT
PCI device debug message format.
Definition: pci.h:311
PCI bus.
A PCI device.
Definition: pci.h:210
void * table
MSI-X table.
Definition: pcimsix.h:40
unsigned int uint32_t
Definition: stdint.h:12
void * dma_alloc(struct dma_device *dma, struct dma_mapping *map, size_t len, size_t align)
Allocate and map DMA-coherent buffer.
#define PCI_MSIX_LEN
MSI-X BAR mapped length.
Definition: pcimsix.h:15
#define PCI_MSIX_ADDRESS_LO
MSI-X vector address low 32 bits.
Definition: pcimsix.h:21
unsigned long physaddr_t
Definition: stdint.h:20
Definition: sis900.h:23
FILE_LICENCE(GPL2_OR_LATER_OR_UBDL)
static void * pci_msix_ioremap(struct pci_device *pci, struct pci_msix *msix, unsigned int cfg)
Map MSI-X BAR portion.
Definition: pcimsix.c:93
void pci_msix_dump(struct pci_msix *msix, unsigned int vector)
Dump MSI-X interrupt state (for debugging)
Definition: pcimsix.c:278
#define PCI_CAP_ID_MSIX
MSI-X.
Definition: pci.h:98
#define PCI_MSIX_DATA
MSI-X vector data.
Definition: pcimsix.h:27
int pci_msix_enable(struct pci_device *pci, struct pci_msix *msix)
Enable MSI-X interrupts.
Definition: pcimsix.c:136
#define PCI_MSIX_DESC_OFFSET(x)
BAR offset.
Definition: pci.h:122
#define PCI_MSIX_CONTROL_MASK
Vector is masked.
Definition: pcimsix.h:31
#define PCI_ARGS(pci)
PCI device debug message arguments.
Definition: pci.h:314
uint8_t ctrl
Ring control.
Definition: dwmac.h:18
void iounmap(volatile const void *io_addr)
Unmap I/O address.
uint8_t data[48]
Additional event data.
Definition: ena.h:22
#define PCI_MSIX_CTRL_ENABLE
Enable MSI-X.
Definition: pci.h:116
uint16_t offset
Offset to command line.
Definition: bzimage.h:8
void pci_msix_map(struct pci_msix *msix, unsigned int vector, physaddr_t address, uint32_t data)
Map MSI-X interrupt vector.
Definition: pcimsix.c:233
void * pci_ioremap(struct pci_device *pci, unsigned long bus_addr, size_t len)
Map PCI bus address as an I/O address.
#define DBG_LOG
Definition: compiler.h:317
static const char * pci_msix_name(unsigned int cfg)
Get MSI-X descriptor name (for debugging)
Definition: pcimsix.c:76
#define PCI_MSIX_ADDRESS_HI
MSI-X vector address high 32 bits.
Definition: pcimsix.h:24
#define PCI_MSIX_DESC_PBA
Definition: pci.h:120
#define NULL
NULL pointer (VOID *)
Definition: Base.h:321
struct dma_mapping map
Dummy message target mapping.
Definition: pcimsix.h:46
void * pba
Pending bit array.
Definition: pcimsix.h:42
physaddr_t dma(struct dma_mapping *map, void *addr)
Get DMA address from virtual address.
unsigned int count
Number of vectors.
Definition: pcimsix.h:38
#define PCI_MSIX_DESC_TABLE
Definition: pci.h:119