1 // SPDX-License-Identifier: GPL-2.0
3 * Driver for HiSilicon PCIe tune and trace device
5 * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
6 * Author: Yicong Yang <yangyicong@hisilicon.com>
9 #include <linux/bitfield.h>
10 #include <linux/bitops.h>
11 #include <linux/cpuhotplug.h>
12 #include <linux/delay.h>
13 #include <linux/dma-mapping.h>
14 #include <linux/interrupt.h>
16 #include <linux/iommu.h>
17 #include <linux/iopoll.h>
18 #include <linux/module.h>
19 #include <linux/sysfs.h>
20 #include <linux/vmalloc.h>
24 /* Dynamic CPU hotplug state used by PTT */
25 static enum cpuhp_state hisi_ptt_pmu_online;
27 static bool hisi_ptt_wait_tuning_finish(struct hisi_ptt *hisi_ptt)
31 return !readl_poll_timeout(hisi_ptt->iobase + HISI_PTT_TUNING_INT_STAT,
32 val, !(val & HISI_PTT_TUNING_INT_STAT_MASK),
33 HISI_PTT_WAIT_POLL_INTERVAL_US,
34 HISI_PTT_WAIT_TUNE_TIMEOUT_US);
37 static ssize_t hisi_ptt_tune_attr_show(struct device *dev,
38 struct device_attribute *attr,
41 struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
42 struct dev_ext_attribute *ext_attr;
43 struct hisi_ptt_tune_desc *desc;
47 ext_attr = container_of(attr, struct dev_ext_attribute, attr);
50 mutex_lock(&hisi_ptt->tune_lock);
52 reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
53 reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB);
54 reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB,
56 writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
58 /* Write all 1 to indicates it's the read process */
59 writel(~0U, hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
61 if (!hisi_ptt_wait_tuning_finish(hisi_ptt)) {
62 mutex_unlock(&hisi_ptt->tune_lock);
66 reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
67 reg &= HISI_PTT_TUNING_DATA_VAL_MASK;
68 val = FIELD_GET(HISI_PTT_TUNING_DATA_VAL_MASK, reg);
70 mutex_unlock(&hisi_ptt->tune_lock);
71 return sysfs_emit(buf, "%u\n", val);
74 static ssize_t hisi_ptt_tune_attr_store(struct device *dev,
75 struct device_attribute *attr,
76 const char *buf, size_t count)
78 struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
79 struct dev_ext_attribute *ext_attr;
80 struct hisi_ptt_tune_desc *desc;
84 ext_attr = container_of(attr, struct dev_ext_attribute, attr);
87 if (kstrtou16(buf, 10, &val))
90 mutex_lock(&hisi_ptt->tune_lock);
92 reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
93 reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB);
94 reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB,
96 writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
97 writel(FIELD_PREP(HISI_PTT_TUNING_DATA_VAL_MASK, val),
98 hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
100 if (!hisi_ptt_wait_tuning_finish(hisi_ptt)) {
101 mutex_unlock(&hisi_ptt->tune_lock);
105 mutex_unlock(&hisi_ptt->tune_lock);
109 #define HISI_PTT_TUNE_ATTR(_name, _val, _show, _store) \
110 static struct hisi_ptt_tune_desc _name##_desc = { \
112 .event_code = (_val), \
114 static struct dev_ext_attribute hisi_ptt_##_name##_attr = { \
115 .attr = __ATTR(_name, 0600, _show, _store), \
116 .var = &_name##_desc, \
119 #define HISI_PTT_TUNE_ATTR_COMMON(_name, _val) \
120 HISI_PTT_TUNE_ATTR(_name, _val, \
121 hisi_ptt_tune_attr_show, \
122 hisi_ptt_tune_attr_store)
125 * The value of the tuning event are composed of two parts: main event code
126 * in BIT[0,15] and subevent code in BIT[16,23]. For example, qox_tx_cpl is
127 * a subevent of 'Tx path QoS control' which for tuning the weight of Tx
128 * completion TLPs. See hisi_ptt.rst documentation for more information.
130 #define HISI_PTT_TUNE_QOS_TX_CPL (0x4 | (3 << 16))
131 #define HISI_PTT_TUNE_QOS_TX_NP (0x4 | (4 << 16))
132 #define HISI_PTT_TUNE_QOS_TX_P (0x4 | (5 << 16))
133 #define HISI_PTT_TUNE_RX_ALLOC_BUF_LEVEL (0x5 | (6 << 16))
134 #define HISI_PTT_TUNE_TX_ALLOC_BUF_LEVEL (0x5 | (7 << 16))
136 HISI_PTT_TUNE_ATTR_COMMON(qos_tx_cpl, HISI_PTT_TUNE_QOS_TX_CPL);
137 HISI_PTT_TUNE_ATTR_COMMON(qos_tx_np, HISI_PTT_TUNE_QOS_TX_NP);
138 HISI_PTT_TUNE_ATTR_COMMON(qos_tx_p, HISI_PTT_TUNE_QOS_TX_P);
139 HISI_PTT_TUNE_ATTR_COMMON(rx_alloc_buf_level, HISI_PTT_TUNE_RX_ALLOC_BUF_LEVEL);
140 HISI_PTT_TUNE_ATTR_COMMON(tx_alloc_buf_level, HISI_PTT_TUNE_TX_ALLOC_BUF_LEVEL);
142 static struct attribute *hisi_ptt_tune_attrs[] = {
143 &hisi_ptt_qos_tx_cpl_attr.attr.attr,
144 &hisi_ptt_qos_tx_np_attr.attr.attr,
145 &hisi_ptt_qos_tx_p_attr.attr.attr,
146 &hisi_ptt_rx_alloc_buf_level_attr.attr.attr,
147 &hisi_ptt_tx_alloc_buf_level_attr.attr.attr,
151 static struct attribute_group hisi_ptt_tune_group = {
153 .attrs = hisi_ptt_tune_attrs,
156 static u16 hisi_ptt_get_filter_val(u16 devid, bool is_port)
159 return BIT(HISI_PCIE_CORE_PORT_ID(devid & 0xff));
164 static bool hisi_ptt_wait_trace_hw_idle(struct hisi_ptt *hisi_ptt)
168 return !readl_poll_timeout_atomic(hisi_ptt->iobase + HISI_PTT_TRACE_STS,
169 val, val & HISI_PTT_TRACE_IDLE,
170 HISI_PTT_WAIT_POLL_INTERVAL_US,
171 HISI_PTT_WAIT_TRACE_TIMEOUT_US);
174 static void hisi_ptt_wait_dma_reset_done(struct hisi_ptt *hisi_ptt)
178 readl_poll_timeout_atomic(hisi_ptt->iobase + HISI_PTT_TRACE_WR_STS,
179 val, !val, HISI_PTT_RESET_POLL_INTERVAL_US,
180 HISI_PTT_RESET_TIMEOUT_US);
183 static void hisi_ptt_trace_end(struct hisi_ptt *hisi_ptt)
185 writel(0, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
186 hisi_ptt->trace_ctrl.started = false;
189 static int hisi_ptt_trace_start(struct hisi_ptt *hisi_ptt)
191 struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
195 /* Check device idle before start trace */
196 if (!hisi_ptt_wait_trace_hw_idle(hisi_ptt)) {
197 pci_err(hisi_ptt->pdev, "Failed to start trace, the device is still busy\n");
201 ctrl->started = true;
203 /* Reset the DMA before start tracing */
204 val = readl(hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
205 val |= HISI_PTT_TRACE_CTRL_RST;
206 writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
208 hisi_ptt_wait_dma_reset_done(hisi_ptt);
210 val = readl(hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
211 val &= ~HISI_PTT_TRACE_CTRL_RST;
212 writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
214 /* Reset the index of current buffer */
215 hisi_ptt->trace_ctrl.buf_index = 0;
217 /* Zero the trace buffers */
218 for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++)
219 memset(ctrl->trace_buf[i].addr, 0, HISI_PTT_TRACE_BUF_SIZE);
221 /* Clear the interrupt status */
222 writel(HISI_PTT_TRACE_INT_STAT_MASK, hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
223 writel(0, hisi_ptt->iobase + HISI_PTT_TRACE_INT_MASK);
225 /* Set the trace control register */
226 val = FIELD_PREP(HISI_PTT_TRACE_CTRL_TYPE_SEL, ctrl->type);
227 val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_RXTX_SEL, ctrl->direction);
228 val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_DATA_FORMAT, ctrl->format);
229 val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_TARGET_SEL, hisi_ptt->trace_ctrl.filter);
230 if (!hisi_ptt->trace_ctrl.is_port)
231 val |= HISI_PTT_TRACE_CTRL_FILTER_MODE;
233 /* Start the Trace */
234 val |= HISI_PTT_TRACE_CTRL_EN;
235 writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
240 static int hisi_ptt_update_aux(struct hisi_ptt *hisi_ptt, int index, bool stop)
242 struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
243 struct perf_output_handle *handle = &ctrl->handle;
244 struct perf_event *event = handle->event;
245 struct hisi_ptt_pmu_buf *buf;
249 buf = perf_get_aux(handle);
250 if (!buf || !handle->size)
253 addr = ctrl->trace_buf[ctrl->buf_index].addr;
256 * If we're going to stop, read the size of already traced data from
257 * HISI_PTT_TRACE_WR_STS. Otherwise we're coming from the interrupt,
258 * the data size is always HISI_PTT_TRACE_BUF_SIZE.
263 reg = readl(hisi_ptt->iobase + HISI_PTT_TRACE_WR_STS);
264 size = FIELD_GET(HISI_PTT_TRACE_WR_STS_WRITE, reg);
266 size = HISI_PTT_TRACE_BUF_SIZE;
269 memcpy(buf->base + buf->pos, addr, size);
273 * Just commit the traced data if we're going to stop. Otherwise if the
274 * resident AUX buffer cannot contain the data of next trace buffer,
278 perf_aux_output_end(handle, buf->pos);
279 } else if (buf->length - buf->pos < HISI_PTT_TRACE_BUF_SIZE) {
280 perf_aux_output_end(handle, buf->pos);
282 buf = perf_aux_output_begin(handle, event);
286 buf->pos = handle->head % buf->length;
287 if (buf->length - buf->pos < HISI_PTT_TRACE_BUF_SIZE) {
288 perf_aux_output_end(handle, 0);
296 static irqreturn_t hisi_ptt_isr(int irq, void *context)
298 struct hisi_ptt *hisi_ptt = context;
301 status = readl(hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
302 if (!(status & HISI_PTT_TRACE_INT_STAT_MASK))
305 buf_idx = ffs(status) - 1;
307 /* Clear the interrupt status of buffer @buf_idx */
308 writel(status, hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
311 * Update the AUX buffer and cache the current buffer index,
312 * as we need to know this and save the data when the trace
313 * is ended out of the interrupt handler. End the trace
314 * if the updating fails.
316 if (hisi_ptt_update_aux(hisi_ptt, buf_idx, false))
317 hisi_ptt_trace_end(hisi_ptt);
319 hisi_ptt->trace_ctrl.buf_index = (buf_idx + 1) % HISI_PTT_TRACE_BUF_CNT;
324 static void hisi_ptt_irq_free_vectors(void *pdev)
326 pci_free_irq_vectors(pdev);
329 static int hisi_ptt_register_irq(struct hisi_ptt *hisi_ptt)
331 struct pci_dev *pdev = hisi_ptt->pdev;
334 ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
336 pci_err(pdev, "failed to allocate irq vector, ret = %d\n", ret);
340 ret = devm_add_action_or_reset(&pdev->dev, hisi_ptt_irq_free_vectors, pdev);
344 ret = devm_request_threaded_irq(&pdev->dev,
345 pci_irq_vector(pdev, HISI_PTT_TRACE_DMA_IRQ),
346 NULL, hisi_ptt_isr, 0,
349 pci_err(pdev, "failed to request irq %d, ret = %d\n",
350 pci_irq_vector(pdev, HISI_PTT_TRACE_DMA_IRQ), ret);
357 static void hisi_ptt_del_free_filter(struct hisi_ptt *hisi_ptt,
358 struct hisi_ptt_filter_desc *filter)
361 hisi_ptt->port_mask &= ~hisi_ptt_get_filter_val(filter->devid, true);
363 list_del(&filter->list);
368 static struct hisi_ptt_filter_desc *
369 hisi_ptt_alloc_add_filter(struct hisi_ptt *hisi_ptt, u16 devid, bool is_port)
371 struct hisi_ptt_filter_desc *filter;
372 u8 devfn = devid & 0xff;
375 filter_name = kasprintf(GFP_KERNEL, "%04x:%02x:%02x.%d", pci_domain_nr(hisi_ptt->pdev->bus),
376 PCI_BUS_NUM(devid), PCI_SLOT(devfn), PCI_FUNC(devfn));
378 pci_err(hisi_ptt->pdev, "failed to allocate name for filter %04x:%02x:%02x.%d\n",
379 pci_domain_nr(hisi_ptt->pdev->bus), PCI_BUS_NUM(devid),
380 PCI_SLOT(devfn), PCI_FUNC(devfn));
384 filter = kzalloc(sizeof(*filter), GFP_KERNEL);
386 pci_err(hisi_ptt->pdev, "failed to add filter for %s\n",
392 filter->name = filter_name;
393 filter->is_port = is_port;
394 filter->devid = devid;
396 if (filter->is_port) {
397 list_add_tail(&filter->list, &hisi_ptt->port_filters);
399 /* Update the available port mask */
400 hisi_ptt->port_mask |= hisi_ptt_get_filter_val(filter->devid, true);
402 list_add_tail(&filter->list, &hisi_ptt->req_filters);
408 static void hisi_ptt_update_filters(struct work_struct *work)
410 struct delayed_work *delayed_work = to_delayed_work(work);
411 struct hisi_ptt_filter_update_info info;
412 struct hisi_ptt_filter_desc *filter;
413 struct hisi_ptt *hisi_ptt;
415 hisi_ptt = container_of(delayed_work, struct hisi_ptt, work);
417 if (!mutex_trylock(&hisi_ptt->filter_lock)) {
418 schedule_delayed_work(&hisi_ptt->work, HISI_PTT_WORK_DELAY_MS);
422 while (kfifo_get(&hisi_ptt->filter_update_kfifo, &info)) {
425 * Notify the users if failed to add this filter, others
426 * still work and available. See the comments in
427 * hisi_ptt_init_filters().
429 filter = hisi_ptt_alloc_add_filter(hisi_ptt, info.devid, info.is_port);
433 struct hisi_ptt_filter_desc *tmp;
434 struct list_head *target_list;
436 target_list = info.is_port ? &hisi_ptt->port_filters :
437 &hisi_ptt->req_filters;
439 list_for_each_entry_safe(filter, tmp, target_list, list)
440 if (filter->devid == info.devid) {
441 hisi_ptt_del_free_filter(hisi_ptt, filter);
447 mutex_unlock(&hisi_ptt->filter_lock);
451 * A PCI bus notifier is used here for dynamically updating the filter
454 static int hisi_ptt_notifier_call(struct notifier_block *nb, unsigned long action,
457 struct hisi_ptt *hisi_ptt = container_of(nb, struct hisi_ptt, hisi_ptt_nb);
458 struct hisi_ptt_filter_update_info info;
459 struct pci_dev *pdev, *root_port;
460 struct device *dev = data;
463 pdev = to_pci_dev(dev);
464 root_port = pcie_find_root_port(pdev);
468 port_devid = PCI_DEVID(root_port->bus->number, root_port->devfn);
469 if (port_devid < hisi_ptt->lower_bdf ||
470 port_devid > hisi_ptt->upper_bdf)
473 info.is_port = pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT;
474 info.devid = PCI_DEVID(pdev->bus->number, pdev->devfn);
477 case BUS_NOTIFY_ADD_DEVICE:
480 case BUS_NOTIFY_DEL_DEVICE:
488 * The FIFO size is 16 which is sufficient for almost all the cases,
489 * since each PCIe core will have most 8 Root Ports (typically only
490 * 1~4 Root Ports). On failure log the failed filter and let user
493 if (kfifo_in_spinlocked(&hisi_ptt->filter_update_kfifo, &info, 1,
494 &hisi_ptt->filter_update_lock))
495 schedule_delayed_work(&hisi_ptt->work, 0);
497 pci_warn(hisi_ptt->pdev,
498 "filter update fifo overflow for target %s\n",
504 static int hisi_ptt_init_filters(struct pci_dev *pdev, void *data)
506 struct pci_dev *root_port = pcie_find_root_port(pdev);
507 struct hisi_ptt_filter_desc *filter;
508 struct hisi_ptt *hisi_ptt = data;
514 port_devid = PCI_DEVID(root_port->bus->number, root_port->devfn);
515 if (port_devid < hisi_ptt->lower_bdf ||
516 port_devid > hisi_ptt->upper_bdf)
520 * We won't fail the probe if filter allocation failed here. The filters
521 * should be partial initialized and users would know which filter fails
522 * through the log. Other functions of PTT device are still available.
524 filter = hisi_ptt_alloc_add_filter(hisi_ptt, PCI_DEVID(pdev->bus->number, pdev->devfn),
525 pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT);
532 static void hisi_ptt_release_filters(void *data)
534 struct hisi_ptt_filter_desc *filter, *tmp;
535 struct hisi_ptt *hisi_ptt = data;
537 list_for_each_entry_safe(filter, tmp, &hisi_ptt->req_filters, list)
538 hisi_ptt_del_free_filter(hisi_ptt, filter);
540 list_for_each_entry_safe(filter, tmp, &hisi_ptt->port_filters, list)
541 hisi_ptt_del_free_filter(hisi_ptt, filter);
544 static int hisi_ptt_config_trace_buf(struct hisi_ptt *hisi_ptt)
546 struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
547 struct device *dev = &hisi_ptt->pdev->dev;
550 ctrl->trace_buf = devm_kcalloc(dev, HISI_PTT_TRACE_BUF_CNT,
551 sizeof(*ctrl->trace_buf), GFP_KERNEL);
552 if (!ctrl->trace_buf)
555 for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; ++i) {
556 ctrl->trace_buf[i].addr = dmam_alloc_coherent(dev, HISI_PTT_TRACE_BUF_SIZE,
557 &ctrl->trace_buf[i].dma,
559 if (!ctrl->trace_buf[i].addr)
563 /* Configure the trace DMA buffer */
564 for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++) {
565 writel(lower_32_bits(ctrl->trace_buf[i].dma),
566 hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_BASE_LO_0 +
567 i * HISI_PTT_TRACE_ADDR_STRIDE);
568 writel(upper_32_bits(ctrl->trace_buf[i].dma),
569 hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_BASE_HI_0 +
570 i * HISI_PTT_TRACE_ADDR_STRIDE);
572 writel(HISI_PTT_TRACE_BUF_SIZE, hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_SIZE);
577 static int hisi_ptt_init_ctrls(struct hisi_ptt *hisi_ptt)
579 struct pci_dev *pdev = hisi_ptt->pdev;
584 INIT_DELAYED_WORK(&hisi_ptt->work, hisi_ptt_update_filters);
585 INIT_KFIFO(hisi_ptt->filter_update_kfifo);
586 spin_lock_init(&hisi_ptt->filter_update_lock);
588 INIT_LIST_HEAD(&hisi_ptt->port_filters);
589 INIT_LIST_HEAD(&hisi_ptt->req_filters);
590 mutex_init(&hisi_ptt->filter_lock);
592 ret = hisi_ptt_config_trace_buf(hisi_ptt);
597 * The device range register provides the information about the root
598 * ports which the RCiEP can control and trace. The RCiEP and the root
599 * ports which it supports are on the same PCIe core, with same domain
600 * number but maybe different bus number. The device range register
601 * will tell us which root ports we can support, Bit[31:16] indicates
602 * the upper BDF numbers of the root port, while Bit[15:0] indicates
605 reg = readl(hisi_ptt->iobase + HISI_PTT_DEVICE_RANGE);
606 hisi_ptt->upper_bdf = FIELD_GET(HISI_PTT_DEVICE_RANGE_UPPER, reg);
607 hisi_ptt->lower_bdf = FIELD_GET(HISI_PTT_DEVICE_RANGE_LOWER, reg);
609 bus = pci_find_bus(pci_domain_nr(pdev->bus), PCI_BUS_NUM(hisi_ptt->upper_bdf));
611 pci_walk_bus(bus, hisi_ptt_init_filters, hisi_ptt);
613 ret = devm_add_action_or_reset(&pdev->dev, hisi_ptt_release_filters, hisi_ptt);
617 hisi_ptt->trace_ctrl.on_cpu = -1;
621 static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
624 struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
625 const cpumask_t *cpumask = cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev));
627 return cpumap_print_to_pagebuf(true, buf, cpumask);
629 static DEVICE_ATTR_RO(cpumask);
631 static struct attribute *hisi_ptt_cpumask_attrs[] = {
632 &dev_attr_cpumask.attr,
636 static const struct attribute_group hisi_ptt_cpumask_attr_group = {
637 .attrs = hisi_ptt_cpumask_attrs,
641 * Bit 19 indicates the filter type, 1 for Root Port filter and 0 for Requester
642 * filter. Bit[15:0] indicates the filter value, for Root Port filter it's
643 * a bit mask of desired ports and for Requester filter it's the Requester ID
644 * of the desired PCIe function. Bit[18:16] is reserved for extension.
646 * See hisi_ptt.rst documentation for detailed information.
648 PMU_FORMAT_ATTR(filter, "config:0-19");
649 PMU_FORMAT_ATTR(direction, "config:20-23");
650 PMU_FORMAT_ATTR(type, "config:24-31");
651 PMU_FORMAT_ATTR(format, "config:32-35");
653 static struct attribute *hisi_ptt_pmu_format_attrs[] = {
654 &format_attr_filter.attr,
655 &format_attr_direction.attr,
656 &format_attr_type.attr,
657 &format_attr_format.attr,
661 static struct attribute_group hisi_ptt_pmu_format_group = {
663 .attrs = hisi_ptt_pmu_format_attrs,
666 static const struct attribute_group *hisi_ptt_pmu_groups[] = {
667 &hisi_ptt_cpumask_attr_group,
668 &hisi_ptt_pmu_format_group,
669 &hisi_ptt_tune_group,
673 static int hisi_ptt_trace_valid_direction(u32 val)
676 * The direction values have different effects according to the data
677 * format (specified in the parentheses). TLP set A/B means different
678 * set of TLP types. See hisi_ptt.rst documentation for more details.
680 static const u32 hisi_ptt_trace_available_direction[] = {
681 0, /* inbound(4DW) or reserved(8DW) */
682 1, /* outbound(4DW) */
683 2, /* {in, out}bound(4DW) or inbound(8DW), TLP set A */
684 3, /* {in, out}bound(4DW) or inbound(8DW), TLP set B */
688 for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_available_direction); i++) {
689 if (val == hisi_ptt_trace_available_direction[i])
696 static int hisi_ptt_trace_valid_type(u32 val)
698 /* Different types can be set simultaneously */
699 static const u32 hisi_ptt_trace_available_type[] = {
700 1, /* posted_request */
701 2, /* non-posted_request */
710 * Walk the available list and clear the valid bits of
711 * the config. If there is any resident bit after the
712 * walk then the config is invalid.
714 for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_available_type); i++)
715 val &= ~hisi_ptt_trace_available_type[i];
723 static int hisi_ptt_trace_valid_format(u32 val)
725 static const u32 hisi_ptt_trace_availble_format[] = {
731 for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_availble_format); i++) {
732 if (val == hisi_ptt_trace_availble_format[i])
739 static int hisi_ptt_trace_valid_filter(struct hisi_ptt *hisi_ptt, u64 config)
741 unsigned long val, port_mask = hisi_ptt->port_mask;
742 struct hisi_ptt_filter_desc *filter;
745 hisi_ptt->trace_ctrl.is_port = FIELD_GET(HISI_PTT_PMU_FILTER_IS_PORT, config);
746 val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, config);
749 * Port filters are defined as bit mask. For port filters, check
750 * the bits in the @val are within the range of hisi_ptt->port_mask
751 * and whether it's empty or not, otherwise user has specified
752 * some unsupported root ports.
754 * For Requester ID filters, walk the available filter list to see
755 * whether we have one matched.
757 mutex_lock(&hisi_ptt->filter_lock);
758 if (!hisi_ptt->trace_ctrl.is_port) {
759 list_for_each_entry(filter, &hisi_ptt->req_filters, list) {
760 if (val == hisi_ptt_get_filter_val(filter->devid, filter->is_port))
763 } else if (bitmap_subset(&val, &port_mask, BITS_PER_LONG)) {
769 mutex_unlock(&hisi_ptt->filter_lock);
773 static void hisi_ptt_pmu_init_configs(struct hisi_ptt *hisi_ptt, struct perf_event *event)
775 struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
778 val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, event->attr.config);
779 hisi_ptt->trace_ctrl.filter = val;
781 val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config);
782 ctrl->direction = val;
784 val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config);
787 val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config);
791 static int hisi_ptt_pmu_event_init(struct perf_event *event)
793 struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
797 if (event->cpu < 0) {
798 dev_dbg(event->pmu->dev, "Per-task mode not supported\n");
802 if (event->attr.type != hisi_ptt->hisi_ptt_pmu.type)
805 ret = hisi_ptt_trace_valid_filter(hisi_ptt, event->attr.config);
809 val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config);
810 ret = hisi_ptt_trace_valid_direction(val);
814 val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config);
815 ret = hisi_ptt_trace_valid_type(val);
819 val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config);
820 return hisi_ptt_trace_valid_format(val);
823 static void *hisi_ptt_pmu_setup_aux(struct perf_event *event, void **pages,
824 int nr_pages, bool overwrite)
826 struct hisi_ptt_pmu_buf *buf;
827 struct page **pagelist;
831 dev_warn(event->pmu->dev, "Overwrite mode is not supported\n");
835 /* If the pages size less than buffers, we cannot start trace */
836 if (nr_pages < HISI_PTT_TRACE_TOTAL_BUF_SIZE / PAGE_SIZE)
839 buf = kzalloc(sizeof(*buf), GFP_KERNEL);
843 pagelist = kcalloc(nr_pages, sizeof(*pagelist), GFP_KERNEL);
847 for (i = 0; i < nr_pages; i++)
848 pagelist[i] = virt_to_page(pages[i]);
850 buf->base = vmap(pagelist, nr_pages, VM_MAP, PAGE_KERNEL);
856 buf->nr_pages = nr_pages;
857 buf->length = nr_pages * PAGE_SIZE;
867 static void hisi_ptt_pmu_free_aux(void *aux)
869 struct hisi_ptt_pmu_buf *buf = aux;
875 static void hisi_ptt_pmu_start(struct perf_event *event, int flags)
877 struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
878 struct perf_output_handle *handle = &hisi_ptt->trace_ctrl.handle;
879 struct hw_perf_event *hwc = &event->hw;
880 struct device *dev = event->pmu->dev;
881 struct hisi_ptt_pmu_buf *buf;
882 int cpu = event->cpu;
887 /* Serialize the perf process if user specified several CPUs */
888 spin_lock(&hisi_ptt->pmu_lock);
889 if (hisi_ptt->trace_ctrl.started) {
890 dev_dbg(dev, "trace has already started\n");
895 * Handle the interrupt on the same cpu which starts the trace to avoid
896 * context mismatch. Otherwise we'll trigger the WARN from the perf
897 * core in event_function_local(). If CPU passed is offline we'll fail
898 * here, just log it since we can do nothing here.
900 ret = irq_set_affinity(pci_irq_vector(hisi_ptt->pdev, HISI_PTT_TRACE_DMA_IRQ),
903 dev_warn(dev, "failed to set the affinity of trace interrupt\n");
905 hisi_ptt->trace_ctrl.on_cpu = cpu;
907 buf = perf_aux_output_begin(handle, event);
909 dev_dbg(dev, "aux output begin failed\n");
913 buf->pos = handle->head % buf->length;
915 hisi_ptt_pmu_init_configs(hisi_ptt, event);
917 ret = hisi_ptt_trace_start(hisi_ptt);
919 dev_dbg(dev, "trace start failed, ret = %d\n", ret);
920 perf_aux_output_end(handle, 0);
924 spin_unlock(&hisi_ptt->pmu_lock);
927 event->hw.state |= PERF_HES_STOPPED;
928 spin_unlock(&hisi_ptt->pmu_lock);
931 static void hisi_ptt_pmu_stop(struct perf_event *event, int flags)
933 struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
934 struct hw_perf_event *hwc = &event->hw;
936 if (hwc->state & PERF_HES_STOPPED)
939 spin_lock(&hisi_ptt->pmu_lock);
940 if (hisi_ptt->trace_ctrl.started) {
941 hisi_ptt_trace_end(hisi_ptt);
943 if (!hisi_ptt_wait_trace_hw_idle(hisi_ptt))
944 dev_warn(event->pmu->dev, "Device is still busy\n");
946 hisi_ptt_update_aux(hisi_ptt, hisi_ptt->trace_ctrl.buf_index, true);
948 spin_unlock(&hisi_ptt->pmu_lock);
950 hwc->state |= PERF_HES_STOPPED;
951 perf_event_update_userpage(event);
952 hwc->state |= PERF_HES_UPTODATE;
955 static int hisi_ptt_pmu_add(struct perf_event *event, int flags)
957 struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
958 struct hw_perf_event *hwc = &event->hw;
959 int cpu = event->cpu;
961 /* Only allow the cpus on the device's node to add the event */
962 if (!cpumask_test_cpu(cpu, cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev))))
965 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
967 if (flags & PERF_EF_START) {
968 hisi_ptt_pmu_start(event, PERF_EF_RELOAD);
969 if (hwc->state & PERF_HES_STOPPED)
976 static void hisi_ptt_pmu_del(struct perf_event *event, int flags)
978 hisi_ptt_pmu_stop(event, PERF_EF_UPDATE);
981 static void hisi_ptt_remove_cpuhp_instance(void *hotplug_node)
983 cpuhp_state_remove_instance_nocalls(hisi_ptt_pmu_online, hotplug_node);
986 static void hisi_ptt_unregister_pmu(void *pmu)
988 perf_pmu_unregister(pmu);
991 static int hisi_ptt_register_pmu(struct hisi_ptt *hisi_ptt)
993 u16 core_id, sicl_id;
998 ret = cpuhp_state_add_instance_nocalls(hisi_ptt_pmu_online,
999 &hisi_ptt->hotplug_node);
1003 ret = devm_add_action_or_reset(&hisi_ptt->pdev->dev,
1004 hisi_ptt_remove_cpuhp_instance,
1005 &hisi_ptt->hotplug_node);
1009 mutex_init(&hisi_ptt->tune_lock);
1010 spin_lock_init(&hisi_ptt->pmu_lock);
1012 hisi_ptt->hisi_ptt_pmu = (struct pmu) {
1013 .module = THIS_MODULE,
1014 .capabilities = PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE,
1015 .task_ctx_nr = perf_sw_context,
1016 .attr_groups = hisi_ptt_pmu_groups,
1017 .event_init = hisi_ptt_pmu_event_init,
1018 .setup_aux = hisi_ptt_pmu_setup_aux,
1019 .free_aux = hisi_ptt_pmu_free_aux,
1020 .start = hisi_ptt_pmu_start,
1021 .stop = hisi_ptt_pmu_stop,
1022 .add = hisi_ptt_pmu_add,
1023 .del = hisi_ptt_pmu_del,
1026 reg = readl(hisi_ptt->iobase + HISI_PTT_LOCATION);
1027 core_id = FIELD_GET(HISI_PTT_CORE_ID, reg);
1028 sicl_id = FIELD_GET(HISI_PTT_SICL_ID, reg);
1030 pmu_name = devm_kasprintf(&hisi_ptt->pdev->dev, GFP_KERNEL, "hisi_ptt%u_%u",
1035 ret = perf_pmu_register(&hisi_ptt->hisi_ptt_pmu, pmu_name, -1);
1039 return devm_add_action_or_reset(&hisi_ptt->pdev->dev,
1040 hisi_ptt_unregister_pmu,
1041 &hisi_ptt->hisi_ptt_pmu);
1044 static void hisi_ptt_unregister_filter_update_notifier(void *data)
1046 struct hisi_ptt *hisi_ptt = data;
1048 bus_unregister_notifier(&pci_bus_type, &hisi_ptt->hisi_ptt_nb);
1050 /* Cancel any work that has been queued */
1051 cancel_delayed_work_sync(&hisi_ptt->work);
1054 /* Register the bus notifier for dynamically updating the filter list */
1055 static int hisi_ptt_register_filter_update_notifier(struct hisi_ptt *hisi_ptt)
1059 hisi_ptt->hisi_ptt_nb.notifier_call = hisi_ptt_notifier_call;
1060 ret = bus_register_notifier(&pci_bus_type, &hisi_ptt->hisi_ptt_nb);
1064 return devm_add_action_or_reset(&hisi_ptt->pdev->dev,
1065 hisi_ptt_unregister_filter_update_notifier,
1070 * The DMA of PTT trace can only use direct mappings due to some
1071 * hardware restriction. Check whether there is no IOMMU or the
1072 * policy of the IOMMU domain is passthrough, otherwise the trace
1075 * The PTT device is supposed to behind an ARM SMMUv3, which
1076 * should have passthrough the device by a quirk.
1078 static int hisi_ptt_check_iommu_mapping(struct pci_dev *pdev)
1080 struct iommu_domain *iommu_domain;
1082 iommu_domain = iommu_get_domain_for_dev(&pdev->dev);
1083 if (!iommu_domain || iommu_domain->type == IOMMU_DOMAIN_IDENTITY)
1089 static int hisi_ptt_probe(struct pci_dev *pdev,
1090 const struct pci_device_id *id)
1092 struct hisi_ptt *hisi_ptt;
1095 ret = hisi_ptt_check_iommu_mapping(pdev);
1097 pci_err(pdev, "requires direct DMA mappings\n");
1101 hisi_ptt = devm_kzalloc(&pdev->dev, sizeof(*hisi_ptt), GFP_KERNEL);
1105 hisi_ptt->pdev = pdev;
1106 pci_set_drvdata(pdev, hisi_ptt);
1108 ret = pcim_enable_device(pdev);
1110 pci_err(pdev, "failed to enable device, ret = %d\n", ret);
1114 ret = pcim_iomap_regions(pdev, BIT(2), DRV_NAME);
1116 pci_err(pdev, "failed to remap io memory, ret = %d\n", ret);
1120 hisi_ptt->iobase = pcim_iomap_table(pdev)[2];
1122 ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1124 pci_err(pdev, "failed to set 64 bit dma mask, ret = %d\n", ret);
1128 pci_set_master(pdev);
1130 ret = hisi_ptt_register_irq(hisi_ptt);
1134 ret = hisi_ptt_init_ctrls(hisi_ptt);
1136 pci_err(pdev, "failed to init controls, ret = %d\n", ret);
1140 ret = hisi_ptt_register_filter_update_notifier(hisi_ptt);
1142 pci_warn(pdev, "failed to register filter update notifier, ret = %d", ret);
1144 ret = hisi_ptt_register_pmu(hisi_ptt);
1146 pci_err(pdev, "failed to register PMU device, ret = %d", ret);
1153 static const struct pci_device_id hisi_ptt_id_tbl[] = {
1154 { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa12e) },
1157 MODULE_DEVICE_TABLE(pci, hisi_ptt_id_tbl);
1159 static struct pci_driver hisi_ptt_driver = {
1161 .id_table = hisi_ptt_id_tbl,
1162 .probe = hisi_ptt_probe,
1165 static int hisi_ptt_cpu_teardown(unsigned int cpu, struct hlist_node *node)
1167 struct hisi_ptt *hisi_ptt;
1171 hisi_ptt = hlist_entry_safe(node, struct hisi_ptt, hotplug_node);
1172 src = hisi_ptt->trace_ctrl.on_cpu;
1173 dev = hisi_ptt->hisi_ptt_pmu.dev;
1175 if (!hisi_ptt->trace_ctrl.started || src != cpu)
1178 target = cpumask_any_but(cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev)), cpu);
1179 if (target >= nr_cpu_ids) {
1180 dev_err(dev, "no available cpu for perf context migration\n");
1184 perf_pmu_migrate_context(&hisi_ptt->hisi_ptt_pmu, src, target);
1187 * Also make sure the interrupt bind to the migrated CPU as well. Warn
1188 * the user on failure here.
1190 if (irq_set_affinity(pci_irq_vector(hisi_ptt->pdev, HISI_PTT_TRACE_DMA_IRQ),
1191 cpumask_of(target)))
1192 dev_warn(dev, "failed to set the affinity of trace interrupt\n");
1194 hisi_ptt->trace_ctrl.on_cpu = target;
1198 static int __init hisi_ptt_init(void)
1202 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRV_NAME, NULL,
1203 hisi_ptt_cpu_teardown);
1206 hisi_ptt_pmu_online = ret;
1208 ret = pci_register_driver(&hisi_ptt_driver);
1210 cpuhp_remove_multi_state(hisi_ptt_pmu_online);
1214 module_init(hisi_ptt_init);
1216 static void __exit hisi_ptt_exit(void)
1218 pci_unregister_driver(&hisi_ptt_driver);
1219 cpuhp_remove_multi_state(hisi_ptt_pmu_online);
1221 module_exit(hisi_ptt_exit);
1223 MODULE_LICENSE("GPL");
1224 MODULE_AUTHOR("Yicong Yang <yangyicong@hisilicon.com>");
1225 MODULE_DESCRIPTION("Driver for HiSilicon PCIe tune and trace device");