dev: Let the pixel pump bypass the DMA FIFO in non-caching mode.

When in non-caching mode, performance metrics are not meaningful, and
we're just interested in functional level behavior. Going through the
DMA FIFO in the HDLCD controller is very inefficient, and prevents
reading a batch of pixels from memory all in one go.

Change-Id: I3fb6d4d06730b5a94b5399f01aa02186baa5c9b3
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/38721
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Gabe Black
2020-12-24 07:53:57 -08:00
parent 5a124c2d86
commit 9c7cc711bc
4 changed files with 71 additions and 12 deletions

View File

@@ -37,6 +37,7 @@
#include "dev/arm/hdlcd.hh"
#include "base/compiler.hh"
#include "base/output.hh"
#include "base/trace.hh"
#include "base/vnc/vncinput.hh"
@@ -505,6 +506,25 @@ HDLcd::pxlNext(Pixel &p)
}
}
size_t
HDLcd::lineNext(std::vector<Pixel>::iterator pixel_it, size_t line_length)
{
const size_t byte_count = line_length * conv.length;
lineBuffer.resize(byte_count);
dmaRead(bypassLineAddress, byte_count, nullptr, lineBuffer.data());
bypassLineAddress += fb_line_pitch;
uint8_t *bufPtr = lineBuffer.data();
for (size_t i = 0; i < line_length; i++) {
*pixel_it++ = conv.toPixel(bufPtr);
bufPtr += conv.length;
}
return line_length;
}
void
HDLcd::pxlVSyncBegin()
{
@@ -516,7 +536,11 @@ void
HDLcd::pxlVSyncEnd()
{
DPRINTF(HDLcd, "End of VSYNC, starting DMA engine\n");
dmaEngine->startFrame(fb_base);
if (sys->bypassCaches()) {
bypassLineAddress = fb_base;
} else {
dmaEngine->startFrame(fb_base);
}
}
void

View File

@@ -75,6 +75,7 @@
#include <fstream>
#include <memory>
#include <vector>
#include "base/framebuffer.hh"
#include "base/imgwriter.hh"
@@ -252,6 +253,8 @@ class HDLcd: public AmbaDmaDevice
ColorSelectReg blue_select = 0; /**< Blue color select register */
/** @} */
std::vector<uint8_t> lineBuffer;
uint32_t readReg(Addr offset);
void writeReg(Addr offset, uint32_t value);
@@ -267,6 +270,7 @@ class HDLcd: public AmbaDmaDevice
public: // Pixel pump callbacks
bool pxlNext(Pixel &p);
size_t lineNext(std::vector<Pixel>::iterator pixel_it, size_t line_length);
void pxlVSyncBegin();
void pxlVSyncEnd();
void pxlUnderrun();
@@ -326,12 +330,19 @@ class HDLcd: public AmbaDmaDevice
{
public:
PixelPump(HDLcd &p, ClockDomain &pxl_clk, unsigned pixel_chunk)
: BasePixelPump(p, pxl_clk, pixel_chunk), parent(p) {}
: BasePixelPump(p, pxl_clk, pixel_chunk), parent(p)
{}
void dumpSettings();
protected:
bool nextPixel(Pixel &p) override { return parent.pxlNext(p); }
size_t
nextLine(std::vector<Pixel>::iterator pixel_it,
size_t line_length) override
{
return parent.lineNext(pixel_it, line_length);
}
void onVSyncBegin() override { return parent.pxlVSyncBegin(); }
void onVSyncEnd() override { return parent.pxlVSyncEnd(); }
@@ -348,6 +359,8 @@ class HDLcd: public AmbaDmaDevice
HDLcd &parent;
};
Addr bypassLineAddress = 0;
/** Handler for fast frame refresh in KVM-mode */
void virtRefresh();
EventFunctionWrapper virtRefreshEvent;

View File

@@ -37,6 +37,8 @@
#include "dev/pixelpump.hh"
#include "base/logging.hh"
const DisplayTimings DisplayTimings::vga(
640, 480,
48, 96, 16,
@@ -281,16 +283,12 @@ BasePixelPump::renderFrame()
void
BasePixelPump::renderLine()
{
const unsigned pos_y(posY());
const unsigned pos_y = posY();
const size_t _width = fb.width();
Pixel pixel(0, 0, 0);
for (_posX = 0; _posX < _timings.width; ++_posX) {
if (!nextPixel(pixel)) {
panic("Unexpected underrun in BasePixelPump (%u, %u)\n",
_posX, pos_y);
}
fb.pixel(_posX, pos_y) = pixel;
}
auto pixel_it = fb.pixels.begin() + _width * pos_y;
panic_if(nextLine(pixel_it, _width) != _width,
"Unexpected underrun in BasePixelPump (%u, %u)", _width, pos_y);
}

View File

@@ -38,6 +38,8 @@
#ifndef __DEV_PIXELPUMP_HH__
#define __DEV_PIXELPUMP_HH__
#include <vector>
#include "base/framebuffer.hh"
#include "sim/clocked_object.hh"
@@ -171,7 +173,7 @@ class BasePixelPump
/** Update frame size using display timing */
void updateTimings(const DisplayTimings &timings);
/** Render an entire frame in KVM execution mode */
/** Render an entire frame in non-caching mode */
void renderFrame();
/** Starting pushing pixels in timing mode */
@@ -219,6 +221,28 @@ class BasePixelPump
*/
virtual bool nextPixel(Pixel &p) = 0;
/**
* Get the next line of pixels directly from memory. This is for use from
* the renderFrame which is called in non-caching mode.
*
* The default implementation falls back to calling nextPixel over and
* over, but a more efficient implementation could retrieve the entire line
* of pixels all at once using fewer access to memory which bypass any
* intermediate structures like an incoming FIFO.
*
* @param ps A vector iterator to store retrieved pixels into.
* @param line_length The number of pixels being requested.
* @return The number of pixels actually retrieved.
*/
virtual size_t
nextLine(std::vector<Pixel>::iterator ps, size_t line_length)
{
size_t count = 0;
while (count < line_length && nextPixel(*ps++))
count++;
return count;
}
/** First pixel clock of the first VSync line. */
virtual void onVSyncBegin() {};