metal: Add support for YUV/NV12 texture formats.

This commit is contained in:
Alex Szpakowski 2018-01-06 18:54:12 -04:00
parent 9a8683b275
commit 740a90af37
4 changed files with 4007 additions and 1859 deletions

View file

@ -90,8 +90,15 @@ SDL_RenderDriver METAL_RenderDriver = {
{
"metal",
(SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_TARGETTEXTURE),
2,
{SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_ABGR8888},
6,
{
SDL_PIXELFORMAT_ARGB8888,
SDL_PIXELFORMAT_ABGR8888,
SDL_PIXELFORMAT_YV12,
SDL_PIXELFORMAT_IYUV,
SDL_PIXELFORMAT_NV12,
SDL_PIXELFORMAT_NV21
},
// !!! FIXME: how do you query Metal for this?
// (the weakest GPU supported by Metal on iOS has 4k texture max, and
@ -116,7 +123,10 @@ SDL_RenderDriver METAL_RenderDriver = {
static const size_t CONSTANTS_OFFSET_IDENTITY = 0;
static const size_t CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM = ALIGN_CONSTANTS(CONSTANTS_OFFSET_IDENTITY + sizeof(float) * 16);
static const size_t CONSTANTS_OFFSET_CLEAR_VERTS = ALIGN_CONSTANTS(CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM + sizeof(float) * 16);
static const size_t CONSTANTS_OFFSET_DECODE_JPEG = ALIGN_CONSTANTS(CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM + sizeof(float) * 16);
static const size_t CONSTANTS_OFFSET_DECODE_BT601 = ALIGN_CONSTANTS(CONSTANTS_OFFSET_DECODE_JPEG + sizeof(float) * 4 * 4);
static const size_t CONSTANTS_OFFSET_DECODE_BT709 = ALIGN_CONSTANTS(CONSTANTS_OFFSET_DECODE_BT601 + sizeof(float) * 4 * 4);
static const size_t CONSTANTS_OFFSET_CLEAR_VERTS = ALIGN_CONSTANTS(CONSTANTS_OFFSET_DECODE_BT709 + sizeof(float) * 4 * 4);
static const size_t CONSTANTS_LENGTH = CONSTANTS_OFFSET_CLEAR_VERTS + sizeof(float) * 6;
typedef enum SDL_MetalVertexFunction
@ -127,8 +137,12 @@ typedef enum SDL_MetalVertexFunction
typedef enum SDL_MetalFragmentFunction
{
SDL_METAL_FRAGMENT_SOLID,
SDL_METAL_FRAGMENT_SOLID = 0,
SDL_METAL_FRAGMENT_COPY,
SDL_METAL_FRAGMENT_YUV,
SDL_METAL_FRAGMENT_NV12,
SDL_METAL_FRAGMENT_NV21,
SDL_METAL_FRAGMENT_COUNT,
} SDL_MetalFragmentFunction;
typedef struct METAL_PipelineState
@ -146,6 +160,15 @@ typedef struct METAL_PipelineCache
const char *label;
} METAL_PipelineCache;
/* Each shader combination used by drawing functions has a separate pipeline
* cache. This is more efficient than iterating over a global cache to find
* the pipeline based on the specified shader combination, since we know what
* the shader combination is inside each drawing function's code. */
typedef struct METAL_ShaderPipelines
{
METAL_PipelineCache caches[SDL_METAL_FRAGMENT_COUNT];
} METAL_ShaderPipelines;
@interface METAL_RenderData : NSObject
@property (nonatomic, retain) id<MTLDevice> mtldevice;
@property (nonatomic, retain) id<MTLCommandQueue> mtlcmdqueue;
@ -153,13 +176,12 @@ typedef struct METAL_PipelineCache
@property (nonatomic, retain) id<MTLRenderCommandEncoder> mtlcmdencoder;
@property (nonatomic, retain) id<MTLLibrary> mtllibrary;
@property (nonatomic, retain) id<CAMetalDrawable> mtlbackbuffer;
@property (nonatomic, assign) METAL_PipelineCache *mtlpipelineprims;
@property (nonatomic, assign) METAL_PipelineCache *mtlpipelinecopy;
@property (nonatomic, retain) id<MTLSamplerState> mtlsamplernearest;
@property (nonatomic, retain) id<MTLSamplerState> mtlsamplerlinear;
@property (nonatomic, retain) id<MTLBuffer> mtlbufconstants;
@property (nonatomic, retain) CAMetalLayer *mtllayer;
@property (nonatomic, retain) MTLRenderPassDescriptor *mtlpassdesc;
@property (nonatomic, assign) METAL_ShaderPipelines *pipelines;
@end
@implementation METAL_RenderData
@ -184,7 +206,12 @@ typedef struct METAL_PipelineCache
@interface METAL_TextureData : NSObject
@property (nonatomic, retain) id<MTLTexture> mtltexture;
@property (nonatomic, retain) id<MTLTexture> mtltexture_uv;
@property (nonatomic, retain) id<MTLSamplerState> mtlsampler;
@property (nonatomic, assign) SDL_MetalFragmentFunction fragmentFunction;
@property (nonatomic, assign) BOOL yuv;
@property (nonatomic, assign) BOOL nv12;
@property (nonatomic, assign) size_t conversionBufferOffset;
@end
@implementation METAL_TextureData
@ -192,6 +219,7 @@ typedef struct METAL_PipelineCache
- (void)dealloc
{
[_mtltexture release];
[_mtltexture_uv release];
[_mtlsampler release];
[super dealloc];
}
@ -265,6 +293,9 @@ GetFragmentFunctionName(SDL_MetalFragmentFunction function)
switch (function) {
case SDL_METAL_FRAGMENT_SOLID: return @"SDL_Solid_fragment";
case SDL_METAL_FRAGMENT_COPY: return @"SDL_Copy_fragment";
case SDL_METAL_FRAGMENT_YUV: return @"SDL_YUV_fragment";
case SDL_METAL_FRAGMENT_NV12: return @"SDL_NV12_fragment";
case SDL_METAL_FRAGMENT_NV21: return @"SDL_NV21_fragment";
default: return nil;
}
}
@ -329,16 +360,9 @@ MakePipelineState(METAL_RenderData *data, METAL_PipelineCache *cache,
}
}
static METAL_PipelineCache *
MakePipelineCache(METAL_RenderData *data, const char *label, SDL_MetalVertexFunction vertfn, SDL_MetalFragmentFunction fragfn)
static void
MakePipelineCache(METAL_RenderData *data, METAL_PipelineCache *cache, const char *label, SDL_MetalVertexFunction vertfn, SDL_MetalFragmentFunction fragfn)
{
METAL_PipelineCache *cache = SDL_malloc(sizeof(METAL_PipelineCache));
if (!cache) {
SDL_OutOfMemory();
return NULL;
}
SDL_zerop(cache);
cache->vertexFunction = vertfn;
@ -347,12 +371,10 @@ MakePipelineCache(METAL_RenderData *data, const char *label, SDL_MetalVertexFunc
/* Create pipeline states for the default blend modes. Custom blend modes
* will be added to the cache on-demand. */
MakePipelineState(data, cache, @"(blend=none)", SDL_BLENDMODE_NONE);
MakePipelineState(data, cache, @"(blend=blend)", SDL_BLENDMODE_BLEND);
MakePipelineState(data, cache, @"(blend=add)", SDL_BLENDMODE_ADD);
MakePipelineState(data, cache, @"(blend=mod)", SDL_BLENDMODE_MOD);
return cache;
MakePipelineState(data, cache, @" (blend=none)", SDL_BLENDMODE_NONE);
MakePipelineState(data, cache, @" (blend=blend)", SDL_BLENDMODE_BLEND);
MakePipelineState(data, cache, @" (blend=add)", SDL_BLENDMODE_ADD);
MakePipelineState(data, cache, @" (blend=mod)", SDL_BLENDMODE_MOD);
}
static void
@ -364,20 +386,51 @@ DestroyPipelineCache(METAL_PipelineCache *cache)
}
SDL_free(cache->states);
SDL_free(cache);
}
}
static METAL_ShaderPipelines *
MakeShaderPipelines(METAL_RenderData *data)
{
METAL_ShaderPipelines *pipelines = SDL_calloc(1, sizeof(METAL_ShaderPipelines));
if (!pipelines) {
SDL_OutOfMemory();
return NULL;
}
MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_SOLID], "SDL primitives pipeline", SDL_METAL_VERTEX_SOLID, SDL_METAL_FRAGMENT_SOLID);
MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_COPY], "SDL copy pipeline", SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_COPY);
MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_YUV], "SDL YUV pipeline", SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_YUV);
MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_NV12], "SDL NV12 pipeline", SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_NV12);
MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_NV21], "SDL NV21 pipeline", SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_NV21);
return pipelines;
}
static void
DestroyShaderPipelines(METAL_ShaderPipelines *pipelines)
{
if (pipelines != NULL) {
for (int i = 0; i < SDL_METAL_FRAGMENT_COUNT; i++) {
DestroyPipelineCache(&pipelines->caches[i]);
}
SDL_free(pipelines);
}
}
static inline id<MTLRenderPipelineState>
ChoosePipelineState(METAL_RenderData *data, METAL_PipelineCache *cache, const SDL_BlendMode blendmode)
ChoosePipelineState(METAL_RenderData *data, METAL_ShaderPipelines *pipelines, SDL_MetalFragmentFunction fragfn, SDL_BlendMode blendmode)
{
METAL_PipelineCache *cache = &pipelines->caches[fragfn];
for (int i = 0; i < cache->count; i++) {
if (cache->states[i].blendMode == blendmode) {
return (__bridge id<MTLRenderPipelineState>)cache->states[i].pipe;
}
}
return MakePipelineState(data, cache, [NSString stringWithFormat:@"(blend=custom 0x%x)", blendmode], blendmode);
return MakePipelineState(data, cache, [NSString stringWithFormat:@" (blend=custom 0x%x)", blendmode], blendmode);
}
static SDL_Renderer *
@ -455,8 +508,7 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
#endif
data.mtllibrary.label = @"SDL Metal renderer shader library";
data.mtlpipelineprims = MakePipelineCache(data, "SDL primitives pipeline ", SDL_METAL_VERTEX_SOLID, SDL_METAL_FRAGMENT_SOLID);
data.mtlpipelinecopy = MakePipelineCache(data, "SDL texture pipeline ", SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_COPY);
data.pipelines = MakeShaderPipelines(data);
MTLSamplerDescriptor *samplerdesc = [[MTLSamplerDescriptor alloc] init];
@ -485,6 +537,28 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
0.5f, 0.5f, 0.0f, 1.0f,
};
/* Metal pads float3s to 16 bytes. */
float decodetransformJPEG[4*4] = {
0.0, -0.501960814, -0.501960814, 0.0, /* offset */
1.0000, 0.0000, 1.4020, 0.0, /* Rcoeff */
1.0000, -0.3441, -0.7141, 0.0, /* Gcoeff */
1.0000, 1.7720, 0.0000, 0.0, /* Bcoeff */
};
float decodetransformBT601[4*4] = {
-0.0627451017, -0.501960814, -0.501960814, 0.0, /* offset */
1.1644, 0.0000, 1.5960, 0.0, /* Rcoeff */
1.1644, -0.3918, -0.8130, 0.0, /* Gcoeff */
1.1644, 2.0172, 0.0000, 0.0, /* Bcoeff */
};
float decodetransformBT709[4*4] = {
0.0, -0.501960814, -0.501960814, 0.0, /* offset */
1.0000, 0.0000, 1.4020, 0.0, /* Rcoeff */
1.0000, -0.3441, -0.7141, 0.0, /* Gcoeff */
1.0000, 1.7720, 0.0000, 0.0, /* Bcoeff */
};
float clearverts[6] = {0.0f, 0.0f, 0.0f, 2.0f, 2.0f, 0.0f};
id<MTLBuffer> mtlbufconstantstaging = [data.mtldevice newBufferWithLength:CONSTANTS_LENGTH options:MTLResourceStorageModeShared];
@ -497,6 +571,9 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
char *constantdata = [mtlbufconstantstaging contents];
SDL_memcpy(constantdata + CONSTANTS_OFFSET_IDENTITY, identitytransform, sizeof(identitytransform));
SDL_memcpy(constantdata + CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM, halfpixeltransform, sizeof(halfpixeltransform));
SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_JPEG, decodetransformJPEG, sizeof(decodetransformJPEG));
SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT601, decodetransformBT601, sizeof(decodetransformBT601));
SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT709, decodetransformBT709, sizeof(decodetransformBT709));
SDL_memcpy(constantdata + CONSTANTS_OFFSET_CLEAR_VERTS, clearverts, sizeof(clearverts));
id<MTLCommandBuffer> cmdbuffer = [data.mtlcmdqueue commandBuffer];
@ -660,15 +737,26 @@ static int
METAL_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
{ @autoreleasepool {
METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
MTLPixelFormat mtlpixfmt;
MTLPixelFormat pixfmt;
switch (texture->format) {
case SDL_PIXELFORMAT_ABGR8888: mtlpixfmt = MTLPixelFormatRGBA8Unorm; break;
case SDL_PIXELFORMAT_ARGB8888: mtlpixfmt = MTLPixelFormatBGRA8Unorm; break;
default: return SDL_SetError("Texture format %s not supported by Metal", SDL_GetPixelFormatName(texture->format));
case SDL_PIXELFORMAT_ABGR8888:
pixfmt = MTLPixelFormatRGBA8Unorm;
break;
case SDL_PIXELFORMAT_ARGB8888:
pixfmt = MTLPixelFormatBGRA8Unorm;
break;
case SDL_PIXELFORMAT_IYUV:
case SDL_PIXELFORMAT_YV12:
case SDL_PIXELFORMAT_NV12:
case SDL_PIXELFORMAT_NV21:
pixfmt = MTLPixelFormatR8Unorm;
break;
default:
return SDL_SetError("Texture format %s not supported by Metal", SDL_GetPixelFormatName(texture->format));
}
MTLTextureDescriptor *mtltexdesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:mtlpixfmt
MTLTextureDescriptor *mtltexdesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:pixfmt
width:(NSUInteger)texture->w height:(NSUInteger)texture->h mipmapped:NO];
/* Not available in iOS 8. */
@ -679,14 +767,31 @@ METAL_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
mtltexdesc.usage = MTLTextureUsageShaderRead;
}
}
//mtltexdesc.resourceOptions = MTLResourceCPUCacheModeDefaultCache | MTLResourceStorageModeManaged;
//mtltexdesc.storageMode = MTLStorageModeManaged;
id<MTLTexture> mtltexture = [data.mtldevice newTextureWithDescriptor:mtltexdesc];
if (mtltexture == nil) {
return SDL_SetError("Texture allocation failed");
}
id<MTLTexture> mtltexture_uv = nil;
BOOL yuv = (texture->format == SDL_PIXELFORMAT_IYUV) || (texture->format == SDL_PIXELFORMAT_YV12);
BOOL nv12 = (texture->format == SDL_PIXELFORMAT_NV12) || (texture->format == SDL_PIXELFORMAT_NV21);
if (yuv) {
mtltexdesc.pixelFormat = MTLPixelFormatR8Unorm;
mtltexdesc.width = (texture->w + 1) / 2;
mtltexdesc.height = (texture->h + 1) / 2;
mtltexdesc.textureType = MTLTextureType2DArray;
mtltexdesc.arrayLength = 2;
mtltexture_uv = [data.mtldevice newTextureWithDescriptor:mtltexdesc];
} else if (nv12) {
mtltexdesc.pixelFormat = MTLPixelFormatRG8Unorm;
mtltexdesc.width = (texture->w + 1) / 2;
mtltexdesc.height = (texture->h + 1) / 2;
mtltexture_uv = [data.mtldevice newTextureWithDescriptor:mtltexdesc];
}
METAL_TextureData *texturedata = [[METAL_TextureData alloc] init];
const char *hint = SDL_GetHint(SDL_HINT_RENDER_SCALE_QUALITY);
if (!hint || *hint == '0' || SDL_strcasecmp(hint, "nearest") == 0) {
@ -695,12 +800,39 @@ METAL_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
texturedata.mtlsampler = data.mtlsamplerlinear;
}
texturedata.mtltexture = mtltexture;
texturedata.mtltexture_uv = mtltexture_uv;
texturedata.yuv = yuv;
texturedata.nv12 = nv12;
if (yuv) {
texturedata.fragmentFunction = SDL_METAL_FRAGMENT_YUV;
} else if (texture->format == SDL_PIXELFORMAT_NV12) {
texturedata.fragmentFunction = SDL_METAL_FRAGMENT_NV12;
} else if (texture->format == SDL_PIXELFORMAT_NV21) {
texturedata.fragmentFunction = SDL_METAL_FRAGMENT_NV21;
} else {
texturedata.fragmentFunction = SDL_METAL_FRAGMENT_COPY;
}
if (yuv || nv12) {
size_t offset = 0;
SDL_YUV_CONVERSION_MODE mode = SDL_GetYUVConversionModeForResolution(texture->w, texture->h);
switch (mode) {
case SDL_YUV_CONVERSION_JPEG: offset = CONSTANTS_OFFSET_DECODE_JPEG; break;
case SDL_YUV_CONVERSION_BT601: offset = CONSTANTS_OFFSET_DECODE_BT601; break;
case SDL_YUV_CONVERSION_BT709: offset = CONSTANTS_OFFSET_DECODE_BT709; break;
default: offset = 0; break;
}
texturedata.conversionBufferOffset = offset;
}
texture->driverdata = (void*)CFBridgingRetain(texturedata);
#if !__has_feature(objc_arc)
[texturedata release];
[mtltexture release];
[mtltexture_uv release];
#endif
return 0;
@ -710,12 +842,52 @@ static int
METAL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
const SDL_Rect * rect, const void *pixels, int pitch)
{ @autoreleasepool {
METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
// !!! FIXME: this is a synchronous call; it doesn't return until data is uploaded in some form.
// !!! FIXME: Maybe move this off to a thread that marks the texture as uploaded and only stall the main thread if we try to
// !!! FIXME: use this texture before the marking is done? Is it worth it? Or will we basically always be uploading a bunch of
// !!! FIXME: stuff way ahead of time and/or using it immediately after upload?
id<MTLTexture> mtltexture = ((__bridge METAL_TextureData *)texture->driverdata).mtltexture;
[mtltexture replaceRegion:MTLRegionMake2D(rect->x, rect->y, rect->w, rect->h) mipmapLevel:0 withBytes:pixels bytesPerRow:pitch];
[texturedata.mtltexture replaceRegion:MTLRegionMake2D(rect->x, rect->y, rect->w, rect->h)
mipmapLevel:0
withBytes:pixels
bytesPerRow:pitch];
if (texturedata.yuv) {
int Uslice = texture->format == SDL_PIXELFORMAT_YV12 ? 1 : 0;
int Vslice = texture->format == SDL_PIXELFORMAT_YV12 ? 0 : 1;
/* Skip to the correct offset into the next texture */
pixels = (const void*)((const Uint8*)pixels + rect->h * pitch);
[texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
mipmapLevel:0
slice:Uslice
withBytes:pixels
bytesPerRow:(pitch + 1) / 2
bytesPerImage:0];
/* Skip to the correct offset into the next texture */
pixels = (const void*)((const Uint8*)pixels + ((rect->h + 1) / 2) * ((pitch + 1)/2));
[texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
mipmapLevel:0
slice:Vslice
withBytes:pixels
bytesPerRow:(pitch + 1) / 2
bytesPerImage:0];
}
if (texturedata.nv12) {
/* Skip to the correct offset into the next texture */
pixels = (const void*)((const Uint8*)pixels + rect->h * pitch);
[texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
mipmapLevel:0
slice:0
withBytes:pixels
bytesPerRow:2 * ((pitch + 1) / 2)
bytesPerImage:0];
}
return 0;
}}
@ -725,9 +897,37 @@ METAL_UpdateTextureYUV(SDL_Renderer * renderer, SDL_Texture * texture,
const Uint8 *Yplane, int Ypitch,
const Uint8 *Uplane, int Upitch,
const Uint8 *Vplane, int Vpitch)
{
return SDL_Unsupported(); // !!! FIXME
}
{ @autoreleasepool {
METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
int Uslice = texture->format == SDL_PIXELFORMAT_YV12 ? 1 : 0;
int Vslice = texture->format == SDL_PIXELFORMAT_YV12 ? 0 : 1;
/* Bail out if we're supposed to update an empty rectangle */
if (rect->w <= 0 || rect->h <= 0) {
return 0;
}
[texturedata.mtltexture replaceRegion:MTLRegionMake2D(rect->x, rect->y, rect->w, rect->h)
mipmapLevel:0
withBytes:Yplane
bytesPerRow:Ypitch];
[texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
mipmapLevel:0
slice:Uslice
withBytes:Uplane
bytesPerRow:Upitch
bytesPerImage:0];
[texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
mipmapLevel:0
slice:Vslice
withBytes:Vplane
bytesPerRow:Vpitch
bytesPerImage:0];
return 0;
}}
static int
METAL_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
@ -864,7 +1064,7 @@ METAL_RenderClear(SDL_Renderer * renderer)
// Slow path for clearing: draw a filled fullscreen triangle.
METAL_SetOrthographicProjection(renderer, 1, 1);
[data.mtlcmdencoder setViewport:viewport];
[data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.mtlpipelineprims, SDL_BLENDMODE_NONE)];
[data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.pipelines, SDL_METAL_FRAGMENT_SOLID, SDL_BLENDMODE_NONE)];
[data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:CONSTANTS_OFFSET_CLEAR_VERTS atIndex:0];
[data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:CONSTANTS_OFFSET_IDENTITY atIndex:3];
[data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0];
@ -903,7 +1103,7 @@ DrawVerts(SDL_Renderer * renderer, const SDL_FPoint * points, int count,
// !!! FIXME: render color should live in a dedicated uniform buffer.
const float color[4] = { ((float)renderer->r) / 255.0f, ((float)renderer->g) / 255.0f, ((float)renderer->b) / 255.0f, ((float)renderer->a) / 255.0f };
[data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.mtlpipelineprims, renderer->blendMode)];
[data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.pipelines, SDL_METAL_FRAGMENT_SOLID, renderer->blendMode)];
[data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0];
[data.mtlcmdencoder setVertexBytes:points length:vertlen atIndex:0];
@ -934,7 +1134,7 @@ METAL_RenderFillRects(SDL_Renderer * renderer, const SDL_FRect * rects, int coun
// !!! FIXME: render color should live in a dedicated uniform buffer.
const float color[4] = { ((float)renderer->r) / 255.0f, ((float)renderer->g) / 255.0f, ((float)renderer->b) / 255.0f, ((float)renderer->a) / 255.0f };
[data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.mtlpipelineprims, renderer->blendMode)];
[data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.pipelines, SDL_METAL_FRAGMENT_SOLID, renderer->blendMode)];
[data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0];
[data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:CONSTANTS_OFFSET_IDENTITY atIndex:3];
@ -955,6 +1155,29 @@ METAL_RenderFillRects(SDL_Renderer * renderer, const SDL_FRect * rects, int coun
return 0;
}}
static void
METAL_SetupRenderCopy(METAL_RenderData *data, SDL_Texture *texture, METAL_TextureData *texturedata)
{
float color[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
if (texture->modMode) {
color[0] = ((float)texture->r) / 255.0f;
color[1] = ((float)texture->g) / 255.0f;
color[2] = ((float)texture->b) / 255.0f;
color[3] = ((float)texture->a) / 255.0f;
}
[data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.pipelines, texturedata.fragmentFunction, texture->blendMode)];
[data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0];
[data.mtlcmdencoder setFragmentSamplerState:texturedata.mtlsampler atIndex:0];
[data.mtlcmdencoder setFragmentTexture:texturedata.mtltexture atIndex:0];
if (texturedata.yuv || texturedata.nv12) {
[data.mtlcmdencoder setFragmentTexture:texturedata.mtltexture_uv atIndex:1];
[data.mtlcmdencoder setFragmentBuffer:data.mtlbufconstants offset:texturedata.conversionBufferOffset atIndex:1];
}
}
static int
METAL_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
const SDL_Rect * srcrect, const SDL_FRect * dstrect)
@ -965,6 +1188,8 @@ METAL_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
const float texw = (float) texturedata.mtltexture.width;
const float texh = (float) texturedata.mtltexture.height;
METAL_SetupRenderCopy(data, texture, texturedata);
const float xy[] = {
dstrect->x, dstrect->y + dstrect->h,
dstrect->x, dstrect->y,
@ -979,21 +1204,9 @@ METAL_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
normtex(srcrect->x + srcrect->w, texw), normtex(srcrect->y, texh)
};
float color[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
if (texture->modMode) {
color[0] = ((float)texture->r) / 255.0f;
color[1] = ((float)texture->g) / 255.0f;
color[2] = ((float)texture->b) / 255.0f;
color[3] = ((float)texture->a) / 255.0f;
}
[data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.mtlpipelinecopy, texture->blendMode)];
[data.mtlcmdencoder setVertexBytes:xy length:sizeof(xy) atIndex:0];
[data.mtlcmdencoder setVertexBytes:uv length:sizeof(uv) atIndex:1];
[data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:CONSTANTS_OFFSET_IDENTITY atIndex:3];
[data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0];
[data.mtlcmdencoder setFragmentTexture:texturedata.mtltexture atIndex:0];
[data.mtlcmdencoder setFragmentSamplerState:texturedata.mtlsampler atIndex:0];
[data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
return 0;
@ -1012,6 +1225,8 @@ METAL_RenderCopyEx(SDL_Renderer * renderer, SDL_Texture * texture,
float transform[16];
float minu, maxu, minv, maxv;
METAL_SetupRenderCopy(data, texture, texturedata);
minu = normtex(srcrect->x, texw);
maxu = normtex(srcrect->x + srcrect->w, texw);
minv = normtex(srcrect->y, texh);
@ -1062,21 +1277,9 @@ METAL_RenderCopyEx(SDL_Renderer * renderer, SDL_Texture * texture,
transform[13] = dstrect->y + center->y;
}
float color[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
if (texture->modMode) {
color[0] = ((float)texture->r) / 255.0f;
color[1] = ((float)texture->g) / 255.0f;
color[2] = ((float)texture->b) / 255.0f;
color[3] = ((float)texture->a) / 255.0f;
}
[data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.mtlpipelinecopy, texture->blendMode)];
[data.mtlcmdencoder setVertexBytes:xy length:sizeof(xy) atIndex:0];
[data.mtlcmdencoder setVertexBytes:uv length:sizeof(uv) atIndex:1];
[data.mtlcmdencoder setVertexBytes:transform length:sizeof(transform) atIndex:3];
[data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0];
[data.mtlcmdencoder setFragmentTexture:texturedata.mtltexture atIndex:0];
[data.mtlcmdencoder setFragmentSamplerState:texturedata.mtlsampler atIndex:0];
[data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
return 0;
@ -1144,8 +1347,7 @@ METAL_DestroyRenderer(SDL_Renderer * renderer)
[data.mtlcmdencoder endEncoding];
}
DestroyPipelineCache(data.mtlpipelineprims);
DestroyPipelineCache(data.mtlpipelinecopy);
DestroyShaderPipelines(data.pipelines);
}
SDL_free(renderer);

View file

@ -16,7 +16,7 @@ vertex SolidVertexOutput SDL_Solid_vertex(const device float2 *position [[buffer
{
SolidVertexOutput v;
v.position = (projection * transform) * float4(position[vid], 0.0f, 1.0f);
v.pointSize = 0.5f;
v.pointSize = 1.0f;
return v;
}
@ -50,3 +50,60 @@ fragment float4 SDL_Copy_fragment(CopyVertexOutput vert [[stage_in]],
{
return tex.sample(s, vert.texcoord) * col;
}
struct YUVDecode
{
float3 offset;
float3 Rcoeff;
float3 Gcoeff;
float3 Bcoeff;
};
fragment float4 SDL_YUV_fragment(CopyVertexOutput vert [[stage_in]],
constant float4 &col [[buffer(0)]],
constant YUVDecode &decode [[buffer(1)]],
texture2d<float> texY [[texture(0)]],
texture2d_array<float> texUV [[texture(1)]],
sampler s [[sampler(0)]])
{
float3 yuv;
yuv.x = texY.sample(s, vert.texcoord).r;
yuv.y = texUV.sample(s, vert.texcoord, 0).r;
yuv.z = texUV.sample(s, vert.texcoord, 1).r;
yuv += decode.offset;
return col * float4(dot(yuv, decode.Rcoeff), dot(yuv, decode.Gcoeff), dot(yuv, decode.Bcoeff), 1.0);
}
fragment float4 SDL_NV12_fragment(CopyVertexOutput vert [[stage_in]],
constant float4 &col [[buffer(0)]],
constant YUVDecode &decode [[buffer(1)]],
texture2d<float> texY [[texture(0)]],
texture2d<float> texUV [[texture(1)]],
sampler s [[sampler(0)]])
{
float3 yuv;
yuv.x = texY.sample(s, vert.texcoord).r;
yuv.yz = texUV.sample(s, vert.texcoord).rg;
yuv += decode.offset;
return col * float4(dot(yuv, decode.Rcoeff), dot(yuv, decode.Gcoeff), dot(yuv, decode.Bcoeff), 1.0);
}
fragment float4 SDL_NV21_fragment(CopyVertexOutput vert [[stage_in]],
constant float4 &col [[buffer(0)]],
constant YUVDecode &decode [[buffer(1)]],
texture2d<float> texY [[texture(0)]],
texture2d<float> texUV [[texture(1)]],
sampler s [[sampler(0)]])
{
float3 yuv;
yuv.x = texY.sample(s, vert.texcoord).r;
yuv.yz = texUV.sample(s, vert.texcoord).gr;
yuv += decode.offset;
return col * float4(dot(yuv, decode.Rcoeff), dot(yuv, decode.Gcoeff), dot(yuv, decode.Bcoeff), 1.0);
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff