metal: Add support for YUV/NV12 texture formats.

This commit is contained in:
Alex Szpakowski 2018-01-06 18:54:12 -04:00
parent 9a8683b275
commit 740a90af37
4 changed files with 4007 additions and 1859 deletions

View file

@ -90,8 +90,15 @@ SDL_RenderDriver METAL_RenderDriver = {
{ {
"metal", "metal",
(SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_TARGETTEXTURE), (SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_TARGETTEXTURE),
2, 6,
{SDL_PIXELFORMAT_ARGB8888, SDL_PIXELFORMAT_ABGR8888}, {
SDL_PIXELFORMAT_ARGB8888,
SDL_PIXELFORMAT_ABGR8888,
SDL_PIXELFORMAT_YV12,
SDL_PIXELFORMAT_IYUV,
SDL_PIXELFORMAT_NV12,
SDL_PIXELFORMAT_NV21
},
// !!! FIXME: how do you query Metal for this? // !!! FIXME: how do you query Metal for this?
// (the weakest GPU supported by Metal on iOS has 4k texture max, and // (the weakest GPU supported by Metal on iOS has 4k texture max, and
@ -116,7 +123,10 @@ SDL_RenderDriver METAL_RenderDriver = {
static const size_t CONSTANTS_OFFSET_IDENTITY = 0; static const size_t CONSTANTS_OFFSET_IDENTITY = 0;
static const size_t CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM = ALIGN_CONSTANTS(CONSTANTS_OFFSET_IDENTITY + sizeof(float) * 16); static const size_t CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM = ALIGN_CONSTANTS(CONSTANTS_OFFSET_IDENTITY + sizeof(float) * 16);
static const size_t CONSTANTS_OFFSET_CLEAR_VERTS = ALIGN_CONSTANTS(CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM + sizeof(float) * 16); static const size_t CONSTANTS_OFFSET_DECODE_JPEG = ALIGN_CONSTANTS(CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM + sizeof(float) * 16);
static const size_t CONSTANTS_OFFSET_DECODE_BT601 = ALIGN_CONSTANTS(CONSTANTS_OFFSET_DECODE_JPEG + sizeof(float) * 4 * 4);
static const size_t CONSTANTS_OFFSET_DECODE_BT709 = ALIGN_CONSTANTS(CONSTANTS_OFFSET_DECODE_BT601 + sizeof(float) * 4 * 4);
static const size_t CONSTANTS_OFFSET_CLEAR_VERTS = ALIGN_CONSTANTS(CONSTANTS_OFFSET_DECODE_BT709 + sizeof(float) * 4 * 4);
static const size_t CONSTANTS_LENGTH = CONSTANTS_OFFSET_CLEAR_VERTS + sizeof(float) * 6; static const size_t CONSTANTS_LENGTH = CONSTANTS_OFFSET_CLEAR_VERTS + sizeof(float) * 6;
typedef enum SDL_MetalVertexFunction typedef enum SDL_MetalVertexFunction
@ -127,8 +137,12 @@ typedef enum SDL_MetalVertexFunction
typedef enum SDL_MetalFragmentFunction typedef enum SDL_MetalFragmentFunction
{ {
SDL_METAL_FRAGMENT_SOLID, SDL_METAL_FRAGMENT_SOLID = 0,
SDL_METAL_FRAGMENT_COPY, SDL_METAL_FRAGMENT_COPY,
SDL_METAL_FRAGMENT_YUV,
SDL_METAL_FRAGMENT_NV12,
SDL_METAL_FRAGMENT_NV21,
SDL_METAL_FRAGMENT_COUNT,
} SDL_MetalFragmentFunction; } SDL_MetalFragmentFunction;
typedef struct METAL_PipelineState typedef struct METAL_PipelineState
@ -146,6 +160,15 @@ typedef struct METAL_PipelineCache
const char *label; const char *label;
} METAL_PipelineCache; } METAL_PipelineCache;
/* Each shader combination used by drawing functions has a separate pipeline
* cache. This is more efficient than iterating over a global cache to find
* the pipeline based on the specified shader combination, since we know what
* the shader combination is inside each drawing function's code. */
typedef struct METAL_ShaderPipelines
{
METAL_PipelineCache caches[SDL_METAL_FRAGMENT_COUNT];
} METAL_ShaderPipelines;
@interface METAL_RenderData : NSObject @interface METAL_RenderData : NSObject
@property (nonatomic, retain) id<MTLDevice> mtldevice; @property (nonatomic, retain) id<MTLDevice> mtldevice;
@property (nonatomic, retain) id<MTLCommandQueue> mtlcmdqueue; @property (nonatomic, retain) id<MTLCommandQueue> mtlcmdqueue;
@ -153,13 +176,12 @@ typedef struct METAL_PipelineCache
@property (nonatomic, retain) id<MTLRenderCommandEncoder> mtlcmdencoder; @property (nonatomic, retain) id<MTLRenderCommandEncoder> mtlcmdencoder;
@property (nonatomic, retain) id<MTLLibrary> mtllibrary; @property (nonatomic, retain) id<MTLLibrary> mtllibrary;
@property (nonatomic, retain) id<CAMetalDrawable> mtlbackbuffer; @property (nonatomic, retain) id<CAMetalDrawable> mtlbackbuffer;
@property (nonatomic, assign) METAL_PipelineCache *mtlpipelineprims;
@property (nonatomic, assign) METAL_PipelineCache *mtlpipelinecopy;
@property (nonatomic, retain) id<MTLSamplerState> mtlsamplernearest; @property (nonatomic, retain) id<MTLSamplerState> mtlsamplernearest;
@property (nonatomic, retain) id<MTLSamplerState> mtlsamplerlinear; @property (nonatomic, retain) id<MTLSamplerState> mtlsamplerlinear;
@property (nonatomic, retain) id<MTLBuffer> mtlbufconstants; @property (nonatomic, retain) id<MTLBuffer> mtlbufconstants;
@property (nonatomic, retain) CAMetalLayer *mtllayer; @property (nonatomic, retain) CAMetalLayer *mtllayer;
@property (nonatomic, retain) MTLRenderPassDescriptor *mtlpassdesc; @property (nonatomic, retain) MTLRenderPassDescriptor *mtlpassdesc;
@property (nonatomic, assign) METAL_ShaderPipelines *pipelines;
@end @end
@implementation METAL_RenderData @implementation METAL_RenderData
@ -184,7 +206,12 @@ typedef struct METAL_PipelineCache
@interface METAL_TextureData : NSObject @interface METAL_TextureData : NSObject
@property (nonatomic, retain) id<MTLTexture> mtltexture; @property (nonatomic, retain) id<MTLTexture> mtltexture;
@property (nonatomic, retain) id<MTLTexture> mtltexture_uv;
@property (nonatomic, retain) id<MTLSamplerState> mtlsampler; @property (nonatomic, retain) id<MTLSamplerState> mtlsampler;
@property (nonatomic, assign) SDL_MetalFragmentFunction fragmentFunction;
@property (nonatomic, assign) BOOL yuv;
@property (nonatomic, assign) BOOL nv12;
@property (nonatomic, assign) size_t conversionBufferOffset;
@end @end
@implementation METAL_TextureData @implementation METAL_TextureData
@ -192,6 +219,7 @@ typedef struct METAL_PipelineCache
- (void)dealloc - (void)dealloc
{ {
[_mtltexture release]; [_mtltexture release];
[_mtltexture_uv release];
[_mtlsampler release]; [_mtlsampler release];
[super dealloc]; [super dealloc];
} }
@ -265,6 +293,9 @@ GetFragmentFunctionName(SDL_MetalFragmentFunction function)
switch (function) { switch (function) {
case SDL_METAL_FRAGMENT_SOLID: return @"SDL_Solid_fragment"; case SDL_METAL_FRAGMENT_SOLID: return @"SDL_Solid_fragment";
case SDL_METAL_FRAGMENT_COPY: return @"SDL_Copy_fragment"; case SDL_METAL_FRAGMENT_COPY: return @"SDL_Copy_fragment";
case SDL_METAL_FRAGMENT_YUV: return @"SDL_YUV_fragment";
case SDL_METAL_FRAGMENT_NV12: return @"SDL_NV12_fragment";
case SDL_METAL_FRAGMENT_NV21: return @"SDL_NV21_fragment";
default: return nil; default: return nil;
} }
} }
@ -329,16 +360,9 @@ MakePipelineState(METAL_RenderData *data, METAL_PipelineCache *cache,
} }
} }
static METAL_PipelineCache * static void
MakePipelineCache(METAL_RenderData *data, const char *label, SDL_MetalVertexFunction vertfn, SDL_MetalFragmentFunction fragfn) MakePipelineCache(METAL_RenderData *data, METAL_PipelineCache *cache, const char *label, SDL_MetalVertexFunction vertfn, SDL_MetalFragmentFunction fragfn)
{ {
METAL_PipelineCache *cache = SDL_malloc(sizeof(METAL_PipelineCache));
if (!cache) {
SDL_OutOfMemory();
return NULL;
}
SDL_zerop(cache); SDL_zerop(cache);
cache->vertexFunction = vertfn; cache->vertexFunction = vertfn;
@ -347,12 +371,10 @@ MakePipelineCache(METAL_RenderData *data, const char *label, SDL_MetalVertexFunc
/* Create pipeline states for the default blend modes. Custom blend modes /* Create pipeline states for the default blend modes. Custom blend modes
* will be added to the cache on-demand. */ * will be added to the cache on-demand. */
MakePipelineState(data, cache, @"(blend=none)", SDL_BLENDMODE_NONE); MakePipelineState(data, cache, @" (blend=none)", SDL_BLENDMODE_NONE);
MakePipelineState(data, cache, @"(blend=blend)", SDL_BLENDMODE_BLEND); MakePipelineState(data, cache, @" (blend=blend)", SDL_BLENDMODE_BLEND);
MakePipelineState(data, cache, @"(blend=add)", SDL_BLENDMODE_ADD); MakePipelineState(data, cache, @" (blend=add)", SDL_BLENDMODE_ADD);
MakePipelineState(data, cache, @"(blend=mod)", SDL_BLENDMODE_MOD); MakePipelineState(data, cache, @" (blend=mod)", SDL_BLENDMODE_MOD);
return cache;
} }
static void static void
@ -364,20 +386,51 @@ DestroyPipelineCache(METAL_PipelineCache *cache)
} }
SDL_free(cache->states); SDL_free(cache->states);
SDL_free(cache); }
}
static METAL_ShaderPipelines *
MakeShaderPipelines(METAL_RenderData *data)
{
METAL_ShaderPipelines *pipelines = SDL_calloc(1, sizeof(METAL_ShaderPipelines));
if (!pipelines) {
SDL_OutOfMemory();
return NULL;
}
MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_SOLID], "SDL primitives pipeline", SDL_METAL_VERTEX_SOLID, SDL_METAL_FRAGMENT_SOLID);
MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_COPY], "SDL copy pipeline", SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_COPY);
MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_YUV], "SDL YUV pipeline", SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_YUV);
MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_NV12], "SDL NV12 pipeline", SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_NV12);
MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_NV21], "SDL NV21 pipeline", SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_NV21);
return pipelines;
}
static void
DestroyShaderPipelines(METAL_ShaderPipelines *pipelines)
{
if (pipelines != NULL) {
for (int i = 0; i < SDL_METAL_FRAGMENT_COUNT; i++) {
DestroyPipelineCache(&pipelines->caches[i]);
}
SDL_free(pipelines);
} }
} }
static inline id<MTLRenderPipelineState> static inline id<MTLRenderPipelineState>
ChoosePipelineState(METAL_RenderData *data, METAL_PipelineCache *cache, const SDL_BlendMode blendmode) ChoosePipelineState(METAL_RenderData *data, METAL_ShaderPipelines *pipelines, SDL_MetalFragmentFunction fragfn, SDL_BlendMode blendmode)
{ {
METAL_PipelineCache *cache = &pipelines->caches[fragfn];
for (int i = 0; i < cache->count; i++) { for (int i = 0; i < cache->count; i++) {
if (cache->states[i].blendMode == blendmode) { if (cache->states[i].blendMode == blendmode) {
return (__bridge id<MTLRenderPipelineState>)cache->states[i].pipe; return (__bridge id<MTLRenderPipelineState>)cache->states[i].pipe;
} }
} }
return MakePipelineState(data, cache, [NSString stringWithFormat:@"(blend=custom 0x%x)", blendmode], blendmode); return MakePipelineState(data, cache, [NSString stringWithFormat:@" (blend=custom 0x%x)", blendmode], blendmode);
} }
static SDL_Renderer * static SDL_Renderer *
@ -455,8 +508,7 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
#endif #endif
data.mtllibrary.label = @"SDL Metal renderer shader library"; data.mtllibrary.label = @"SDL Metal renderer shader library";
data.mtlpipelineprims = MakePipelineCache(data, "SDL primitives pipeline ", SDL_METAL_VERTEX_SOLID, SDL_METAL_FRAGMENT_SOLID); data.pipelines = MakeShaderPipelines(data);
data.mtlpipelinecopy = MakePipelineCache(data, "SDL texture pipeline ", SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_COPY);
MTLSamplerDescriptor *samplerdesc = [[MTLSamplerDescriptor alloc] init]; MTLSamplerDescriptor *samplerdesc = [[MTLSamplerDescriptor alloc] init];
@ -485,6 +537,28 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
0.5f, 0.5f, 0.0f, 1.0f, 0.5f, 0.5f, 0.0f, 1.0f,
}; };
/* Metal pads float3s to 16 bytes. */
float decodetransformJPEG[4*4] = {
0.0, -0.501960814, -0.501960814, 0.0, /* offset */
1.0000, 0.0000, 1.4020, 0.0, /* Rcoeff */
1.0000, -0.3441, -0.7141, 0.0, /* Gcoeff */
1.0000, 1.7720, 0.0000, 0.0, /* Bcoeff */
};
float decodetransformBT601[4*4] = {
-0.0627451017, -0.501960814, -0.501960814, 0.0, /* offset */
1.1644, 0.0000, 1.5960, 0.0, /* Rcoeff */
1.1644, -0.3918, -0.8130, 0.0, /* Gcoeff */
1.1644, 2.0172, 0.0000, 0.0, /* Bcoeff */
};
float decodetransformBT709[4*4] = {
0.0, -0.501960814, -0.501960814, 0.0, /* offset */
1.0000, 0.0000, 1.4020, 0.0, /* Rcoeff */
1.0000, -0.3441, -0.7141, 0.0, /* Gcoeff */
1.0000, 1.7720, 0.0000, 0.0, /* Bcoeff */
};
float clearverts[6] = {0.0f, 0.0f, 0.0f, 2.0f, 2.0f, 0.0f}; float clearverts[6] = {0.0f, 0.0f, 0.0f, 2.0f, 2.0f, 0.0f};
id<MTLBuffer> mtlbufconstantstaging = [data.mtldevice newBufferWithLength:CONSTANTS_LENGTH options:MTLResourceStorageModeShared]; id<MTLBuffer> mtlbufconstantstaging = [data.mtldevice newBufferWithLength:CONSTANTS_LENGTH options:MTLResourceStorageModeShared];
@ -497,6 +571,9 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
char *constantdata = [mtlbufconstantstaging contents]; char *constantdata = [mtlbufconstantstaging contents];
SDL_memcpy(constantdata + CONSTANTS_OFFSET_IDENTITY, identitytransform, sizeof(identitytransform)); SDL_memcpy(constantdata + CONSTANTS_OFFSET_IDENTITY, identitytransform, sizeof(identitytransform));
SDL_memcpy(constantdata + CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM, halfpixeltransform, sizeof(halfpixeltransform)); SDL_memcpy(constantdata + CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM, halfpixeltransform, sizeof(halfpixeltransform));
SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_JPEG, decodetransformJPEG, sizeof(decodetransformJPEG));
SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT601, decodetransformBT601, sizeof(decodetransformBT601));
SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT709, decodetransformBT709, sizeof(decodetransformBT709));
SDL_memcpy(constantdata + CONSTANTS_OFFSET_CLEAR_VERTS, clearverts, sizeof(clearverts)); SDL_memcpy(constantdata + CONSTANTS_OFFSET_CLEAR_VERTS, clearverts, sizeof(clearverts));
id<MTLCommandBuffer> cmdbuffer = [data.mtlcmdqueue commandBuffer]; id<MTLCommandBuffer> cmdbuffer = [data.mtlcmdqueue commandBuffer];
@ -660,15 +737,26 @@ static int
METAL_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture) METAL_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
{ @autoreleasepool { { @autoreleasepool {
METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata; METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
MTLPixelFormat mtlpixfmt; MTLPixelFormat pixfmt;
switch (texture->format) { switch (texture->format) {
case SDL_PIXELFORMAT_ABGR8888: mtlpixfmt = MTLPixelFormatRGBA8Unorm; break; case SDL_PIXELFORMAT_ABGR8888:
case SDL_PIXELFORMAT_ARGB8888: mtlpixfmt = MTLPixelFormatBGRA8Unorm; break; pixfmt = MTLPixelFormatRGBA8Unorm;
default: return SDL_SetError("Texture format %s not supported by Metal", SDL_GetPixelFormatName(texture->format)); break;
case SDL_PIXELFORMAT_ARGB8888:
pixfmt = MTLPixelFormatBGRA8Unorm;
break;
case SDL_PIXELFORMAT_IYUV:
case SDL_PIXELFORMAT_YV12:
case SDL_PIXELFORMAT_NV12:
case SDL_PIXELFORMAT_NV21:
pixfmt = MTLPixelFormatR8Unorm;
break;
default:
return SDL_SetError("Texture format %s not supported by Metal", SDL_GetPixelFormatName(texture->format));
} }
MTLTextureDescriptor *mtltexdesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:mtlpixfmt MTLTextureDescriptor *mtltexdesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:pixfmt
width:(NSUInteger)texture->w height:(NSUInteger)texture->h mipmapped:NO]; width:(NSUInteger)texture->w height:(NSUInteger)texture->h mipmapped:NO];
/* Not available in iOS 8. */ /* Not available in iOS 8. */
@ -679,14 +767,31 @@ METAL_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
mtltexdesc.usage = MTLTextureUsageShaderRead; mtltexdesc.usage = MTLTextureUsageShaderRead;
} }
} }
//mtltexdesc.resourceOptions = MTLResourceCPUCacheModeDefaultCache | MTLResourceStorageModeManaged;
//mtltexdesc.storageMode = MTLStorageModeManaged;
id<MTLTexture> mtltexture = [data.mtldevice newTextureWithDescriptor:mtltexdesc]; id<MTLTexture> mtltexture = [data.mtldevice newTextureWithDescriptor:mtltexdesc];
if (mtltexture == nil) { if (mtltexture == nil) {
return SDL_SetError("Texture allocation failed"); return SDL_SetError("Texture allocation failed");
} }
id<MTLTexture> mtltexture_uv = nil;
BOOL yuv = (texture->format == SDL_PIXELFORMAT_IYUV) || (texture->format == SDL_PIXELFORMAT_YV12);
BOOL nv12 = (texture->format == SDL_PIXELFORMAT_NV12) || (texture->format == SDL_PIXELFORMAT_NV21);
if (yuv) {
mtltexdesc.pixelFormat = MTLPixelFormatR8Unorm;
mtltexdesc.width = (texture->w + 1) / 2;
mtltexdesc.height = (texture->h + 1) / 2;
mtltexdesc.textureType = MTLTextureType2DArray;
mtltexdesc.arrayLength = 2;
mtltexture_uv = [data.mtldevice newTextureWithDescriptor:mtltexdesc];
} else if (nv12) {
mtltexdesc.pixelFormat = MTLPixelFormatRG8Unorm;
mtltexdesc.width = (texture->w + 1) / 2;
mtltexdesc.height = (texture->h + 1) / 2;
mtltexture_uv = [data.mtldevice newTextureWithDescriptor:mtltexdesc];
}
METAL_TextureData *texturedata = [[METAL_TextureData alloc] init]; METAL_TextureData *texturedata = [[METAL_TextureData alloc] init];
const char *hint = SDL_GetHint(SDL_HINT_RENDER_SCALE_QUALITY); const char *hint = SDL_GetHint(SDL_HINT_RENDER_SCALE_QUALITY);
if (!hint || *hint == '0' || SDL_strcasecmp(hint, "nearest") == 0) { if (!hint || *hint == '0' || SDL_strcasecmp(hint, "nearest") == 0) {
@ -695,12 +800,39 @@ METAL_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
texturedata.mtlsampler = data.mtlsamplerlinear; texturedata.mtlsampler = data.mtlsamplerlinear;
} }
texturedata.mtltexture = mtltexture; texturedata.mtltexture = mtltexture;
texturedata.mtltexture_uv = mtltexture_uv;
texturedata.yuv = yuv;
texturedata.nv12 = nv12;
if (yuv) {
texturedata.fragmentFunction = SDL_METAL_FRAGMENT_YUV;
} else if (texture->format == SDL_PIXELFORMAT_NV12) {
texturedata.fragmentFunction = SDL_METAL_FRAGMENT_NV12;
} else if (texture->format == SDL_PIXELFORMAT_NV21) {
texturedata.fragmentFunction = SDL_METAL_FRAGMENT_NV21;
} else {
texturedata.fragmentFunction = SDL_METAL_FRAGMENT_COPY;
}
if (yuv || nv12) {
size_t offset = 0;
SDL_YUV_CONVERSION_MODE mode = SDL_GetYUVConversionModeForResolution(texture->w, texture->h);
switch (mode) {
case SDL_YUV_CONVERSION_JPEG: offset = CONSTANTS_OFFSET_DECODE_JPEG; break;
case SDL_YUV_CONVERSION_BT601: offset = CONSTANTS_OFFSET_DECODE_BT601; break;
case SDL_YUV_CONVERSION_BT709: offset = CONSTANTS_OFFSET_DECODE_BT709; break;
default: offset = 0; break;
}
texturedata.conversionBufferOffset = offset;
}
texture->driverdata = (void*)CFBridgingRetain(texturedata); texture->driverdata = (void*)CFBridgingRetain(texturedata);
#if !__has_feature(objc_arc) #if !__has_feature(objc_arc)
[texturedata release]; [texturedata release];
[mtltexture release]; [mtltexture release];
[mtltexture_uv release];
#endif #endif
return 0; return 0;
@ -710,12 +842,52 @@ static int
METAL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture, METAL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
const SDL_Rect * rect, const void *pixels, int pitch) const SDL_Rect * rect, const void *pixels, int pitch)
{ @autoreleasepool { { @autoreleasepool {
METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
// !!! FIXME: this is a synchronous call; it doesn't return until data is uploaded in some form. // !!! FIXME: this is a synchronous call; it doesn't return until data is uploaded in some form.
// !!! FIXME: Maybe move this off to a thread that marks the texture as uploaded and only stall the main thread if we try to // !!! FIXME: Maybe move this off to a thread that marks the texture as uploaded and only stall the main thread if we try to
// !!! FIXME: use this texture before the marking is done? Is it worth it? Or will we basically always be uploading a bunch of // !!! FIXME: use this texture before the marking is done? Is it worth it? Or will we basically always be uploading a bunch of
// !!! FIXME: stuff way ahead of time and/or using it immediately after upload? // !!! FIXME: stuff way ahead of time and/or using it immediately after upload?
id<MTLTexture> mtltexture = ((__bridge METAL_TextureData *)texture->driverdata).mtltexture;
[mtltexture replaceRegion:MTLRegionMake2D(rect->x, rect->y, rect->w, rect->h) mipmapLevel:0 withBytes:pixels bytesPerRow:pitch]; [texturedata.mtltexture replaceRegion:MTLRegionMake2D(rect->x, rect->y, rect->w, rect->h)
mipmapLevel:0
withBytes:pixels
bytesPerRow:pitch];
if (texturedata.yuv) {
int Uslice = texture->format == SDL_PIXELFORMAT_YV12 ? 1 : 0;
int Vslice = texture->format == SDL_PIXELFORMAT_YV12 ? 0 : 1;
/* Skip to the correct offset into the next texture */
pixels = (const void*)((const Uint8*)pixels + rect->h * pitch);
[texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
mipmapLevel:0
slice:Uslice
withBytes:pixels
bytesPerRow:(pitch + 1) / 2
bytesPerImage:0];
/* Skip to the correct offset into the next texture */
pixels = (const void*)((const Uint8*)pixels + ((rect->h + 1) / 2) * ((pitch + 1)/2));
[texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
mipmapLevel:0
slice:Vslice
withBytes:pixels
bytesPerRow:(pitch + 1) / 2
bytesPerImage:0];
}
if (texturedata.nv12) {
/* Skip to the correct offset into the next texture */
pixels = (const void*)((const Uint8*)pixels + rect->h * pitch);
[texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
mipmapLevel:0
slice:0
withBytes:pixels
bytesPerRow:2 * ((pitch + 1) / 2)
bytesPerImage:0];
}
return 0; return 0;
}} }}
@ -725,9 +897,37 @@ METAL_UpdateTextureYUV(SDL_Renderer * renderer, SDL_Texture * texture,
const Uint8 *Yplane, int Ypitch, const Uint8 *Yplane, int Ypitch,
const Uint8 *Uplane, int Upitch, const Uint8 *Uplane, int Upitch,
const Uint8 *Vplane, int Vpitch) const Uint8 *Vplane, int Vpitch)
{ { @autoreleasepool {
return SDL_Unsupported(); // !!! FIXME METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
} int Uslice = texture->format == SDL_PIXELFORMAT_YV12 ? 1 : 0;
int Vslice = texture->format == SDL_PIXELFORMAT_YV12 ? 0 : 1;
/* Bail out if we're supposed to update an empty rectangle */
if (rect->w <= 0 || rect->h <= 0) {
return 0;
}
[texturedata.mtltexture replaceRegion:MTLRegionMake2D(rect->x, rect->y, rect->w, rect->h)
mipmapLevel:0
withBytes:Yplane
bytesPerRow:Ypitch];
[texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
mipmapLevel:0
slice:Uslice
withBytes:Uplane
bytesPerRow:Upitch
bytesPerImage:0];
[texturedata.mtltexture_uv replaceRegion:MTLRegionMake2D(rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2)
mipmapLevel:0
slice:Vslice
withBytes:Vplane
bytesPerRow:Vpitch
bytesPerImage:0];
return 0;
}}
static int static int
METAL_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture, METAL_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
@ -864,7 +1064,7 @@ METAL_RenderClear(SDL_Renderer * renderer)
// Slow path for clearing: draw a filled fullscreen triangle. // Slow path for clearing: draw a filled fullscreen triangle.
METAL_SetOrthographicProjection(renderer, 1, 1); METAL_SetOrthographicProjection(renderer, 1, 1);
[data.mtlcmdencoder setViewport:viewport]; [data.mtlcmdencoder setViewport:viewport];
[data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.mtlpipelineprims, SDL_BLENDMODE_NONE)]; [data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.pipelines, SDL_METAL_FRAGMENT_SOLID, SDL_BLENDMODE_NONE)];
[data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:CONSTANTS_OFFSET_CLEAR_VERTS atIndex:0]; [data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:CONSTANTS_OFFSET_CLEAR_VERTS atIndex:0];
[data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:CONSTANTS_OFFSET_IDENTITY atIndex:3]; [data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:CONSTANTS_OFFSET_IDENTITY atIndex:3];
[data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0]; [data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0];
@ -903,7 +1103,7 @@ DrawVerts(SDL_Renderer * renderer, const SDL_FPoint * points, int count,
// !!! FIXME: render color should live in a dedicated uniform buffer. // !!! FIXME: render color should live in a dedicated uniform buffer.
const float color[4] = { ((float)renderer->r) / 255.0f, ((float)renderer->g) / 255.0f, ((float)renderer->b) / 255.0f, ((float)renderer->a) / 255.0f }; const float color[4] = { ((float)renderer->r) / 255.0f, ((float)renderer->g) / 255.0f, ((float)renderer->b) / 255.0f, ((float)renderer->a) / 255.0f };
[data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.mtlpipelineprims, renderer->blendMode)]; [data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.pipelines, SDL_METAL_FRAGMENT_SOLID, renderer->blendMode)];
[data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0]; [data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0];
[data.mtlcmdencoder setVertexBytes:points length:vertlen atIndex:0]; [data.mtlcmdencoder setVertexBytes:points length:vertlen atIndex:0];
@ -934,7 +1134,7 @@ METAL_RenderFillRects(SDL_Renderer * renderer, const SDL_FRect * rects, int coun
// !!! FIXME: render color should live in a dedicated uniform buffer. // !!! FIXME: render color should live in a dedicated uniform buffer.
const float color[4] = { ((float)renderer->r) / 255.0f, ((float)renderer->g) / 255.0f, ((float)renderer->b) / 255.0f, ((float)renderer->a) / 255.0f }; const float color[4] = { ((float)renderer->r) / 255.0f, ((float)renderer->g) / 255.0f, ((float)renderer->b) / 255.0f, ((float)renderer->a) / 255.0f };
[data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.mtlpipelineprims, renderer->blendMode)]; [data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.pipelines, SDL_METAL_FRAGMENT_SOLID, renderer->blendMode)];
[data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0]; [data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0];
[data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:CONSTANTS_OFFSET_IDENTITY atIndex:3]; [data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:CONSTANTS_OFFSET_IDENTITY atIndex:3];
@ -955,6 +1155,29 @@ METAL_RenderFillRects(SDL_Renderer * renderer, const SDL_FRect * rects, int coun
return 0; return 0;
}} }}
static void
METAL_SetupRenderCopy(METAL_RenderData *data, SDL_Texture *texture, METAL_TextureData *texturedata)
{
float color[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
if (texture->modMode) {
color[0] = ((float)texture->r) / 255.0f;
color[1] = ((float)texture->g) / 255.0f;
color[2] = ((float)texture->b) / 255.0f;
color[3] = ((float)texture->a) / 255.0f;
}
[data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.pipelines, texturedata.fragmentFunction, texture->blendMode)];
[data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0];
[data.mtlcmdencoder setFragmentSamplerState:texturedata.mtlsampler atIndex:0];
[data.mtlcmdencoder setFragmentTexture:texturedata.mtltexture atIndex:0];
if (texturedata.yuv || texturedata.nv12) {
[data.mtlcmdencoder setFragmentTexture:texturedata.mtltexture_uv atIndex:1];
[data.mtlcmdencoder setFragmentBuffer:data.mtlbufconstants offset:texturedata.conversionBufferOffset atIndex:1];
}
}
static int static int
METAL_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture, METAL_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
const SDL_Rect * srcrect, const SDL_FRect * dstrect) const SDL_Rect * srcrect, const SDL_FRect * dstrect)
@ -965,6 +1188,8 @@ METAL_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
const float texw = (float) texturedata.mtltexture.width; const float texw = (float) texturedata.mtltexture.width;
const float texh = (float) texturedata.mtltexture.height; const float texh = (float) texturedata.mtltexture.height;
METAL_SetupRenderCopy(data, texture, texturedata);
const float xy[] = { const float xy[] = {
dstrect->x, dstrect->y + dstrect->h, dstrect->x, dstrect->y + dstrect->h,
dstrect->x, dstrect->y, dstrect->x, dstrect->y,
@ -979,21 +1204,9 @@ METAL_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
normtex(srcrect->x + srcrect->w, texw), normtex(srcrect->y, texh) normtex(srcrect->x + srcrect->w, texw), normtex(srcrect->y, texh)
}; };
float color[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
if (texture->modMode) {
color[0] = ((float)texture->r) / 255.0f;
color[1] = ((float)texture->g) / 255.0f;
color[2] = ((float)texture->b) / 255.0f;
color[3] = ((float)texture->a) / 255.0f;
}
[data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.mtlpipelinecopy, texture->blendMode)];
[data.mtlcmdencoder setVertexBytes:xy length:sizeof(xy) atIndex:0]; [data.mtlcmdencoder setVertexBytes:xy length:sizeof(xy) atIndex:0];
[data.mtlcmdencoder setVertexBytes:uv length:sizeof(uv) atIndex:1]; [data.mtlcmdencoder setVertexBytes:uv length:sizeof(uv) atIndex:1];
[data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:CONSTANTS_OFFSET_IDENTITY atIndex:3]; [data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:CONSTANTS_OFFSET_IDENTITY atIndex:3];
[data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0];
[data.mtlcmdencoder setFragmentTexture:texturedata.mtltexture atIndex:0];
[data.mtlcmdencoder setFragmentSamplerState:texturedata.mtlsampler atIndex:0];
[data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4]; [data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
return 0; return 0;
@ -1012,6 +1225,8 @@ METAL_RenderCopyEx(SDL_Renderer * renderer, SDL_Texture * texture,
float transform[16]; float transform[16];
float minu, maxu, minv, maxv; float minu, maxu, minv, maxv;
METAL_SetupRenderCopy(data, texture, texturedata);
minu = normtex(srcrect->x, texw); minu = normtex(srcrect->x, texw);
maxu = normtex(srcrect->x + srcrect->w, texw); maxu = normtex(srcrect->x + srcrect->w, texw);
minv = normtex(srcrect->y, texh); minv = normtex(srcrect->y, texh);
@ -1062,21 +1277,9 @@ METAL_RenderCopyEx(SDL_Renderer * renderer, SDL_Texture * texture,
transform[13] = dstrect->y + center->y; transform[13] = dstrect->y + center->y;
} }
float color[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
if (texture->modMode) {
color[0] = ((float)texture->r) / 255.0f;
color[1] = ((float)texture->g) / 255.0f;
color[2] = ((float)texture->b) / 255.0f;
color[3] = ((float)texture->a) / 255.0f;
}
[data.mtlcmdencoder setRenderPipelineState:ChoosePipelineState(data, data.mtlpipelinecopy, texture->blendMode)];
[data.mtlcmdencoder setVertexBytes:xy length:sizeof(xy) atIndex:0]; [data.mtlcmdencoder setVertexBytes:xy length:sizeof(xy) atIndex:0];
[data.mtlcmdencoder setVertexBytes:uv length:sizeof(uv) atIndex:1]; [data.mtlcmdencoder setVertexBytes:uv length:sizeof(uv) atIndex:1];
[data.mtlcmdencoder setVertexBytes:transform length:sizeof(transform) atIndex:3]; [data.mtlcmdencoder setVertexBytes:transform length:sizeof(transform) atIndex:3];
[data.mtlcmdencoder setFragmentBytes:color length:sizeof(color) atIndex:0];
[data.mtlcmdencoder setFragmentTexture:texturedata.mtltexture atIndex:0];
[data.mtlcmdencoder setFragmentSamplerState:texturedata.mtlsampler atIndex:0];
[data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4]; [data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
return 0; return 0;
@ -1144,8 +1347,7 @@ METAL_DestroyRenderer(SDL_Renderer * renderer)
[data.mtlcmdencoder endEncoding]; [data.mtlcmdencoder endEncoding];
} }
DestroyPipelineCache(data.mtlpipelineprims); DestroyShaderPipelines(data.pipelines);
DestroyPipelineCache(data.mtlpipelinecopy);
} }
SDL_free(renderer); SDL_free(renderer);

View file

@ -16,7 +16,7 @@ vertex SolidVertexOutput SDL_Solid_vertex(const device float2 *position [[buffer
{ {
SolidVertexOutput v; SolidVertexOutput v;
v.position = (projection * transform) * float4(position[vid], 0.0f, 1.0f); v.position = (projection * transform) * float4(position[vid], 0.0f, 1.0f);
v.pointSize = 0.5f; v.pointSize = 1.0f;
return v; return v;
} }
@ -50,3 +50,60 @@ fragment float4 SDL_Copy_fragment(CopyVertexOutput vert [[stage_in]],
{ {
return tex.sample(s, vert.texcoord) * col; return tex.sample(s, vert.texcoord) * col;
} }
struct YUVDecode
{
float3 offset;
float3 Rcoeff;
float3 Gcoeff;
float3 Bcoeff;
};
fragment float4 SDL_YUV_fragment(CopyVertexOutput vert [[stage_in]],
constant float4 &col [[buffer(0)]],
constant YUVDecode &decode [[buffer(1)]],
texture2d<float> texY [[texture(0)]],
texture2d_array<float> texUV [[texture(1)]],
sampler s [[sampler(0)]])
{
float3 yuv;
yuv.x = texY.sample(s, vert.texcoord).r;
yuv.y = texUV.sample(s, vert.texcoord, 0).r;
yuv.z = texUV.sample(s, vert.texcoord, 1).r;
yuv += decode.offset;
return col * float4(dot(yuv, decode.Rcoeff), dot(yuv, decode.Gcoeff), dot(yuv, decode.Bcoeff), 1.0);
}
fragment float4 SDL_NV12_fragment(CopyVertexOutput vert [[stage_in]],
constant float4 &col [[buffer(0)]],
constant YUVDecode &decode [[buffer(1)]],
texture2d<float> texY [[texture(0)]],
texture2d<float> texUV [[texture(1)]],
sampler s [[sampler(0)]])
{
float3 yuv;
yuv.x = texY.sample(s, vert.texcoord).r;
yuv.yz = texUV.sample(s, vert.texcoord).rg;
yuv += decode.offset;
return col * float4(dot(yuv, decode.Rcoeff), dot(yuv, decode.Gcoeff), dot(yuv, decode.Bcoeff), 1.0);
}
fragment float4 SDL_NV21_fragment(CopyVertexOutput vert [[stage_in]],
constant float4 &col [[buffer(0)]],
constant YUVDecode &decode [[buffer(1)]],
texture2d<float> texY [[texture(0)]],
texture2d<float> texUV [[texture(1)]],
sampler s [[sampler(0)]])
{
float3 yuv;
yuv.x = texY.sample(s, vert.texcoord).r;
yuv.yz = texUV.sample(s, vert.texcoord).gr;
yuv += decode.offset;
return col * float4(dot(yuv, decode.Rcoeff), dot(yuv, decode.Gcoeff), dot(yuv, decode.Bcoeff), 1.0);
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff