diff --git a/src/audio_core/hle/adts.h b/src/audio_core/hle/adts.h
index 11c736986..9aba09fc3 100644
--- a/src/audio_core/hle/adts.h
+++ b/src/audio_core/hle/adts.h
@@ -20,4 +20,4 @@ ADTSData ParseADTS(const char* buffer);
 
 // last two bytes of MF AAC decoder user data
 // see https://docs.microsoft.com/en-us/windows/desktop/medfound/aac-decoder#example-media-types
-u16 MFGetAACTag(const ADTSData input);
+u16 MFGetAACTag(const ADTSData& input);
diff --git a/src/audio_core/hle/adts_reader.cpp b/src/audio_core/hle/adts_reader.cpp
index 93118f887..e70c32a3b 100644
--- a/src/audio_core/hle/adts_reader.cpp
+++ b/src/audio_core/hle/adts_reader.cpp
@@ -50,7 +50,7 @@ ADTSData ParseADTS(const char* buffer) {
 // Frame length flag (1 bit)
 // Depends on core coder (1 bit)
 // Extension flag (1 bit)
-u16 MFGetAACTag(const ADTSData input) {
+u16 MFGetAACTag(const ADTSData& input) {
     u16 tag = 0;
 
     tag |= input.profile << 11;
diff --git a/src/audio_core/hle/wmf_decoder.cpp b/src/audio_core/hle/wmf_decoder.cpp
index 3b4087bf7..e78f24ca1 100644
--- a/src/audio_core/hle/wmf_decoder.cpp
+++ b/src/audio_core/hle/wmf_decoder.cpp
@@ -16,14 +16,12 @@ public:
 private:
     std::optional<BinaryResponse> Initalize(const BinaryRequest& request);
 
-    void Clear();
-
     std::optional<BinaryResponse> Decode(const BinaryRequest& request);
 
     MFOutputState DecodingLoop(ADTSData adts_header, std::array<std::vector<u8>, 2>& out_streams);
 
-    bool initalized = false;
-    bool selected = false;
+    bool transform_initialized = false;
+    bool format_selected = false;
 
     Memory::MemorySystem& memory;
 
@@ -33,10 +31,51 @@ private:
 };
 
 WMFDecoder::Impl::Impl(Memory::MemorySystem& memory) : memory(memory) {
-    MFCoInit();
+    HRESULT hr = S_OK;
+    hr = CoInitialize(NULL);
+    // S_FALSE will be returned when COM has already been initialized
+    if (hr != S_OK && hr != S_FALSE) {
+        ReportError("Failed to start COM components", hr);
+    }
+
+    // lite startup is faster and all what we need is included
+    hr = MFStartup(MF_VERSION, MFSTARTUP_LITE);
+    if (hr != S_OK) {
+        // Do you know you can't initialize MF in test mode or safe mode?
+        ReportError("Failed to initialize Media Foundation", hr);
+    }
+
+    LOG_INFO(Audio_DSP, "Media Foundation activated");
+
+    // initialize transform
+    transform = MFDecoderInit();
+    if (transform == nullptr) {
+        LOG_CRITICAL(Audio_DSP, "Can't initialize decoder");
+        return;
+    }
+
+    hr = transform->GetStreamIDs(1, &in_stream_id, 1, &out_stream_id);
+    if (hr == E_NOTIMPL) {
+        // if not implemented, it means this MFT does not assign stream ID for you
+        in_stream_id = 0;
+        out_stream_id = 0;
+    } else if (FAILED(hr)) {
+        ReportError("Decoder failed to initialize the stream ID", hr);
+        return;
+    }
+    transform_initialized = true;
 }
 
-WMFDecoder::Impl::~Impl() = default;
+WMFDecoder::Impl::~Impl() {
+    if (transform_initialized) {
+        MFFlush(transform.get());
+        // delete the transform object before shutting down MF
+        // otherwise access violation will occur
+        transform.reset();
+    }
+    MFShutdown();
+    CoUninitialize();
+}
 
 std::optional<BinaryResponse> WMFDecoder::Impl::ProcessRequest(const BinaryRequest& request) {
     if (request.codec != DecoderCodec::AAC) {
@@ -65,43 +104,14 @@ std::optional<BinaryResponse> WMFDecoder::Impl::ProcessRequest(const BinaryReque
 }
 
 std::optional<BinaryResponse> WMFDecoder::Impl::Initalize(const BinaryRequest& request) {
-    if (initalized) {
-        Clear();
-    }
-
     BinaryResponse response;
     std::memcpy(&response, &request, sizeof(response));
     response.unknown1 = 0x0;
-    transform = MFDecoderInit();
 
-    if (transform == nullptr) {
-        LOG_CRITICAL(Audio_DSP, "Can't init decoder");
-        return response;
-    }
-
-    HRESULT hr = transform->GetStreamIDs(1, &in_stream_id, 1, &out_stream_id);
-    if (hr == E_NOTIMPL) {
-        // if not implemented, it means this MFT does not assign stream ID for you
-        in_stream_id = 0;
-        out_stream_id = 0;
-    } else if (FAILED(hr)) {
-        ReportError("Decoder failed to initialize the stream ID", hr);
-        return response;
-    }
-
-    initalized = true;
+    format_selected = false; // select format again if application request initialize the DSP
     return response;
 }
 
-void WMFDecoder::Impl::Clear() {
-    if (initalized) {
-        MFFlush(transform.get());
-        MFDeInit(transform.get());
-    }
-    initalized = false;
-    selected = false;
-}
-
 MFOutputState WMFDecoder::Impl::DecodingLoop(ADTSData adts_header,
                                              std::array<std::vector<u8>, 2>& out_streams) {
     MFOutputState output_status = MFOutputState::OK;
@@ -117,7 +127,7 @@ MFOutputState WMFDecoder::Impl::DecodingLoop(ADTSData adts_header,
 
             // the following was taken from ffmpeg version of the decoder
             f32 val_f32;
-            for (size_t i = 0; i < output_buffer->size();) {
+            for (std::size_t i = 0; i < output_buffer->size();) {
                 for (std::size_t channel = 0; channel < adts_header.channels; channel++) {
                     val_f32 = output_buffer->at(i);
                     s16 val = static_cast<s16>(0x7FFF * val_f32);
@@ -135,8 +145,8 @@ MFOutputState WMFDecoder::Impl::DecodingLoop(ADTSData adts_header,
 
         // for status = 2, reset MF
         if (output_status == MFOutputState::NeedReconfig) {
-            Clear();
-            return MFOutputState::FatalError;
+            format_selected = false;
+            return MFOutputState::NeedReconfig;
         }
 
         // for status = 3, try again with new buffer
@@ -161,8 +171,8 @@ std::optional<BinaryResponse> WMFDecoder::Impl::Decode(const BinaryRequest& requ
     response.num_channels = 2;
     response.num_samples = 1024;
 
-    if (!initalized) {
-        LOG_DEBUG(Audio_DSP, "Decoder not initalized");
+    if (!transform_initialized) {
+        LOG_DEBUG(Audio_DSP, "Decoder not initialized");
         // This is a hack to continue games when decoder failed to initialize
         return response;
     }
@@ -177,6 +187,7 @@ std::optional<BinaryResponse> WMFDecoder::Impl::Decode(const BinaryRequest& requ
     std::array<std::vector<u8>, 2> out_streams;
     unique_mfptr<IMFSample> sample;
     MFInputState input_status = MFInputState::OK;
+    MFOutputState output_status = MFOutputState::OK;
     std::optional<ADTSMeta> adts_meta = DetectMediaType((char*)data, request.size);
 
     if (!adts_meta) {
@@ -186,7 +197,7 @@ std::optional<BinaryResponse> WMFDecoder::Impl::Decode(const BinaryRequest& requ
 
     response.num_channels = adts_meta->ADTSHeader.channels;
 
-    if (!selected) {
+    if (!format_selected) {
         LOG_DEBUG(Audio_DSP, "New ADTS stream: channels = {}, sample rate = {}",
                   adts_meta->ADTSHeader.channels, adts_meta->ADTSHeader.samplerate);
         SelectInputMediaType(transform.get(), in_stream_id, adts_meta->ADTSHeader,
@@ -196,7 +207,7 @@ std::optional<BinaryResponse> WMFDecoder::Impl::Decode(const BinaryRequest& requ
         // cache the result from detect_mediatype and call select_*_mediatype only once
         // This could increase performance very slightly
         transform->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0);
-        selected = true;
+        format_selected = true;
     }
 
     sample = CreateSample((void*)data, request.size, 1, 0);
@@ -204,8 +215,9 @@ std::optional<BinaryResponse> WMFDecoder::Impl::Decode(const BinaryRequest& requ
 
     while (true) {
         input_status = SendSample(transform.get(), in_stream_id, sample.get());
+        output_status = DecodingLoop(adts_meta->ADTSHeader, out_streams);
 
-        if (DecodingLoop(adts_meta->ADTSHeader, out_streams) == MFOutputState::FatalError) {
+        if (output_status == MFOutputState::FatalError) {
             // if the decode issues are caused by MFT not accepting new samples, try again
             // NOTICE: you are required to check the output even if you already knew/guessed
             // MFT didn't accept the input sample
@@ -216,6 +228,11 @@ std::optional<BinaryResponse> WMFDecoder::Impl::Decode(const BinaryRequest& requ
 
             LOG_ERROR(Audio_DSP, "Errors occurred when receiving output");
             return response;
+        } else if (output_status == MFOutputState::NeedReconfig) {
+            // flush the transform
+            MFFlush(transform.get());
+            // decode again
+            return this->Decode(request);
         }
 
         break; // jump out of the loop if at least we don't have obvious issues
diff --git a/src/audio_core/hle/wmf_decoder_utils.cpp b/src/audio_core/hle/wmf_decoder_utils.cpp
index c0c9c5744..21dd8a950 100644
--- a/src/audio_core/hle/wmf_decoder_utils.cpp
+++ b/src/audio_core/hle/wmf_decoder_utils.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/logging/log.h"
+#include "common/string_util.h"
 #include "wmf_decoder_utils.h"
 
 // utility functions
@@ -9,42 +10,21 @@ void ReportError(std::string msg, HRESULT hr) {
     if (SUCCEEDED(hr)) {
         return;
     }
-    LPSTR err;
-    FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER |
-                      FORMAT_MESSAGE_IGNORE_INSERTS,
-                  nullptr, hr,
-                  // hardcode to use en_US because if any user had problems with this
-                  // we can help them w/o translating anything
-                  // default is to use the language currently active on the operating system
-                  MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), (LPSTR)&err, 0, nullptr);
+    LPWSTR err;
+    FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                       FORMAT_MESSAGE_IGNORE_INSERTS,
+                   nullptr, hr,
+                   // hardcode to use en_US because if any user had problems with this
+                   // we can help them w/o translating anything
+                   // default is to use the language currently active on the operating system
+                   MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), (LPWSTR)&err, 0, nullptr);
     if (err != nullptr) {
-        LOG_CRITICAL(Audio_DSP, "{}: {}", msg, err);
+        LOG_CRITICAL(Audio_DSP, "{}: {}", msg, Common::UTF16ToUTF8(err));
+        LocalFree(err);
     }
     LOG_CRITICAL(Audio_DSP, "{}: {:08x}", msg, hr);
 }
 
-bool MFCoInit() {
-    HRESULT hr = S_OK;
-    hr = CoInitialize(NULL);
-    // S_FALSE will be returned when COM has already been initialized
-    if (hr != S_OK && hr != S_FALSE) {
-        ReportError("Failed to start COM components", hr);
-        return false;
-    }
-
-    // lite startup is faster and all what we need is included
-    hr = MFStartup(MF_VERSION, MFSTARTUP_LITE);
-    if (hr != S_OK) {
-        // Do you know you can't initialize MF in test mode or safe mode?
-        ReportError("Failed to initialize Media Foundation", hr);
-        return false;
-    }
-
-    LOG_INFO(Audio_DSP, "Media Foundation activated");
-
-    return true;
-}
-
 unique_mfptr<IMFTransform> MFDecoderInit(GUID audio_format) {
     HRESULT hr = S_OK;
     MFT_REGISTER_TYPE_INFO reg = {0};
@@ -72,6 +52,8 @@ unique_mfptr<IMFTransform> MFDecoderInit(GUID audio_format) {
         if (FAILED(hr))
             transform = nullptr;
         activate[n]->Release();
+        if (SUCCEEDED(hr))
+            break;
     }
     if (transform == nullptr) {
         ReportError("Failed to initialize MFT", hr);
@@ -79,15 +61,11 @@ unique_mfptr<IMFTransform> MFDecoderInit(GUID audio_format) {
         return nullptr;
     }
     CoTaskMemFree(activate);
-    return std::move(transform);
+    return transform;
 }
 
-void MFDeInit(IMFTransform* transform) {
-    MFShutdownObject(transform);
-    CoUninitialize();
-}
-
-unique_mfptr<IMFSample> CreateSample(void* data, DWORD len, DWORD alignment, LONGLONG duration) {
+unique_mfptr<IMFSample> CreateSample(const void* data, DWORD len, DWORD alignment,
+                                     LONGLONG duration) {
     HRESULT hr = S_OK;
     unique_mfptr<IMFMediaBuffer> buf;
     unique_mfptr<IMFSample> sample;
@@ -126,11 +104,11 @@ unique_mfptr<IMFSample> CreateSample(void* data, DWORD len, DWORD alignment, LON
         ReportError("Unable to set sample duration, but continuing anyway", hr);
     }
 
-    return std::move(sample);
+    return sample;
 }
 
 bool SelectInputMediaType(IMFTransform* transform, int in_stream_id, const ADTSData& adts,
-                          UINT8* user_data, UINT32 user_data_len, GUID audio_format) {
+                          const UINT8* user_data, UINT32 user_data_len, GUID audio_format) {
     HRESULT hr = S_OK;
     unique_mfptr<IMFMediaType> t;
 
@@ -209,7 +187,7 @@ bool SelectOutputMediaType(IMFTransform* transform, int out_stream_id, GUID audi
     return false;
 }
 
-std::optional<ADTSMeta> DetectMediaType(char* buffer, size_t len) {
+std::optional<ADTSMeta> DetectMediaType(char* buffer, std::size_t len) {
     if (len < 7) {
         return std::nullopt;
     }
diff --git a/src/audio_core/hle/wmf_decoder_utils.h b/src/audio_core/hle/wmf_decoder_utils.h
index a5e5d0154..26e1217a2 100644
--- a/src/audio_core/hle/wmf_decoder_utils.h
+++ b/src/audio_core/hle/wmf_decoder_utils.h
@@ -29,6 +29,14 @@ struct MFRelease {
     };
 };
 
+template <>
+struct MFRelease<IMFTransform> {
+    void operator()(IMFTransform* pointer) const {
+        MFShutdownObject(pointer);
+        pointer->Release();
+    };
+};
+
 // wrapper facilities for dealing with pointers
 template <typename T>
 using unique_mfptr = std::unique_ptr<T, MFRelease<T>>;
@@ -65,15 +73,13 @@ struct ADTSMeta {
 };
 
 // exported functions
-bool MFCoInit();
 unique_mfptr<IMFTransform> MFDecoderInit(GUID audio_format = MFAudioFormat_AAC);
-void MFDeInit(IMFTransform* transform);
-unique_mfptr<IMFSample> CreateSample(void* data, DWORD len, DWORD alignment = 1,
+unique_mfptr<IMFSample> CreateSample(const void* data, DWORD len, DWORD alignment = 1,
                                      LONGLONG duration = 0);
 bool SelectInputMediaType(IMFTransform* transform, int in_stream_id, const ADTSData& adts,
-                          UINT8* user_data, UINT32 user_data_len,
+                          const UINT8* user_data, UINT32 user_data_len,
                           GUID audio_format = MFAudioFormat_AAC);
-std::optional<ADTSMeta> DetectMediaType(char* buffer, size_t len);
+std::optional<ADTSMeta> DetectMediaType(char* buffer, std::size_t len);
 bool SelectOutputMediaType(IMFTransform* transform, int out_stream_id,
                            GUID audio_format = MFAudioFormat_PCM);
 void MFFlush(IMFTransform* transform);