diff fast_dsp/SPC_DSP.cpp @ 0:e38dacceb958

initial import
author Robert McIntyre <rlm@mit.edu>
date Fri, 21 Oct 2011 05:53:11 -0700
parents
children
line wrap: on
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/fast_dsp/SPC_DSP.cpp	Fri Oct 21 05:53:11 2011 -0700
     1.3 @@ -0,0 +1,703 @@
     1.4 +// snes_spc 0.9.0. http://www.slack.net/~ant/
     1.5 +
     1.6 +#include "SPC_DSP.h"
     1.7 +
     1.8 +#include "blargg_endian.h"
     1.9 +#include <string.h>
    1.10 +
    1.11 +/* Copyright (C) 2007 Shay Green. This module is free software; you
    1.12 +can redistribute it and/or modify it under the terms of the GNU Lesser
    1.13 +General Public License as published by the Free Software Foundation; either
    1.14 +version 2.1 of the License, or (at your option) any later version. This
    1.15 +module is distributed in the hope that it will be useful, but WITHOUT ANY
    1.16 +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
    1.17 +FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
    1.18 +details. You should have received a copy of the GNU Lesser General Public
    1.19 +License along with this module; if not, write to the Free Software Foundation,
    1.20 +Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
    1.21 +
    1.22 +#include "blargg_source.h"
    1.23 +
    1.24 +#ifdef BLARGG_ENABLE_OPTIMIZER
    1.25 +	#include BLARGG_ENABLE_OPTIMIZER
    1.26 +#endif
    1.27 +
    1.28 +#if INT_MAX < 0x7FFFFFFF
    1.29 +	#error "Requires that int type have at least 32 bits"
    1.30 +#endif
    1.31 +
    1.32 +
    1.33 +// TODO: add to blargg_endian.h
    1.34 +#define GET_LE16SA( addr )      ((BOOST::int16_t) GET_LE16( addr ))
    1.35 +#define GET_LE16A( addr )       GET_LE16( addr )
    1.36 +#define SET_LE16A( addr, data ) SET_LE16( addr, data )
    1.37 +
    1.38 +static BOOST::uint8_t const initial_regs [SPC_DSP::register_count] =
    1.39 +{
    1.40 +	0x45,0x8B,0x5A,0x9A,0xE4,0x82,0x1B,0x78,0x00,0x00,0xAA,0x96,0x89,0x0E,0xE0,0x80,
    1.41 +	0x2A,0x49,0x3D,0xBA,0x14,0xA0,0xAC,0xC5,0x00,0x00,0x51,0xBB,0x9C,0x4E,0x7B,0xFF,
    1.42 +	0xF4,0xFD,0x57,0x32,0x37,0xD9,0x42,0x22,0x00,0x00,0x5B,0x3C,0x9F,0x1B,0x87,0x9A,
    1.43 +	0x6F,0x27,0xAF,0x7B,0xE5,0x68,0x0A,0xD9,0x00,0x00,0x9A,0xC5,0x9C,0x4E,0x7B,0xFF,
    1.44 +	0xEA,0x21,0x78,0x4F,0xDD,0xED,0x24,0x14,0x00,0x00,0x77,0xB1,0xD1,0x36,0xC1,0x67,
    1.45 +	0x52,0x57,0x46,0x3D,0x59,0xF4,0x87,0xA4,0x00,0x00,0x7E,0x44,0x9C,0x4E,0x7B,0xFF,
    1.46 +	0x75,0xF5,0x06,0x97,0x10,0xC3,0x24,0xBB,0x00,0x00,0x7B,0x7A,0xE0,0x60,0x12,0x0F,
    1.47 +	0xF7,0x74,0x1C,0xE5,0x39,0x3D,0x73,0xC1,0x00,0x00,0x7A,0xB3,0xFF,0x4E,0x7B,0xFF
    1.48 +};
    1.49 +
    1.50 +// if ( io < -32768 ) io = -32768;
    1.51 +// if ( io >  32767 ) io =  32767;
    1.52 +#define CLAMP16( io )\
    1.53 +{\
    1.54 +	if ( (int16_t) io != io )\
    1.55 +		io = (io >> 31) ^ 0x7FFF;\
    1.56 +}
    1.57 +
    1.58 +// Access global DSP register
    1.59 +#define REG(n)      m.regs [r_##n]
    1.60 +
    1.61 +// Access voice DSP register
    1.62 +#define VREG(r,n)   r [v_##n]
    1.63 +
    1.64 +#define WRITE_SAMPLES( l, r, out ) \
    1.65 +{\
    1.66 +	out [0] = l;\
    1.67 +	out [1] = r;\
    1.68 +	out += 2;\
    1.69 +	if ( out >= m.out_end )\
    1.70 +	{\
    1.71 +		check( out == m.out_end );\
    1.72 +		check( m.out_end != &m.extra [extra_size] || \
    1.73 +			(m.extra <= m.out_begin && m.extra < &m.extra [extra_size]) );\
    1.74 +		out       = m.extra;\
    1.75 +		m.out_end = &m.extra [extra_size];\
    1.76 +	}\
    1.77 +}\
    1.78 +
    1.79 +void SPC_DSP::set_output( sample_t* out, int size )
    1.80 +{
    1.81 +	require( (size & 1) == 0 ); // must be even
    1.82 +	if ( !out )
    1.83 +	{
    1.84 +		out  = m.extra;
    1.85 +		size = extra_size;
    1.86 +	}
    1.87 +	m.out_begin = out;
    1.88 +	m.out       = out;
    1.89 +	m.out_end   = out + size;
    1.90 +}
    1.91 +
    1.92 +// Volume registers and efb are signed! Easy to forget int8_t cast.
    1.93 +// Prefixes are to avoid accidental use of locals with same names.
    1.94 +
    1.95 +// Interleved gauss table (to improve cache coherency)
    1.96 +// interleved_gauss [i] = gauss [(i & 1) * 256 + 255 - (i >> 1 & 0xFF)]
    1.97 +static short const interleved_gauss [512] =
    1.98 +{
    1.99 + 370,1305, 366,1305, 362,1304, 358,1304, 354,1304, 351,1304, 347,1304, 343,1303,
   1.100 + 339,1303, 336,1303, 332,1302, 328,1302, 325,1301, 321,1300, 318,1300, 314,1299,
   1.101 + 311,1298, 307,1297, 304,1297, 300,1296, 297,1295, 293,1294, 290,1293, 286,1292,
   1.102 + 283,1291, 280,1290, 276,1288, 273,1287, 270,1286, 267,1284, 263,1283, 260,1282,
   1.103 + 257,1280, 254,1279, 251,1277, 248,1275, 245,1274, 242,1272, 239,1270, 236,1269,
   1.104 + 233,1267, 230,1265, 227,1263, 224,1261, 221,1259, 218,1257, 215,1255, 212,1253,
   1.105 + 210,1251, 207,1248, 204,1246, 201,1244, 199,1241, 196,1239, 193,1237, 191,1234,
   1.106 + 188,1232, 186,1229, 183,1227, 180,1224, 178,1221, 175,1219, 173,1216, 171,1213,
   1.107 + 168,1210, 166,1207, 163,1205, 161,1202, 159,1199, 156,1196, 154,1193, 152,1190,
   1.108 + 150,1186, 147,1183, 145,1180, 143,1177, 141,1174, 139,1170, 137,1167, 134,1164,
   1.109 + 132,1160, 130,1157, 128,1153, 126,1150, 124,1146, 122,1143, 120,1139, 118,1136,
   1.110 + 117,1132, 115,1128, 113,1125, 111,1121, 109,1117, 107,1113, 106,1109, 104,1106,
   1.111 + 102,1102, 100,1098,  99,1094,  97,1090,  95,1086,  94,1082,  92,1078,  90,1074,
   1.112 +  89,1070,  87,1066,  86,1061,  84,1057,  83,1053,  81,1049,  80,1045,  78,1040,
   1.113 +  77,1036,  76,1032,  74,1027,  73,1023,  71,1019,  70,1014,  69,1010,  67,1005,
   1.114 +  66,1001,  65, 997,  64, 992,  62, 988,  61, 983,  60, 978,  59, 974,  58, 969,
   1.115 +  56, 965,  55, 960,  54, 955,  53, 951,  52, 946,  51, 941,  50, 937,  49, 932,
   1.116 +  48, 927,  47, 923,  46, 918,  45, 913,  44, 908,  43, 904,  42, 899,  41, 894,
   1.117 +  40, 889,  39, 884,  38, 880,  37, 875,  36, 870,  36, 865,  35, 860,  34, 855,
   1.118 +  33, 851,  32, 846,  32, 841,  31, 836,  30, 831,  29, 826,  29, 821,  28, 816,
   1.119 +  27, 811,  27, 806,  26, 802,  25, 797,  24, 792,  24, 787,  23, 782,  23, 777,
   1.120 +  22, 772,  21, 767,  21, 762,  20, 757,  20, 752,  19, 747,  19, 742,  18, 737,
   1.121 +  17, 732,  17, 728,  16, 723,  16, 718,  15, 713,  15, 708,  15, 703,  14, 698,
   1.122 +  14, 693,  13, 688,  13, 683,  12, 678,  12, 674,  11, 669,  11, 664,  11, 659,
   1.123 +  10, 654,  10, 649,  10, 644,   9, 640,   9, 635,   9, 630,   8, 625,   8, 620,
   1.124 +   8, 615,   7, 611,   7, 606,   7, 601,   6, 596,   6, 592,   6, 587,   6, 582,
   1.125 +   5, 577,   5, 573,   5, 568,   5, 563,   4, 559,   4, 554,   4, 550,   4, 545,
   1.126 +   4, 540,   3, 536,   3, 531,   3, 527,   3, 522,   3, 517,   2, 513,   2, 508,
   1.127 +   2, 504,   2, 499,   2, 495,   2, 491,   2, 486,   1, 482,   1, 477,   1, 473,
   1.128 +   1, 469,   1, 464,   1, 460,   1, 456,   1, 451,   1, 447,   1, 443,   1, 439,
   1.129 +   0, 434,   0, 430,   0, 426,   0, 422,   0, 418,   0, 414,   0, 410,   0, 405,
   1.130 +   0, 401,   0, 397,   0, 393,   0, 389,   0, 385,   0, 381,   0, 378,   0, 374,
   1.131 +};
   1.132 +
   1.133 +
   1.134 +//// Counters
   1.135 +
   1.136 +#define RATE( rate, div )\
   1.137 +	(rate >= div ? rate / div * 8 - 1 : rate - 1)
   1.138 +
   1.139 +static unsigned const counter_mask [32] =
   1.140 +{
   1.141 +	RATE(   2,2), RATE(2048,4), RATE(1536,3),
   1.142 +	RATE(1280,5), RATE(1024,4), RATE( 768,3),
   1.143 +	RATE( 640,5), RATE( 512,4), RATE( 384,3),
   1.144 +	RATE( 320,5), RATE( 256,4), RATE( 192,3),
   1.145 +	RATE( 160,5), RATE( 128,4), RATE(  96,3),
   1.146 +	RATE(  80,5), RATE(  64,4), RATE(  48,3),
   1.147 +	RATE(  40,5), RATE(  32,4), RATE(  24,3),
   1.148 +	RATE(  20,5), RATE(  16,4), RATE(  12,3),
   1.149 +	RATE(  10,5), RATE(   8,4), RATE(   6,3),
   1.150 +	RATE(   5,5), RATE(   4,4), RATE(   3,3),
   1.151 +	              RATE(   2,4),
   1.152 +	              RATE(   1,4)
   1.153 +};
   1.154 +#undef RATE
   1.155 +
   1.156 +inline void SPC_DSP::init_counter()
   1.157 +{
   1.158 +	// counters start out with this synchronization
   1.159 +	m.counters [0] =     1;
   1.160 +	m.counters [1] =     0;
   1.161 +	m.counters [2] = -0x20u;
   1.162 +	m.counters [3] =  0x0B;
   1.163 +	
   1.164 +	int n = 2;
   1.165 +	for ( int i = 1; i < 32; i++ )
   1.166 +	{
   1.167 +		m.counter_select [i] = &m.counters [n];
   1.168 +		if ( !--n )
   1.169 +			n = 3;
   1.170 +	}
   1.171 +	m.counter_select [ 0] = &m.counters [0];
   1.172 +	m.counter_select [30] = &m.counters [2];
   1.173 +}
   1.174 +
   1.175 +inline void SPC_DSP::run_counter( int i )
   1.176 +{
   1.177 +	int n = m.counters [i];
   1.178 +	if ( !(n-- & 7) )
   1.179 +		n -= 6 - i;
   1.180 +	m.counters [i] = n;
   1.181 +}
   1.182 +
   1.183 +#define READ_COUNTER( rate )\
   1.184 +	(*m.counter_select [rate] & counter_mask [rate])
   1.185 +
   1.186 +
   1.187 +//// Emulation
   1.188 +
   1.189 +void SPC_DSP::run( int clock_count )
   1.190 +{
   1.191 +	int new_phase = m.phase + clock_count;
   1.192 +	int count = new_phase >> 5;
   1.193 +	m.phase = new_phase & 31;
   1.194 +	if ( !count )
   1.195 +		return;
   1.196 +	
   1.197 +	uint8_t* const ram = m.ram;
   1.198 +	uint8_t const* const dir = &ram [REG(dir) * 0x100];
   1.199 +	int const slow_gaussian = (REG(pmon) >> 1) | REG(non);
   1.200 +	int const noise_rate = REG(flg) & 0x1F;
   1.201 +	
   1.202 +	// Global volume
   1.203 +	int mvoll = (int8_t) REG(mvoll);
   1.204 +	int mvolr = (int8_t) REG(mvolr);
   1.205 +	if ( mvoll * mvolr < m.surround_threshold )
   1.206 +		mvoll = -mvoll; // eliminate surround
   1.207 +	
   1.208 +	do
   1.209 +	{
   1.210 +		// KON/KOFF reading
   1.211 +		if ( (m.every_other_sample ^= 1) != 0 )
   1.212 +		{
   1.213 +			m.new_kon &= ~m.kon;
   1.214 +			m.kon    = m.new_kon;
   1.215 +			m.t_koff = REG(koff); 
   1.216 +		}
   1.217 +		
   1.218 +		run_counter( 1 );
   1.219 +		run_counter( 2 );
   1.220 +		run_counter( 3 );
   1.221 +		
   1.222 +		// Noise
   1.223 +		if ( !READ_COUNTER( noise_rate ) )
   1.224 +		{
   1.225 +			int feedback = (m.noise << 13) ^ (m.noise << 14);
   1.226 +			m.noise = (feedback & 0x4000) ^ (m.noise >> 1);
   1.227 +		}
   1.228 +		
   1.229 +		// Voices
   1.230 +		int pmon_input = 0;
   1.231 +		int main_out_l = 0;
   1.232 +		int main_out_r = 0;
   1.233 +		int echo_out_l = 0;
   1.234 +		int echo_out_r = 0;
   1.235 +		voice_t* v = m.voices;
   1.236 +		uint8_t* v_regs = m.regs;
   1.237 +		int vbit = 1;
   1.238 +		do
   1.239 +		{
   1.240 +			#define SAMPLE_PTR(i) GET_LE16A( &dir [VREG(v_regs,srcn) * 4 + i * 2] )
   1.241 +			
   1.242 +			int brr_header = ram [v->brr_addr];
   1.243 +			int kon_delay = v->kon_delay;
   1.244 +			
   1.245 +			// Pitch
   1.246 +			int pitch = GET_LE16A( &VREG(v_regs,pitchl) ) & 0x3FFF;
   1.247 +			if ( REG(pmon) & vbit )
   1.248 +				pitch += ((pmon_input >> 5) * pitch) >> 10;
   1.249 +			
   1.250 +			// KON phases
   1.251 +			if ( --kon_delay >= 0 )
   1.252 +			{
   1.253 +				v->kon_delay = kon_delay;
   1.254 +				
   1.255 +				// Get ready to start BRR decoding on next sample
   1.256 +				if ( kon_delay == 4 )
   1.257 +				{
   1.258 +					v->brr_addr   = SAMPLE_PTR( 0 );
   1.259 +					v->brr_offset = 1;
   1.260 +					v->buf_pos    = v->buf;
   1.261 +					brr_header    = 0; // header is ignored on this sample
   1.262 +				}
   1.263 +				
   1.264 +				// Envelope is never run during KON
   1.265 +				v->env        = 0;
   1.266 +				v->hidden_env = 0;
   1.267 +				
   1.268 +				// Disable BRR decoding until last three samples
   1.269 +				v->interp_pos = (kon_delay & 3 ? 0x4000 : 0);
   1.270 +				
   1.271 +				// Pitch is never added during KON
   1.272 +				pitch = 0;
   1.273 +			}
   1.274 +			
   1.275 +			int env = v->env;
   1.276 +			
   1.277 +			// Gaussian interpolation
   1.278 +			{
   1.279 +				int output = 0;
   1.280 +				VREG(v_regs,envx) = (uint8_t) (env >> 4);
   1.281 +				if ( env )
   1.282 +				{
   1.283 +					// Make pointers into gaussian based on fractional position between samples
   1.284 +					int offset = (unsigned) v->interp_pos >> 3 & 0x1FE;
   1.285 +					short const* fwd = interleved_gauss       + offset;
   1.286 +					short const* rev = interleved_gauss + 510 - offset; // mirror left half of gaussian
   1.287 +					
   1.288 +					int const* in = &v->buf_pos [(unsigned) v->interp_pos >> 12];
   1.289 +					
   1.290 +					if ( !(slow_gaussian & vbit) ) // 99%
   1.291 +					{
   1.292 +						// Faster approximation when exact sample value isn't necessary for pitch mod
   1.293 +						output = (fwd [0] * in [0] +
   1.294 +						          fwd [1] * in [1] +
   1.295 +						          rev [1] * in [2] +
   1.296 +						          rev [0] * in [3]) >> 11;
   1.297 +						output = (output * env) >> 11;
   1.298 +					}
   1.299 +					else
   1.300 +					{
   1.301 +						output = (int16_t) (m.noise * 2);
   1.302 +						if ( !(REG(non) & vbit) )
   1.303 +						{
   1.304 +							output  = (fwd [0] * in [0]) >> 11;
   1.305 +							output += (fwd [1] * in [1]) >> 11;
   1.306 +							output += (rev [1] * in [2]) >> 11;
   1.307 +							output = (int16_t) output;
   1.308 +							output += (rev [0] * in [3]) >> 11;
   1.309 +							
   1.310 +							CLAMP16( output );
   1.311 +							output &= ~1;
   1.312 +						}
   1.313 +						output = (output * env) >> 11 & ~1;
   1.314 +					}
   1.315 +					
   1.316 +					// Output
   1.317 +					int l = output * v->volume [0];
   1.318 +					int r = output * v->volume [1];
   1.319 +					
   1.320 +					main_out_l += l;
   1.321 +					main_out_r += r;
   1.322 +					
   1.323 +					if ( REG(eon) & vbit )
   1.324 +					{
   1.325 +						echo_out_l += l;
   1.326 +						echo_out_r += r;
   1.327 +					}
   1.328 +				}
   1.329 +				
   1.330 +				pmon_input = output;
   1.331 +				VREG(v_regs,outx) = (uint8_t) (output >> 8);
   1.332 +			}
   1.333 +			
   1.334 +			// Soft reset or end of sample
   1.335 +			if ( REG(flg) & 0x80 || (brr_header & 3) == 1 )
   1.336 +			{
   1.337 +				v->env_mode = env_release;
   1.338 +				env         = 0;
   1.339 +			}
   1.340 +			
   1.341 +			if ( m.every_other_sample )
   1.342 +			{
   1.343 +				// KOFF
   1.344 +				if ( m.t_koff & vbit )
   1.345 +					v->env_mode = env_release;
   1.346 +				
   1.347 +				// KON
   1.348 +				if ( m.kon & vbit )
   1.349 +				{
   1.350 +					v->kon_delay = 5;
   1.351 +					v->env_mode  = env_attack;
   1.352 +					REG(endx) &= ~vbit;
   1.353 +				}
   1.354 +			}
   1.355 +			
   1.356 +			// Envelope
   1.357 +			if ( !v->kon_delay )
   1.358 +			{
   1.359 +				if ( v->env_mode == env_release ) // 97%
   1.360 +				{
   1.361 +					env -= 0x8;
   1.362 +					v->env = env;
   1.363 +					if ( env <= 0 )
   1.364 +					{
   1.365 +						v->env = 0;
   1.366 +						goto skip_brr; // no BRR decoding for you!
   1.367 +					}
   1.368 +				}
   1.369 +				else // 3%
   1.370 +				{
   1.371 +					int rate;
   1.372 +					int const adsr0 = VREG(v_regs,adsr0);
   1.373 +					int env_data = VREG(v_regs,adsr1);
   1.374 +					if ( adsr0 >= 0x80 ) // 97% ADSR
   1.375 +					{
   1.376 +						if ( v->env_mode > env_decay ) // 89%
   1.377 +						{
   1.378 +							env--;
   1.379 +							env -= env >> 8;
   1.380 +							rate = env_data & 0x1F;
   1.381 +							
   1.382 +							// optimized handling
   1.383 +							v->hidden_env = env;
   1.384 +							if ( READ_COUNTER( rate ) )
   1.385 +								goto exit_env;
   1.386 +							v->env = env;
   1.387 +							goto exit_env;
   1.388 +						}
   1.389 +						else if ( v->env_mode == env_decay )
   1.390 +						{
   1.391 +							env--;
   1.392 +							env -= env >> 8;
   1.393 +							rate = (adsr0 >> 3 & 0x0E) + 0x10;
   1.394 +						}
   1.395 +						else // env_attack
   1.396 +						{
   1.397 +							rate = (adsr0 & 0x0F) * 2 + 1;
   1.398 +							env += rate < 31 ? 0x20 : 0x400;
   1.399 +						}
   1.400 +					}
   1.401 +					else // GAIN
   1.402 +					{
   1.403 +						int mode;
   1.404 +						env_data = VREG(v_regs,gain);
   1.405 +						mode = env_data >> 5;
   1.406 +						if ( mode < 4 ) // direct
   1.407 +						{
   1.408 +							env = env_data * 0x10;
   1.409 +							rate = 31;
   1.410 +						}
   1.411 +						else
   1.412 +						{
   1.413 +							rate = env_data & 0x1F;
   1.414 +							if ( mode == 4 ) // 4: linear decrease
   1.415 +							{
   1.416 +								env -= 0x20;
   1.417 +							}
   1.418 +							else if ( mode < 6 ) // 5: exponential decrease
   1.419 +							{
   1.420 +								env--;
   1.421 +								env -= env >> 8;
   1.422 +							}
   1.423 +							else // 6,7: linear increase
   1.424 +							{
   1.425 +								env += 0x20;
   1.426 +								if ( mode > 6 && (unsigned) v->hidden_env >= 0x600 )
   1.427 +									env += 0x8 - 0x20; // 7: two-slope linear increase
   1.428 +							}
   1.429 +						}
   1.430 +					}
   1.431 +					
   1.432 +					// Sustain level
   1.433 +					if ( (env >> 8) == (env_data >> 5) && v->env_mode == env_decay )
   1.434 +						v->env_mode = env_sustain;
   1.435 +					
   1.436 +					v->hidden_env = env;
   1.437 +					
   1.438 +					// unsigned cast because linear decrease going negative also triggers this
   1.439 +					if ( (unsigned) env > 0x7FF )
   1.440 +					{
   1.441 +						env = (env < 0 ? 0 : 0x7FF);
   1.442 +						if ( v->env_mode == env_attack )
   1.443 +							v->env_mode = env_decay;
   1.444 +					}
   1.445 +					
   1.446 +					if ( !READ_COUNTER( rate ) )
   1.447 +						v->env = env; // nothing else is controlled by the counter
   1.448 +				}
   1.449 +			}
   1.450 +		exit_env:
   1.451 +			
   1.452 +			{
   1.453 +				// Apply pitch
   1.454 +				int old_pos = v->interp_pos;
   1.455 +				int interp_pos = (old_pos & 0x3FFF) + pitch;
   1.456 +				if ( interp_pos > 0x7FFF )
   1.457 +					interp_pos = 0x7FFF;
   1.458 +				v->interp_pos = interp_pos;
   1.459 +				
   1.460 +				// BRR decode if necessary
   1.461 +				if ( old_pos >= 0x4000 )
   1.462 +				{
   1.463 +					// Arrange the four input nybbles in 0xABCD order for easy decoding
   1.464 +					int nybbles = ram [(v->brr_addr + v->brr_offset) & 0xFFFF] * 0x100 +
   1.465 +							ram [(v->brr_addr + v->brr_offset + 1) & 0xFFFF];
   1.466 +					
   1.467 +					// Advance read position
   1.468 +					int const brr_block_size = 9;
   1.469 +					int brr_offset = v->brr_offset;
   1.470 +					if ( (brr_offset += 2) >= brr_block_size )
   1.471 +					{
   1.472 +						// Next BRR block
   1.473 +						int brr_addr = (v->brr_addr + brr_block_size) & 0xFFFF;
   1.474 +						assert( brr_offset == brr_block_size );
   1.475 +						if ( brr_header & 1 )
   1.476 +						{
   1.477 +							brr_addr = SAMPLE_PTR( 1 );
   1.478 +							if ( !v->kon_delay )
   1.479 +								REG(endx) |= vbit;
   1.480 +						}
   1.481 +						v->brr_addr = brr_addr;
   1.482 +						brr_offset  = 1;
   1.483 +					}
   1.484 +					v->brr_offset = brr_offset;
   1.485 +					
   1.486 +					// Decode
   1.487 +					
   1.488 +					// 0: >>1  1: <<0  2: <<1 ... 12: <<11  13-15: >>4 <<11
   1.489 +					static unsigned char const shifts [16 * 2] = {
   1.490 +						13,12,12,12,12,12,12,12,12,12,12, 12, 12, 16, 16, 16,
   1.491 +						 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
   1.492 +					};
   1.493 +					int const scale = brr_header >> 4;
   1.494 +					int const right_shift = shifts [scale];
   1.495 +					int const left_shift  = shifts [scale + 16];
   1.496 +					
   1.497 +					// Write to next four samples in circular buffer
   1.498 +					int* pos = v->buf_pos;
   1.499 +					int* end;
   1.500 +					
   1.501 +					// Decode four samples
   1.502 +					for ( end = pos + 4; pos < end; pos++, nybbles <<= 4 )
   1.503 +					{
   1.504 +						// Extract upper nybble and scale appropriately
   1.505 +						int s = ((int16_t) nybbles >> right_shift) << left_shift;
   1.506 +						
   1.507 +						// Apply IIR filter (8 is the most commonly used)
   1.508 +						int const filter = brr_header & 0x0C;
   1.509 +						int const p1 = pos [brr_buf_size - 1];
   1.510 +						int const p2 = pos [brr_buf_size - 2] >> 1;
   1.511 +						if ( filter >= 8 )
   1.512 +						{
   1.513 +							s += p1;
   1.514 +							s -= p2;
   1.515 +							if ( filter == 8 ) // s += p1 * 0.953125 - p2 * 0.46875
   1.516 +							{
   1.517 +								s += p2 >> 4;
   1.518 +								s += (p1 * -3) >> 6;
   1.519 +							}
   1.520 +							else // s += p1 * 0.8984375 - p2 * 0.40625
   1.521 +							{
   1.522 +								s += (p1 * -13) >> 7;
   1.523 +								s += (p2 * 3) >> 4;
   1.524 +							}
   1.525 +						}
   1.526 +						else if ( filter ) // s += p1 * 0.46875
   1.527 +						{
   1.528 +							s += p1 >> 1;
   1.529 +							s += (-p1) >> 5;
   1.530 +						}
   1.531 +						
   1.532 +						// Adjust and write sample
   1.533 +						CLAMP16( s );
   1.534 +						s = (int16_t) (s * 2);
   1.535 +						pos [brr_buf_size] = pos [0] = s; // second copy simplifies wrap-around
   1.536 +					}
   1.537 +					
   1.538 +					if ( pos >= &v->buf [brr_buf_size] )
   1.539 +						pos = v->buf;
   1.540 +					v->buf_pos = pos;
   1.541 +				}
   1.542 +			}
   1.543 +skip_brr:
   1.544 +			// Next voice
   1.545 +			vbit <<= 1;
   1.546 +			v_regs += 0x10;
   1.547 +			v++;
   1.548 +		}
   1.549 +		while ( vbit < 0x100 );
   1.550 +		
   1.551 +		// Echo position
   1.552 +		int echo_offset = m.echo_offset;
   1.553 +		uint8_t* const echo_ptr = &ram [(REG(esa) * 0x100 + echo_offset) & 0xFFFF];
   1.554 +		if ( !echo_offset )
   1.555 +			m.echo_length = (REG(edl) & 0x0F) * 0x800;
   1.556 +		echo_offset += 4;
   1.557 +		if ( echo_offset >= m.echo_length )
   1.558 +			echo_offset = 0;
   1.559 +		m.echo_offset = echo_offset;
   1.560 +		
   1.561 +		// FIR
   1.562 +		int echo_in_l = GET_LE16SA( echo_ptr + 0 );
   1.563 +		int echo_in_r = GET_LE16SA( echo_ptr + 2 );
   1.564 +		
   1.565 +		int (*echo_hist_pos) [2] = m.echo_hist_pos;
   1.566 +		if ( ++echo_hist_pos >= &m.echo_hist [echo_hist_size] )
   1.567 +			echo_hist_pos = m.echo_hist;
   1.568 +		m.echo_hist_pos = echo_hist_pos;
   1.569 +		
   1.570 +		echo_hist_pos [0] [0] = echo_hist_pos [8] [0] = echo_in_l;
   1.571 +		echo_hist_pos [0] [1] = echo_hist_pos [8] [1] = echo_in_r;
   1.572 +		
   1.573 +		#define CALC_FIR_( i, in )  ((in) * (int8_t) REG(fir + i * 0x10))
   1.574 +		echo_in_l = CALC_FIR_( 7, echo_in_l );
   1.575 +		echo_in_r = CALC_FIR_( 7, echo_in_r );
   1.576 +		
   1.577 +		#define CALC_FIR( i, ch )   CALC_FIR_( i, echo_hist_pos [i + 1] [ch] )
   1.578 +		#define DO_FIR( i )\
   1.579 +			echo_in_l += CALC_FIR( i, 0 );\
   1.580 +			echo_in_r += CALC_FIR( i, 1 );
   1.581 +		DO_FIR( 0 );
   1.582 +		DO_FIR( 1 );
   1.583 +		DO_FIR( 2 );
   1.584 +		#if defined (__MWERKS__) && __MWERKS__ < 0x3200
   1.585 +			__eieio(); // keeps compiler from stupidly "caching" things in memory
   1.586 +		#endif
   1.587 +		DO_FIR( 3 );
   1.588 +		DO_FIR( 4 );
   1.589 +		DO_FIR( 5 );
   1.590 +		DO_FIR( 6 );
   1.591 +		
   1.592 +		// Echo out
   1.593 +		if ( !(REG(flg) & 0x20) )
   1.594 +		{
   1.595 +			int l = (echo_out_l >> 7) + ((echo_in_l * (int8_t) REG(efb)) >> 14);
   1.596 +			int r = (echo_out_r >> 7) + ((echo_in_r * (int8_t) REG(efb)) >> 14);
   1.597 +			
   1.598 +			// just to help pass more validation tests
   1.599 +			#if SPC_MORE_ACCURACY
   1.600 +				l &= ~1;
   1.601 +				r &= ~1;
   1.602 +			#endif
   1.603 +			
   1.604 +			CLAMP16( l );
   1.605 +			CLAMP16( r );
   1.606 +			
   1.607 +			SET_LE16A( echo_ptr + 0, l );
   1.608 +			SET_LE16A( echo_ptr + 2, r );
   1.609 +		}
   1.610 +		
   1.611 +		// Sound out
   1.612 +		int l = (main_out_l * mvoll + echo_in_l * (int8_t) REG(evoll)) >> 14;
   1.613 +		int r = (main_out_r * mvolr + echo_in_r * (int8_t) REG(evolr)) >> 14;
   1.614 +		
   1.615 +		CLAMP16( l );
   1.616 +		CLAMP16( r );
   1.617 +		
   1.618 +		if ( (REG(flg) & 0x40) )
   1.619 +		{
   1.620 +			l = 0;
   1.621 +			r = 0;
   1.622 +		}
   1.623 +		
   1.624 +		sample_t* out = m.out;
   1.625 +		WRITE_SAMPLES( l, r, out );
   1.626 +		m.out = out;
   1.627 +	}
   1.628 +	while ( --count );
   1.629 +}
   1.630 +
   1.631 +
   1.632 +//// Setup
   1.633 +
   1.634 +void SPC_DSP::mute_voices( int mask )
   1.635 +{
   1.636 +	m.mute_mask = mask;
   1.637 +	for ( int i = 0; i < voice_count; i++ )
   1.638 +	{
   1.639 +		m.voices [i].enabled = (mask >> i & 1) - 1;
   1.640 +		update_voice_vol( i * 0x10 );
   1.641 +	}
   1.642 +}
   1.643 +
   1.644 +void SPC_DSP::init( void* ram_64k )
   1.645 +{
   1.646 +	m.ram = (uint8_t*) ram_64k;
   1.647 +	mute_voices( 0 );
   1.648 +	disable_surround( false );
   1.649 +	set_output( 0, 0 );
   1.650 +	reset();
   1.651 +	
   1.652 +	#ifndef NDEBUG
   1.653 +		// be sure this sign-extends
   1.654 +		assert( (int16_t) 0x8000 == -0x8000 );
   1.655 +		
   1.656 +		// be sure right shift preserves sign
   1.657 +		assert( (-1 >> 1) == -1 );
   1.658 +		
   1.659 +		// check clamp macro
   1.660 +		int i;
   1.661 +		i = +0x8000; CLAMP16( i ); assert( i == +0x7FFF );
   1.662 +		i = -0x8001; CLAMP16( i ); assert( i == -0x8000 );
   1.663 +		
   1.664 +		blargg_verify_byte_order();
   1.665 +	#endif
   1.666 +}
   1.667 +
   1.668 +void SPC_DSP::soft_reset_common()
   1.669 +{
   1.670 +	require( m.ram ); // init() must have been called already
   1.671 +	
   1.672 +	m.noise              = 0x4000;
   1.673 +	m.echo_hist_pos      = m.echo_hist;
   1.674 +	m.every_other_sample = 1;
   1.675 +	m.echo_offset        = 0;
   1.676 +	m.phase              = 0;
   1.677 +	
   1.678 +	init_counter();
   1.679 +}
   1.680 +
   1.681 +void SPC_DSP::soft_reset()
   1.682 +{
   1.683 +	REG(flg) = 0xE0;
   1.684 +	soft_reset_common();
   1.685 +}
   1.686 +
   1.687 +void SPC_DSP::load( uint8_t const regs [register_count] )
   1.688 +{
   1.689 +	memcpy( m.regs, regs, sizeof m.regs );
   1.690 +	memset( &m.regs [register_count], 0, offsetof (state_t,ram) - register_count );
   1.691 +	
   1.692 +	// Internal state
   1.693 +	int i;
   1.694 +	for ( i = voice_count; --i >= 0; )
   1.695 +	{
   1.696 +		voice_t& v = m.voices [i];
   1.697 +		v.brr_offset = 1;
   1.698 +		v.buf_pos    = v.buf;
   1.699 +	}
   1.700 +	m.new_kon = REG(kon);
   1.701 +	
   1.702 +	mute_voices( m.mute_mask );
   1.703 +	soft_reset_common();
   1.704 +}
   1.705 +
   1.706 +void SPC_DSP::reset() { load( initial_regs ); }