#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#include "Constants.h"
#include "Timing.h"
#include "Cpu.h"
#include "Memory.h"
#include "Io.h"
#include "Log.h"
#include "DataBus.h"

#include "CpuUtilities.h"
#include "CpuNoPrefix.h"
#include "CpuEDPrefix.h"
#include "CpuCBPrefix.h"
#include "CpuDDAndFDPrefix.h"
#include "CpuDDCBAndFDCBPrefix.h"

// ================= STATE VARIABLES =================
// these must be preserved when saving and loading state
static volatile Uint8 _interruptRequested;

// threads other than the video thread (aka the thread which runs the CPU) must not change these
// note that loading/saving of state occurs only on the video thread for synchronization purposes
static Uint8 _isHalted;
static enum interruptMode _interruptMode;
static struct regs* _regs = NULL;
// =============== END STATE VARIABLES ===============

// used for decode-only runs, like when running like a disassembler
static Uint16 _savedPc;
static Uint8 _runMode;

static Uint8 _cpuStarted = 0;
static Uint8 _logEveryInstruction = 0;
static Uint8 _generateMnemonics = 0;
static Uint8 _useDataBusIM2HandlerLSB = 0;

static Uint64 _totalTStates = 0;	// total elapsed tstates
static Uint64 _totalExecutedInstructions = 0;

static Uint64 _executedTStates;
static Uint64 _executionTStateTarget;

#define _DEBUG_BUFFER_SIZE 256
static char _debugBuffer[_DEBUG_BUFFER_SIZE];

func_cpu_instruction_executed_callback* _cpuInstructionExecutedCallback = NULL;

// CANNOT be NULL
func_cpu_on_tstates_elapsed_callback* _cpuTstatesElapsedCallback;

static const char* __unresolvedInstructionMnemonic = "not resolved";
static const char* _emptyString = "";

// prevents consecutive duplicated log entries of unrecognized instructions
static Sint64 _lastUnrecognizedInstructionPC = -1;

// removes a huge number of mallocs, by taking advantage of the assumption that
// CPU execution is single-threaded
static struct instruction _cpu_current_instruction;

void _cpu_NOOP_on_tstates_elapsed_callback(Uint64 tstates) {
}

struct regNames _regNames = { .A = "A", .F = "F", .B = "B", .C = "C", .D = "D", .E = "E", .H = "H", .L = "L", .AF = "AF", .BC = "BC", .DE = "DE", .HL = "HL", .IXL = "IXL", .IXH = "IXH",
    .IX = "IX", .IYL = "IYL", .IYH = "IYH", .IY = "IY", .R = "R", .I = "I", .IR = "IR", .SP = "SP", .PC = "PC", .IFF = "IFF", .A_alt = "A'", .B_alt = "B'", .F_alt = "F'", .C_alt = "C'", .D_alt = "D'",
    .E_alt = "E'", .H_alt = "H'", .L_alt = "L'" };

struct regNames _regNamesReference = { .A = "(A)", .F = "(F)", .B = "(B)", .C = "(C)", .D = "(D)", .E = "(E)", .H = "(H)", .L = "(L)", .AF = "(AF)", .BC = "(BC)", .DE = "(DE)", .HL = "(HL)", .IXL = "(IXL)", .IXH = "(IXH)",
    .IX = "(IX)", .IYL = "(IYL)", .IYH = "(IYH)", .IY = "(IY)", .R = "(R)", .I = "(I)", .IR = "(IR)", .SP = "(SP)", .PC = "(PC)", .IFF = "(IFF)", .A_alt = "(A')", .B_alt = "(B')", .F_alt = "(F')", .C_alt = "(C')", .D_alt = "(D')",
    .E_alt = "(E')", .H_alt = "(H')", .L_alt = "(L')" };

struct regs* _cpu_make_regs() {
    struct regs* value = (struct regs*)malloc(1 * sizeof(struct regs));
    if (value == NULL) {
        return NULL;
    }

    // x86 is little endian, so LSB must come first
    value->AF = (Uint16*)&value->F;
    value->BC = (Uint16*)&value->C;
    value->DE = (Uint16*)&value->E;
    value->HL = (Uint16*)&value->L;
    value->IR = (Uint16*)&value->R;
    value->IX = (Uint16*)&value->IXL;
    value->IY = (Uint16*)&value->IYL;

    value->AF_alt = (Uint16*)&value->F_alt;
    value->BC_alt = (Uint16*)&value->C_alt;
    value->DE_alt = (Uint16*)&value->E_alt;
    value->HL_alt = (Uint16*)&value->L_alt;

    *value->AF = 0xFFFF;
    value->PC = 0;
    value->SP = 0xFFFF;
    value->IFF = 0;
    value->I = 0;
    value->R = 0;

    *value->AF_alt = 0xFEDC;
    *value->BC_alt = 0xBA98;
    *value->DE_alt = 0x7654;
    *value->HL_alt = 0x3210;

    return value;
}

const char* cpu_prefix_to_string(enum prefixType prefix) {
    switch (prefix) {
    case NoPrefix: return "None";
    case DDCB: return "DDCB";
    case DD: return "DD";
    case FDCB: return "FDCB";
    case FD: return "FD";
    case ED: return "ED";
    case CB: return "CB";
    default: return "Unknown";
    }
}

void cpu_start(
    Uint8 logEveryInstruction, 
    Uint8 useDataBusIM2HandlerLSB, 
    func_cpu_instruction_executed_callback onInstructionExecutedCallback, 
    func_cpu_on_tstates_elapsed_callback onTstatesElapsedCallback, 
    Uint8 generateMnemonics
) {
    cpu_destroy();

    _cpu_current_instruction.disassembledNameReadonly = 1;
    _cpu_current_instruction.dissassembledName = (char*)__unresolvedInstructionMnemonic;

    _cpuInstructionExecutedCallback = onInstructionExecutedCallback;

    _cpuTstatesElapsedCallback = _cpu_NOOP_on_tstates_elapsed_callback;
    if (onTstatesElapsedCallback != NULL) {
        _cpuTstatesElapsedCallback = onTstatesElapsedCallback;
    }

    _regs = _cpu_make_regs();
    _runMode |= CPU_RUN_MODE_FLAG__EXECUTE;

    _isHalted = 0;
    _logEveryInstruction = logEveryInstruction;
    _generateMnemonics = generateMnemonics;
    _useDataBusIM2HandlerLSB = useDataBusIM2HandlerLSB;

    _interruptMode = Mode0;
    _interruptRequested = 0;
    _totalTStates = 0;
    _cpuStarted = 1;
}

void cpu_destroy() {
    if (_regs != NULL) {
        free(_regs);
        _regs = NULL;
    }

    _cpuInstructionExecutedCallback = NULL;
    _cpuTstatesElapsedCallback = _cpu_NOOP_on_tstates_elapsed_callback;
    _cpuStarted = 0;
}

void cpu_request_interrupt() {
    _interruptRequested = 1;
}

Uint8 cpu_is_interrupt_requested() {
    return _interruptRequested;
}

void cpu_set_interrupt_mode(enum interruptMode mode) {
    _interruptMode = mode;
}

enum interruptMode cpu_get_interrupt_mode() {
    return _interruptMode;
}

inline struct regs* cpu_regs() {
    return _regs;
}

inline void _cpu_refresh_R_register() {
    // this is an area that had a long-standing bug until version 24
    // R register incrementing affected bit 7, which should not happen, because
    //     the Z80 CPU increments only bits 0-6
    // 
    // before the fix, the following types of issues were present
    // 
    // 1. if R is incremented often, per spec, after each M1 cycle:
    //    - Ping Pong's in-game graphics are corrupted, and gameplay timing is off
    // 2. if R is incremented rarely, (e.g. once every 256 frames):
    //    - Robin of the Wood's soldiers can disappear or be corrupted; Robin's hits make no sounds; menu sounds are bad
    //    - Sanxion's text fades in strips, and not randomly per-pixel
    // 3. if R is never incremented:
    //    - Defender of the Crown no longer animates soldiers during battle
    //
    // in version 24 the above were all fixed
    //
    // interestingly, many games rely on the R register for timing and randomness, affecting graphics,
    //     sounds, and even gameplay

    // non-prefixed instructions have one M1 cycle
    cpu_increment_R();
    if (_cpu_current_instruction.prefix.type != NoPrefix) {
        // prefixed instructions have at least two M1 cycles
        // however, even in the case of three M1 cycles (e.g. two-byte prefix such as DDCB), 
        //     R is incremented only twice
        cpu_increment_R();
    }
}

// fetches the next instruction
// moves PC to immediately AFTER the opcode of the next instruction
// (either before data, or before the instruction after next)
inline void _cpu_fetch_instruction() {
    enum prefixType prefix = NoPrefix;
    Uint16 tstates = 0;
    _cpu_current_instruction.startPc = _regs->PC;

    while (1) {
        if (prefix == NoPrefix) {
            Uint8 first = memory_read8(_regs->PC++);
            tstates += 4;	// M1 cycle

            // see if there's a prefix
            if (first == 0xDD) {
                Uint8 second = memory_read8(_regs->PC++);

                if (second == 0xCB) {
                    tstates += 4;	// M1 cycle
                    // DDCB prefix
                    // 
                    // CB and opcode will form instructions similar to those in the CB - prefixed opcodes section above.
                    // However, these will now operate on(IX + d)and, if the instruction isn't BIT, copy the result to the register they would have initially acted upon, unless it was (HL).
                    prefix = DDCB;
                    continue;
                }
                else if (second == 0xDD || second == 0xED || second == 0xFD) {
                    // a new prefix follows, so this one is a NOOP (though it still takes some tstates)

                    // NOTE: we leave PC on this second opcode
                    _regs->PC--;
                    prefix = NoPrefix;
                    continue;
                }
                else {
                    // DD prefix
                    // 
                    // If the next opcode makes use of HL, H, L, but not (HL), any occurrence of these will be replaced by IX, IXH, IXL respectively.An exception of this is EX DE, HL which is unaffected.
                    // If the next opcode makes use of(HL), it will be replaced by(IX + d), where d is a signed 8 - bit displacement immediately following the opcode(any immediate data, i.e.n, will follow the displacement byte), and any other instances of Hand L will be unaffected.Therefore, an instruction like LD IXH, (IX + d) does not exist, but LD H, (IX + d) does.
                    // All other instructions are unaffected.
                    prefix = DD;
                    // NOTE: we leave PC on this second opcode
                    _regs->PC--;
                    continue;
                }
            }
            else if (first == 0xFD) {
                Uint8 second = memory_read8(_regs->PC++);

                if (second == 0xCB) {
                    // FDCB prefix
                    //
                    // like DDCB prefix, except they use IY and not IX
                    tstates += 4;	// M1 cycle
                    prefix = FDCB;
                    continue;
                }
                else if (second == 0xDD || second == 0xED || second == 0xFD) {
                    // a new prefix follows, so this one is a NOOP (though it still takes some tstates)

                    // NOTE: we leave PC on this second opcode
                    _regs->PC--;
                    prefix = NoPrefix;
                    continue;
                }
                else {
                    // FD prefix
                    // The FD prefix acts exactly like the DD prefix, but the IY register is used instead of IX.
                    // NOTE: we leave PC on this second opcode
                    _regs->PC--;
                    prefix = FD;
                    continue;
                }
            }
            else if (first == 0xED) {
                // ED prefix
                prefix = ED;
                continue;
            }
            else if (first == 0xCB) {
                // CB prefix
                prefix = CB;
                continue;
            }

            // this is not a prefix
            _regs->PC--;		// first byte was not the start of a prefix, so don't count it yet
            tstates -= 4;
            // flow into below, allowing the instruction opcode to be non-prefixed
        }

        // at this point we may or may not have a prefix
        // but we KNOW that the byte at PC is either:
        //     - a mandatory displacement byte, when prefix is DDCB or FDCB, or
        //     - an instruction opcode
        if (prefix == DDCB || prefix == FDCB) {
            // PC is on a displacement byte

            // unless we enter this branch, displacement value will be in an undefined state
            // this is acceptable because only those CPU opcodes which WILL cause this branch
            // to be entered (e.g. with DDCB or FDCB prefixes) care about displacement value
            _cpu_current_instruction.displacementValue = memory_read8(_regs->PC++);
            
            tstates += 4;	// M1 cycle
            // PC is now on the opcode byte
        }

        // here, the byte at PC is an instruction opcode
        Uint8 first = memory_read8(_regs->PC);
        tstates += 4;	// M1 cycle

        _cpu_current_instruction.opcodeValue = first;
        _cpu_current_instruction.tstatesElapsed = tstates;
        _cpu_current_instruction.prefix.type = prefix;
        _cpu_current_instruction.outcome = DecodedOnly;

        // move to immediately after instruction opcode
        _regs->PC++;

#ifndef _____DISABLE_DEBUG_FUNCTIONALITY
        if (timing_is_faster() || (!_logEveryInstruction && !_generateMnemonics)) {
            // we are NOT building disassembled names for instructions when either
            //     - during fast mode (such as loading tapes), or
            //     - we haven't been told to log every instruction (either by command line argument, or debugging switch)
            // setting the disassembled name as readonly here causes no further string building 
            // for the purposes of setting a disassembled name
            _cpu_current_instruction.disassembledNameReadonly = 1;
            _cpu_current_instruction.dissassembledName = (char*)__unresolvedInstructionMnemonic;
        }
        else {
            // we are building disassembled names normally
            _cpu_current_instruction.disassembledNameReadonly = 0;
            _cpu_current_instruction.dissassembledName = NULL;
        }
#endif

        return;
    }
}

// entry point for running the next CPU instruction
inline void cpu_run_next_instruction() {
    if (_isHalted) {
        _cpu_current_instruction.outcome = SkippedDueToHalt;
#ifndef _____DISABLE_DEBUG_FUNCTIONALITY
        _cpu_current_instruction.dissassembledName = (char*)_emptyString;
        _cpu_current_instruction.disassembledNameReadonly = 1;
#endif
    }

    _cpu_fetch_instruction();
    
    data_bus_write8(_cpu_current_instruction.opcodeValue);
    switch (_cpu_current_instruction.prefix.type) {
    case NoPrefix:
        cpu_run_no_prefix(&_cpu_current_instruction);
        break;
    case ED:
        cpu_run_ED_prefix(&_cpu_current_instruction);
        break;
    case CB:
        cpu_run_CB_prefix(&_cpu_current_instruction);
        break;
    case DD:
        cpu_run_DD_and_FD_prefix(&_cpu_current_instruction);
        break;
    case FD:
        cpu_run_DD_and_FD_prefix(&_cpu_current_instruction);
        break;
    case DDCB:
        cpu_run_DDCB_and_FDCB_prefix(&_cpu_current_instruction);
        break;
    case FDCB:
        cpu_run_DDCB_and_FDCB_prefix(&_cpu_current_instruction);
        break;
    default:
        _cpu_current_instruction.outcome = UnknownInstruction;
        break;
    }

    if (_cpu_current_instruction.outcome == Executed) {
        if (_cpuInstructionExecutedCallback != NULL) {
            _cpuInstructionExecutedCallback(&_cpu_current_instruction, _totalTStates);
        }
        
        _cpuTstatesElapsedCallback(_totalTStates);

        _totalExecutedInstructions++;

        _cpu_refresh_R_register();

    } else if (_cpu_current_instruction.outcome == UnknownInstruction) {
        if (_lastUnrecognizedInstructionPC == -1 || _lastUnrecognizedInstructionPC != _cpu_current_instruction.startPc) {
            // this is either the first unrecognized instruction we're seeing,
            // or the immediately previous unrecognized instruction was not this one
            // ... so we log it
            _lastUnrecognizedInstructionPC = _cpu_current_instruction.startPc;

            sprintf_s(_debugBuffer, _DEBUG_BUFFER_SIZE - 10, "%04x: Unrecognized instruction with prefix: %s   opcode: %02x", 
                _cpu_current_instruction.startPc,
                _cpu_current_instruction.prefix.humanReadable,
                _cpu_current_instruction.opcodeValue);
            log_write(_debugBuffer);

            char* message = (char*)cpu_make_regs_dump_string__static_mem_ptr();
            log_write(message);
        }
    }

#ifndef _____DISABLE_DEBUG_FUNCTIONALITY
    if (_logEveryInstruction && !timing_is_faster()) {
        sprintf_s(_debugBuffer, _DEBUG_BUFFER_SIZE - 10, "%04x\t%s\n", _cpu_current_instruction.startPc, _cpu_current_instruction.dissassembledName);
        log_write(_debugBuffer);

        char* message = (char*)cpu_make_regs_dump_string__static_mem_ptr();
        log_write(message);
    }
#endif
}

void cpu_begin_decode_only_run() {
    _savedPc = _regs->PC;
    _runMode &= ~CPU_RUN_MODE_FLAG__EXECUTE;
}

void cpu_end_decode_only_run() {
    _regs->PC = _savedPc;
    _runMode |= CPU_RUN_MODE_FLAG__EXECUTE;
}

inline void cpu_destroy_instruction() {
    if (!_cpu_current_instruction.disassembledNameReadonly && _cpu_current_instruction.dissassembledName != NULL) {
        free(_cpu_current_instruction.dissassembledName);
        _cpu_current_instruction.dissassembledName = NULL;
    }
}

void _cpu_handle_interrupt() {
    Uint16 handlerPtrPtr, handlerPtr;

    if (!_interruptRequested) {
        return;
    }
    if (!cpu_regs()->IFF) {
        // ignore interrupt requests while interrupts are disabled
        _interruptRequested = 0;
        return;
    }

    // an interrupt has occurred
    switch (_interruptMode) {
    case Mode0:
        // unsupported... ZX Spectrum data bus is unreliable when the CPU handles an interrupt
        break;
    case Mode1:
        cpu_push_PC();
        cpu_regs()->PC = 0x38;
        // per z80 manual, clear IFF1 and IFF2 when the CPU handles a maskable interrupt
        cpu_regs()->IFF = 0;
        break;
    case Mode2:
        handlerPtrPtr = ((Uint16)cpu_regs()->I) << 8;
        if (_useDataBusIM2HandlerLSB) {
            handlerPtrPtr |= data_bus_read8();
        } else {
            handlerPtrPtr |= 0xff;
        }
        handlerPtr = memory_read16(handlerPtrPtr);

        cpu_push_PC();
        cpu_regs()->PC = handlerPtr;
        // per z80 manual, clear IFF1 and IFF2 when the CPU handles a maskable interrupt
        cpu_regs()->IFF = 0;
        break;
    }

    _interruptRequested = 0;
    _isHalted = 0;
}

// this is used by string instructions to know when they can exit
Uint8 cpu_can_continue_string_operation(Uint32 currentTStatesConsumedByInstruction) {
    if (_executedTStates + currentTStatesConsumedByInstruction < _executionTStateTarget) {
        return 1;
    }

    // string operation has consumed all of our remaining tstate target,
    // so it cannot continue
    return 0;
}

void _cpu_elapse_halted_tstates() {
    if (_executedTStates >= _executionTStateTarget) {
        // nothing left to do
        return;
    }

    Uint64 remainingTstates = _executionTStateTarget - _executedTStates;
    _executedTStates += remainingTstates;

    Uint64 iterationsOfFours = remainingTstates / 4;
    Uint64 iterationsOfOnes = remainingTstates % 4;

    while (iterationsOfFours != 0) {
        iterationsOfFours--;
        // simulate each tstate as having elapsed naturally
        _totalTStates += 4;
        // PERFORMANCE: invoking this callback is expensive, but needed - so that 
        //              the sound module reads speaker samples at a steady rate
        //              even when the CPU is really halted
        // 
        //              HOWEVER, since the minimum amount of tstates a Z80 instruction takes is 4,
        //              this loop can probably be done in fours instead of ones, to reduce the
        //              amount of callback invocations
        _cpuTstatesElapsedCallback(_totalTStates);
    }

    if (iterationsOfOnes == 0) {
        return;
    }

    // handle the remainder
    _totalTStates+=iterationsOfOnes;
    _cpuTstatesElapsedCallback(_totalTStates);
}

Uint64 cpu_run_many(Uint64 tstateTarget) {
    _executedTStates = 0;
    _executionTStateTarget = tstateTarget;

    _cpu_handle_interrupt();

    // given that the first thing in the while() loop below is this exact if statment
    // it is likely useless here
    // 
    // I have a vague recollection of adding it because something was breaking, but this
    // was probably long time ago, when this function was more complicated. As such, I'm
    // leaving it here commented out, just in case it fixes some weird issue in the future
    //
    //if (_isHalted) {
    //    // CPU is halted, so no instructions are actually executed
    //    // we must elapse tstates as if that real time had actually passed
    //    _cpu_elapse_halted_tstates();
    //    return _executedTStates;
    //}

    // run instructions until we reach or pass the target
    while (_executedTStates < _executionTStateTarget) {
        if (_isHalted) {
            // we have become halted while burning through our tstate allocation
            // we must elapse tstates as if that real time had actually passed
            _cpu_elapse_halted_tstates();
            return _executedTStates;
        }

        cpu_run_next_instruction();
        if (_cpu_current_instruction.outcome == Executed) {
            // instruction has executed successfully

            _executedTStates += _cpu_current_instruction.tstatesElapsed;
            _totalTStates += _cpu_current_instruction.tstatesElapsed;
        }

#ifndef _____DISABLE_DEBUG_FUNCTIONALITY
        // no cleanup is necessary when debug functionality is disabled
        // because no disassembled names are ever malloc'd
        cpu_destroy_instruction(&_cpu_current_instruction);
#endif
        
    }

    return _executedTStates;
}

Uint8 cpu_mode() {
    return _runMode;
}

void cpu_halt() {
    _isHalted = 1;
}

Uint8 cpu_is_halted() {
    return _isHalted;
}

void cpu_prepare_after_state_load(Uint8 isHalted, Uint8 isInterruptRequested) {
    _isHalted = isHalted;
    _interruptRequested = isInterruptRequested;
}
