I use the flash ROM chip select for each byte, though I'm not sure that would be a problem. Are you providing a 25-mS delay after the sector erase and the 20-uS delay after writing each byte?
Code:
/******************************************************************************
* program 64K RAM image onto 64K 'A' or 'B' half of the 128K Flash ROM *
* */
void flash() // ****************************************
{ addr = 0x0000; // start address $0000 *
do // *
{ if((addr & 0x0FFF) == 0) // if 4K sector boundary *
{ wrROM(0x5555,0xAA); // erase sector sequence *
wrROM(0x2AAA,0x55); // " *
wrROM(0x5555,0x80); // " *
wrROM(0x5555,0xAA); // " *
wrROM(0x2AAA,0x55); // " *
wrROM(addr,0x30); // " *
_delay_ms(25); // required delay *
putSer('.'); // indicate progress *
} // *
wrROM(0x5555,0xAA); // write byte 'unlock' sequence *
wrROM(0x2AAA,0x55); // " *
wrROM(0x5555,0xA0); // " *
wrROM(addr,rdMEM(ram)); // write byte *
_delay_us(20); // required write delay *
} while(addr); // until roll-over to 0 (full 64K image) *
} // ****************************************