Mandelbrot Generator
Posté le 15/06/2019 10:38
Hello allô
Default 1x zoom takes 7sec
Max zoom takes around 5-10min
It has a max zoom of 2^50: over one Quadrillion!
Going over 2^48 can be rather buggy
This is because numbers are limited to the 8 byte double variables
Attached file is both SH4 and SH3 compatible:
MANDEL.G1A
This does need the 'MonochromeLib' libs the code comes with it now
Controls
[-] Zoom out
[+] Zoom in
[F1] Hide/show HUD which contains Cords, Zoom level and Max Iterations. (Heads Up Display)
[F2] Changes colours of camera rectangle: Black, White & Inverted
[AC] Resets screen back to default state
[EXE] Draw set
[EXIT] Stop drawing the Mandelbrot (If it's taking too long)
[MENU] Return to the menu screen
[REPLAY] Move camera rectangle around (Arrow Keys: [LEFT], [RIGHT], [UP], [DOWN])
How can I optimize this code to run faster or zoom in further?
#include "fxlib.h"
#include "stdio.h"
#define TRUE 1
#define FALSE 0
#define ML_vram_adress (*(sc_cpv)sc0135)
typedef enum { ML_TRANSPARENT = -1, ML_WHITE, ML_BLACK, ML_XOR, ML_CHECKER } ML_Color;
typedef char* (*sc_cpv)(void);
const unsigned int sc0135[] = { 0xD201D002, 0x422B0009, 0x80010070, 0x0135 };
unsigned int key; //pause until key press
int kcode1, kcode2; //row & col keycode for Bkey_GetKeyWait()
char unused; //unused (cause CASIO dumb dumb)
unsigned short dispX, dispY; //cords on display when drawing mandelbrot
void ML_clear_vram() {
int i, end, * pointer_long, vram;
char* pointer_byte;
vram = (int)ML_vram_adress();
end = 4 - vram & 3;
pointer_byte = (char*)vram;
for (i = 0; i < end; i++) pointer_byte[i] = 0;
pointer_long = (int*)(vram + end);
for (i = 0; i < 255; i++) pointer_long[i] = 0;
pointer_byte += 1020 + end;
end = vram & 3;
for (i = 0; i < end; i++) pointer_byte[i] = 0;
}
void ML_display_vram() {
char* LCD_register_selector = (char*)0xB4000000, * LCD_data_register = (char*)0xB4010000, * vram;
int i, j;
vram = ML_vram_adress();
for (i = 0; i < 64; i++) {
*LCD_register_selector = 4;
*LCD_data_register = i | 192;
*LCD_register_selector = 4;
*LCD_data_register = 0;
*LCD_register_selector = 7;
for (j = 0; j < 16; j++)
*LCD_data_register = *vram++;
}
}
void ML_display_vram_row(int row) { //faster than ML_display_vram() which displays the entire screen instead of a single row
unsigned char i;
char* LCD_register_selector = (char*)0xB4000000, *LCD_data_register = (char*)0xB4010000, *vram;
vram = (row << 4) + ML_vram_adress();
*LCD_register_selector = 4;
*LCD_data_register = row | 192;
*LCD_register_selector = 4;
*LCD_data_register = 0;
*LCD_register_selector = 7;
for (i = 0; i < 16; i++)
* LCD_data_register = *vram++;
}
void ML_horizontal_line(int y, int x1, int x2, ML_Color color) {
int i;
char checker;
char* vram = ML_vram_adress();
if (y & ~63 || (x1 < 0 && x2 < 0) || (x1 > 127 && x2 > 127))
return;
if (x1 > x2) {
i = x1;
x1 = x2;
x2 = i;
}
if (x1 < 0)
x1 = 0;
if (x2 > 127)
x2 = 127;
switch (color) {
case ML_BLACK:
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] |= 255 >> (x1 & 7);
vram[(y << 4) + (x2 >> 3)] |= 255 << 7 - (x2 & 7);
for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
vram[(y << 4) + i] = 255;
} else
vram[(y << 4) + (x1 >> 3)] |= (255 >> (x1 % 8 + 7 - x2 % 8)) << (7 - (x2 & 7));
break;
case ML_WHITE:
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] &= 255 << 8 - (x1 & 7);
vram[(y << 4) + (x2 >> 3)] &= 255 >> 1 + (x2 & 7);
for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
vram[(y << 4) + i] = 0;
} else
vram[(y << 4) + (x1 >> 3)] &= (255 << 8 - (x1 & 7)) | (255 >> 1 + (x2 & 7));
break;
case ML_XOR:
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] ^= 255 >> (x1 & 7);
vram[(y << 4) + (x2 >> 3)] ^= 255 << 7 - (x2 & 7);
for (i = (x1 >> 3) + 1; i < (x2 >> 3); i++)
vram[(y << 4) + i] ^= 255;
} else
vram[(y << 4) + (x1 >> 3)] ^= (255 >> ((x1 & 7) + 7 - (x2 & 7))) << (7 - (x2 & 7));
break;
case ML_CHECKER:
checker = (y & 1 ? 85 : 170);
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] &= 255 << 8 - (x1 & 7);
vram[(y << 4) + (x2 >> 3)] &= 255 >> 1 + (x2 & 7);
vram[(y << 4) + (x1 >> 3)] |= checker & 255 >> (x1 & 7);
vram[(y << 4) + (x2 >> 3)] |= checker & 255 << 7 - (x2 & 7);
for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
vram[(y << 4) + i] = checker;
} else {
vram[(y << 4) + (x1 >> 3)] &= (255 << 8 - (x1 & 7)) | (255 >> 1 + (x2 & 7));
vram[(y << 4) + (x1 >> 3)] |= checker & (255 >> (x1 % 8 + 7 - x2 % 8)) << (7 - (x2 & 7));
}
break;
}
}
void ML_vertical_line(int x, int y1, int y2, ML_Color color) {
int i, j;
char checker, byte, * vram = ML_vram_adress();
if (x & ~127 || (y1 < 0 && y2 < 0) || (y1 > 63 && y2 > 63)) return;
if (y1 > y2) {
int tmp = y1;
y1 = y2;
y2 = tmp;
}
if (y1 < 0) y1 = 0;
if (y2 > 63) y2 = 63;
i = (y1 << 4) + (x >> 3);
j = (y2 << 4) + (x >> 3);
switch (color) {
case ML_BLACK:
byte = 128 >> (x & 7);
for (; i <= j; i += 16)
vram[i] |= byte;
break;
case ML_WHITE:
byte = ~(128 >> (x & 7));
for (; i <= j; i += 16)
vram[i] &= byte;
break;
case ML_XOR:
byte = 128 >> (x & 7);
for (; i <= j; i += 16)
vram[i] ^= byte;
break;
case ML_CHECKER:
byte = 128 >> (x & 7);
checker = y1 & 1 ^ x & 1;
for (; i <= j; i += 16) {
if (checker) vram[i] &= ~byte;
else vram[i] |= byte;
checker = !checker;
}
break;
}
}
void ML_pixel(int x, int y, ML_Color color) {
char* vram = ML_vram_adress();
if (x & ~127 || y & ~63) return;
switch (color) {
case ML_BLACK:
vram[(y << 4) + (x >> 3)] |= 128 >> (x & 7);
break;
case ML_WHITE:
vram[(y << 4) + (x >> 3)] &= ~(128 >> (x & 7));
break;
case ML_XOR:
vram[(y << 4) + (x >> 3)] ^= 128 >> (x & 7);
break;
case ML_CHECKER:
if (y & 1 ^ x & 1) vram[(y << 4) + (x >> 3)] &= ~(128 >> (x & 7));
else vram[(y << 4) + (x >> 3)] |= 128 >> (x & 7);
break;
}
}
double divByPow(double n, double x, int p) { //Divide OR Times n by x, p times (n / x^p): used for numbers bigger than 2^32 (int limit)
if (p < 0)
for (; p < 0; p++)
n *= x;
else
for (; p > 0; p--)
n /= x;
return n;
}
void stop(void) { //stops drawing set if user presses [EXIT] or [MENU]
if (Bkey_GetKeyWait(&kcode1, &kcode2, 1, 0, 1, &unused))
if (kcode1 == 4 && (kcode2 == 8 || kcode2 == 9)) {
dispX = 128; //Very hacky stop function
dispY = 64;
}
}
int AddIn_main(int isAppli, unsigned short OptionNum) { //Main function
unsigned int graphZoom = 1; //zoom level for graph
char screenZoom; //zoom level on screen (rectangle)
int screenX1, screenX2; //corner X cords for drawing rectangle to screen
int screenY1, screenY2; //corner Y cords for drawing rectangle to screen
unsigned char string[1]; //Used in converting int/double to char
char HUD = TRUE; //Heads Up Display: Cords, Zoom level & Max iteration: toggle with [F1]
char colour = ML_XOR; //Colour of rectangle: Black, White or Inverted
int screenX, screenY; //offset cords on screen from 0,0 for rectangle
double graphX = 0, graphY = 0; //cords on graph - where to center mandelbrot
double graphMove; //amount graphX & Y changes by when moving rectangle around
int screenMove; //amount screenX & Y changes by when moving rectangle around with arrow keys
short tempPixel = 0; //Write pixels to temp variable then write the entire 2bytes to VRAM all at once
register double zr, zi; //zr is real, zi imaginary
register double zr2, zi2; //zr2 = zr^2, zi2 = zi^2
register double x1 = -2.0; //bounding box cords on graph
register double x2 = 2.0; //bounding box cords on graph
register double y1 = -1.0; //bounding box cords on graph
register double y2 = 1.0; //bounding box cords on graph
register double x, y; //pixel cords on graph tested if in set
register double xIsz, yIsz; //amount x/y increases by when ploting graph
register unsigned short iMax = 32; //max iterations
register unsigned short i; //iterations
while (TRUE) {
register char* vram = ML_vram_adress();
SetTimer(1, 200, stop);
ML_clear_vram();
ML_display_vram();
xIsz = (x2 - x1) / 128;
yIsz = (y2 - y1) / 64;
y = y1;
for (dispY = 0; dispY < 64; dispY++) {
x = x1;
y += yIsz;
for (dispX = 0; dispX < 128; dispX++) {
zr = x;
zi = y;
for (i = 0; i < iMax; i++) {
zr2 = zr * zr;
zi2 = zi * zi;
if (zr2 + zi2 > 4)
break;
zi = zr * zi;
zi += zi + y;
zr = zr2 - zi2 + x;
}
tempPixel = (tempPixel << 1) | (i == iMax);
if ((dispX & 7) == 7)
*vram++ = tempPixel;
x += xIsz;
}
ML_display_vram_row(dispY);
}
SaveDisp(1);
KillTimer(1);
screenX = 0;
screenY = 0;
screenZoom = 1;
Bkey_GetKeyWait(&kcode1, &kcode2, 2, 1, 1, &unused);
do {
GetKey(&key);
screenMove = screenZoom > 4 ? 1 : divByPow(16, 2, screenZoom);
graphMove = screenZoom > 4 ? divByPow(1, 2, graphZoom - (double)screenZoom) : divByPow(16, 2, graphZoom);
switch (key) {
case KEY_CHAR_PLUS:
if (graphZoom < 51) {
graphZoom++;
screenZoom++;
}
break;
case KEY_CHAR_MINUS:
if (graphZoom) {
graphZoom--;
screenZoom--;
}
break;
case KEY_CTRL_UP:
screenY -= screenMove;
graphY -= graphMove;
break;
case KEY_CTRL_DOWN:
screenY += screenMove;
graphY += graphMove;
break;
case KEY_CTRL_LEFT:
screenX -= screenMove;
graphX -= graphMove;
break;
case KEY_CTRL_RIGHT:
screenX += screenMove;
graphX += graphMove;
break;
case KEY_CTRL_F1:
HUD = !HUD;
break;
case KEY_CTRL_F2:
if (colour)
colour--;
else
colour = ML_XOR;
break;
case KEY_CTRL_F3:
//Gray scale, by refreshing screen multiple times per sec at different max iterations (iMax)
break;
case KEY_CTRL_AC:
graphZoom = 1;
graphX = 0;
graphY = 0;
screenZoom = 1;
screenX = 0;
screenY = 0;
key = KEY_CTRL_EXE;
break;
}
RestoreDisp(1);
iMax = 8 * (graphZoom + 3);
if (screenZoom < 8) {
screenX1 = 65 - divByPow(128, 2, screenZoom) + screenX;
screenX2 = 62 + divByPow(128, 2, screenZoom) + screenX;
screenY1 = 32 - (screenZoom > 6 ? 1 : divByPow(64, 2, screenZoom)) + screenY;
screenY2 = 31 + (screenZoom > 6 ? 0 : divByPow(64, 2, screenZoom)) + screenY;
ML_horizontal_line(screenY1, screenX1, screenX2, colour);
ML_horizontal_line(screenY2, screenX1, screenX2, colour);
ML_vertical_line(screenX1 - 1, screenY1, screenY2, colour);
ML_vertical_line(screenX2 + 1, screenY1, screenY2, colour);
} else
ML_pixel(screenX + 64, screenY + 31, colour);
x1 = divByPow(-4, 2, graphZoom) + (0.03125 * graphX);
x2 = divByPow(4, 2, graphZoom) + (0.03125 * graphX);
y1 = divByPow(-2, 2, graphZoom) + (0.03125 * graphY);
y2 = divByPow(2, 2, graphZoom) + (0.03125 * graphY);
if (HUD == TRUE) {
sprintf(&string, "X1:%f", x1);
PrintMini(0, 0, string, 0);
sprintf(&string, "Y1:%f", y1);
PrintMini(0, 6, string, 0);
sprintf(&string, "X2:%f", x2);
PrintMini(81, 53, string, 0);
sprintf(&string, "Y2:%f", y2);
PrintMini(81, 59, string, 0);
sprintf(&string, "MaxI:%u", iMax);
PrintMini(0, 53, string, 0);
if (graphZoom > 32)
sprintf(&string, "Zoom:2^%ux", graphZoom - 1);
else
sprintf(&string, "Zoom:%ux", (int)divByPow(1, 2, -graphZoom + 1));
PrintMini(0, 59, string, 0);
}
ML_display_vram();
} while (key != KEY_CTRL_EXE);
}
return 0;
}
#pragma section _BR_Size
unsigned long BR_Size;
#pragma section
#pragma section _TOP
int InitializeSystem(int isAppli, unsigned short OptionNum) {
return INIT_ADDIN_APPLICATION(isAppli, OptionNum);
}
#pragma section
Fichier joint
Citer : Posté le 21/01/2020 18:39 | #
You should use SetTimer() to start the timer then KillTimer() to stop it. You can find both of them in the fxlib documentation or here in French.
Citer : Posté le 22/01/2020 08:43 | # | Fichier joint
I already knew how to use SetTimer(), just I didn't know how to send a signal to the ASM code (I figured it out by using pointers)
You can change the Max Iterations via [F3] now
I've added greyscale! (push [F4])
But it doesn't quite work as well as I hoped it would
There's a lot more flicker than there is grey
Also, there are heaps of v-sync lines
Is there any way to control the refresh and latency times of the screen itself?
#include "stdio.h"
#define TRUE 1
#define FALSE 0
#define ML_vram_adress (*(sc_cpv)sc0135)
#define MATLENGTH 8180
unsigned char matrix[MATLENGTH] = { 0 };
unsigned int STOP = FALSE;
typedef enum { ML_TRANSPARENT = -1, ML_WHITE, ML_BLACK, ML_XOR, ML_CHECKER } ML_Color;
typedef char* (*sc_cpv)(void);
const unsigned int sc0135[] = { 0xD201D002, 0x422B0009, 0x80010070, 0x0135 };
void ML_clear_vram() {
int i, end, * pointer_long, vram;
char* pointer_byte;
vram = (int)ML_vram_adress();
end = 4 - vram & 3;
pointer_byte = (char*)vram;
for (i = 0; i < end; i++)
pointer_byte[i] = 0;
pointer_long = (int*)(vram + end);
for (i = 0; i < 255; i++)
pointer_long[i] = 0;
pointer_byte += 1020 + end;
end = vram & 3;
for (i = 0; i < end; i++)
pointer_byte[i] = 0;
}
void ML_display_vram() {
char* LCD_register_selector = (char*)0xB4000000, * LCD_data_register = (char*)0xB4010000, * vram;
int i, j;
vram = ML_vram_adress();
for (i = 0; i < 64; i++) {
*LCD_register_selector = 4;
*LCD_data_register = i | 192;
*LCD_register_selector = 4;
*LCD_data_register = 0;
*LCD_register_selector = 7;
for (j = 0; j < 16; j++)
*LCD_data_register = *vram++;
}
}
void arryVRAM(unsigned char matrix[MATLENGTH], unsigned int iMax) {
char* vram = ML_vram_adress();
unsigned int i, j;
unsigned int byte = 0;
for (i = 0; i < MATLENGTH; i++) {
byte = (byte << 1) | !(matrix[i] > iMax);
if ((i & 7) == 7)
*vram++ = byte;
}
}
void ML_display_matrix(unsigned char matrix[MATLENGTH], unsigned int iMax) {
char* LCD_register_selector = (char*)0xB4000000, * LCD_data_register = (char*)0xB4010000;
char* vram = ML_vram_adress();
unsigned int i, j, k;
unsigned int byte = 0;
for (i = 0; i < 63; i++) {
*LCD_register_selector = 4;
*LCD_data_register = i | 192;
*LCD_register_selector = 4;
*LCD_data_register = 0;
*LCD_register_selector = 7;
for (j = 0; j < 128; j++) {
byte = (byte << 1) | !(matrix[(i << 7) + j] > iMax);
if ((j & 7) == 7)
*LCD_data_register = byte;
}
}
}
void ML_pixel(int x, int y, ML_Color color) {
char* vram = ML_vram_adress();
if (x & ~127 || y & ~63) return;
switch (color) {
case ML_BLACK:
vram[(y << 4) + (x >> 3)] |= 128 >> (x & 7);
break;
case ML_WHITE:
vram[(y << 4) + (x >> 3)] &= ~(128 >> (x & 7));
break;
case ML_XOR:
vram[(y << 4) + (x >> 3)] ^= 128 >> (x & 7);
break;
case ML_CHECKER:
if (y & 1 ^ x & 1) vram[(y << 4) + (x >> 3)] &= ~(128 >> (x & 7));
else vram[(y << 4) + (x >> 3)] |= 128 >> (x & 7);
break;
}
}
void ML_vertical_line(int x, int y1, int y2, ML_Color color) {
int i, j;
char checker, byte, * vram = ML_vram_adress();
if (x & ~127 || (y1 < 0 && y2 < 0) || (y1 > 63 && y2 > 63)) return;
if (y1 > y2) {
int tmp = y1;
y1 = y2;
y2 = tmp;
}
if (y1 < 0) y1 = 0;
if (y2 > 63) y2 = 63;
i = (y1 << 4) + (x >> 3);
j = (y2 << 4) + (x >> 3);
switch (color) {
case ML_BLACK:
byte = 128 >> (x & 7);
for (; i <= j; i += 16)
vram[i] |= byte;
break;
case ML_WHITE:
byte = ~(128 >> (x & 7));
for (; i <= j; i += 16)
vram[i] &= byte;
break;
case ML_XOR:
byte = 128 >> (x & 7);
for (; i <= j; i += 16)
vram[i] ^= byte;
break;
case ML_CHECKER:
byte = 128 >> (x & 7);
checker = y1 & 1 ^ x & 1;
for (; i <= j; i += 16) {
if (checker) vram[i] &= ~byte;
else vram[i] |= byte;
checker = !checker;
}
break;
}
}
void ML_horizontal_line(int y, int x1, int x2, ML_Color color) {
int i;
char checker;
char* vram = ML_vram_adress();
if (y & ~63 || (x1 < 0 && x2 < 0) || (x1 > 127 && x2 > 127))
return;
if (x1 > x2) {
i = x1;
x1 = x2;
x2 = i;
}
if (x1 < 0)
x1 = 0;
if (x2 > 127)
x2 = 127;
switch (color) {
case ML_BLACK:
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] |= 255 >> (x1 & 7);
vram[(y << 4) + (x2 >> 3)] |= 255 << 7 - (x2 & 7);
for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
vram[(y << 4) + i] = 255;
} else
vram[(y << 4) + (x1 >> 3)] |= (255 >> (x1 % 8 + 7 - x2 % 8)) << (7 - (x2 & 7));
break;
case ML_WHITE:
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] &= 255 << 8 - (x1 & 7);
vram[(y << 4) + (x2 >> 3)] &= 255 >> 1 + (x2 & 7);
for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
vram[(y << 4) + i] = 0;
} else
vram[(y << 4) + (x1 >> 3)] &= (255 << 8 - (x1 & 7)) | (255 >> 1 + (x2 & 7));
break;
case ML_XOR:
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] ^= 255 >> (x1 & 7);
vram[(y << 4) + (x2 >> 3)] ^= 255 << 7 - (x2 & 7);
for (i = (x1 >> 3) + 1; i < (x2 >> 3); i++)
vram[(y << 4) + i] ^= 255;
} else
vram[(y << 4) + (x1 >> 3)] ^= (255 >> ((x1 & 7) + 7 - (x2 & 7))) << (7 - (x2 & 7));
break;
case ML_CHECKER:
checker = (y & 1 ? 85 : 170);
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] &= 255 << 8 - (x1 & 7);
vram[(y << 4) + (x2 >> 3)] &= 255 >> 1 + (x2 & 7);
vram[(y << 4) + (x1 >> 3)] |= checker & 255 >> (x1 & 7);
vram[(y << 4) + (x2 >> 3)] |= checker & 255 << 7 - (x2 & 7);
for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
vram[(y << 4) + i] = checker;
} else {
vram[(y << 4) + (x1 >> 3)] &= (255 << 8 - (x1 & 7)) | (255 >> 1 + (x2 & 7));
vram[(y << 4) + (x1 >> 3)] |= checker & (255 >> (x1 % 8 + 7 - x2 % 8)) << (7 - (x2 & 7));
}
break;
}
}
void ML_rectangle(int x1, int y1, int x2, int y2, ML_Color color) {
ML_horizontal_line(y1, x1, x2, color);
ML_horizontal_line(y2, x1, x2, color);
ML_vertical_line(x1 - 1, y1, y2, color);
ML_vertical_line(x2 + 1, y1, y2, color);
}
char sprintFrac(unsigned char* string, unsigned int fixedPoint, int high, int low) {
unsigned int tempHigh = high, tempLow = low;
unsigned char fraction[32];
int i = 0;
fixedPoint = 36 - fixedPoint;
tempHigh = high;
tempLow = low;
abs64(&tempHigh, &tempLow);
tempHigh <<= 4;
tempHigh += tempLow >> fixedPoint;
tempLow &= (1 << fixedPoint) - 1;
tempHigh &= (1 << fixedPoint) - 1;
do {
tempHigh *= 10;
tempLow *= 10;
tempHigh += tempLow >> fixedPoint;
fraction[i++] = '0' + (tempHigh >> fixedPoint);
tempHigh &= (1 << fixedPoint) - 1;
tempLow &= (1 << fixedPoint) - 1;
} while ((tempHigh || tempLow) && i < 27);
fraction[i] = '\0';
sprintf(string, "%s%s%u.%s", string, high < 0 ? "-" : "+", (high < 0 ? -high : high) >> fixedPoint - 4, fraction);
}
void shift64(unsigned int* high, unsigned int* low, int shift) {
if (shift >= 64 || shift <= -64) {
*high = 0;
*low = 0;
} else if (shift >= 32) {
*high = *low << shift - 32;
*low = 0;
} else if (shift <= -32) {
*low = *high << shift + 32;
*high = 0;
} else if (shift > 0) {
*high = (*high << shift) | (*low >> 32 - shift);
*low = *low << shift;
} else if (shift < 0) {
*low = (*low << shift) | (*high << 32 + shift);
*high = *high << shift;
}
}
void stop(void) { //stops drawing set if user presses [EXIT] or [MENU]
int kcode1 = 0, kcode2 = 0; //row & col keycode for Bkey_GetKeyWait()
short unused; //unused
Bkey_GetKeyWait(&kcode1, &kcode2, KEYWAIT_HALTOFF_TIMEROFF, 0, !FALSE, &unused);
if (kcode1 == 4 && (kcode2 == 8 || kcode2 == 9)) {
STOP = TRUE;
KillTimer(1);
}
}
void drawMandelbrot(int X0, int X1, int Y0, int Y1, int zoom, unsigned int iMax[]) {
register char* vram = ML_vram_adress();
int kcode1 = 0, kcode2 = 0; //row & col keycode for Bkey_GetKeyWait()
char unused; //unused
int offsetHigh = 0x80000000;
int offsetLow = 0x00000000;
shift64(&offsetHigh, &offsetLow, -zoom - 5);
sum64(&X0, &X1, offsetHigh, offsetLow);
shift64(&offsetHigh, &offsetLow, -1);
sum64(&Y0, &Y1, offsetHigh, offsetLow);
if (iMax[3] = iMax[1] * zoom + iMax[2]) {
STOP = FALSE;
RestoreDisp(1);
SetTimer(1, 200, stop);
drawMandel(zoom, vram, -Y0, -Y1, -X0, -X1, iMax[3], &STOP, matrix);
KillTimer(1);
if (STOP == TRUE) {
do
Bkey_GetKeyWait(&kcode1, &kcode2, KEYWAIT_HALTON_TIMERON, 1, !FALSE, &unused);
while (kcode1 == 4 && (kcode2 == 8 || kcode2 == 9));
}
} else ML_clear_vram();
SaveDisp(1);
ML_display_vram();
}
void reset(unsigned char* HUD, unsigned int iMax[], char* colour, int* screenZoom, unsigned int* graphZoom, int* screenX, int* screenY, int* graphHighX, int* graphHighY, int* graphLowX, int* graphLowY) {
*HUD = TRUE;
*colour = ML_XOR;
iMax[0] = 1;
iMax[1] = 8;
iMax[2] = 24;
iMax[3] = 32;
*screenX = *screenY = 0;
*screenZoom = *graphZoom = 1;
*graphHighX = *graphLowX = 0;
*graphHighY = *graphLowY = 0;
ML_clear_vram();
drawMandelbrot(*graphHighX, *graphLowX, *graphHighY, *graphLowY, *graphZoom, iMax);
}
int AddIn_main(int isAppli, unsigned short OptionNum) { //Main function
unsigned int key; //Get key being pressed
unsigned char string[32]; //Used in converting int to string
unsigned char HUD; //Heads Up Display: Cords, Zoom level & Max iteration: toggle with [F1]
char colour; //Colour of camera rectangle: Black, White, Inverted, Transparent and Checkered: toggle with [F2]
unsigned int iMax[4]; //max iterations: change with [F3]
int screenZoom; //zoom level on screen (camera rectangle): change with [+] & [-]
unsigned int graphZoom; //zoom level for graph
int screenX, screenY; //cords on screen for camera rectangle: move with [REPLAY]
int graphHighX, graphLowX; //X cord on screen for graph rectangle
int graphHighY, graphLowY; //Y cord on screen for graph rectangle
unsigned int graphMoveHigh, graphMoveLow; //amount graphX & Y changes by when moving camera rectangle around
unsigned int screenMove; //amount screenX & Y changes by when moving camera rectangle around
int i;
unsigned int kcode1, kcode2; //row & col keycode for Bkey_GetKeyWait()
short unused; //unused
//64bit Fixed Point number format - spilt between two 32bit variables
//1:7:56
//Sign:int:Frac
//±:0000000:00000000000000000000000000000000000000000000000000000000
//±0000000.000000000000000000000000,00000000000000000000000000000000
//high = ±0000000.000000000000000000000000
//low = 00000000000000000000000000000000
ML_display_vram();
reset(&HUD, iMax, &colour, &screenZoom, &graphZoom, &screenX, &screenY, &graphHighX, &graphHighY, &graphLowX, &graphLowY);
do {
screenMove = screenZoom > 4 ? 1 : 16 >> screenZoom;
graphMoveHigh = 0x80000000;
graphMoveLow = 0x00000000;
shift64(&graphMoveHigh, &graphMoveLow, -(screenZoom > 4 ? graphZoom - screenZoom + 4 : graphZoom) - 8);
if (!graphMoveHigh && !graphMoveLow)
graphMoveLow = 0x00000001;
/*sprintf(&string, "%u", kcode1 * 10 + kcode2);
PrintMini(0, 30, string, 0);*/
GetKey(&key);
switch (key) {
case KEY_CHAR_PLUS:
if (graphZoom < 51) {
graphZoom++;
screenZoom++;
}
break;
case KEY_CHAR_MINUS:
if (graphZoom) {
graphZoom--;
screenZoom--;
}
break;
case KEY_CTRL_UP:
screenY -= screenMove;
sum64(&graphHighY, &graphLowY, graphMoveHigh, graphMoveLow);
break;
case KEY_CTRL_DOWN:
screenY += screenMove;
sub64(&graphHighY, &graphLowY, graphMoveHigh, graphMoveLow);
break;
case KEY_CTRL_LEFT:
screenX -= screenMove;
sum64(&graphHighX, &graphLowX, graphMoveHigh, graphMoveLow);
break;
case KEY_CTRL_RIGHT:
screenX += screenMove;
sub64(&graphHighX, &graphLowX, graphMoveHigh, graphMoveLow);
break;
case KEY_CTRL_F1:
HUD = !HUD;
break;
case KEY_CTRL_F2:
if (colour > ML_TRANSPARENT)
colour--;
else
colour = ML_CHECKER;
break;
case KEY_CTRL_F3:
SaveDisp(2);
do {
PrintXY(6, 15, &"MaxI = A * Zoom + B", 0);
sprintf(&string, "A:%u", iMax[1]);
PrintXY(48, 24, &string, !(iMax[0] - 1));
sprintf(&string, "B:%u", iMax[2]);
PrintXY(48, 33, &string, iMax[0] - 1);
sprintf(&string, "MaxI:%u", iMax[1] * graphZoom + iMax[2]);
PrintXY(30, 42, &string, 0);
GetKey(&key);
switch (key) {
case KEY_CTRL_UP:
iMax[0] = 1;
break;
case KEY_CTRL_DOWN:
iMax[0] = 2;
break;
case KEY_CTRL_LEFT:
case KEY_CHAR_MINUS:
if (iMax[iMax[0]])
iMax[iMax[0]]--;
break;
case KEY_CTRL_RIGHT:
case KEY_CHAR_PLUS:
iMax[iMax[0]]++;
break;
case KEY_CTRL_AC:
iMax[iMax[0]] = 0;
break;
case KEY_CTRL_DEL:
iMax[iMax[0]] /= 10;
break;
default:
if (key >= '0' && key <= '9')
iMax[iMax[0]] = iMax[iMax[0]] * 10 + key - '0';
break;
}
if (iMax[iMax[0]] > 32768)
iMax[iMax[0]] = 32768;
RestoreDisp(2);
} while (key != KEY_CTRL_EXIT && key != KEY_CTRL_EXE && key != KEY_CTRL_F3);
key = 0;
break;
case KEY_CTRL_F4:
RestoreDisp(1);
ML_display_vram();
Sleep(1200);
do {
for (i = iMax[3] - 1; i > 0; i -= 5) {
ML_display_matrix(&matrix, i);
}
} while (!Bkey_GetKeyWait(&kcode1, &kcode2, KEYWAIT_HALTOFF_TIMEROFF, 0, !FALSE, &unused));
Sleep(1200);
//Gray scale, by refreshing screen multiple times per sec at different max iterations (iMax)
break;
case KEY_CTRL_AC:
reset(&HUD, &iMax, &colour, &screenZoom, &graphZoom, &screenX, &screenY, &graphHighX, &graphHighY, &graphLowX, &graphLowY);
break;
case KEY_CTRL_EXE:
drawMandelbrot(graphHighX, graphLowX, graphHighY, graphLowY, graphZoom, &iMax);
screenX = 0;
screenY = 0;
screenZoom = 1;
break;
}
if (key != KEY_CTRL_AC && key != KEY_CTRL_EXE) {
RestoreDisp(1);
if (screenZoom > -26) {
if (screenZoom < 8)
ML_rectangle(65 - (128 >> screenZoom) + screenX, 32 - (screenZoom > 6 ? 1 : (64 >> screenZoom)) + screenY, 62 + (128 >> screenZoom) + screenX, 31 + (screenZoom > 6 ? 0 : (64 >> screenZoom)) + screenY, colour);
else
ML_pixel(screenX + 64, screenY + 31, colour);
}
if (HUD == TRUE) {
neg64(&graphHighX, &graphLowX);
sprintf(&string, "X:");
sprintFrac(&string, 8, graphHighX, graphLowX);
PrintMini(0, 0, string, 0);
neg64(&graphHighX, &graphLowX);
sprintf(&string, "Y:");
sprintFrac(&string, 8, graphHighY, graphLowY);
PrintMini(0, 6, string, 0);
sprintf(&string, "MaxI:%u", iMax[1] * graphZoom + iMax[2]);
PrintMini(0, 53, string, 0);
if (graphZoom > 32)
sprintf(&string, "Zoom:2^%ux", graphZoom - 1);
else if (graphZoom > 0)
sprintf(&string, "Zoom:%ux", 1 << graphZoom - 1);
else
sprintf(&string, "Zoom:0.5x");
PrintMini(0, 59, string, 0);
}
}
} while (key != KEY_CTRL_EXIT);
return 1;
}
#pragma section _BR_Size
unsigned long BR_Size;
#pragma section
#pragma section _TOP
int InitializeSystem(int isAppli, unsigned short OptionNum) {
return INIT_ADDIN_APPLICATION(isAppli, OptionNum);
}
#pragma section
MANDEL.G1A
Citer : Posté le 22/01/2020 08:58 | #
There's a lot more flicker than there is grey
Also, there are heaps of v-sync lines
Welcome to the bane of gray rendering. Every single application that uses gray has this problem, except if the timers are set to refresh the screen at precise intervals to avoid this. Gathering timer delays that render well on different screens is a common task for gray engine developers.
Although, to be straight, there is no chance that you will be able to render good-looking gray by means of C code counting iterations or SetTimer(). These are way too rough. The only Casio gray engines in existence to the best of my knowledge (Kucalc's original and gint's) all use hardware timers and you can't do that with fxlib (Kucalc's engine broke half of fxlib as a side effect and gint replaces the kernel).
(Edit : I now remember there was one specific engine in Aragorn's Zelda game, with messages starting here. He used setFPS() (RTC 64-bit counter) with mixed results. I mentioned at the time that the method was already rough.)
So... I don't have much advice for you here; checker could be an option, but actual gray I don't know how to do with your tool stack.
Citer : Posté le 23/01/2020 20:33 | #
What is the refresh rate of the LCD screen?
Is it locked with clock cycles of the CPU?
is it the lower or uppper bits?
Would I have any use to get the other 32bits?
With SetTimer() (and Sleep()), the input time is measured in milliseconds, how does that work?
Cause 1 millisecond is 1/1000 of a second, while ticks are counted in 1/128 of a second
Citer : Posté le 23/01/2020 21:03 | #
Ups sorry it is 64-Hz and not 64-bit. The RTC is very imprecise.
The refresh rate of the LCD screen is "too much". You can refresh it 500 times per second if you want. What the exact V-sync delay would be and how to achieve it, is a question I think no one has answered before. But I don't think it would beat what we do by experimentation where we fine-tune manually.
With SetTimer() the delay is actually rounded to the closest multiple of 25 ms. Hence the resolution of the timer is no more than 25 ms. The RTC has a resolution of about 16 ms. And the hardware timer which is not available in fxlib (because it's being used by the system) has a resolution of about 250 ns (yes nanoseconds!). You probably understand why I never bothered trying gray with fxlib...
Citer : Posté le 23/01/2020 21:54 | #
Do you mean 128-Hz? because of 1/128 sec.
RTC isn't working on my calc
If I run (*SysCall)(0, 0, 0, 0, 0x3b); I get a System ERROR
TARGET=E6FF2136
PC =08101F58
But on the SDK it works fine
And my old snake game that used it now crashes when run
static int SysCallCode[] = { 0xD201422B, 0x60F20000, 0x80010070 };
static int (*SysCall)(int R4, int R5, int R6, int R7, int FNo) = (void*)&SysCallCode;
int RTC_getTicks(void) {
return (*SysCall)(0, 0, 0, 0, 0x3b);
}
Any way to access the hardware timer via ASM?
Citer : Posté le 23/01/2020 23:15 | #
Do you mean 128-Hz? because of 1/128 sec.
Yes, whichever. 64 Hz and 128 Hz is not too different (and I usually mix up the two).
You are using the old C-style syscall invocation, which doesn't work on SH4 calcs. I suggest using assembler, at least with assembler you know what you are doing.
Yes, and even from C. But you'll confuse/break the OS and you won't be able to catch the interrupt anyway so...
Citer : Posté le 25/01/2020 11:07 | #
I tried making a 8bit unsigned char 128x64 array
8192 8bit bytes
16384 bits
But I’m getting a size limit error
The size of B and R section should be 0x2000 bytes or less
Citer : Posté le 25/01/2020 11:48 | #
The .data and .bss sections (B and R sections in SDK terms) are loaded in an 8 kB area on SH3 calculators, hence the buffer that you are trying to define, plus whichever global state you may have, slightly exceeds this size.
On SH4 calculators this limit was raised to 32 kB (static RAM), but the SDK does not know about this. You can, if you want, set your array to some pointer between 0x08100000 and this plus 32 kB if you are sure that no other global variable gets linked there (which you can check with the linker output).
There is no way to bypass this check under the SDK.
Citer : Posté le 27/01/2020 06:05 | # | Fichier joint
I have 4 gray levels
White, LightGray, DarkGray and Black
There's 2 buffers, Light and Dark
I've used SetTimer() to call a function that draws the buffers to the screen
It draws the LightBuffer to the screen
waits X instructions
Draws DarkGray buffer to the screen
then waits for SetTimer() to run the function again
In the video, the max iteration level was 128
the 8 numbers to the left of the screen, in order from top to bottom
0 -SetTimer() delay - 0ms (does SetTimer() default to 25ms then? or instantly recalls the function again after its finished?)
1 - unused
50000 - delay() - runs 50000 * 3 = 150000 instructions to create some delay between drawing light gray and dark gray (50k just happened to be the perfect number)
3 - unused
105 - inverse iteration level for light gray (128 - 105 = 23 iterations)
78 - inverse iteration level for dark gray (128 - 78 = 50 iterations)
0 - inverse iteration level for black (128 - 0 = 128 iterations)
0 - unused
LightGrayBuffer is computed via pixels that hit 23+ iterations minus 50+ iteration pixels plus 128 iteration pixels
DarkGrayBuffer is 50+ iteration pixels
Citer : Posté le 27/01/2020 08:11 | #
This is not bad, I think this is even pretty good for a SetTimer()-based effect. Your efforts are paying off
Obviously it waits otherwise you'd never gain back control of your calculator.
Citer : Posté le 28/01/2020 11:18 | #
Cause I’m using SetTimer and GetKey
How can I detect if the user pressed [MENU] and stop the Timer?
I still want to be able to change the display contrast
And something that doesn’t display the vram to screen automatically
would be nice
Citer : Posté le 28/01/2020 13:14 | #
So GetKey() is clearly not what you need. You can try GetKeyWait() (the syscall ! The fxlib wrapper does not set the key correctly). If not, then you'll have to code yourself and you won't be able to easily adjust the contrast because it can be set but not read.
Citer : Posté le 01/02/2020 08:28 | #
How can I optimize this further?
Is there a way to write words or longwords to the DisplayDriver?
Even if it can only be done a few times and use bytes for the start/end
mov.l #H'B4000000,r1 ;LCD_register_selector = 0xB4000000
mov.l #H'B4010000,r2 ;LCD_data_register = 0xB4010000
mov #64, r3 ;row = 64
mov #0, r5 ;zero = 0
mov #4, r6 ;four = 4
mov #7, r7 ;seven = 7
writeRow:
mov.b r6, @r1 ;*LCD_register_selector = four
dt r3 ;T = (row-- == 0)
not r3, r0 ;rowInvert = ~row
mov.b r0, @r2 ;*LCD_data_register = rowInvert
mov.b r6, @r1 ;*LCD_register_selector = four
mov.b r5, @r2 ;*LCD_data_register = zero
mov.b r7, @r1 ;*LCD_register_selector = seven
mov.b @r4+, r0 ;byte = *buffer++
mov.b r0, @r2 ;*LCD_data_register = byte
mov.b @r4+, r0 ;byte = *buffer++
mov.b r0, @r2 ;*LCD_data_register = byte
mov.b @r4+, r0 ;byte = *buffer++
mov.b r0, @r2 ;*LCD_data_register = byte
mov.b @r4+, r0 ;byte = *buffer++
mov.b r0, @r2 ;*LCD_data_register = byte
mov.b @r4+, r0 ;byte = *buffer++
mov.b r0, @r2 ;*LCD_data_register = byte
mov.b @r4+, r0 ;byte = *buffer++
mov.b r0, @r2 ;*LCD_data_register = byte
mov.b @r4+, r0 ;byte = *buffer++
mov.b r0, @r2 ;*LCD_data_register = byte
mov.b @r4+, r0 ;byte = *buffer++
mov.b r0, @r2 ;*LCD_data_register = byte
mov.b @r4+, r0 ;byte = *buffer++
mov.b r0, @r2 ;*LCD_data_register = byte
mov.b @r4+, r0 ;byte = *buffer++
mov.b r0, @r2 ;*LCD_data_register = byte
mov.b @r4+, r0 ;byte = *buffer++
mov.b r0, @r2 ;*LCD_data_register = byte
mov.b @r4+, r0 ;byte = *buffer++
mov.b r0, @r2 ;*LCD_data_register = byte
mov.b @r4+, r0 ;byte = *buffer++
mov.b r0, @r2 ;*LCD_data_register = byte
mov.b @r4+, r0 ;byte = *buffer++
mov.b r0, @r2 ;*LCD_data_register = byte
mov.b @r4+, r0 ;byte = *buffer++
mov.b r0, @r2 ;*LCD_data_register = byte
mov.b @r4+, r0 ;byte = *buffer++
bf/s writeRow ;if(T == 0) goto writeRow
mov.b r0, @r2 ;*LCD_data_register = byte
rts ;return
mov #0, r0
I found the syscalls to enable/disable the menu key in key functions e.g. GetKey(&key)
mov.l SYSCALL,r1
mov.l #H'477,r0
jmp @r1
nop
_DisableGetkeyToMainFunctionReturn:
mov.l SYSCALL,r1
mov.l #H'478,r0
jmp @r1
nop
If I were to use Bkey_GetKeyWait() and a small key code converter function
unsigned int key;
unsigned short KeyCodes[6][9] = {
KEY_CTRL_NOP, KEY_CTRL_NOP, KEY_CTRL_NOP, KEY_CTRL_NOP, KEY_CHAR_STORE, KEY_CHAR_TAN, KEY_CTRL_RIGHT, KEY_CTRL_UP, KEY_CTRL_F6,
KEY_CTRL_EXE, KEY_CHAR_MINUS, KEY_CHAR_DIV, KEY_CTRL_NOP, KEY_CHAR_COMMA, KEY_CHAR_COS, KEY_CTRL_DOWN, KEY_CTRL_LEFT, KEY_CTRL_F5,
KEY_CHAR_PMINUS, KEY_CHAR_PLUS, KEY_CHAR_MULT, KEY_CTRL_DEL, KEY_CHAR_RPAR, KEY_CHAR_SIN, KEY_CTRL_EXIT, KEY_CTRL_MENU, KEY_CTRL_F4,
KEY_CHAR_EXP, KEY_CHAR_3, KEY_CHAR_6, KEY_CHAR_9, KEY_CHAR_LPAR, KEY_CHAR_LN, KEY_CHAR_POW, KEY_CTRL_VARS, KEY_CTRL_F3,
KEY_CHAR_DP, KEY_CHAR_2, KEY_CHAR_5, KEY_CHAR_8, KEY_CTRL_FD, KEY_CHAR_LOG, KEY_CHAR_SQUARE, KEY_CTRL_OPTN, KEY_CTRL_F2,
KEY_CHAR_0, KEY_CHAR_1, KEY_CHAR_4, KEY_CHAR_7, KEY_CHAR_FRAC, KEY_CTRL_XTT, KEY_CTRL_ALPHA, KEY_CTRL_SHIFT, KEY_CTRL_F1
};
if (kcode1 < 2 || kcode2 < 2 || kcode1 > 7 || kcode2 > 10)
return kcode1 == 1 && kcode2 == 1 ? KEY_CTRL_AC : KEY_CTRL_NOP;
return KeyCodes[kcode1 - 2][kcode2 - 2];
}
How much of this fx legacy: keyboard applies to my calc fx-9750GII SH4 OS 02.04.0201? (and the SDK)
Citer : Posté le 01/02/2020 10:19 | #
No because the bus is 8-bit. But:
• First note that it takes 2.5 ms to send the data. How many refresh per second do you do?
• You can overclock to speed up the bus transfer.
• You can try to use the DMA but I'm not sure whether it's possible to do it fully in the background (research needed).
As far as I know there is no way to prevent this, you really have to use GetKeyWait().
You can use the inject key code syscall, I personally used it successfully to insert MENU, I don't know whether GetKeyWait() will handle SHIFT-combinations, if you try it please let me know.
Basically all of it. I wouldn't play with the interrupt handlers (which is too low-level) and the Fkey/PRGM stuff (which is irrelevant), but you're mainly safe there.
Citer : Posté le 01/02/2020 10:58 | #
2.5ms to send only 8bits? (2.5 Milliseconds)
Do you mean 2.5μs? (2.5 Microseconds)
I'm currently doing 1000ms / 25ms = 40fps. SetTimer(ID_1, 25ms, DrawGrayFunction);
you could say 80fps if you count writing the dark and light buffers separately
Got [MENU] working with the key injection and GetKeyWait()
if (Camera.GrayScale == TRUE) {
Gray_Stop();
Keyboard_ClrBuffer();
matrixCode = 0x308;
Keyboard_PutMatrixCode(&matrixCode);
GetKeyWait(KEYWAIT_HALTOFF_TIMEROFF, 0, !TRUE, &key);
}
break;
Now to get Contrast and PowerOff working
Then I can work on some GUI stuff and optimizing the asm code to beyond readable
Citer : Posté le 01/02/2020 11:12 | #
No 2.5ms to send the full VRAM.
I suspect this is 40 FPS, right? You can't go above that with SetTimer().
So 40 FPS with 2.5ms per operation is 10% of the application time. This should give you an idea how important it is to optimize it.
Good job, keep it up!
Citer : Posté le 01/02/2020 11:46 | #
What code is that time based off?
ML_display_vram()? writing pure hardcoded 0's?
Citer : Posté le 01/02/2020 11:49 | #
Timing gint's dupdate() with a hardware timer. You can be sure that this is accurate, here are a few reasons :
• The timer's resolution is below 1 µs.
• This time is very very long for the amount of writes performed. In comparison, clearing the VRAM takes a few µs. Hence, most of this is I/O-bound.
• The code for gint's dupdate() is really the same ML_display_vram().
Citer : Posté le 02/02/2020 03:27 | #
void ML_set_contrast(unsigned char contrast)
{
char *LCD_register_selector = (char*)0xB4000000, *LCD_data_register = (char*)0xB4010000;
*LCD_register_selector = 6;
*LCD_data_register = contrast;
}
#endif
#ifdef ML_GET_CONTRAST
unsigned char ML_get_contrast()
{
char *LCD_register_selector = (char*)0xB4000000, *LCD_data_register = (char*)0xB4010000;
*LCD_register_selector = 6;
return *LCD_data_register;
}
#endif
calling ML_get_contrast(); always returns a 0 (both my calc and emulator)
Ajouté le 10/02/2020 à 10:10 :
Im able to type shll8 r1, r5 without any errors
It just ignores the , r5 => shll8 r1
Without using mov.w or mov.l
Is it possible to type mov #255, r0 without it becoming r0 = 0xFFFFFFFF (-1)?
Citer : Posté le 10/02/2020 10:15 | #
Truly interesting. The assembler is on crack I guess. xD
Is it possible to type mov #255, r0 without it becoming r0 = 0xFFFFFFFF (-1)?
The range of a mov #immediate, rn is only -128...127. If you want another value you either need to load it using mov.w/l or to trick.
In your specific case you can do this, which avoids the memory reference:
extu.b r0, r0
Note that when you do mov.b or mov.w from memory, only the bottom bits are loaded, and so it is usual to follow mov.b with ext[us].b and mov.w with ext[us].w to make sure the top bits have the correct value. Forgetting to do this results in insidious bugs...