/* NVClock 0.8 - Linux overclocker for NVIDIA cards * * site: http://nvclock.sourceforge.net * * Copyright(C) 2001-2008 Roderick Colenbrander * * Thanks to Erik Waling for doing Smartdimmer coding/testing. (his code isn't the one in NVClock) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ /* This source file uses some clock calculation code from nvidia's xfree86 driver. To keep Nvidia happy I have added their copyright. The way they interpret it (see linux kernel riva_hw.h) is that you need to add the disclaimer and copyright and when that's done you can basicly do what you want. */ /***************************************************************************\ |* *| |* Copyright 1993-2003 NVIDIA, Corporation. All rights reserved. *| |* *| |* NOTICE TO USER: The source code is copyrighted under U.S. and *| |* international laws. Users and possessors of this source code are *| |* hereby granted a nonexclusive, royalty-free copyright license to *| |* use this code in individual and commercial software. *| |* *| |* Any use of this source code must include, in the user documenta- *| |* tion and internal comments to the code, notices to the end user *| |* as follows: *| |* *| |* Copyright 1993-2003 NVIDIA, Corporation. All rights reserved. *| |* *| |* NVIDIA, CORPORATION MAKES NO REPRESENTATION ABOUT THE SUITABILITY *| |* OF THIS SOURCE CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" *| |* WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. NVIDIA, CORPOR- *| |* ATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOURCE CODE, *| |* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, NONINFRINGE- *| |* MENT, AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL *| |* NVIDIA, CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT, INCI- *| |* DENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RE- *| |* SULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION *| |* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF *| |* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOURCE CODE. *| |* *| |* U.S. Government End Users. This source code is a "commercial *| |* item," as that term is defined at 48 C.F.R. 2.101 (OCT 1995), *| |* consisting of "commercial computer software" and "commercial *| |* computer software documentation," as such terms are used in *| |* 48 C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Govern- *| |* ment only as a commercial end item. Consistent with 48 C.F.R. *| |* 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), *| |* all U.S. Government End Users acquire the source code with only *| |* those rights set forth herein. *| |* *| \***************************************************************************/ #include #include #include "backend.h" /* / The original NV40 gpu was used as the base for 6800LE/6800NU/6800GT/6800Ultra / GPUs. The difference between all these models lie in the amount of enabled / pixel/vertex pipelines and clockspeeds. For instance the 6800LE ships with 8 / pixel pipelines while the 6800GT ships with 16 of those. Right after production / all GPUs are tested, if all pipelines work and they run at high clocks they / are called Ultra or if pipes are broken they are called 6800NU(12p) or 6800LE(8p). / Further in some cases 'good' GPUs can be rebranded too if there's a shortage of / 6800NU/6800LE GPUs. The key to this rebranding is register 0x1540 which contains / the enabled pixel/vertex pipelines. Depending on the GPU architecture a bit can / correspond to a single vertex shader or to a block containing two or four / pixel pipelines. / / We now define some words coming from Rivatuner as people are familiar with those. / A 'good' GPU for which pipelines are disabled just to get enough lowend models / is said to contain 'Software masked units'. In this case the videobios initializes / 0x1540 with a value that locks out some units. / GPUs which didn't pass the hardware quality testing contain 'Hardware masked units'. / In this case the bios initializes 0x1540 with a value that enables all pipelines. / A certain (read-only) register (0xc010) contains a mask of units to disable by default. / The bios loads this value into another register (0xc020) at startup. The value from / 0xc020 is then used by the drivers to disable units in 0x1540. For example by clearing this / register before the drivers are loaded, you can prevent masks from being disabled. / 1540 units_cfg (rw) second byte contains vertex configuration and first byte pipeline / c010 default_mask (r) pixel pipelines start at +22, while vertex start at +16 (is this correct for all cards?) / c020 active_mask (rw) / c024/c028/c02c are being set to 0, why? (they correspond to c014/c018/c01c) / / Below are supported pipeline configurations on various types of cards. Not sure / if everything is fully correct though: / - NV40 0x3f0f 6800 cards / - NV41 0x1f07 6800 pci-e (is this correct?) / - NV43 0x0703 6200/6600 cards / - NV44 0x0703 6200(Go)/Turbocache cards / - NV46 0x0703 7300 (is this correct?) / - NV47/NV49 0xff3f 7800/7900 cards / - NV4B 0x1f0f 7600 */ static int nv40_get_default_mask(char *pmask, char *vmask) { int mask; switch(nv_card->arch) { case NV40: mask = 0x3f0f; break; case NV41: mask = 0x1f07; break; case NV43: case NV44: case NV46: case C51: mask = 0x703; break; case NV47: case NV49: mask = 0xff3f; break; case NV4B: mask = 0x1f0b; break; } if(pmask) *pmask = mask & 0xff; if(vmask) *vmask = (mask >> 8) & 0xff; return mask; } /* Try to locate hardware maskes units. On success we return 1 and pmask/vmask / contain the masked units. When no hw masked units are found we return 0. */ static int nv40_get_hw_masked_units(char *pmask, char *vmask) { unsigned int mask = nv_card->PMC[0xc010/4]; /* Or should we show the currently locked pipes? */ unsigned int masked_units; /* On 6200 cards for some reason 0xc010 can be empty while there are locked pipes, 0xc020 is then used instead. / For now we use 0xc020 if that's the case. Note that during unlocking 0xc020 is cleared and that we then lose this locking info. */ if(mask == 0) mask = nv_card->PMC[0xc020/4]; /* For now we show units that are hw masked by default, not the currently masked units; the cli code wants to have this info / Unfortunately bios dumping isn't possible on various mobile 6200go cards, so as a fallback use the currently masked pipes / in favor of a segfault ;) */ /* What to do with NV47 which has 8 vertex units? */ masked_units = (((mask & 0x3f0000) >> 8) | (mask >> 22)) & nv40_get_default_mask(0, 0); if(masked_units != 0) { *pmask = masked_units & 0xff; /* pixel */ *vmask = (masked_units >> 8) & 0xff; /* vertex */ return 1; } return 0; } /* Try to locate software maskes units. On success we return 1 and pmask/vmask / contain the masked units. When no sw masked units are found we return 0. */ static int nv40_get_sw_masked_units(char *pmask, char *vmask) { unsigned int mask = nv40_get_default_mask(0, 0); unsigned int pipe_cfg; pipe_cfg = nv_card->PMC[0x1540/4] & nv40_get_default_mask(0, 0); if(!pipe_cfg) return 0; /* Check if the card contains sw masked units by comparing / the default pipe_cfg register value with the most optimal / register value for the type of card. If they differ we have / sw masked units. The check below first does a AND-mask to filter / out bits which aren't needed. */ if((pipe_cfg & 0xffff) != mask) { /* First note the bits that are different / E.g. pipe_cfg = 0x701, while mask = 0x703 / 0x701 ^ 0x703 = 0x002 */ pipe_cfg = (pipe_cfg ^ mask) & mask; *pmask = pipe_cfg & 0xff; *vmask = (pipe_cfg >> 8) & 0xff; return 1; } return 0; } /* Receive the number of enabled pixel pipelines and also / store a mask with active pipelines. Further store the total / number of pixel units per pipeline in total. */ static int nv40_get_pixel_pipelines(char *mask, int *total) { unsigned char pipe_cfg = nv_card->PMC[0x1540/4] & 0xff; int i, pipelines=0; /* The number of enabled pixel pipelines is stored in the first 4 (or more?) bits. / In case of 6800 hardware a single bit corresponds to 4 pipelines and on NV44/NV46 / hardware a bit corresponds to 2 pipelines */ for(i=0; i<8; i++) if((pipe_cfg >> i) & 0x1) pipelines++; *mask = pipe_cfg; /* NV44/NV46 use 2 pixel units per pipeline */ if(nv_card->arch & (NV44 | NV46)) *total = 2; else *total = 4; return pipelines; } /* Receive the number of enabled vertex pipelines and also / store a mask with active pipelines. */ static int nv40_get_vertex_pipelines(char *mask) { unsigned char pipe_cfg = (nv_card->PMC[0x1540/4] >> 8) & 0xff; int i, pipelines=0; /* The number of enabled vertex pipelines is stored in the second byte. / A a single bit corresponds to 1 vertex pipeline. */ for(i=0; i<8; i++) if((pipe_cfg >> i) & 0x1) pipelines++; *mask = pipe_cfg; return pipelines; } static void nv40_set_pixel_pipelines(unsigned char mask) { int pipe_cfg = nv_card->PMC[0x1540/4]; /* Why do 0xc024/0xc028/0xc02c need to be reset? What do they contain? */ nv_card->PMC[0xc020/4] = nv_card->PMC[0xc024/4] = nv_card->PMC[0xc028/4] = nv_card->PMC[0xc02c/4] = 0; nv_card->PMC[0x1540/4] = ~(~pipe_cfg | 0xff) | mask; } static void nv40_set_vertex_pipelines(unsigned char mask) { int pipe_cfg = nv_card->PMC[0x1540/4]; /* Why do 0xc024/0xc028/0xc02c need to be reset? What do they contain? */ nv_card->PMC[0xc020/4] = nv_card->PMC[0xc024/4] = nv_card->PMC[0xc028/4] = nv_card->PMC[0xc02c/4] = 0; nv_card->PMC[0x1540/4] = ~(~pipe_cfg | 0xff00) | (mask<<8); } /* Fanspeed code for Geforce6800 hardware */ static float nv40_get_fanspeed() { /* Bit 30-16 of register 0x10f0 control the voltage for the pwm signal generator / which is connected to the fan. By changing the value in the register the duty cycle can be controlled / so that the fan turns slower or faster. Bit 14-0 of 0x10f0 contain the pwm division / ratio which decides the smallest fanspeed adjustment step. / The value stored in the registers needs to be inverted, so a value of 10% means 90% and so on. */ int pwm_divider = nv_card->PMC[0x10f0/4] & 0x7fff; float fanspeed = (float)(pwm_divider - ((nv_card->PMC[0x10f0/4] >> 16) & 0x7fff)) * 100.0/(float)pwm_divider; return fanspeed; } static void nv40_set_fanspeed(float speed) { int value; int pwm_divider = nv_card->PMC[0x10f0/4] & 0x7fff; /* For safety reasons we should never disable the fan by not putting it below 10%; further negative values don't exist ;) */ if(speed < 10 || speed > 100) return; value = 0x80000000 + ((((int)(100 - speed) * pwm_divider/100) & 0x7fff)<<16) + pwm_divider; nv_card->PMC[0x10f0/4] = value; } /* Fanspeed code for Geforce6600 hardware (does this work for 6200 cards too??)*/ static float nv43_get_fanspeed() { /* The first 12 or more bits of register 0x15f4 control the voltage for the pwm signal generator in case / of Geforce 6200/6600(GT)/7600/7800GS hardware. By changing the value in the register the duty cycle of the pwm signal / can be controlled so that the fan turns slower or faster. The first part of register 0x15f8 contains the pwm division ratio. / The value stored in the registers needs to be inverted, so a value of 10% means 90% and so on. (pwm_divider means off, 0 means on) */ int pwm_divider = nv_card->PMC[0x15f8/4] & 0x3fff; float fanspeed = (pwm_divider - (nv_card->PMC[0x15f4/4] & 0x3fff)) * 100.0/(float)pwm_divider; return fanspeed; } static void nv43_set_fanspeed(float speed) { int value; int pwm_divider = nv_card->PMC[0x15f8/4] & 0x3fff; /* For safety reasons we should never disable the fan by not putting it below 10%; further negative values don't exist ;) */ if(speed < 10 || speed > 100) return; value = 0x80000000 + (int)((100 - speed) * pwm_divider/100); nv_card->PMC[0x15f4/4] = value; } /* There's an internal temperature sensor on NV43 hardware and higher / Note that the sensor variable which is passed to this function is bogus / it is only there to share nv_card->get_gpu_temp between I2C and low-level. */ static int nv43_get_gpu_temp(void *sensor) { int temp; int correction=0; float offset; float slope; /* For now duplicate the temperature offset code here. It is needed for Mobile users in most cases the bios can't be read on those GPUs. */ if(!nv_card->bios) { switch(nv_card->arch) { case NV43: offset = 32060.0/1000.0; slope = 792.0/1000.0; break; case NV44: case NV47: offset = 27839.0/1000.0; slope = 700.0/1000.0; break; case NV46: /* are these really the default ones? they come from a 7300GS bios */ offset = -24775.0/100.0; slope = 467.0/10000.0; break; case NV49: /* are these really the default ones? they come from a 7900GT/GTX bioses */ offset = -25051.0/100.0; slope = 458.0/10000.0; break; case NV4B: /* are these really the default ones? they come from a 7600GT bios */ offset = -24088.0/100.0; slope = 442.0/10000.0; break; } } else { /* The output value of the sensor needs to be 'calibrated' in order to get the correct temperature. These / values are stored in the video bios and are different for each type of gpu. The value needs to be multiplied / with a certain 'slope' and further the sensor has an offset and another correction constant. */ offset = (float)nv_card->bios->sensor_cfg.diode_offset_mult / (float)nv_card->bios->sensor_cfg.diode_offset_div; slope = (float)nv_card->bios->sensor_cfg.slope_mult / (float)nv_card->bios->sensor_cfg.slope_div; correction = nv_card->bios->sensor_cfg.temp_correction; } /* Assume that the sensor is disabled when the temperature part (without offset) is 0 */ if((nv_card->PMC[0x15b4/4] & 0xfff) == 0) { /* Initialize the sensor, for now program a threshold value of 120C. */ int max_temp = (int)(((float)120 - offset - correction) / slope); /* 7300/7600/7900 cards need bit31 to be set while older cards need a different bit */ if(nv_card->arch & (NV46 | NV49 | NV4B)) nv_card->PMC[0x15b0/4] = 0x80000000 | max_temp; else nv_card->PMC[0x15b0/4] = 0x10000000 | max_temp; usleep(500); } /* In case of Geforce 7300/7600/7900 cards more than one byte is used for the temperature */ if(nv_card->arch & (NV46 | NV49 | NV4B)) temp = nv_card->PMC[0x15b4/4] & 0x1fff; else temp = nv_card->PMC[0x15b4/4] & 0xff; return (int)(temp * slope + offset) + correction; } /* Get current backpanel brightness level on laptops */ static int nv44_mobile_get_smartdimmer() { /* Convert level to a value between 1 and 100 */ return 5*(((nv_card->PMC[0x15f0/4] >> 16) & 0x1f) - 1); } /* Adjust backpanel brightness on laptops */ static void nv44_mobile_set_smartdimmer(int level) { if(level < 15 || level > 100) return; /* Convert the level to correct Smartdimmer values; on Windows a value between 4 and 21 works fine although 0-31 should work. / The code below creates a value between 4 and 21; */ level = level/5 + 1; /* Modify the smartdimmer part but keep the rest of the register the same */ nv_card->PMC[0x15f0/4] = (level << 16) | (nv_card->PMC[0x15f0/4] & 0xffe0ffff); } static int CalcSpeed_nv40(int base_freq, int m1, int m2, int n1, int n2, int p) { return (int)((float)(n1*n2)/(m1*m2) * base_freq) >> p; } float GetClock_nv40(int base_freq, unsigned int pll, unsigned int pll2) { int m1, m2, n1, n2, p; /* mpll at 0x4020 and 0x4024; nvpll at 0x4000 and 0x4004 */ p = (pll >> 16) & 0x03; m1 = pll2 & 0xFF; n1 = (pll2 >> 8) & 0xFF; /* Bit 8 of the first pll register can be used to disable the second set of dividers/multipliers. */ if(pll & 0x100) { m2 = 1; n2 = 1; } /* NV46/NV49/NV4B cards seem to use a different calculation; I'm not sure how it works yet, so for now check the architecture. Further it looks like bit 15 can be used to identify it but I'm not sure yet. */ else if(pll & 0x1000) { m2 = 1; n2 = 1; } else { m2 = (pll2 >> 16) & 0xFF; n2 = (pll2 >> 24) & 0xFF; } if(nv_card->debug) printf("m1=%d m2=%d n1=%d n2=%d p=%d\n", m1, m2, n1, n2, p); return (float)CalcSpeed_nv40(base_freq, m1, m2, n1, n2, p)/1000; } const unsigned int pll_entries=2; /* TODO: add proper architecture specific defaults */ const struct pll pll_lst[2] = { {0x0000, 7, 0, {3000, 25000, 100000, 405000, 1, 255, 1, 255}, {35000, 100000, 400000, 1000000, 1, 31, 1, 31}}, {0x4020, 7, 0, {3000, 25000, 100000, 405000, 1, 255, 1, 255}, {35000, 100000, 600000, 1400000, 1, 31, 1, 31}}, }; static const struct pll* GetPllLimits(unsigned int reg) { int i; if(nv_card->bios && nv_card->bios->pll_entries) { for(i=0; ibios->pll_entries; i++) { if(nv_card->bios->pll_lst[i].reg == reg) return &nv_card->bios->pll_lst[i]; } /* Return the default limits */ return &nv_card->bios->pll_lst[0]; } else { for(i=0; iVCO1.minInputFreq; minVCOFreq = pll_limits->VCO1.minFreq; maxVCOFreq = pll_limits->VCO1.maxFreq; minM = pll_limits->VCO1.minM; maxM = pll_limits->VCO1.maxM; minN = pll_limits->VCO1.minN; maxN = pll_limits->VCO1.maxN; maxP = 6; /* The optimal frequency for the PLL to work at is somewhere in the center of its range. / Choose a post divider in such a way to achieve this. / The G8x nv driver does something similar but they they derive a minP and maxP. That / doesn't seem required as you get so many matching clocks that you don't enter a second / iteration for P. (The minP / maxP values in the nv driver only differ at most 1, so it is for / some rare corner cases. */ for(P=0, VCOFreq=maxVCOFreq/2; clockIn<=VCOFreq && P <= maxP; P++) { VCOFreq /= 2; } /* Calculate the m and n values. There are a lot of values which give the same speed; / We choose the speed for which the difference with the request speed is as small as possible. */ for(M=minM; M<=maxM; M++) { /* The VCO has a minimum input frequency */ if((refClk/M) < minVCOInputFreq) break; for(N=minN; N<=maxN; N++) { /* Calculate the frequency generated by VCO1 */ clock = (int)(refClk * N / (float)M); /* Verify if the clock lies within the output limits of VCO1 */ if(clock < minVCOFreq) continue; else if (clock > maxVCOFreq) /* It is no use to continue as the clock will only become higher */ break; clock >>= P; delta = abs((int)(clockIn - clock)); /* When the difference is 0 or less than .5% accept the speed */ if(((delta == 0) || ((float)delta/(float)clockIn <= 0.005))) { *bestM = M; *bestN = N; *bestP = P; return; } /* When the new difference is smaller than the old one, use this one */ if(delta < bestDelta) { bestDelta = delta; *bestM = M; *bestN = N; *bestP = P; } } } } static void ClockSelectDoubleVCO_nv40(int clockIn, const struct pll *pll_limits, int *bestM, int *bestM2, int *bestN, int *bestN2, int *bestP) { int clock1, clock2, M, M2, N, N2, P; int delta, bestDelta, minM, minM2, maxM, maxM2, minN, minN2, maxN, maxN2, maxP; int minVCOInputFreq, minVCO2InputFreq, maxVCO2InputFreq, minVCOFreq, minVCO2Freq, maxVCOFreq, maxVCO2Freq; int maxClock, VCO2Freq; int refClk = 27000; bestDelta = clockIn; *bestM=*bestM2=*bestN=*bestN2=*bestP=0; minVCOInputFreq = pll_limits->VCO1.minInputFreq; minVCOFreq = pll_limits->VCO1.minFreq; maxVCOFreq = pll_limits->VCO1.maxFreq; minM = pll_limits->VCO1.minM; maxM = pll_limits->VCO1.maxM; minN = pll_limits->VCO1.minN; maxN = pll_limits->VCO1.maxN; minVCO2InputFreq = pll_limits->VCO2.minInputFreq; maxVCO2InputFreq = pll_limits->VCO2.maxInputFreq; minVCO2Freq = pll_limits->VCO2.minFreq; maxVCO2Freq = pll_limits->VCO2.maxFreq; minM2 = pll_limits->VCO2.minM; maxM2 = pll_limits->VCO2.maxM; minN2 = pll_limits->VCO2.minN; maxN2 = pll_limits->VCO2.maxN; maxP = 6; /* This should be somewhere in the bios too */ maxClock = maxVCO2Freq; /* If the requested clock is behind the bios limits, try it anyway */ if(clockIn > maxVCO2Freq) maxClock = clockIn + clockIn/200; /* Add a .5% margin */ /* The optimal frequency for the PLL to work at is somewhere in the center of its range. / Choose a post divider in such a way to achieve this. / The G8x nv driver does something similar but they they derive a minP and maxP. That / doesn't seem required as you get so many matching clocks that you don't enter a second / iteration for P. (The minP / maxP values in the nv driver only differ at most 1, so it is for / some rare corner cases. */ for(P=0, VCO2Freq=maxClock/2; clockIn<=VCO2Freq && P <= maxP; P++) { VCO2Freq /= 2; } /* The PLLs on Geforce6/7 hardware can operate in a single stage made with only 1 VCO / and a cascade mode of two VCOs. This second mode is in general used for relatively high / frequencies. The loop below calculates the divider and multiplier ratios for the cascade / mode. The code takes into account limits defined in the video bios. */ for(M=minM; M<=maxM; M++) { /* The VCO has a minimum input frequency */ if((refClk/M) < minVCOInputFreq) break; for(N=minN; N<=maxN; N++) { /* Calculate the frequency generated by VCO1 */ clock1 = (int)(refClk * N / (float)M); /* Verify if the clock lies within the output limits of VCO1 */ if( (clock1 < minVCOFreq) ) continue; else if(clock1 > maxVCOFreq) /* For future N, the clock will only increase so stop; xorg nv continues but that is useless */ break; for(M2=minM2; M2<=maxM2; M2++) { /* The clock fed to the second VCO needs to lie within a certain input range */ if(clock1 / M2 < minVCO2InputFreq) break; else if(clock1 / M2 > maxVCO2InputFreq) continue; N2 = (int)((float)((clockIn << P) * M * M2) / (float)(refClk * N)+.5); if( (N2 < minN2) || (N2 > maxN2) ) continue; /* The clock before being fed to the post-divider needs to lie within a certain range. / Further there are some limits on N2/M2. */ clock2 = (int)((float)(N*N2)/(M*M2) * refClk); if( (clock2 < minVCO2Freq) || (clock2 > maxClock))// || ((N2 / M2) < 4) || ((N2 / M2) > 10) ) continue; /* The post-divider delays the 'high' clock to create a low clock if requested. / This post-divider exists because the VCOs can only generate frequencies within / a limited frequency range. This range has been tuned to lie around half of its max / input frequency. It tries to calculate all clocks (including lower ones) around this / 'center' frequency. */ clock2 >>= P; delta = abs((int)(clockIn - clock2)); /* When the difference is 0 or less than .5% accept the speed */ if(((delta == 0) || ((float)delta/(float)clockIn <= 0.005))) { *bestM = M; *bestM2 = M2; *bestN = N; *bestN2 = N2; *bestP = P; return; } /* When the new difference is smaller than the old one, use this one */ if(delta < bestDelta) { bestDelta = delta; *bestM = M; *bestM2 = M2; *bestN = N; *bestN2 = N2; *bestP = P; } } } } } static void ClockSelect_nv40(int clockIn, unsigned int reg, unsigned int *pllOut, unsigned int *pllBOut) { int PLL=0, PLL2=0; int bestM=0, bestM2=1, bestN=0, bestN2=1, bestP=0; const struct pll *pll_limits = GetPllLimits(reg); int pllIn = nv_card->PMC[reg/4]; printf("Warning using experimental NV4x lowlevel clock adjustment, if you encounter strange issues, issue a bugreport.\n"); /* Use a single VCO if the clock isn't high enough to require a second. / This is what apparently the Nvidia drivers are doing. For instance on my 7600GS / the max frequency for the first VCO in case of the GPU clock is 300MHz. Indeed below / 300MHz it moves to a single VCO. / We could likely also check if the limits of the second divider and multiplier are 1. */ if(clockIn < pll_limits->VCO1.maxFreq) { ClockSelectSingleVCO_nv40(clockIn, pll_limits, &bestM, &bestN, &bestP); /* Bit31 enables the first VCO */ PLL = 0x80000000; /* Select only a single VCO; bit12 is apparently used when there is a single VCO (G7x) / while bit8 is used when there are two VCOs, so it is apparently some disable bit. / / On some cards at least on G7x ones, a single VCO is used for the memory / In this case the min and max reference dividers (M) are equal. */ if(pll_limits->VCO2.minM == pll_limits->VCO2.maxM) PLL |= 0x1000; else PLL |= 0x100; /* Set the reference divider (M) and the feedback divider (N) */ PLL2 = (bestN<<8) | bestM; } else { ClockSelectDoubleVCO_nv40(clockIn, pll_limits, &bestM, &bestM2, &bestN, &bestN2, &bestP); /* Bit31 enables the first VCO, bit30 the second */ PLL = 0xc0000000; /* Set the reference dividers (M, M2) and the feedback dividers (N, N2) */ PLL2 = (bestN2<<24) | (bestM2<<16) | (bestN<<8) | bestM; } /* Set the post divider */ PLL |= (bestP<<16); if(reg == 0x4020) /* MPLL */ { unsigned int default_pll = nv_card->bios ? nv_card->bios->mpll : 0; /* This data comes from the init script tables of bios of the respective cards */ if(!default_pll) { switch(nv_card->arch) { case NV40: case NV41: /* I'm not sure if this is correct */ case NV43: case NV44: default_pll = 0x2000001c; break; case NV46: default_pll = 0x20000000; break; case NV47: default_pll = 0x2400001c; break; case NV49: case NV4B: default_pll = 0x24800000; break; default: printf("Unknown default pll for %#x\n", nv_card->arch); } } /* According to a vbtracetool log, this is done. Why a second post divider for the MPLL? */ PLL |= (bestP + pll_limits->var1e) << 20; /* Or with the 'empty' PLL but make sure that it doesn't adjust the post dividers or the enable bits */ PLL |= (default_pll & 0x3f88ffff); } else /* NVPLL */ { unsigned int default_pll = nv_card->bios ? nv_card->bios->nvpll : 0; if(!default_pll) { switch(nv_card->arch) { case NV40: case NV41: /* I'm not sure if this is correct */ case NV43: default_pll = 0x0000001c; break; case NV44: default_pll = 0xc000001c; break; case NV46: default_pll = 0xc0000000; break; case NV47: default_pll = 0x0001001f; break; case NV49: case NV4B: default_pll = 0x00010000; break; case C51: default: printf("Unknown default pll for %#x\n", nv_card->arch); default_pll = 0xc0000000; /* this should only get reached for C51 */ } } /* Or with the 'empty' PLL but make sure that it doesn't adjust the post dividers or the enable bits */ PLL |= (default_pll & 0x3f88ffff); } /* TODO: at some stage we should perhaps also play nice with 0xc040. / Before programming a PLL, we should disable it in there. Then / we should put the PLL in program mode (?) by setting bit28. After that / we should program the PLLs (first the dividers/multipliers and then the config), / then re-enable the PLL in 0xc040 and then unset bit28 of the pll config register. / At least this is what happens in a vbtracetool log. */ *pllOut = PLL; *pllBOut = PLL2; if(nv_card->debug) { printf("register=%#x, clockIn=%d, calculated=%d\n", reg, clockIn, CalcSpeed_nv40(27000, bestM, bestM2, bestN, bestN2, bestP)); printf("PLL=%#08x, PLL2=%08x\n", *pllOut, *pllBOut); } } static float nv40_get_gpu_speed() { int pll = nv_card->PMC[0x4000/4]; int pll2 = nv_card->PMC[0x4004/4]; if(nv_card->debug == 1) { printf("NVPLL_COEFF=%08x\n", pll); printf("NVPLL2_COEFF=%08x\n", pll2); } return (float)GetClock_nv40(nv_card->base_freq, pll, pll2); } static void nv40_set_gpu_speed(unsigned int nvclk) { unsigned int PLL=0, PLL2=0; nvclk *= 1000; ClockSelect_nv40(nvclk, 0x4000, &PLL, &PLL2); /* When no speed is found, don't change the PLL */ /* The algorithm doesn't allow too low speeds */ if(PLL) { if(nv_card->debug) { printf("NVPLL_COEFF: %08x\n", PLL); printf("NVPLL2_COEFF: %08x\n", PLL2); } nv_card->PMC[0x4000/4] = PLL; nv_card->PMC[0x4004/4] = PLL2; } } static float nv40_get_memory_speed() { int pll = nv_card->PMC[0x4020/4]; int pll2 = nv_card->PMC[0x4024/4]; if(nv_card->debug == 1) { printf("MPLL_COEFF=%08x\n", pll); printf("MPLL2_COEFF=%08x\n", pll2); } return (float)GetClock_nv40(nv_card->base_freq, pll, pll2); } static void nv40_set_memory_speed(unsigned int memclk) { unsigned int PLL=0, PLL2=0; memclk *= 1000; ClockSelect_nv40(memclk, 0x4020, &PLL, &PLL2); /* When no speed is found, don't change the PLL */ /* The algorithm doesn't allow too low speeds */ if(PLL) { if(nv_card->debug) { printf("MPLL_COEFF: %08x\n", PLL); printf("MPLL2_COEFF: %08x\n", PLL2); } /* It seems that different NV4X GPUs contain multiple memory clocks. / A 6800 card has 4 of them, a 6600GT 2 of them and a NV44 (6200) 1. / Very likely this is related to the width of the memory bus, which / is 256bit on the 6800, 128bit on the 6600GT (NV43) and 64bit on the NV44. / / The code below handles the setting of the extra clockspeeds. */ switch(nv_card->arch) { case NV40: case NV41: case NV47: nv_card->PMC[0x402c/4] = PLL; nv_card->PMC[0x4030/4] = PLL2; nv_card->PMC[0x4044/4] = PLL; nv_card->PMC[0x4048/4] = PLL2; case NV43: case NV49: case NV4B: nv_card->PMC[0x4038/4] = PLL; nv_card->PMC[0x403c/4] = PLL2; case NV44: case NV46: nv_card->PMC[0x4020/4] = PLL; nv_card->PMC[0x4024/4] = PLL2; } } } static void nv40_reset_gpu_speed() { /* Set the gpu speed */ nv_card->PMC[0x4000/4] = nv_card->nvpll; nv_card->PMC[0x4004/4] = nv_card->nvpll2; } static void nv40_reset_memory_speed() { /* Set the memory speed */ nv_card->PMC[0x4024/4] = nv_card->mpll2; switch(nv_card->arch) { case NV40: case NV41: case NV47: nv_card->PMC[0x402c/4] = nv_card->mpll; nv_card->PMC[0x4030/4] = nv_card->mpll2; nv_card->PMC[0x4044/4] = nv_card->mpll; nv_card->PMC[0x4048/4] = nv_card->mpll2; case NV43: case NV49: case NV4B: nv_card->PMC[0x4038/4] = nv_card->mpll; nv_card->PMC[0x403c/4] = nv_card->mpll2; case NV44: case NV46: nv_card->PMC[0x4020/4] = nv_card->mpll; nv_card->PMC[0x4024/4] = nv_card->mpll2; } } static void nv40_set_state(int state) { #ifdef HAVE_NVCONTROL if(state & (STATE_2D | STATE_3D)) { nv_card->state = state; nv_card->get_gpu_speed = nvcontrol_get_gpu_speed; nv_card->get_memory_speed = nvcontrol_get_memory_speed; nv_card->set_gpu_speed = nvcontrol_set_gpu_speed; nv_card->set_memory_speed = nvcontrol_set_memory_speed; nv_card->reset_gpu_speed = nvcontrol_reset_gpu_speed; nv_card->reset_memory_speed = nvcontrol_reset_memory_speed; } else #endif { nv_card->state = STATE_LOWLEVEL; nv_card->get_gpu_speed = nv40_get_gpu_speed; nv_card->get_memory_speed = nv40_get_memory_speed; nv_card->set_memory_speed = nv40_set_memory_speed; nv_card->set_gpu_speed = nv40_set_gpu_speed; nv_card->reset_gpu_speed = nv40_reset_gpu_speed; nv_card->reset_memory_speed = nv40_reset_memory_speed; } } void nv40_init(void) { nv_card->base_freq = 27000; nv_card->set_state = nv40_set_state; nv_card->set_state(STATE_LOWLEVEL); /* Set the clock function pointers */ nv_card->get_default_mask = nv40_get_default_mask; nv_card->get_hw_masked_units = nv40_get_hw_masked_units; nv_card->get_sw_masked_units = nv40_get_sw_masked_units; nv_card->get_pixel_pipelines = nv40_get_pixel_pipelines; nv_card->get_vertex_pipelines = nv40_get_vertex_pipelines; /* Register I2C busses for hardware monitoring purposes */ if(nv_card->busses[0] == NULL) { nv_card->num_busses = 3; nv_card->busses[0] = NV_I2CCreateBusPtr("BUS0", 0x3e); /* available on riva128 and higher */ nv_card->busses[1] = NV_I2CCreateBusPtr("BUS1", 0x36); /* available on rivatnt hardware and higher */ nv_card->busses[2] = NV_I2CCreateBusPtr("BUS2", 0x50); /* available on geforce4mx/4ti/fx/6/7 */ i2c_sensor_init(); } /* For now enable modding on NV40 cards and NV43 revisions prior to A4; other cards are locked */ if((nv_card->arch & NV40) || ((nv_card->arch & NV43) && (nv_card->get_gpu_revision() < 0xA4))) { nv_card->caps |= PIPELINE_MODDING; nv_card->set_pixel_pipelines = nv40_set_pixel_pipelines; nv_card->set_vertex_pipelines = nv40_set_vertex_pipelines; } /* If the smartdimmer register contains a value (default 21) then smartdimmer is supported on the laptop; This should work on various 6200Go/7600Go cards */ if((nv_card->PMC[0x15f0/4] & 0xff) && nv_card->gpu == MOBILE) { nv_card->caps |= SMARTDIMMER; nv_card->get_smartdimmer = nv44_mobile_get_smartdimmer; nv_card->set_smartdimmer = nv44_mobile_set_smartdimmer; } /* Temperature monitoring; all cards after the NV40 feature an internal temperature sensor. / Only it is disabled on most 6200/6600(GT) cards but we can re-enable it ;) */ if((nv_card->arch & (NV43 | NV44 | NV46 | NV47 | NV49 | NV4B)) && !(nv_card->caps & GPU_TEMP_MONITORING)) { nv_card->caps |= GPU_TEMP_MONITORING; nv_card->sensor_name = (char*)strdup("GPU Internal Sensor"); nv_card->get_gpu_temp = (int(*)(I2CDevPtr))nv43_get_gpu_temp; } /* Fanspeed monitoring; bit 31 is an indication if fanspeed monitoring is available / Note this bit isn't very reliable as it is set on cards with advanced sensors too. / Should the NV44 use the NV43 codepath? */ if(((nv_card->arch & (NV40 | NV49)) && (nv_card->PMC[0x10f0/4] & 0x80000000)) && !(nv_card->caps & I2C_FANSPEED_MONITORING)) { nv_card->caps |= GPU_FANSPEED_MONITORING; nv_card->get_fanspeed = nv40_get_fanspeed; nv_card->set_fanspeed = nv40_set_fanspeed; } else if(((nv_card->arch & (NV41 | NV43 | NV44 | NV47 | NV4B)) && (nv_card->PMC[0x15f4/4] & 0x80000000)) && !(nv_card->caps & I2C_FANSPEED_MONITORING)) { nv_card->caps |= GPU_FANSPEED_MONITORING; nv_card->get_fanspeed = nv43_get_fanspeed; nv_card->set_fanspeed = nv43_set_fanspeed; } /* Mobile GPU check; we don't want to overclock those unless the user wants it */ if(nv_card->gpu == MOBILE) { nv_card->caps = ~(~nv_card->caps | GPU_OVERCLOCKING | MEM_OVERCLOCKING); } else nv_card->caps |= (GPU_OVERCLOCKING | MEM_OVERCLOCKING); /* Set the speed range */ if(nv_card->bios) { /* Most Geforce6 bioses just have one active entry but some Geforce6 6800(Ultra) bioses have 2 entries / in that case the first one contains the highest clocks (3d?). Further there are 6600GT cards with / also two entries for which the second entry contains the 3d clock. */ if((nv_card->bios->perf_entries == 1) || (nv_card->bios->perf_lst[0].nvclk > nv_card->bios->perf_lst[1].nvclk)) { nv_card->memclk_3d = (short)nv_card->bios->perf_lst[0].memclk; nv_card->nvclk_3d = (short)nv_card->bios->perf_lst[0].nvclk; } else { /* 6600GT cards have 2d/3d clocks again; the second entries are the 3d ones. / We use the 2d entries for the minimum clocks and the 3d ones for the maximum ones. */ nv_card->memclk_3d = (short)nv_card->bios->perf_lst[1].memclk; nv_card->nvclk_3d = (short)nv_card->bios->perf_lst[1].nvclk; } nv_card->memclk_min = (short)(nv_card->bios->perf_lst[0].memclk * .75); nv_card->memclk_max = nv_card->memclk_3d * 1.25; nv_card->nvclk_min = (short)(nv_card->bios->perf_lst[0].nvclk * .75); nv_card->nvclk_max = nv_card->nvclk_3d * 1.25; /* FIXME: Divide the memory clocks by two on Geforc7600/7900 cards because we program the 'real' clocks for those instead of the effective DDR ones which are twice as high */ if(nv_card->arch & (NV46 | NV49 | NV4B)) { nv_card->memclk_min /= 2; nv_card->memclk_max /= 2; } } else { float memclk = GetClock_nv40(nv_card->base_freq, nv_card->mpll, nv_card->mpll2); float nvclk = GetClock_nv40(nv_card->base_freq, nv_card->nvpll, nv_card->nvpll2); /* Not great but better than nothing .. */ nv_card->memclk_min = (short)(memclk * .75); nv_card->memclk_max = (short)(memclk * 1.5); nv_card->nvclk_min = (short)(nvclk * .75); nv_card->nvclk_max = (short)(nvclk * 1.5); } }